271 lines
8.8 KiB
Python

"""
This module contains some useful classes for interacting with the Github API.
The full documentation for the API can be found here: https://docs.github.com/en/rest
Mostly, this focusses on two areas, repo branches and repo packages, as the use case
is cleaning up container images which are no longer referred to.
"""
import functools
import logging
import re
import urllib.parse
from typing import Dict
from typing import List
from typing import Optional
import httpx
logger = logging.getLogger("github-api")
class _GithubApiBase:
"""
A base class for interacting with the Github API. It
will handle the session and setting authorization headers.
"""
def __init__(self, token: str) -> None:
self._token = token
self._client: Optional[httpx.Client] = None
def __enter__(self) -> "_GithubApiBase":
"""
Sets up the required headers for auth and response
type from the API
"""
self._client = httpx.Client()
self._client.headers.update(
{
"Accept": "application/vnd.github.v3+json",
"Authorization": f"token {self._token}",
},
)
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""
Ensures the authorization token is cleaned up no matter
the reason for the exit
"""
if "Accept" in self._client.headers:
del self._client.headers["Accept"]
if "Authorization" in self._client.headers:
del self._client.headers["Authorization"]
# Close the session as well
self._client.close()
self._client = None
def _read_all_pages(self, endpoint):
"""
Helper function to read all pages of an endpoint, utilizing the
next.url until exhausted. Assumes the endpoint returns a list
"""
internal_data = []
while True:
resp = self._client.get(endpoint)
if resp.status_code == 200:
internal_data += resp.json()
if "next" in resp.links:
endpoint = resp.links["next"]["url"]
else:
logger.debug("Exiting pagination loop")
break
else:
logger.warning(f"Request to {endpoint} return HTTP {resp.status_code}")
resp.raise_for_status()
return internal_data
class _EndpointResponse:
"""
For all endpoint JSON responses, store the full
response data, for ease of extending later, if need be.
"""
def __init__(self, data: Dict) -> None:
self._data = data
class GithubBranch(_EndpointResponse):
"""
Simple wrapper for a repository branch, only extracts name information
for now.
"""
def __init__(self, data: Dict) -> None:
super().__init__(data)
self.name = self._data["name"]
class GithubBranchApi(_GithubApiBase):
"""
Wrapper around branch API.
See https://docs.github.com/en/rest/branches/branches
"""
def __init__(self, token: str) -> None:
super().__init__(token)
self._ENDPOINT = "https://api.github.com/repos/{REPO}/branches"
def get_branches(self, repo: str) -> List[GithubBranch]:
"""
Returns all current branches of the given repository owned by the given
owner or organization.
"""
# The environment GITHUB_REPOSITORY already contains the owner in the correct location
endpoint = self._ENDPOINT.format(REPO=repo)
internal_data = self._read_all_pages(endpoint)
return [GithubBranch(branch) for branch in internal_data]
class ContainerPackage(_EndpointResponse):
"""
Data class wrapping the JSON response from the package related
endpoints
"""
def __init__(self, data: Dict):
super().__init__(data)
# This is a numerical ID, required for interactions with this
# specific package, including deletion of it or restoration
self.id: int = self._data["id"]
# A string name. This might be an actual name or it could be a
# digest string like "sha256:"
self.name: str = self._data["name"]
# URL to the package, including its ID, can be used for deletion
# or restoration without needing to build up a URL ourselves
self.url: str = self._data["url"]
# The list of tags applied to this image. Maybe an empty list
self.tags: List[str] = self._data["metadata"]["container"]["tags"]
@functools.cached_property
def untagged(self) -> bool:
"""
Returns True if the image has no tags applied to it, False otherwise
"""
return len(self.tags) == 0
@functools.cache
def tag_matches(self, pattern: str) -> bool:
"""
Returns True if the image has at least one tag which matches the given regex,
False otherwise
"""
return any(re.match(pattern, tag) is not None for tag in self.tags)
def __repr__(self):
return f"Package {self.name}"
class GithubContainerRegistryApi(_GithubApiBase):
"""
Class wrapper to deal with the Github packages API. This class only deals with
container type packages, the only type published by paperless-ngx.
"""
def __init__(self, token: str, owner_or_org: str) -> None:
super().__init__(token)
self._owner_or_org = owner_or_org
if self._owner_or_org == "paperless-ngx":
# https://docs.github.com/en/rest/packages#get-all-package-versions-for-a-package-owned-by-an-organization
self._PACKAGES_VERSIONS_ENDPOINT = "https://api.github.com/orgs/{ORG}/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions"
# https://docs.github.com/en/rest/packages#delete-package-version-for-an-organization
self._PACKAGE_VERSION_DELETE_ENDPOINT = "https://api.github.com/orgs/{ORG}/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions/{PACKAGE_VERSION_ID}"
else:
# https://docs.github.com/en/rest/packages#get-all-package-versions-for-a-package-owned-by-the-authenticated-user
self._PACKAGES_VERSIONS_ENDPOINT = "https://api.github.com/user/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions"
# https://docs.github.com/en/rest/packages#delete-a-package-version-for-the-authenticated-user
self._PACKAGE_VERSION_DELETE_ENDPOINT = "https://api.github.com/user/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions/{PACKAGE_VERSION_ID}"
self._PACKAGE_VERSION_RESTORE_ENDPOINT = (
f"{self._PACKAGE_VERSION_DELETE_ENDPOINT}/restore"
)
def get_active_package_versions(
self,
package_name: str,
) -> List[ContainerPackage]:
"""
Returns all the versions of a given package (container images) from
the API
"""
package_type: str = "container"
# Need to quote this for slashes in the name
package_name = urllib.parse.quote(package_name, safe="")
endpoint = self._PACKAGES_VERSIONS_ENDPOINT.format(
ORG=self._owner_or_org,
PACKAGE_TYPE=package_type,
PACKAGE_NAME=package_name,
)
pkgs = []
for data in self._read_all_pages(endpoint):
pkgs.append(ContainerPackage(data))
return pkgs
def get_deleted_package_versions(
self,
package_name: str,
) -> List[ContainerPackage]:
package_type: str = "container"
# Need to quote this for slashes in the name
package_name = urllib.parse.quote(package_name, safe="")
endpoint = (
self._PACKAGES_VERSIONS_ENDPOINT.format(
ORG=self._owner_or_org,
PACKAGE_TYPE=package_type,
PACKAGE_NAME=package_name,
)
+ "?state=deleted"
)
pkgs = []
for data in self._read_all_pages(endpoint):
pkgs.append(ContainerPackage(data))
return pkgs
def delete_package_version(self, package_data: ContainerPackage):
"""
Deletes the given package version from the GHCR
"""
resp = self._client.delete(package_data.url)
if resp.status_code != 204:
logger.warning(
f"Request to delete {package_data.url} returned HTTP {resp.status_code}",
)
def restore_package_version(
self,
package_name: str,
package_data: ContainerPackage,
):
package_type: str = "container"
endpoint = self._PACKAGE_VERSION_RESTORE_ENDPOINT.format(
ORG=self._owner_or_org,
PACKAGE_TYPE=package_type,
PACKAGE_NAME=package_name,
PACKAGE_VERSION_ID=package_data.id,
)
resp = self._client.post(endpoint)
if resp.status_code != 204:
logger.warning(
f"Request to delete {endpoint} returned HTTP {resp.status_code}",
)