From 0090e276999522119481eb52c364401f4829c437 Mon Sep 17 00:00:00 2001 From: Trenton Holmes Date: Thu, 16 Jun 2022 09:11:49 -0700 Subject: [PATCH 1/2] Adds a new workflow to cleanup image tags which no longer have an associated branch --- .github/scripts/cleanup-tags.py | 210 +++++++++++++++++++++++++++++ .github/scripts/common.py | 17 +++ .github/workflows/cleanup-tags.yml | 48 +++++++ 3 files changed, 275 insertions(+) create mode 100755 .github/scripts/cleanup-tags.py create mode 100644 .github/workflows/cleanup-tags.yml diff --git a/.github/scripts/cleanup-tags.py b/.github/scripts/cleanup-tags.py new file mode 100755 index 000000000..f34e16cb4 --- /dev/null +++ b/.github/scripts/cleanup-tags.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python3 +""" +When a feature branch is created, a new GitHub container is built and tagged +with the feature branch name. When a feature branch is deleted, either through +a merge or deletion, the old image tag will still exist. + +Though this isn't a problem for storage size, etc, it does lead to a long list +of tags which are no longer relevant and the last released version is pushed + further and further down that list. + +This script utlizes the GitHub API (through the gh cli application) to list the +package versions (aka tags) and the repository branches. Then it removes feature +tags which have no matching branch + +This pruning is applied to the primary package, the frontend builder package and the +frontend build cache package. + +""" +import argparse +import logging +import os.path +import pprint +from typing import Dict +from typing import Final +from typing import List + +from common import get_log_level +from ghapi.all import GhApi +from ghapi.all import paged + + +def _get_feature_packages( + logger: logging.Logger, + api: GhApi, + is_org_repo: bool, + repo_owner: str, + package_name: str, +) -> Dict: + """ + Uses the GitHub packages API endpoint data filter to containers + which have a tag starting with "feature-" + """ + + # Get all package versions + pkg_versions = [] + if is_org_repo: + + for pkg_version in paged( + api.packages.get_all_package_versions_for_package_owned_by_org, + org=repo_owner, + package_type="container", + package_name=package_name, + ): + pkg_versions.extend(pkg_version) + else: + for pkg_version in paged( + api.packages.get_all_package_versions_for_package_owned_by_authenticated_user, # noqa: E501 + package_type="container", + package_name=package_name, + ): + pkg_versions.extend(pkg_version) + + logger.debug(f"Found {len(pkg_versions)} package versions for {package_name}") + + # Filter to just those containers tagged "feature-" + feature_versions = {} + + for item in pkg_versions: + is_feature_version = False + feature_tag_name = None + if ( + "metadata" in item + and "container" in item["metadata"] + and "tags" in item["metadata"]["container"] + ): + for tag in item["metadata"]["container"]["tags"]: + if tag.startswith("feature-"): + feature_tag_name = tag + is_feature_version = True + if is_feature_version: + logger.info( + f"Located feature tag: {feature_tag_name} for image {package_name}", + ) + # logger.debug(pprint.pformat(item, indent=2)) + feature_versions[feature_tag_name] = item + else: + logger.debug(f"Filtered {pprint.pformat(item, indent=2)}") + + logger.info( + f"Found {len(feature_versions)} package versions for" + f" {package_name} with feature tags", + ) + + return feature_versions + + +def _main(): + + parser = argparse.ArgumentParser( + description="Using the GitHub API locate and optionally delete container" + " tags which no longer have an associated feature branch", + ) + + parser.add_argument( + "--delete", + action="store_true", + default=False, + help="If provided, actually delete the container tags", + ) + + parser.add_argument( + "--loglevel", + default="info", + help="Configures the logging level", + ) + + args = parser.parse_args() + + logging.basicConfig( + level=get_log_level(args), + datefmt="%Y-%m-%d %H:%M:%S", + format="%(asctime)s %(levelname)-8s %(message)s", + ) + + logger = logging.getLogger("cleanup-tags") + + repo: Final[str] = os.environ["GITHUB_REPOSITORY"] + repo_owner: Final[str] = os.environ["GITHUB_REPOSITORY_OWNER"] + + is_org_repo: Final[bool] = repo_owner == "paperless-ngx" + dry_run: Final[bool] = not args.delete + + logger.debug(f"Org Repo? {is_org_repo}") + logger.debug(f"Dry Run? {dry_run}") + + api = GhApi( + owner=repo_owner, + repo=os.path.basename(repo), + token=os.environ["GITHUB_TOKEN"], + ) + + pkg_list: Final[List[str]] = [ + "paperless-ngx", + # TODO: It would be nice to cleanup additional packages, but we can't + # see https://github.com/fastai/ghapi/issues/84 + # "builder/frontend", + # "builder-frontend-cache", + ] + + # Get the list of current "feature-" branches + feature_branch_info = api.list_branches(prefix="feature-") + feature_branch_names = [] + for branch in feature_branch_info: + name_only = branch["ref"].removeprefix("refs/heads/") + logger.info(f"Located feature branch: {name_only}") + feature_branch_names.append(name_only) + + logger.info(f"Located {len(feature_branch_names)} feature branches") + + # TODO The deletion doesn't yet actually work + # See https://github.com/fastai/ghapi/issues/132 + # This would need to be updated to use gh cli app or requests or curl + # or something + if is_org_repo: + endpoint = ( + "https://api.github.com/orgs/{ORG}/packages/container/{name}/versions/{id}" + ) + else: + endpoint = "https://api.github.com/user/packages/container/{name}/{id}" + + for package_name in pkg_list: + + logger.info(f"Processing image {package_name}") + + # Get the list of images tagged with "feature-" + feature_packages = _get_feature_packages( + logger, + api, + is_org_repo, + repo_owner, + package_name, + ) + + # Get the set of container tags without matching feature branches + to_delete = list(set(feature_packages.keys()) - set(feature_branch_names)) + + for container_tag in to_delete: + container_info = feature_packages[container_tag] + + formatted_endpoint = endpoint.format( + ORG=repo_owner, + name=package_name, + id=container_info["id"], + ) + + if dry_run: + logger.info( + f"Would delete {package_name}:{container_tag} with" + f" id: {container_info['id']}", + ) + # logger.debug(formatted_endpoint) + else: + logger.info( + f"Deleting {package_name}:{container_tag} with" + f" id: {container_info['id']}", + ) + + +if __name__ == "__main__": + _main() diff --git a/.github/scripts/common.py b/.github/scripts/common.py index 3913c91cd..a64fa929a 100644 --- a/.github/scripts/common.py +++ b/.github/scripts/common.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +import logging +from argparse import ArgumentError def get_image_tag( @@ -25,3 +27,18 @@ def get_cache_image_tag( rebuilds, generally almost instant for the same version """ return f"ghcr.io/{repo_name}/builder/cache/{pkg_name}:{pkg_version}" + + +def get_log_level(args) -> int: + levels = { + "critical": logging.CRITICAL, + "error": logging.ERROR, + "warn": logging.WARNING, + "warning": logging.WARNING, + "info": logging.INFO, + "debug": logging.DEBUG, + } + level = levels.get(args.loglevel.lower()) + if level is None: + raise ArgumentError(f"{args.loglevel} is not a valid level") + return level diff --git a/.github/workflows/cleanup-tags.yml b/.github/workflows/cleanup-tags.yml new file mode 100644 index 000000000..3f5fadaaf --- /dev/null +++ b/.github/workflows/cleanup-tags.yml @@ -0,0 +1,48 @@ +name: Cleanup Image Tags + +on: + schedule: + - cron: '0 0 * * SAT' + delete: + pull_request: + types: + - closed + push: + paths: + - ".github/workflows/cleanup-tags.yml" + - ".github/scripts/cleanup-tags.py" + - ".github/scripts/common.py" + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + +jobs: + cleanup: + name: Cleanup Image Tags + runs-on: ubuntu-20.04 + permissions: + packages: write + steps: + - + name: Checkout + uses: actions/checkout@v3 + - + name: Login to Github Container Registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - + name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: "3.9" + - + name: Install fastai GitHub API + run: | + python -m pip install ghapi requests + - + name: Cleanup feature tags + run: | + python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --loglevel info From 499bd552a159a3148a241d28689f73719cd12d64 Mon Sep 17 00:00:00 2001 From: Trenton Holmes Date: Fri, 17 Jun 2022 08:02:34 -0700 Subject: [PATCH 2/2] Fine, I made my own GitHub API interface. With blackjack and ... --- .github/scripts/cleanup-tags.py | 376 +++++++++++++++++------------ .github/scripts/common.py | 2 +- .github/workflows/cleanup-tags.yml | 4 +- 3 files changed, 220 insertions(+), 162 deletions(-) mode change 100755 => 100644 .github/scripts/cleanup-tags.py diff --git a/.github/scripts/cleanup-tags.py b/.github/scripts/cleanup-tags.py old mode 100755 new mode 100644 index f34e16cb4..a77b5fad0 --- a/.github/scripts/cleanup-tags.py +++ b/.github/scripts/cleanup-tags.py @@ -1,102 +1,159 @@ -#!/usr/bin/env python3 -""" -When a feature branch is created, a new GitHub container is built and tagged -with the feature branch name. When a feature branch is deleted, either through -a merge or deletion, the old image tag will still exist. - -Though this isn't a problem for storage size, etc, it does lead to a long list -of tags which are no longer relevant and the last released version is pushed - further and further down that list. - -This script utlizes the GitHub API (through the gh cli application) to list the -package versions (aka tags) and the repository branches. Then it removes feature -tags which have no matching branch - -This pruning is applied to the primary package, the frontend builder package and the -frontend build cache package. - -""" -import argparse import logging -import os.path -import pprint -from typing import Dict +import os +from argparse import ArgumentParser from typing import Final from typing import List +from urllib.parse import quote +import requests from common import get_log_level -from ghapi.all import GhApi -from ghapi.all import paged + +logger = logging.getLogger("cleanup-tags") -def _get_feature_packages( - logger: logging.Logger, - api: GhApi, - is_org_repo: bool, - repo_owner: str, - package_name: str, -) -> Dict: - """ - Uses the GitHub packages API endpoint data filter to containers - which have a tag starting with "feature-" - """ - - # Get all package versions - pkg_versions = [] - if is_org_repo: - - for pkg_version in paged( - api.packages.get_all_package_versions_for_package_owned_by_org, - org=repo_owner, - package_type="container", - package_name=package_name, - ): - pkg_versions.extend(pkg_version) - else: - for pkg_version in paged( - api.packages.get_all_package_versions_for_package_owned_by_authenticated_user, # noqa: E501 - package_type="container", - package_name=package_name, - ): - pkg_versions.extend(pkg_version) - - logger.debug(f"Found {len(pkg_versions)} package versions for {package_name}") - - # Filter to just those containers tagged "feature-" - feature_versions = {} - - for item in pkg_versions: - is_feature_version = False - feature_tag_name = None - if ( - "metadata" in item - and "container" in item["metadata"] - and "tags" in item["metadata"]["container"] - ): - for tag in item["metadata"]["container"]["tags"]: - if tag.startswith("feature-"): - feature_tag_name = tag - is_feature_version = True - if is_feature_version: - logger.info( - f"Located feature tag: {feature_tag_name} for image {package_name}", - ) - # logger.debug(pprint.pformat(item, indent=2)) - feature_versions[feature_tag_name] = item +class GithubContainerRegistry: + def __init__( + self, + session: requests.Session, + token: str, + owner_or_org: str, + ): + self._session: requests.Session = session + self._token = token + self._owner_or_org = owner_or_org + self._BRANCHES_ENDPOINT = "https://api.github.com/repos/{OWNER}/{REPO}/branches" + if self._owner_or_org == "paperless-ngx": + self._PACKAGES_VERSIONS_ENDPOINT = "https://api.github.com/orgs/{ORG}/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions" + self._PACKAGE_VERSION_DELETE_ENDPOINT = "https://api.github.com/orgs/{ORG}/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions/{PACKAGE_VERSION_ID}" else: - logger.debug(f"Filtered {pprint.pformat(item, indent=2)}") + self._PACKAGES_VERSIONS_ENDPOINT = "https://api.github.com/user/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions" + self._PACKAGE_VERSION_DELETE_ENDPOINT = "https://api.github.com/user/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions/{PACKAGE_VERSION_ID}" - logger.info( - f"Found {len(feature_versions)} package versions for" - f" {package_name} with feature tags", - ) + def __enter__(self): + self._session.headers.update( + { + "Accept": "application/vnd.github.v3+json", + "Authorization": f"token {self._token}", + }, + ) + return self - return feature_versions + def __exit__(self, exc_type, exc_val, exc_tb): + if "Accept" in self._session.headers: + del self._session.headers["Accept"] + if "Authorization" in self._session.headers: + del self._session.headers["Authorization"] + + def _read_all_pages(self, endpoint): + internal_data = [] + + while True: + resp = self._session.get(endpoint) + if resp.status_code == 200: + internal_data += resp.json() + if "next" in resp.links: + endpoint = resp.links["next"]["url"] + else: + logger.debug("Exiting pagination loop") + break + else: + logger.warning(f"Request to {endpoint} return HTTP {resp.status_code}") + break + + return internal_data + + def get_branches(self, repo: str): + endpoint = self._BRANCHES_ENDPOINT.format(OWNER=self._owner_or_org, REPO=repo) + internal_data = self._read_all_pages(endpoint) + return internal_data + + def filter_branches_by_name_pattern(self, branch_data, pattern: str): + matches = {} + + for branch in branch_data: + if branch["name"].startswith(pattern): + matches[branch["name"]] = branch + + return matches + + def get_package_versions( + self, + package_name: str, + package_type: str = "container", + ) -> List: + package_name = quote(package_name, safe="") + endpoint = self._PACKAGES_VERSIONS_ENDPOINT.format( + ORG=self._owner_or_org, + PACKAGE_TYPE=package_type, + PACKAGE_NAME=package_name, + ) + + internal_data = self._read_all_pages(endpoint) + + return internal_data + + def filter_packages_by_tag_pattern(self, package_data, pattern: str): + matches = {} + + for package in package_data: + if "metadata" in package and "container" in package["metadata"]: + container_metadata = package["metadata"]["container"] + if "tags" in container_metadata: + container_tags = container_metadata["tags"] + for tag in container_tags: + if tag.startswith(pattern): + matches[tag] = package + break + + return matches + + def filter_packages_untagged(self, package_data): + matches = {} + + for package in package_data: + if "metadata" in package and "container" in package["metadata"]: + container_metadata = package["metadata"]["container"] + if "tags" in container_metadata: + container_tags = container_metadata["tags"] + if not len(container_tags): + matches[package["name"]] = package + + return matches + + def delete_package_version(self, package_name, package_data): + package_name = quote(package_name, safe="") + endpoint = self._PACKAGE_VERSION_DELETE_ENDPOINT.format( + ORG=self._owner_or_org, + PACKAGE_TYPE=package_data["metadata"]["package_type"], + PACKAGE_NAME=package_name, + PACKAGE_VERSION_ID=package_data["id"], + ) + resp = self._session.delete(endpoint) + if resp.status_code != 204: + logger.warning( + f"Request to delete {endpoint} returned HTTP {resp.status_code}", + ) + + +class DockerHubContainerRegistery: + def __init__(self): + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + def get_image_versions(self) -> List: + return [] + + def delete_image_version(self): + pass def _main(): - - parser = argparse.ArgumentParser( + parser = ArgumentParser( description="Using the GitHub API locate and optionally delete container" " tags which no longer have an associated feature branch", ) @@ -108,6 +165,14 @@ def _main(): help="If provided, actually delete the container tags", ) + # TODO There's a lot of untagged images, do those need to stay for anything? + parser.add_argument( + "--untagged", + action="store_true", + default=False, + help="If provided, delete untagged containers as well", + ) + parser.add_argument( "--loglevel", default="info", @@ -122,89 +187,82 @@ def _main(): format="%(asctime)s %(levelname)-8s %(message)s", ) - logger = logging.getLogger("cleanup-tags") - - repo: Final[str] = os.environ["GITHUB_REPOSITORY"] repo_owner: Final[str] = os.environ["GITHUB_REPOSITORY_OWNER"] + repo: Final[str] = os.environ["GITHUB_REPOSITORY"] + gh_token: Final[str] = os.environ["GITHUB_TOKEN"] - is_org_repo: Final[bool] = repo_owner == "paperless-ngx" - dry_run: Final[bool] = not args.delete + with requests.session() as sess: + with GithubContainerRegistry(sess, gh_token, repo_owner) as gh_api: + all_branches = gh_api.get_branches("paperless-ngx") + logger.info(f"Located {len(all_branches)} branches of {repo_owner}/{repo} ") - logger.debug(f"Org Repo? {is_org_repo}") - logger.debug(f"Dry Run? {dry_run}") - - api = GhApi( - owner=repo_owner, - repo=os.path.basename(repo), - token=os.environ["GITHUB_TOKEN"], - ) - - pkg_list: Final[List[str]] = [ - "paperless-ngx", - # TODO: It would be nice to cleanup additional packages, but we can't - # see https://github.com/fastai/ghapi/issues/84 - # "builder/frontend", - # "builder-frontend-cache", - ] - - # Get the list of current "feature-" branches - feature_branch_info = api.list_branches(prefix="feature-") - feature_branch_names = [] - for branch in feature_branch_info: - name_only = branch["ref"].removeprefix("refs/heads/") - logger.info(f"Located feature branch: {name_only}") - feature_branch_names.append(name_only) - - logger.info(f"Located {len(feature_branch_names)} feature branches") - - # TODO The deletion doesn't yet actually work - # See https://github.com/fastai/ghapi/issues/132 - # This would need to be updated to use gh cli app or requests or curl - # or something - if is_org_repo: - endpoint = ( - "https://api.github.com/orgs/{ORG}/packages/container/{name}/versions/{id}" - ) - else: - endpoint = "https://api.github.com/user/packages/container/{name}/{id}" - - for package_name in pkg_list: - - logger.info(f"Processing image {package_name}") - - # Get the list of images tagged with "feature-" - feature_packages = _get_feature_packages( - logger, - api, - is_org_repo, - repo_owner, - package_name, - ) - - # Get the set of container tags without matching feature branches - to_delete = list(set(feature_packages.keys()) - set(feature_branch_names)) - - for container_tag in to_delete: - container_info = feature_packages[container_tag] - - formatted_endpoint = endpoint.format( - ORG=repo_owner, - name=package_name, - id=container_info["id"], + feature_branches = gh_api.filter_branches_by_name_pattern( + all_branches, + "feature-", ) + logger.info(f"Located {len(feature_branches)} feature branches") - if dry_run: + for package_name in ["paperless-ngx", "paperless-ngx/builder/cache/app"]: + + all_package_versions = gh_api.get_package_versions(package_name) logger.info( - f"Would delete {package_name}:{container_tag} with" - f" id: {container_info['id']}", + f"Located {len(all_package_versions)} versions of package {package_name}", + ) + + packages_tagged_feature = gh_api.filter_packages_by_tag_pattern( + all_package_versions, + "feature-", ) - # logger.debug(formatted_endpoint) - else: logger.info( - f"Deleting {package_name}:{container_tag} with" - f" id: {container_info['id']}", + f'Located {len(packages_tagged_feature)} versions of package {package_name} tagged "feature-"', ) + untagged_packages = gh_api.filter_packages_untagged( + all_package_versions, + ) + logger.info( + f"Located {len(untagged_packages)} untagged versions of package {package_name}", + ) + + to_delete = list( + set(packages_tagged_feature.keys()) - set(feature_branches.keys()), + ) + logger.info( + f"Located {len(to_delete)} versions of package {package_name} to delete", + ) + + for tag_to_delete in to_delete: + package_version_info = packages_tagged_feature[tag_to_delete] + + logger.info( + f"Deleting {tag_to_delete} (id {package_version_info['id']})", + ) + if args.delete: + gh_api.delete_package_version( + package_name, + package_version_info, + ) + + if args.untagged: + logger.info(f"Deleting untagged packages of {package_name}") + for to_delete_name in untagged_packages: + to_delete_version = untagged_packages[to_delete_name] + logger.info(f"Deleting id {to_delete_version['id']}") + if args.delete: + gh_api.delete_package_version( + package_name, + to_delete_version, + ) + + with DockerHubContainerRegistery() as dh_api: + docker_hub_image_version = dh_api.get_image_versions() + + # TODO + docker_hub_to_delete = [] + + for x in docker_hub_to_delete: + dh_api.delete_image_version() + if __name__ == "__main__": _main() diff --git a/.github/scripts/common.py b/.github/scripts/common.py index a64fa929a..f0302e79a 100644 --- a/.github/scripts/common.py +++ b/.github/scripts/common.py @@ -40,5 +40,5 @@ def get_log_level(args) -> int: } level = levels.get(args.loglevel.lower()) if level is None: - raise ArgumentError(f"{args.loglevel} is not a valid level") + level = logging.INFO return level diff --git a/.github/workflows/cleanup-tags.yml b/.github/workflows/cleanup-tags.yml index 3f5fadaaf..5342d040a 100644 --- a/.github/workflows/cleanup-tags.yml +++ b/.github/workflows/cleanup-tags.yml @@ -39,9 +39,9 @@ jobs: with: python-version: "3.9" - - name: Install fastai GitHub API + name: Install requests run: | - python -m pip install ghapi requests + python -m pip install requests - name: Cleanup feature tags run: |