diff --git a/.github/scripts/cleanup-tags.py b/.github/scripts/cleanup-tags.py index 592bd9e9f..ea8e69b6a 100644 --- a/.github/scripts/cleanup-tags.py +++ b/.github/scripts/cleanup-tags.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 import logging import os from argparse import ArgumentParser @@ -35,6 +36,10 @@ class GithubContainerRegistry: self._PACKAGE_VERSION_DELETE_ENDPOINT = "https://api.github.com/user/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions/{PACKAGE_VERSION_ID}" def __enter__(self): + """ + Sets up the required headers for auth and response + type from the API + """ self._session.headers.update( { "Accept": "application/vnd.github.v3+json", @@ -44,12 +49,20 @@ class GithubContainerRegistry: return self def __exit__(self, exc_type, exc_val, exc_tb): + """ + Ensures the authorization token is cleaned up no matter + the reason for the exit + """ if "Accept" in self._session.headers: del self._session.headers["Accept"] if "Authorization" in self._session.headers: del self._session.headers["Authorization"] def _read_all_pages(self, endpoint): + """ + Internal function to read all pages of an endpoint, utilizing the + next.url until exhausted + """ internal_data = [] while True: @@ -68,11 +81,18 @@ class GithubContainerRegistry: return internal_data def get_branches(self, repo: str): + """ + Returns all current branches of the given repository + """ endpoint = self._BRANCHES_ENDPOINT.format(OWNER=self._owner_or_org, REPO=repo) internal_data = self._read_all_pages(endpoint) return internal_data def filter_branches_by_name_pattern(self, branch_data, pattern: str): + """ + Filters the given list of branches to those which start with the given + pattern. Future enhancement could use regex patterns instead. + """ matches = {} for branch in branch_data: @@ -86,6 +106,10 @@ class GithubContainerRegistry: package_name: str, package_type: str = "container", ) -> List: + """ + Returns all the versions of a given package (container images) from + the API + """ package_name = quote(package_name, safe="") endpoint = self._PACKAGES_VERSIONS_ENDPOINT.format( ORG=self._owner_or_org, @@ -98,6 +122,10 @@ class GithubContainerRegistry: return internal_data def filter_packages_by_tag_pattern(self, package_data, pattern: str): + """ + Filters the given package version info to those where the tags of the image + containers at least 1 tag which starts with the given pattern. + """ matches = {} for package in package_data: @@ -113,6 +141,9 @@ class GithubContainerRegistry: return matches def filter_packages_untagged(self, package_data): + """ + Filters the given package data to those which have no tags at all + """ matches = {} for package in package_data: @@ -126,13 +157,10 @@ class GithubContainerRegistry: return matches def delete_package_version(self, package_name, package_data): - package_name = quote(package_name, safe="") - endpoint = self._PACKAGE_VERSION_DELETE_ENDPOINT.format( - ORG=self._owner_or_org, - PACKAGE_TYPE=package_data["metadata"]["package_type"], - PACKAGE_NAME=package_name, - PACKAGE_VERSION_ID=package_data["id"], - ) + """ + Deletes the given package version from the GHCR + """ + endpoint = package_data["url"] resp = self._session.delete(endpoint) if resp.status_code != 204: logger.warning( @@ -146,6 +174,7 @@ def _main(): " tags which no longer have an associated feature branch", ) + # Requires an affirmative command to actually do a delete parser.add_argument( "--delete", action="store_true", @@ -153,7 +182,8 @@ def _main(): help="If provided, actually delete the container tags", ) - # TODO There's a lot of untagged images, do those need to stay for anything? + # When a tagged image is updated, the previous version remains, but it no longer tagged + # Add this option to remove them as well parser.add_argument( "--untagged", action="store_true", @@ -161,6 +191,7 @@ def _main(): help="If provided, delete untagged containers as well", ) + # Allows configuration of log level for debugging parser.add_argument( "--loglevel", default="info", @@ -175,28 +206,34 @@ def _main(): format="%(asctime)s %(levelname)-8s %(message)s", ) + # Must be provided in the environment repo_owner: Final[str] = os.environ["GITHUB_REPOSITORY_OWNER"] repo: Final[str] = os.environ["GITHUB_REPOSITORY"] - gh_token: Final[str] = os.environ["GITHUB_TOKEN"] + gh_token: Final[str] = os.environ["TOKEN"] with requests.session() as sess: with GithubContainerRegistry(sess, gh_token, repo_owner) as gh_api: + # Step 1 - Locate all branches of the repo all_branches = gh_api.get_branches("paperless-ngx") logger.info(f"Located {len(all_branches)} branches of {repo_owner}/{repo} ") + # Step 2 - Filter branches to those starting with "feature-" feature_branches = gh_api.filter_branches_by_name_pattern( all_branches, "feature-", ) logger.info(f"Located {len(feature_branches)} feature branches") + # Step 3 - Deal with package information for package_name in ["paperless-ngx", "paperless-ngx/builder/cache/app"]: + # Step 3.1 - Location all versions of the given package all_package_versions = gh_api.get_package_versions(package_name) logger.info( f"Located {len(all_package_versions)} versions of package {package_name}", ) + # Step 3.2 - Location package versions which have a tag of "feature-" packages_tagged_feature = gh_api.filter_packages_by_tag_pattern( all_package_versions, "feature-", @@ -205,6 +242,8 @@ def _main(): f'Located {len(packages_tagged_feature)} versions of package {package_name} tagged "feature-"', ) + # Step 3.3 - Location package versions with no tags at all + # TODO: What exactly are these? Leftovers? untagged_packages = gh_api.filter_packages_untagged( all_package_versions, ) @@ -212,6 +251,7 @@ def _main(): f"Located {len(untagged_packages)} untagged versions of package {package_name}", ) + # Step 3.4 - Determine which package versions have no matching branch to_delete = list( set(packages_tagged_feature.keys()) - set(feature_branches.keys()), ) @@ -219,6 +259,7 @@ def _main(): f"Located {len(to_delete)} versions of package {package_name} to delete", ) + # Step 3.5 - Delete certain package versions for tag_to_delete in to_delete: package_version_info = packages_tagged_feature[tag_to_delete] @@ -236,16 +277,22 @@ def _main(): f"Would delete {tag_to_delete} (id {package_version_info['id']})", ) + # Step 3.6 - Delete untagged package versions if args.untagged: logger.info(f"Deleting untagged packages of {package_name}") for to_delete_name in untagged_packages: to_delete_version = untagged_packages[to_delete_name] - logger.info(f"Deleting id {to_delete_version['id']}") + if args.delete: + logger.info(f"Deleting id {to_delete_version['id']}") gh_api.delete_package_version( package_name, to_delete_version, ) + else: + logger.info( + f"Would delete {to_delete_name} (id {to_delete_version['id']})", + ) else: logger.info("Leaving untagged images untouched") diff --git a/.github/workflows/cleanup-tags.yml b/.github/workflows/cleanup-tags.yml index 5041bf0ee..97e0081d2 100644 --- a/.github/workflows/cleanup-tags.yml +++ b/.github/workflows/cleanup-tags.yml @@ -1,3 +1,8 @@ +# This workflow runs on certain conditions to check for and potentially +# delete container images from the GHCR which no longer have an associated +# code branch. +# Requires a PAT with the correct scope set in the secrets + name: Cleanup Image Tags on: @@ -13,15 +18,13 @@ on: - ".github/scripts/cleanup-tags.py" - ".github/scripts/common.py" -env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - jobs: cleanup: name: Cleanup Image Tags runs-on: ubuntu-20.04 - permissions: - packages: write + env: + # Requires a personal access token with the OAuth scope delete:packages + TOKEN: ${{ secrets.GHA_CONTAINER_DELETE_TOKEN }} steps: - name: Checkout @@ -44,5 +47,7 @@ jobs: python -m pip install requests - name: Cleanup feature tags + # Only run if the token is not empty + if: "${{ env.TOKEN != '' }}" run: | - python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --loglevel info --delete + python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --loglevel info --untagged --delete