Changes the cleanup images workflow so it uses a OAuth token with the correct scope (GITHUB_TOKEN is not enough). Also prevents running if the token is not defined and generally does commenting/cleanups"

This commit is contained in:
Trenton Holmes 2022-07-26 15:41:57 -07:00
parent 173934258c
commit 0fdd3d56f4
2 changed files with 68 additions and 16 deletions

View File

@ -1,3 +1,4 @@
#!/usr/bin/env python3
import logging import logging
import os import os
from argparse import ArgumentParser from argparse import ArgumentParser
@ -35,6 +36,10 @@ class GithubContainerRegistry:
self._PACKAGE_VERSION_DELETE_ENDPOINT = "https://api.github.com/user/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions/{PACKAGE_VERSION_ID}" self._PACKAGE_VERSION_DELETE_ENDPOINT = "https://api.github.com/user/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions/{PACKAGE_VERSION_ID}"
def __enter__(self): def __enter__(self):
"""
Sets up the required headers for auth and response
type from the API
"""
self._session.headers.update( self._session.headers.update(
{ {
"Accept": "application/vnd.github.v3+json", "Accept": "application/vnd.github.v3+json",
@ -44,12 +49,20 @@ class GithubContainerRegistry:
return self return self
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
"""
Ensures the authorization token is cleaned up no matter
the reason for the exit
"""
if "Accept" in self._session.headers: if "Accept" in self._session.headers:
del self._session.headers["Accept"] del self._session.headers["Accept"]
if "Authorization" in self._session.headers: if "Authorization" in self._session.headers:
del self._session.headers["Authorization"] del self._session.headers["Authorization"]
def _read_all_pages(self, endpoint): def _read_all_pages(self, endpoint):
"""
Internal function to read all pages of an endpoint, utilizing the
next.url until exhausted
"""
internal_data = [] internal_data = []
while True: while True:
@ -68,11 +81,18 @@ class GithubContainerRegistry:
return internal_data return internal_data
def get_branches(self, repo: str): def get_branches(self, repo: str):
"""
Returns all current branches of the given repository
"""
endpoint = self._BRANCHES_ENDPOINT.format(OWNER=self._owner_or_org, REPO=repo) endpoint = self._BRANCHES_ENDPOINT.format(OWNER=self._owner_or_org, REPO=repo)
internal_data = self._read_all_pages(endpoint) internal_data = self._read_all_pages(endpoint)
return internal_data return internal_data
def filter_branches_by_name_pattern(self, branch_data, pattern: str): def filter_branches_by_name_pattern(self, branch_data, pattern: str):
"""
Filters the given list of branches to those which start with the given
pattern. Future enhancement could use regex patterns instead.
"""
matches = {} matches = {}
for branch in branch_data: for branch in branch_data:
@ -86,6 +106,10 @@ class GithubContainerRegistry:
package_name: str, package_name: str,
package_type: str = "container", package_type: str = "container",
) -> List: ) -> List:
"""
Returns all the versions of a given package (container images) from
the API
"""
package_name = quote(package_name, safe="") package_name = quote(package_name, safe="")
endpoint = self._PACKAGES_VERSIONS_ENDPOINT.format( endpoint = self._PACKAGES_VERSIONS_ENDPOINT.format(
ORG=self._owner_or_org, ORG=self._owner_or_org,
@ -98,6 +122,10 @@ class GithubContainerRegistry:
return internal_data return internal_data
def filter_packages_by_tag_pattern(self, package_data, pattern: str): def filter_packages_by_tag_pattern(self, package_data, pattern: str):
"""
Filters the given package version info to those where the tags of the image
containers at least 1 tag which starts with the given pattern.
"""
matches = {} matches = {}
for package in package_data: for package in package_data:
@ -113,6 +141,9 @@ class GithubContainerRegistry:
return matches return matches
def filter_packages_untagged(self, package_data): def filter_packages_untagged(self, package_data):
"""
Filters the given package data to those which have no tags at all
"""
matches = {} matches = {}
for package in package_data: for package in package_data:
@ -126,13 +157,10 @@ class GithubContainerRegistry:
return matches return matches
def delete_package_version(self, package_name, package_data): def delete_package_version(self, package_name, package_data):
package_name = quote(package_name, safe="") """
endpoint = self._PACKAGE_VERSION_DELETE_ENDPOINT.format( Deletes the given package version from the GHCR
ORG=self._owner_or_org, """
PACKAGE_TYPE=package_data["metadata"]["package_type"], endpoint = package_data["url"]
PACKAGE_NAME=package_name,
PACKAGE_VERSION_ID=package_data["id"],
)
resp = self._session.delete(endpoint) resp = self._session.delete(endpoint)
if resp.status_code != 204: if resp.status_code != 204:
logger.warning( logger.warning(
@ -146,6 +174,7 @@ def _main():
" tags which no longer have an associated feature branch", " tags which no longer have an associated feature branch",
) )
# Requires an affirmative command to actually do a delete
parser.add_argument( parser.add_argument(
"--delete", "--delete",
action="store_true", action="store_true",
@ -153,7 +182,8 @@ def _main():
help="If provided, actually delete the container tags", help="If provided, actually delete the container tags",
) )
# TODO There's a lot of untagged images, do those need to stay for anything? # When a tagged image is updated, the previous version remains, but it no longer tagged
# Add this option to remove them as well
parser.add_argument( parser.add_argument(
"--untagged", "--untagged",
action="store_true", action="store_true",
@ -161,6 +191,7 @@ def _main():
help="If provided, delete untagged containers as well", help="If provided, delete untagged containers as well",
) )
# Allows configuration of log level for debugging
parser.add_argument( parser.add_argument(
"--loglevel", "--loglevel",
default="info", default="info",
@ -175,28 +206,34 @@ def _main():
format="%(asctime)s %(levelname)-8s %(message)s", format="%(asctime)s %(levelname)-8s %(message)s",
) )
# Must be provided in the environment
repo_owner: Final[str] = os.environ["GITHUB_REPOSITORY_OWNER"] repo_owner: Final[str] = os.environ["GITHUB_REPOSITORY_OWNER"]
repo: Final[str] = os.environ["GITHUB_REPOSITORY"] repo: Final[str] = os.environ["GITHUB_REPOSITORY"]
gh_token: Final[str] = os.environ["GITHUB_TOKEN"] gh_token: Final[str] = os.environ["TOKEN"]
with requests.session() as sess: with requests.session() as sess:
with GithubContainerRegistry(sess, gh_token, repo_owner) as gh_api: with GithubContainerRegistry(sess, gh_token, repo_owner) as gh_api:
# Step 1 - Locate all branches of the repo
all_branches = gh_api.get_branches("paperless-ngx") all_branches = gh_api.get_branches("paperless-ngx")
logger.info(f"Located {len(all_branches)} branches of {repo_owner}/{repo} ") logger.info(f"Located {len(all_branches)} branches of {repo_owner}/{repo} ")
# Step 2 - Filter branches to those starting with "feature-"
feature_branches = gh_api.filter_branches_by_name_pattern( feature_branches = gh_api.filter_branches_by_name_pattern(
all_branches, all_branches,
"feature-", "feature-",
) )
logger.info(f"Located {len(feature_branches)} feature branches") logger.info(f"Located {len(feature_branches)} feature branches")
# Step 3 - Deal with package information
for package_name in ["paperless-ngx", "paperless-ngx/builder/cache/app"]: for package_name in ["paperless-ngx", "paperless-ngx/builder/cache/app"]:
# Step 3.1 - Location all versions of the given package
all_package_versions = gh_api.get_package_versions(package_name) all_package_versions = gh_api.get_package_versions(package_name)
logger.info( logger.info(
f"Located {len(all_package_versions)} versions of package {package_name}", f"Located {len(all_package_versions)} versions of package {package_name}",
) )
# Step 3.2 - Location package versions which have a tag of "feature-"
packages_tagged_feature = gh_api.filter_packages_by_tag_pattern( packages_tagged_feature = gh_api.filter_packages_by_tag_pattern(
all_package_versions, all_package_versions,
"feature-", "feature-",
@ -205,6 +242,8 @@ def _main():
f'Located {len(packages_tagged_feature)} versions of package {package_name} tagged "feature-"', f'Located {len(packages_tagged_feature)} versions of package {package_name} tagged "feature-"',
) )
# Step 3.3 - Location package versions with no tags at all
# TODO: What exactly are these? Leftovers?
untagged_packages = gh_api.filter_packages_untagged( untagged_packages = gh_api.filter_packages_untagged(
all_package_versions, all_package_versions,
) )
@ -212,6 +251,7 @@ def _main():
f"Located {len(untagged_packages)} untagged versions of package {package_name}", f"Located {len(untagged_packages)} untagged versions of package {package_name}",
) )
# Step 3.4 - Determine which package versions have no matching branch
to_delete = list( to_delete = list(
set(packages_tagged_feature.keys()) - set(feature_branches.keys()), set(packages_tagged_feature.keys()) - set(feature_branches.keys()),
) )
@ -219,6 +259,7 @@ def _main():
f"Located {len(to_delete)} versions of package {package_name} to delete", f"Located {len(to_delete)} versions of package {package_name} to delete",
) )
# Step 3.5 - Delete certain package versions
for tag_to_delete in to_delete: for tag_to_delete in to_delete:
package_version_info = packages_tagged_feature[tag_to_delete] package_version_info = packages_tagged_feature[tag_to_delete]
@ -236,16 +277,22 @@ def _main():
f"Would delete {tag_to_delete} (id {package_version_info['id']})", f"Would delete {tag_to_delete} (id {package_version_info['id']})",
) )
# Step 3.6 - Delete untagged package versions
if args.untagged: if args.untagged:
logger.info(f"Deleting untagged packages of {package_name}") logger.info(f"Deleting untagged packages of {package_name}")
for to_delete_name in untagged_packages: for to_delete_name in untagged_packages:
to_delete_version = untagged_packages[to_delete_name] to_delete_version = untagged_packages[to_delete_name]
logger.info(f"Deleting id {to_delete_version['id']}")
if args.delete: if args.delete:
logger.info(f"Deleting id {to_delete_version['id']}")
gh_api.delete_package_version( gh_api.delete_package_version(
package_name, package_name,
to_delete_version, to_delete_version,
) )
else:
logger.info(
f"Would delete {to_delete_name} (id {to_delete_version['id']})",
)
else: else:
logger.info("Leaving untagged images untouched") logger.info("Leaving untagged images untouched")

View File

@ -1,3 +1,8 @@
# This workflow runs on certain conditions to check for and potentially
# delete container images from the GHCR which no longer have an associated
# code branch.
# Requires a PAT with the correct scope set in the secrets
name: Cleanup Image Tags name: Cleanup Image Tags
on: on:
@ -13,15 +18,13 @@ on:
- ".github/scripts/cleanup-tags.py" - ".github/scripts/cleanup-tags.py"
- ".github/scripts/common.py" - ".github/scripts/common.py"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
jobs: jobs:
cleanup: cleanup:
name: Cleanup Image Tags name: Cleanup Image Tags
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
permissions: env:
packages: write # Requires a personal access token with the OAuth scope delete:packages
TOKEN: ${{ secrets.GHA_CONTAINER_DELETE_TOKEN }}
steps: steps:
- -
name: Checkout name: Checkout
@ -44,5 +47,7 @@ jobs:
python -m pip install requests python -m pip install requests
- -
name: Cleanup feature tags name: Cleanup feature tags
# Only run if the token is not empty
if: "${{ env.TOKEN != '' }}"
run: | run: |
python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --loglevel info --delete python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --loglevel info --untagged --delete