mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-22 03:16:15 -05:00 
			
		
		
		
	Moves to the new action for cleaning the published images
This commit is contained in:
		
							
								
								
									
										485
									
								
								.github/scripts/cleanup-tags.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										485
									
								
								.github/scripts/cleanup-tags.py
									
									
									
									
										vendored
									
									
								
							| @@ -1,485 +0,0 @@ | ||||
| import json | ||||
| import logging | ||||
| import os | ||||
| import shutil | ||||
| import subprocess | ||||
| from argparse import ArgumentParser | ||||
| from typing import Dict | ||||
| from typing import Final | ||||
| from typing import Iterator | ||||
| from typing import List | ||||
| from typing import Optional | ||||
|  | ||||
| from common import get_log_level | ||||
| from github import ContainerPackage | ||||
| from github import GithubBranchApi | ||||
| from github import GithubContainerRegistryApi | ||||
|  | ||||
| logger = logging.getLogger("cleanup-tags") | ||||
|  | ||||
|  | ||||
| class ImageProperties: | ||||
|     """ | ||||
|     Data class wrapping the properties of an entry in the image index | ||||
|     manifests list.  It is NOT an actual image with layers, etc | ||||
|  | ||||
|     https://docs.docker.com/registry/spec/manifest-v2-2/ | ||||
|     https://github.com/opencontainers/image-spec/blob/main/manifest.md | ||||
|     https://github.com/opencontainers/image-spec/blob/main/descriptor.md | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, data: Dict) -> None: | ||||
|         self._data = data | ||||
|         # This is the sha256: digest string.  Corresponds to GitHub API name | ||||
|         # if the package is an untagged package | ||||
|         self.digest = self._data["digest"] | ||||
|         platform_data_os = self._data["platform"]["os"] | ||||
|         platform_arch = self._data["platform"]["architecture"] | ||||
|         platform_variant = self._data["platform"].get( | ||||
|             "variant", | ||||
|             "", | ||||
|         ) | ||||
|         self.platform = f"{platform_data_os}/{platform_arch}{platform_variant}" | ||||
|  | ||||
|  | ||||
| class ImageIndex: | ||||
|     """ | ||||
|     Data class wrapping up logic for an OCI Image Index | ||||
|     JSON data.  Primary use is to access the manifests listing | ||||
|  | ||||
|     See https://github.com/opencontainers/image-spec/blob/main/image-index.md | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, package_url: str, tag: str) -> None: | ||||
|         self.qualified_name = f"{package_url}:{tag}" | ||||
|         logger.info(f"Getting image index for {self.qualified_name}") | ||||
|         try: | ||||
|             proc = subprocess.run( | ||||
|                 [ | ||||
|                     shutil.which("docker"), | ||||
|                     "buildx", | ||||
|                     "imagetools", | ||||
|                     "inspect", | ||||
|                     "--raw", | ||||
|                     self.qualified_name, | ||||
|                 ], | ||||
|                 capture_output=True, | ||||
|                 check=True, | ||||
|             ) | ||||
|  | ||||
|             self._data = json.loads(proc.stdout) | ||||
|  | ||||
|         except subprocess.CalledProcessError as e: | ||||
|             logger.error( | ||||
|                 f"Failed to get image index for {self.qualified_name}: {e.stderr}", | ||||
|             ) | ||||
|             raise e | ||||
|  | ||||
|     @property | ||||
|     def image_pointers(self) -> Iterator[ImageProperties]: | ||||
|         for manifest_data in self._data["manifests"]: | ||||
|             yield ImageProperties(manifest_data) | ||||
|  | ||||
|  | ||||
| class RegistryTagsCleaner: | ||||
|     """ | ||||
|     This is the base class for the image registry cleaning.  Given a package | ||||
|     name, it will keep all images which are tagged and all untagged images | ||||
|     referred to by a manifest.  This results in only images which have been untagged | ||||
|     and cannot be referenced except by their SHA in being removed.  None of these | ||||
|     images should be referenced, so it is fine to delete them. | ||||
|     """ | ||||
|  | ||||
|     def __init__( | ||||
|         self, | ||||
|         package_name: str, | ||||
|         repo_owner: str, | ||||
|         repo_name: str, | ||||
|         package_api: GithubContainerRegistryApi, | ||||
|         branch_api: Optional[GithubBranchApi], | ||||
|     ): | ||||
|         self.actually_delete = False | ||||
|         self.package_api = package_api | ||||
|         self.branch_api = branch_api | ||||
|         self.package_name = package_name | ||||
|         self.repo_owner = repo_owner | ||||
|         self.repo_name = repo_name | ||||
|         self.tags_to_delete: List[str] = [] | ||||
|         self.tags_to_keep: List[str] = [] | ||||
|  | ||||
|         # Get the information about all versions of the given package | ||||
|         # These are active, not deleted, the default returned from the API | ||||
|         self.all_package_versions = self.package_api.get_active_package_versions( | ||||
|             self.package_name, | ||||
|         ) | ||||
|  | ||||
|         # Get a mapping from a tag like "1.7.0" or "feature-xyz" to the ContainerPackage | ||||
|         # tagged with it.  It makes certain lookups easy | ||||
|         self.all_pkgs_tags_to_version: Dict[str, ContainerPackage] = {} | ||||
|         for pkg in self.all_package_versions: | ||||
|             for tag in pkg.tags: | ||||
|                 self.all_pkgs_tags_to_version[tag] = pkg | ||||
|         logger.info( | ||||
|             f"Located {len(self.all_package_versions)} versions of package {self.package_name}", | ||||
|         ) | ||||
|  | ||||
|         self.decide_what_tags_to_keep() | ||||
|  | ||||
|     def clean(self): | ||||
|         """ | ||||
|         This method will delete image versions, based on the selected tags to delete. | ||||
|         It behaves more like an unlinking than actual deletion.  Removing the tag | ||||
|         simply removes a pointer to an image, but the actual image data remains accessible | ||||
|         if one has the sha256 digest of it. | ||||
|         """ | ||||
|         for tag_to_delete in self.tags_to_delete: | ||||
|             package_version_info = self.all_pkgs_tags_to_version[tag_to_delete] | ||||
|  | ||||
|             if self.actually_delete: | ||||
|                 logger.info( | ||||
|                     f"Deleting {tag_to_delete} (id {package_version_info.id})", | ||||
|                 ) | ||||
|                 self.package_api.delete_package_version( | ||||
|                     package_version_info, | ||||
|                 ) | ||||
|  | ||||
|             else: | ||||
|                 logger.info( | ||||
|                     f"Would delete {tag_to_delete} (id {package_version_info.id})", | ||||
|                 ) | ||||
|         else: | ||||
|             logger.info("No tags to delete") | ||||
|  | ||||
|     def clean_untagged(self, is_manifest_image: bool): | ||||
|         """ | ||||
|         This method will delete untagged images, that is those which are not named.  It | ||||
|         handles if the image tag is actually a manifest, which points to images that look otherwise | ||||
|         untagged. | ||||
|         """ | ||||
|  | ||||
|         def _clean_untagged_manifest(): | ||||
|             """ | ||||
|  | ||||
|             Handles the deletion of untagged images, but where the package is a manifest, ie a multi | ||||
|             arch image, which means some "untagged" images need to exist still. | ||||
|  | ||||
|             Ok, bear with me, these are annoying. | ||||
|  | ||||
|             Our images are multi-arch, so the manifest is more like a pointer to a sha256 digest. | ||||
|             These images are untagged, but pointed to, and so should not be removed (or every pull fails). | ||||
|  | ||||
|             So for each image getting kept, parse the manifest to find the digest(s) it points to.  Then | ||||
|             remove those from the list of untagged images.  The final result is the untagged, not pointed to | ||||
|             version which should be safe to remove. | ||||
|  | ||||
|             Example: | ||||
|                 Tag: ghcr.io/paperless-ngx/paperless-ngx:1.7.1 refers to | ||||
|                     amd64: sha256:b9ed4f8753bbf5146547671052d7e91f68cdfc9ef049d06690b2bc866fec2690 | ||||
|                     armv7: sha256:81605222df4ba4605a2ba4893276e5d08c511231ead1d5da061410e1bbec05c3 | ||||
|                     arm64: sha256:374cd68db40734b844705bfc38faae84cc4182371de4bebd533a9a365d5e8f3b | ||||
|                 each of which appears as untagged image, but isn't really. | ||||
|  | ||||
|                 So from the list of untagged packages, remove those digests.  Once all tags which | ||||
|                 are being kept are checked, the remaining untagged packages are actually untagged | ||||
|                 with no referrals in a manifest to them. | ||||
|             """ | ||||
|             # Simplify the untagged data, mapping name (which is a digest) to the version | ||||
|             # At the moment, these are the images which APPEAR untagged. | ||||
|             untagged_versions = {} | ||||
|             for x in self.all_package_versions: | ||||
|                 if x.untagged: | ||||
|                     untagged_versions[x.name] = x | ||||
|  | ||||
|             skips = 0 | ||||
|  | ||||
|             # Parse manifests to locate digests pointed to | ||||
|             for tag in sorted(self.tags_to_keep): | ||||
|                 try: | ||||
|                     image_index = ImageIndex( | ||||
|                         f"ghcr.io/{self.repo_owner}/{self.package_name}", | ||||
|                         tag, | ||||
|                     ) | ||||
|                     for manifest in image_index.image_pointers: | ||||
|                         if manifest.digest in untagged_versions: | ||||
|                             logger.info( | ||||
|                                 f"Skipping deletion of {manifest.digest}," | ||||
|                                 f" referred to by {image_index.qualified_name}" | ||||
|                                 f" for {manifest.platform}", | ||||
|                             ) | ||||
|                             del untagged_versions[manifest.digest] | ||||
|                             skips += 1 | ||||
|  | ||||
|                 except Exception as err: | ||||
|                     self.actually_delete = False | ||||
|                     logger.exception(err) | ||||
|                     return | ||||
|  | ||||
|             logger.info( | ||||
|                 f"Skipping deletion of {skips} packages referred to by a manifest", | ||||
|             ) | ||||
|  | ||||
|             # Delete the untagged and not pointed at packages | ||||
|             logger.info(f"Deleting untagged packages of {self.package_name}") | ||||
|             for to_delete_name in untagged_versions: | ||||
|                 to_delete_version = untagged_versions[to_delete_name] | ||||
|  | ||||
|                 if self.actually_delete: | ||||
|                     logger.info( | ||||
|                         f"Deleting id {to_delete_version.id} named {to_delete_version.name}", | ||||
|                     ) | ||||
|                     self.package_api.delete_package_version( | ||||
|                         to_delete_version, | ||||
|                     ) | ||||
|                 else: | ||||
|                     logger.info( | ||||
|                         f"Would delete {to_delete_name} (id {to_delete_version.id})", | ||||
|                     ) | ||||
|  | ||||
|         def _clean_untagged_non_manifest(): | ||||
|             """ | ||||
|             If the package is not a multi-arch manifest, images without tags are safe to delete. | ||||
|             """ | ||||
|  | ||||
|             for package in self.all_package_versions: | ||||
|                 if package.untagged: | ||||
|                     if self.actually_delete: | ||||
|                         logger.info( | ||||
|                             f"Deleting id {package.id} named {package.name}", | ||||
|                         ) | ||||
|                         self.package_api.delete_package_version( | ||||
|                             package, | ||||
|                         ) | ||||
|                     else: | ||||
|                         logger.info( | ||||
|                             f"Would delete {package.name} (id {package.id})", | ||||
|                         ) | ||||
|                 else: | ||||
|                     logger.info( | ||||
|                         f"Not deleting tag {package.tags[0]} of package {self.package_name}", | ||||
|                     ) | ||||
|  | ||||
|         logger.info("Beginning untagged image cleaning") | ||||
|  | ||||
|         if is_manifest_image: | ||||
|             _clean_untagged_manifest() | ||||
|         else: | ||||
|             _clean_untagged_non_manifest() | ||||
|  | ||||
|     def decide_what_tags_to_keep(self): | ||||
|         """ | ||||
|         This method holds the logic to delete what tags to keep and there fore | ||||
|         what tags to delete. | ||||
|  | ||||
|         By default, any image with at least 1 tag will be kept | ||||
|         """ | ||||
|         # By default, keep anything which is tagged | ||||
|         self.tags_to_keep = list(set(self.all_pkgs_tags_to_version.keys())) | ||||
|  | ||||
|     def check_remaining_tags_valid(self): | ||||
|         """ | ||||
|         Checks the non-deleted tags are still valid.  The assumption is if the | ||||
|         manifest is can be inspected and each image manifest if points to can be | ||||
|         inspected, the image will still pull. | ||||
|  | ||||
|         https://github.com/opencontainers/image-spec/blob/main/image-index.md | ||||
|         """ | ||||
|         logger.info("Beginning confirmation step") | ||||
|         a_tag_failed = False | ||||
|         for tag in sorted(self.tags_to_keep): | ||||
|             try: | ||||
|                 image_index = ImageIndex( | ||||
|                     f"ghcr.io/{self.repo_owner}/{self.package_name}", | ||||
|                     tag, | ||||
|                 ) | ||||
|                 for manifest in image_index.image_pointers: | ||||
|                     logger.info(f"Checking {manifest.digest} for {manifest.platform}") | ||||
|  | ||||
|                     # This follows the pointer from the index to an actual image, layers and all | ||||
|                     # Note the format is @ | ||||
|                     digest_name = f"ghcr.io/{self.repo_owner}/{self.package_name}@{manifest.digest}" | ||||
|  | ||||
|                     try: | ||||
|                         subprocess.run( | ||||
|                             [ | ||||
|                                 shutil.which("docker"), | ||||
|                                 "buildx", | ||||
|                                 "imagetools", | ||||
|                                 "inspect", | ||||
|                                 "--raw", | ||||
|                                 digest_name, | ||||
|                             ], | ||||
|                             capture_output=True, | ||||
|                             check=True, | ||||
|                         ) | ||||
|                     except subprocess.CalledProcessError as e: | ||||
|                         logger.error(f"Failed to inspect digest: {e.stderr}") | ||||
|                         a_tag_failed = True | ||||
|             except subprocess.CalledProcessError as e: | ||||
|                 a_tag_failed = True | ||||
|                 logger.error(f"Failed to inspect: {e.stderr}") | ||||
|                 continue | ||||
|  | ||||
|         if a_tag_failed: | ||||
|             raise Exception("At least one image tag failed to inspect") | ||||
|  | ||||
|  | ||||
| class MainImageTagsCleaner(RegistryTagsCleaner): | ||||
|     def decide_what_tags_to_keep(self): | ||||
|         """ | ||||
|         Overrides the default logic for deciding what images to keep.  Images tagged as "feature-" | ||||
|         will be removed, if the corresponding branch no longer exists. | ||||
|         """ | ||||
|  | ||||
|         # Default to everything gets kept still | ||||
|         super().decide_what_tags_to_keep() | ||||
|  | ||||
|         # Locate the feature branches | ||||
|         feature_branches = {} | ||||
|         for branch in self.branch_api.get_branches( | ||||
|             repo=self.repo_name, | ||||
|         ): | ||||
|             if branch.name.startswith("feature-"): | ||||
|                 logger.debug(f"Found feature branch {branch.name}") | ||||
|                 feature_branches[branch.name] = branch | ||||
|  | ||||
|         logger.info(f"Located {len(feature_branches)} feature branches") | ||||
|  | ||||
|         if not len(feature_branches): | ||||
|             # Our work here is done, delete nothing | ||||
|             return | ||||
|  | ||||
|         # Filter to packages which are tagged with feature-* | ||||
|         packages_tagged_feature: List[ContainerPackage] = [] | ||||
|         for package in self.all_package_versions: | ||||
|             if package.tag_matches("feature-"): | ||||
|                 packages_tagged_feature.append(package) | ||||
|  | ||||
|         # Map tags like "feature-xyz" to a ContainerPackage | ||||
|         feature_pkgs_tags_to_versions: Dict[str, ContainerPackage] = {} | ||||
|         for pkg in packages_tagged_feature: | ||||
|             for tag in pkg.tags: | ||||
|                 feature_pkgs_tags_to_versions[tag] = pkg | ||||
|  | ||||
|         logger.info( | ||||
|             f'Located {len(feature_pkgs_tags_to_versions)} versions of package {self.package_name} tagged "feature-"', | ||||
|         ) | ||||
|  | ||||
|         # All the feature tags minus all the feature branches leaves us feature tags | ||||
|         # with no corresponding branch | ||||
|         self.tags_to_delete = list( | ||||
|             set(feature_pkgs_tags_to_versions.keys()) - set(feature_branches.keys()), | ||||
|         ) | ||||
|  | ||||
|         # All the tags minus the set of going to be deleted tags leaves us the | ||||
|         # tags which will be kept around | ||||
|         self.tags_to_keep = list( | ||||
|             set(self.all_pkgs_tags_to_version.keys()) - set(self.tags_to_delete), | ||||
|         ) | ||||
|         logger.info( | ||||
|             f"Located {len(self.tags_to_delete)} versions of package {self.package_name} to delete", | ||||
|         ) | ||||
|  | ||||
|  | ||||
| class LibraryTagsCleaner(RegistryTagsCleaner): | ||||
|     """ | ||||
|     Exists for the off chance that someday, the installer library images | ||||
|     will need their own logic | ||||
|     """ | ||||
|  | ||||
|  | ||||
| def _main(): | ||||
|     parser = ArgumentParser( | ||||
|         description="Using the GitHub API locate and optionally delete container" | ||||
|         " tags which no longer have an associated feature branch", | ||||
|     ) | ||||
|  | ||||
|     # Requires an affirmative command to actually do a delete | ||||
|     parser.add_argument( | ||||
|         "--delete", | ||||
|         action="store_true", | ||||
|         default=False, | ||||
|         help="If provided, actually delete the container tags", | ||||
|     ) | ||||
|  | ||||
|     # When a tagged image is updated, the previous version remains, but it no longer tagged | ||||
|     # Add this option to remove them as well | ||||
|     parser.add_argument( | ||||
|         "--untagged", | ||||
|         action="store_true", | ||||
|         default=False, | ||||
|         help="If provided, delete untagged containers as well", | ||||
|     ) | ||||
|  | ||||
|     # If given, the package is assumed to be a multi-arch manifest.  Cache packages are | ||||
|     # not multi-arch, all other types are | ||||
|     parser.add_argument( | ||||
|         "--is-manifest", | ||||
|         action="store_true", | ||||
|         default=False, | ||||
|         help="If provided, the package is assumed to be a multi-arch manifest following schema v2", | ||||
|     ) | ||||
|  | ||||
|     # Allows configuration of log level for debugging | ||||
|     parser.add_argument( | ||||
|         "--loglevel", | ||||
|         default="info", | ||||
|         help="Configures the logging level", | ||||
|     ) | ||||
|  | ||||
|     # Get the name of the package being processed this round | ||||
|     parser.add_argument( | ||||
|         "package", | ||||
|         help="The package to process", | ||||
|     ) | ||||
|  | ||||
|     args = parser.parse_args() | ||||
|  | ||||
|     logging.basicConfig( | ||||
|         level=get_log_level(args), | ||||
|         datefmt="%Y-%m-%d %H:%M:%S", | ||||
|         format="%(asctime)s %(levelname)-8s %(message)s", | ||||
|     ) | ||||
|  | ||||
|     # Must be provided in the environment | ||||
|     repo_owner: Final[str] = os.environ["GITHUB_REPOSITORY_OWNER"] | ||||
|     repo: Final[str] = os.environ["GITHUB_REPOSITORY"] | ||||
|     gh_token: Final[str] = os.environ["TOKEN"] | ||||
|  | ||||
|     # Find all branches named feature-* | ||||
|     # Note: Only relevant to the main application, but simpler to | ||||
|     # leave in for all packages | ||||
|     with GithubBranchApi(gh_token) as branch_api: | ||||
|         with GithubContainerRegistryApi(gh_token, repo_owner) as container_api: | ||||
|             if args.package in {"paperless-ngx", "paperless-ngx/builder/cache/app"}: | ||||
|                 cleaner = MainImageTagsCleaner( | ||||
|                     args.package, | ||||
|                     repo_owner, | ||||
|                     repo, | ||||
|                     container_api, | ||||
|                     branch_api, | ||||
|                 ) | ||||
|             else: | ||||
|                 cleaner = LibraryTagsCleaner( | ||||
|                     args.package, | ||||
|                     repo_owner, | ||||
|                     repo, | ||||
|                     container_api, | ||||
|                     None, | ||||
|                 ) | ||||
|  | ||||
|             # Set if actually doing a delete vs dry run | ||||
|             cleaner.actually_delete = args.delete | ||||
|  | ||||
|             # Clean images with tags | ||||
|             cleaner.clean() | ||||
|  | ||||
|             # Clean images which are untagged | ||||
|             cleaner.clean_untagged(args.is_manifest) | ||||
|  | ||||
|             # Verify remaining tags still pull | ||||
|             if args.is_manifest: | ||||
|                 cleaner.check_remaining_tags_valid() | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     _main() | ||||
							
								
								
									
										270
									
								
								.github/scripts/github.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										270
									
								
								.github/scripts/github.py
									
									
									
									
										vendored
									
									
								
							| @@ -1,270 +0,0 @@ | ||||
| """ | ||||
| This module contains some useful classes for interacting with the Github API. | ||||
| The full documentation for the API can be found here: https://docs.github.com/en/rest | ||||
|  | ||||
| Mostly, this focusses on two areas, repo branches and repo packages, as the use case | ||||
| is cleaning up container images which are no longer referred to. | ||||
|  | ||||
| """ | ||||
| import functools | ||||
| import logging | ||||
| import re | ||||
| import urllib.parse | ||||
| from typing import Dict | ||||
| from typing import List | ||||
| from typing import Optional | ||||
|  | ||||
| import httpx | ||||
|  | ||||
| logger = logging.getLogger("github-api") | ||||
|  | ||||
|  | ||||
| class _GithubApiBase: | ||||
|     """ | ||||
|     A base class for interacting with the Github API.  It | ||||
|     will handle the session and setting authorization headers. | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, token: str) -> None: | ||||
|         self._token = token | ||||
|         self._client: Optional[httpx.Client] = None | ||||
|  | ||||
|     def __enter__(self) -> "_GithubApiBase": | ||||
|         """ | ||||
|         Sets up the required headers for auth and response | ||||
|         type from the API | ||||
|         """ | ||||
|         self._client = httpx.Client() | ||||
|         self._client.headers.update( | ||||
|             { | ||||
|                 "Accept": "application/vnd.github.v3+json", | ||||
|                 "Authorization": f"token {self._token}", | ||||
|             }, | ||||
|         ) | ||||
|         return self | ||||
|  | ||||
|     def __exit__(self, exc_type, exc_val, exc_tb): | ||||
|         """ | ||||
|         Ensures the authorization token is cleaned up no matter | ||||
|         the reason for the exit | ||||
|         """ | ||||
|         if "Accept" in self._client.headers: | ||||
|             del self._client.headers["Accept"] | ||||
|         if "Authorization" in self._client.headers: | ||||
|             del self._client.headers["Authorization"] | ||||
|  | ||||
|         # Close the session as well | ||||
|         self._client.close() | ||||
|         self._client = None | ||||
|  | ||||
|     def _read_all_pages(self, endpoint): | ||||
|         """ | ||||
|         Helper function to read all pages of an endpoint, utilizing the | ||||
|         next.url until exhausted.  Assumes the endpoint returns a list | ||||
|         """ | ||||
|         internal_data = [] | ||||
|  | ||||
|         while True: | ||||
|             resp = self._client.get(endpoint) | ||||
|             if resp.status_code == 200: | ||||
|                 internal_data += resp.json() | ||||
|                 if "next" in resp.links: | ||||
|                     endpoint = resp.links["next"]["url"] | ||||
|                 else: | ||||
|                     logger.debug("Exiting pagination loop") | ||||
|                     break | ||||
|             else: | ||||
|                 logger.warning(f"Request to {endpoint} return HTTP {resp.status_code}") | ||||
|                 resp.raise_for_status() | ||||
|  | ||||
|         return internal_data | ||||
|  | ||||
|  | ||||
| class _EndpointResponse: | ||||
|     """ | ||||
|     For all endpoint JSON responses, store the full | ||||
|     response data, for ease of extending later, if need be. | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, data: Dict) -> None: | ||||
|         self._data = data | ||||
|  | ||||
|  | ||||
| class GithubBranch(_EndpointResponse): | ||||
|     """ | ||||
|     Simple wrapper for a repository branch, only extracts name information | ||||
|     for now. | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, data: Dict) -> None: | ||||
|         super().__init__(data) | ||||
|         self.name = self._data["name"] | ||||
|  | ||||
|  | ||||
| class GithubBranchApi(_GithubApiBase): | ||||
|     """ | ||||
|     Wrapper around branch API. | ||||
|  | ||||
|     See https://docs.github.com/en/rest/branches/branches | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, token: str) -> None: | ||||
|         super().__init__(token) | ||||
|  | ||||
|         self._ENDPOINT = "https://api.github.com/repos/{REPO}/branches" | ||||
|  | ||||
|     def get_branches(self, repo: str) -> List[GithubBranch]: | ||||
|         """ | ||||
|         Returns all current branches of the given repository owned by the given | ||||
|         owner or organization. | ||||
|         """ | ||||
|         # The environment GITHUB_REPOSITORY already contains the owner in the correct location | ||||
|         endpoint = self._ENDPOINT.format(REPO=repo) | ||||
|         internal_data = self._read_all_pages(endpoint) | ||||
|         return [GithubBranch(branch) for branch in internal_data] | ||||
|  | ||||
|  | ||||
| class ContainerPackage(_EndpointResponse): | ||||
|     """ | ||||
|     Data class wrapping the JSON response from the package related | ||||
|     endpoints | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, data: Dict): | ||||
|         super().__init__(data) | ||||
|         # This is a numerical ID, required for interactions with this | ||||
|         # specific package, including deletion of it or restoration | ||||
|         self.id: int = self._data["id"] | ||||
|  | ||||
|         # A string name.  This might be an actual name or it could be a | ||||
|         # digest string like "sha256:" | ||||
|         self.name: str = self._data["name"] | ||||
|  | ||||
|         # URL to the package, including its ID, can be used for deletion | ||||
|         # or restoration without needing to build up a URL ourselves | ||||
|         self.url: str = self._data["url"] | ||||
|  | ||||
|         # The list of tags applied to this image. Maybe an empty list | ||||
|         self.tags: List[str] = self._data["metadata"]["container"]["tags"] | ||||
|  | ||||
|     @functools.cached_property | ||||
|     def untagged(self) -> bool: | ||||
|         """ | ||||
|         Returns True if the image has no tags applied to it, False otherwise | ||||
|         """ | ||||
|         return len(self.tags) == 0 | ||||
|  | ||||
|     @functools.cache | ||||
|     def tag_matches(self, pattern: str) -> bool: | ||||
|         """ | ||||
|         Returns True if the image has at least one tag which matches the given regex, | ||||
|         False otherwise | ||||
|         """ | ||||
|         return any(re.match(pattern, tag) is not None for tag in self.tags) | ||||
|  | ||||
|     def __repr__(self): | ||||
|         return f"Package {self.name}" | ||||
|  | ||||
|  | ||||
| class GithubContainerRegistryApi(_GithubApiBase): | ||||
|     """ | ||||
|     Class wrapper to deal with the Github packages API.  This class only deals with | ||||
|     container type packages, the only type published by paperless-ngx. | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, token: str, owner_or_org: str) -> None: | ||||
|         super().__init__(token) | ||||
|         self._owner_or_org = owner_or_org | ||||
|         if self._owner_or_org == "paperless-ngx": | ||||
|             # https://docs.github.com/en/rest/packages#get-all-package-versions-for-a-package-owned-by-an-organization | ||||
|             self._PACKAGES_VERSIONS_ENDPOINT = "https://api.github.com/orgs/{ORG}/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions" | ||||
|             # https://docs.github.com/en/rest/packages#delete-package-version-for-an-organization | ||||
|             self._PACKAGE_VERSION_DELETE_ENDPOINT = "https://api.github.com/orgs/{ORG}/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions/{PACKAGE_VERSION_ID}" | ||||
|         else: | ||||
|             # https://docs.github.com/en/rest/packages#get-all-package-versions-for-a-package-owned-by-the-authenticated-user | ||||
|             self._PACKAGES_VERSIONS_ENDPOINT = "https://api.github.com/user/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions" | ||||
|             # https://docs.github.com/en/rest/packages#delete-a-package-version-for-the-authenticated-user | ||||
|             self._PACKAGE_VERSION_DELETE_ENDPOINT = "https://api.github.com/user/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions/{PACKAGE_VERSION_ID}" | ||||
|         self._PACKAGE_VERSION_RESTORE_ENDPOINT = ( | ||||
|             f"{self._PACKAGE_VERSION_DELETE_ENDPOINT}/restore" | ||||
|         ) | ||||
|  | ||||
|     def get_active_package_versions( | ||||
|         self, | ||||
|         package_name: str, | ||||
|     ) -> List[ContainerPackage]: | ||||
|         """ | ||||
|         Returns all the versions of a given package (container images) from | ||||
|         the API | ||||
|         """ | ||||
|  | ||||
|         package_type: str = "container" | ||||
|         # Need to quote this for slashes in the name | ||||
|         package_name = urllib.parse.quote(package_name, safe="") | ||||
|  | ||||
|         endpoint = self._PACKAGES_VERSIONS_ENDPOINT.format( | ||||
|             ORG=self._owner_or_org, | ||||
|             PACKAGE_TYPE=package_type, | ||||
|             PACKAGE_NAME=package_name, | ||||
|         ) | ||||
|  | ||||
|         pkgs = [] | ||||
|  | ||||
|         for data in self._read_all_pages(endpoint): | ||||
|             pkgs.append(ContainerPackage(data)) | ||||
|  | ||||
|         return pkgs | ||||
|  | ||||
|     def get_deleted_package_versions( | ||||
|         self, | ||||
|         package_name: str, | ||||
|     ) -> List[ContainerPackage]: | ||||
|         package_type: str = "container" | ||||
|         # Need to quote this for slashes in the name | ||||
|         package_name = urllib.parse.quote(package_name, safe="") | ||||
|  | ||||
|         endpoint = ( | ||||
|             self._PACKAGES_VERSIONS_ENDPOINT.format( | ||||
|                 ORG=self._owner_or_org, | ||||
|                 PACKAGE_TYPE=package_type, | ||||
|                 PACKAGE_NAME=package_name, | ||||
|             ) | ||||
|             + "?state=deleted" | ||||
|         ) | ||||
|  | ||||
|         pkgs = [] | ||||
|  | ||||
|         for data in self._read_all_pages(endpoint): | ||||
|             pkgs.append(ContainerPackage(data)) | ||||
|  | ||||
|         return pkgs | ||||
|  | ||||
|     def delete_package_version(self, package_data: ContainerPackage): | ||||
|         """ | ||||
|         Deletes the given package version from the GHCR | ||||
|         """ | ||||
|         resp = self._client.delete(package_data.url) | ||||
|         if resp.status_code != 204: | ||||
|             logger.warning( | ||||
|                 f"Request to delete {package_data.url} returned HTTP {resp.status_code}", | ||||
|             ) | ||||
|  | ||||
|     def restore_package_version( | ||||
|         self, | ||||
|         package_name: str, | ||||
|         package_data: ContainerPackage, | ||||
|     ): | ||||
|         package_type: str = "container" | ||||
|         endpoint = self._PACKAGE_VERSION_RESTORE_ENDPOINT.format( | ||||
|             ORG=self._owner_or_org, | ||||
|             PACKAGE_TYPE=package_type, | ||||
|             PACKAGE_NAME=package_name, | ||||
|             PACKAGE_VERSION_ID=package_data.id, | ||||
|         ) | ||||
|  | ||||
|         resp = self._client.post(endpoint) | ||||
|         if resp.status_code != 204: | ||||
|             logger.warning( | ||||
|                 f"Request to delete {endpoint} returned HTTP {resp.status_code}", | ||||
|             ) | ||||
							
								
								
									
										95
									
								
								.github/workflows/cleanup-tags.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										95
									
								
								.github/workflows/cleanup-tags.yml
									
									
									
									
										vendored
									
									
								
							| @@ -12,9 +12,6 @@ on: | ||||
|   push: | ||||
|     paths: | ||||
|       - ".github/workflows/cleanup-tags.yml" | ||||
|       - ".github/scripts/cleanup-tags.py" | ||||
|       - ".github/scripts/github.py" | ||||
|       - ".github/scripts/common.py" | ||||
|  | ||||
| concurrency: | ||||
|   group: registry-tags-cleanup | ||||
| @@ -22,62 +19,56 @@ concurrency: | ||||
|  | ||||
| jobs: | ||||
|   cleanup-images: | ||||
|     name: Cleanup Image Tags for ${{ matrix.primary-name }} | ||||
|     name: Cleanup Image Tags for paperless-ngx | ||||
|     if: github.repository_owner == 'paperless-ngx' | ||||
|     runs-on: ubuntu-22.04 | ||||
|     strategy: | ||||
|       matrix: | ||||
|         include: | ||||
|           - primary-name: "paperless-ngx" | ||||
|             cache-name: "paperless-ngx/builder/cache/app" | ||||
|  | ||||
|           - primary-name: "paperless-ngx/builder/qpdf" | ||||
|             cache-name: "paperless-ngx/builder/cache/qpdf" | ||||
|  | ||||
|           - primary-name: "paperless-ngx/builder/pikepdf" | ||||
|             cache-name: "paperless-ngx/builder/cache/pikepdf" | ||||
|  | ||||
|           - primary-name: "paperless-ngx/builder/jbig2enc" | ||||
|             cache-name: "paperless-ngx/builder/cache/jbig2enc" | ||||
|  | ||||
|           - primary-name: "paperless-ngx/builder/psycopg2" | ||||
|             cache-name: "paperless-ngx/builder/cache/psycopg2" | ||||
|     env: | ||||
|       # Requires a personal access token with the OAuth scope delete:packages | ||||
|       TOKEN: ${{ secrets.GHA_CONTAINER_DELETE_TOKEN }} | ||||
|     steps: | ||||
|       - | ||||
|         name: Checkout | ||||
|         uses: actions/checkout@v3 | ||||
|       - | ||||
|         name: Login to Github Container Registry | ||||
|         uses: docker/login-action@v2 | ||||
|         with: | ||||
|           registry: ghcr.io | ||||
|           username: ${{ github.actor }} | ||||
|           password: ${{ secrets.GITHUB_TOKEN }} | ||||
|       - | ||||
|         name: Set up Python | ||||
|         uses: actions/setup-python@v4 | ||||
|         with: | ||||
|           python-version: "3.10" | ||||
|       - | ||||
|         name: Install Python libraries | ||||
|         run: | | ||||
|           python -m pip install httpx docker | ||||
|       # | ||||
|       # Clean up primary package | ||||
|       # | ||||
|       - | ||||
|         name: Cleanup for package "${{ matrix.primary-name }}" | ||||
|         name: Clean temporary images | ||||
|         if: "${{ env.TOKEN != '' }}" | ||||
|         run: | | ||||
|           python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --untagged --is-manifest --delete "${{ matrix.primary-name }}" | ||||
|       # | ||||
|       # Clean up registry cache package | ||||
|       # | ||||
|         uses: stumpylog/image-cleaner-action/ephemeral@develop | ||||
|         with: | ||||
|           token: "${{ env.TOKEN }}" | ||||
|           owner: "${{ github.repository_owner }}" | ||||
|           is_org: "true" | ||||
|           package_name: "paperless-ngx" | ||||
|           scheme: "branch" | ||||
|           repo_name: "paperless-ngx" | ||||
|           match_regex: "feature-" | ||||
|  | ||||
|   cleanup-untagged-images: | ||||
|     name: Cleanup Untagged Images Tags for ${{ matrix.primary-name }} | ||||
|     if: github.repository_owner == 'paperless-ngx' | ||||
|     runs-on: ubuntu-22.04 | ||||
|     needs: | ||||
|       - cleanup-images | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         include: | ||||
|           - primary-name: "paperless-ngx" | ||||
|           - primary-name: "paperless-ngx/builder/cache/app" | ||||
|           - primary-name: "paperless-ngx/builder/qpdf" | ||||
|           - primary-name: "paperless-ngx/builder/cache/qpdf" | ||||
|           - primary-name: "paperless-ngx/builder/pikepdf" | ||||
|           - primary-name: "paperless-ngx/builder/cache/pikepdf" | ||||
|           - primary-name: "paperless-ngx/builder/jbig2enc" | ||||
|           - primary-name: "paperless-ngx/builder/cache/jbig2enc" | ||||
|           - primary-name: "paperless-ngx/builder/psycopg2" | ||||
|           - primary-name: "paperless-ngx/builder/cache/psycopg2" | ||||
|     env: | ||||
|       # Requires a personal access token with the OAuth scope delete:packages | ||||
|       TOKEN: ${{ secrets.GHA_CONTAINER_DELETE_TOKEN }} | ||||
|     steps: | ||||
|       - | ||||
|         name: Cleanup for package "${{ matrix.cache-name }}" | ||||
|         name: Clean untagged images | ||||
|         if: "${{ env.TOKEN != '' }}" | ||||
|         run: | | ||||
|           python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --untagged --delete "${{ matrix.cache-name }}" | ||||
|         uses: stumpylog/image-cleaner-action/untagged@develop | ||||
|         with: | ||||
|           token: "${{ env.TOKEN }}" | ||||
|           owner: "${{ github.repository_owner }}" | ||||
|           is_org: "true" | ||||
|           package_name: "${{ matrix.primary-name }}" | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Trenton H
					Trenton H