mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 03:16:10 -06:00 
			
		
		
		
	Moves to the new action for cleaning the published images
This commit is contained in:
		
							
								
								
									
										485
									
								
								.github/scripts/cleanup-tags.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										485
									
								
								.github/scripts/cleanup-tags.py
									
									
									
									
										vendored
									
									
								
							@@ -1,485 +0,0 @@
 | 
			
		||||
import json
 | 
			
		||||
import logging
 | 
			
		||||
import os
 | 
			
		||||
import shutil
 | 
			
		||||
import subprocess
 | 
			
		||||
from argparse import ArgumentParser
 | 
			
		||||
from typing import Dict
 | 
			
		||||
from typing import Final
 | 
			
		||||
from typing import Iterator
 | 
			
		||||
from typing import List
 | 
			
		||||
from typing import Optional
 | 
			
		||||
 | 
			
		||||
from common import get_log_level
 | 
			
		||||
from github import ContainerPackage
 | 
			
		||||
from github import GithubBranchApi
 | 
			
		||||
from github import GithubContainerRegistryApi
 | 
			
		||||
 | 
			
		||||
logger = logging.getLogger("cleanup-tags")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ImageProperties:
 | 
			
		||||
    """
 | 
			
		||||
    Data class wrapping the properties of an entry in the image index
 | 
			
		||||
    manifests list.  It is NOT an actual image with layers, etc
 | 
			
		||||
 | 
			
		||||
    https://docs.docker.com/registry/spec/manifest-v2-2/
 | 
			
		||||
    https://github.com/opencontainers/image-spec/blob/main/manifest.md
 | 
			
		||||
    https://github.com/opencontainers/image-spec/blob/main/descriptor.md
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, data: Dict) -> None:
 | 
			
		||||
        self._data = data
 | 
			
		||||
        # This is the sha256: digest string.  Corresponds to GitHub API name
 | 
			
		||||
        # if the package is an untagged package
 | 
			
		||||
        self.digest = self._data["digest"]
 | 
			
		||||
        platform_data_os = self._data["platform"]["os"]
 | 
			
		||||
        platform_arch = self._data["platform"]["architecture"]
 | 
			
		||||
        platform_variant = self._data["platform"].get(
 | 
			
		||||
            "variant",
 | 
			
		||||
            "",
 | 
			
		||||
        )
 | 
			
		||||
        self.platform = f"{platform_data_os}/{platform_arch}{platform_variant}"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ImageIndex:
 | 
			
		||||
    """
 | 
			
		||||
    Data class wrapping up logic for an OCI Image Index
 | 
			
		||||
    JSON data.  Primary use is to access the manifests listing
 | 
			
		||||
 | 
			
		||||
    See https://github.com/opencontainers/image-spec/blob/main/image-index.md
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, package_url: str, tag: str) -> None:
 | 
			
		||||
        self.qualified_name = f"{package_url}:{tag}"
 | 
			
		||||
        logger.info(f"Getting image index for {self.qualified_name}")
 | 
			
		||||
        try:
 | 
			
		||||
            proc = subprocess.run(
 | 
			
		||||
                [
 | 
			
		||||
                    shutil.which("docker"),
 | 
			
		||||
                    "buildx",
 | 
			
		||||
                    "imagetools",
 | 
			
		||||
                    "inspect",
 | 
			
		||||
                    "--raw",
 | 
			
		||||
                    self.qualified_name,
 | 
			
		||||
                ],
 | 
			
		||||
                capture_output=True,
 | 
			
		||||
                check=True,
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            self._data = json.loads(proc.stdout)
 | 
			
		||||
 | 
			
		||||
        except subprocess.CalledProcessError as e:
 | 
			
		||||
            logger.error(
 | 
			
		||||
                f"Failed to get image index for {self.qualified_name}: {e.stderr}",
 | 
			
		||||
            )
 | 
			
		||||
            raise e
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def image_pointers(self) -> Iterator[ImageProperties]:
 | 
			
		||||
        for manifest_data in self._data["manifests"]:
 | 
			
		||||
            yield ImageProperties(manifest_data)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RegistryTagsCleaner:
 | 
			
		||||
    """
 | 
			
		||||
    This is the base class for the image registry cleaning.  Given a package
 | 
			
		||||
    name, it will keep all images which are tagged and all untagged images
 | 
			
		||||
    referred to by a manifest.  This results in only images which have been untagged
 | 
			
		||||
    and cannot be referenced except by their SHA in being removed.  None of these
 | 
			
		||||
    images should be referenced, so it is fine to delete them.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(
 | 
			
		||||
        self,
 | 
			
		||||
        package_name: str,
 | 
			
		||||
        repo_owner: str,
 | 
			
		||||
        repo_name: str,
 | 
			
		||||
        package_api: GithubContainerRegistryApi,
 | 
			
		||||
        branch_api: Optional[GithubBranchApi],
 | 
			
		||||
    ):
 | 
			
		||||
        self.actually_delete = False
 | 
			
		||||
        self.package_api = package_api
 | 
			
		||||
        self.branch_api = branch_api
 | 
			
		||||
        self.package_name = package_name
 | 
			
		||||
        self.repo_owner = repo_owner
 | 
			
		||||
        self.repo_name = repo_name
 | 
			
		||||
        self.tags_to_delete: List[str] = []
 | 
			
		||||
        self.tags_to_keep: List[str] = []
 | 
			
		||||
 | 
			
		||||
        # Get the information about all versions of the given package
 | 
			
		||||
        # These are active, not deleted, the default returned from the API
 | 
			
		||||
        self.all_package_versions = self.package_api.get_active_package_versions(
 | 
			
		||||
            self.package_name,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        # Get a mapping from a tag like "1.7.0" or "feature-xyz" to the ContainerPackage
 | 
			
		||||
        # tagged with it.  It makes certain lookups easy
 | 
			
		||||
        self.all_pkgs_tags_to_version: Dict[str, ContainerPackage] = {}
 | 
			
		||||
        for pkg in self.all_package_versions:
 | 
			
		||||
            for tag in pkg.tags:
 | 
			
		||||
                self.all_pkgs_tags_to_version[tag] = pkg
 | 
			
		||||
        logger.info(
 | 
			
		||||
            f"Located {len(self.all_package_versions)} versions of package {self.package_name}",
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        self.decide_what_tags_to_keep()
 | 
			
		||||
 | 
			
		||||
    def clean(self):
 | 
			
		||||
        """
 | 
			
		||||
        This method will delete image versions, based on the selected tags to delete.
 | 
			
		||||
        It behaves more like an unlinking than actual deletion.  Removing the tag
 | 
			
		||||
        simply removes a pointer to an image, but the actual image data remains accessible
 | 
			
		||||
        if one has the sha256 digest of it.
 | 
			
		||||
        """
 | 
			
		||||
        for tag_to_delete in self.tags_to_delete:
 | 
			
		||||
            package_version_info = self.all_pkgs_tags_to_version[tag_to_delete]
 | 
			
		||||
 | 
			
		||||
            if self.actually_delete:
 | 
			
		||||
                logger.info(
 | 
			
		||||
                    f"Deleting {tag_to_delete} (id {package_version_info.id})",
 | 
			
		||||
                )
 | 
			
		||||
                self.package_api.delete_package_version(
 | 
			
		||||
                    package_version_info,
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
            else:
 | 
			
		||||
                logger.info(
 | 
			
		||||
                    f"Would delete {tag_to_delete} (id {package_version_info.id})",
 | 
			
		||||
                )
 | 
			
		||||
        else:
 | 
			
		||||
            logger.info("No tags to delete")
 | 
			
		||||
 | 
			
		||||
    def clean_untagged(self, is_manifest_image: bool):
 | 
			
		||||
        """
 | 
			
		||||
        This method will delete untagged images, that is those which are not named.  It
 | 
			
		||||
        handles if the image tag is actually a manifest, which points to images that look otherwise
 | 
			
		||||
        untagged.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        def _clean_untagged_manifest():
 | 
			
		||||
            """
 | 
			
		||||
 | 
			
		||||
            Handles the deletion of untagged images, but where the package is a manifest, ie a multi
 | 
			
		||||
            arch image, which means some "untagged" images need to exist still.
 | 
			
		||||
 | 
			
		||||
            Ok, bear with me, these are annoying.
 | 
			
		||||
 | 
			
		||||
            Our images are multi-arch, so the manifest is more like a pointer to a sha256 digest.
 | 
			
		||||
            These images are untagged, but pointed to, and so should not be removed (or every pull fails).
 | 
			
		||||
 | 
			
		||||
            So for each image getting kept, parse the manifest to find the digest(s) it points to.  Then
 | 
			
		||||
            remove those from the list of untagged images.  The final result is the untagged, not pointed to
 | 
			
		||||
            version which should be safe to remove.
 | 
			
		||||
 | 
			
		||||
            Example:
 | 
			
		||||
                Tag: ghcr.io/paperless-ngx/paperless-ngx:1.7.1 refers to
 | 
			
		||||
                    amd64: sha256:b9ed4f8753bbf5146547671052d7e91f68cdfc9ef049d06690b2bc866fec2690
 | 
			
		||||
                    armv7: sha256:81605222df4ba4605a2ba4893276e5d08c511231ead1d5da061410e1bbec05c3
 | 
			
		||||
                    arm64: sha256:374cd68db40734b844705bfc38faae84cc4182371de4bebd533a9a365d5e8f3b
 | 
			
		||||
                each of which appears as untagged image, but isn't really.
 | 
			
		||||
 | 
			
		||||
                So from the list of untagged packages, remove those digests.  Once all tags which
 | 
			
		||||
                are being kept are checked, the remaining untagged packages are actually untagged
 | 
			
		||||
                with no referrals in a manifest to them.
 | 
			
		||||
            """
 | 
			
		||||
            # Simplify the untagged data, mapping name (which is a digest) to the version
 | 
			
		||||
            # At the moment, these are the images which APPEAR untagged.
 | 
			
		||||
            untagged_versions = {}
 | 
			
		||||
            for x in self.all_package_versions:
 | 
			
		||||
                if x.untagged:
 | 
			
		||||
                    untagged_versions[x.name] = x
 | 
			
		||||
 | 
			
		||||
            skips = 0
 | 
			
		||||
 | 
			
		||||
            # Parse manifests to locate digests pointed to
 | 
			
		||||
            for tag in sorted(self.tags_to_keep):
 | 
			
		||||
                try:
 | 
			
		||||
                    image_index = ImageIndex(
 | 
			
		||||
                        f"ghcr.io/{self.repo_owner}/{self.package_name}",
 | 
			
		||||
                        tag,
 | 
			
		||||
                    )
 | 
			
		||||
                    for manifest in image_index.image_pointers:
 | 
			
		||||
                        if manifest.digest in untagged_versions:
 | 
			
		||||
                            logger.info(
 | 
			
		||||
                                f"Skipping deletion of {manifest.digest},"
 | 
			
		||||
                                f" referred to by {image_index.qualified_name}"
 | 
			
		||||
                                f" for {manifest.platform}",
 | 
			
		||||
                            )
 | 
			
		||||
                            del untagged_versions[manifest.digest]
 | 
			
		||||
                            skips += 1
 | 
			
		||||
 | 
			
		||||
                except Exception as err:
 | 
			
		||||
                    self.actually_delete = False
 | 
			
		||||
                    logger.exception(err)
 | 
			
		||||
                    return
 | 
			
		||||
 | 
			
		||||
            logger.info(
 | 
			
		||||
                f"Skipping deletion of {skips} packages referred to by a manifest",
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            # Delete the untagged and not pointed at packages
 | 
			
		||||
            logger.info(f"Deleting untagged packages of {self.package_name}")
 | 
			
		||||
            for to_delete_name in untagged_versions:
 | 
			
		||||
                to_delete_version = untagged_versions[to_delete_name]
 | 
			
		||||
 | 
			
		||||
                if self.actually_delete:
 | 
			
		||||
                    logger.info(
 | 
			
		||||
                        f"Deleting id {to_delete_version.id} named {to_delete_version.name}",
 | 
			
		||||
                    )
 | 
			
		||||
                    self.package_api.delete_package_version(
 | 
			
		||||
                        to_delete_version,
 | 
			
		||||
                    )
 | 
			
		||||
                else:
 | 
			
		||||
                    logger.info(
 | 
			
		||||
                        f"Would delete {to_delete_name} (id {to_delete_version.id})",
 | 
			
		||||
                    )
 | 
			
		||||
 | 
			
		||||
        def _clean_untagged_non_manifest():
 | 
			
		||||
            """
 | 
			
		||||
            If the package is not a multi-arch manifest, images without tags are safe to delete.
 | 
			
		||||
            """
 | 
			
		||||
 | 
			
		||||
            for package in self.all_package_versions:
 | 
			
		||||
                if package.untagged:
 | 
			
		||||
                    if self.actually_delete:
 | 
			
		||||
                        logger.info(
 | 
			
		||||
                            f"Deleting id {package.id} named {package.name}",
 | 
			
		||||
                        )
 | 
			
		||||
                        self.package_api.delete_package_version(
 | 
			
		||||
                            package,
 | 
			
		||||
                        )
 | 
			
		||||
                    else:
 | 
			
		||||
                        logger.info(
 | 
			
		||||
                            f"Would delete {package.name} (id {package.id})",
 | 
			
		||||
                        )
 | 
			
		||||
                else:
 | 
			
		||||
                    logger.info(
 | 
			
		||||
                        f"Not deleting tag {package.tags[0]} of package {self.package_name}",
 | 
			
		||||
                    )
 | 
			
		||||
 | 
			
		||||
        logger.info("Beginning untagged image cleaning")
 | 
			
		||||
 | 
			
		||||
        if is_manifest_image:
 | 
			
		||||
            _clean_untagged_manifest()
 | 
			
		||||
        else:
 | 
			
		||||
            _clean_untagged_non_manifest()
 | 
			
		||||
 | 
			
		||||
    def decide_what_tags_to_keep(self):
 | 
			
		||||
        """
 | 
			
		||||
        This method holds the logic to delete what tags to keep and there fore
 | 
			
		||||
        what tags to delete.
 | 
			
		||||
 | 
			
		||||
        By default, any image with at least 1 tag will be kept
 | 
			
		||||
        """
 | 
			
		||||
        # By default, keep anything which is tagged
 | 
			
		||||
        self.tags_to_keep = list(set(self.all_pkgs_tags_to_version.keys()))
 | 
			
		||||
 | 
			
		||||
    def check_remaining_tags_valid(self):
 | 
			
		||||
        """
 | 
			
		||||
        Checks the non-deleted tags are still valid.  The assumption is if the
 | 
			
		||||
        manifest is can be inspected and each image manifest if points to can be
 | 
			
		||||
        inspected, the image will still pull.
 | 
			
		||||
 | 
			
		||||
        https://github.com/opencontainers/image-spec/blob/main/image-index.md
 | 
			
		||||
        """
 | 
			
		||||
        logger.info("Beginning confirmation step")
 | 
			
		||||
        a_tag_failed = False
 | 
			
		||||
        for tag in sorted(self.tags_to_keep):
 | 
			
		||||
            try:
 | 
			
		||||
                image_index = ImageIndex(
 | 
			
		||||
                    f"ghcr.io/{self.repo_owner}/{self.package_name}",
 | 
			
		||||
                    tag,
 | 
			
		||||
                )
 | 
			
		||||
                for manifest in image_index.image_pointers:
 | 
			
		||||
                    logger.info(f"Checking {manifest.digest} for {manifest.platform}")
 | 
			
		||||
 | 
			
		||||
                    # This follows the pointer from the index to an actual image, layers and all
 | 
			
		||||
                    # Note the format is @
 | 
			
		||||
                    digest_name = f"ghcr.io/{self.repo_owner}/{self.package_name}@{manifest.digest}"
 | 
			
		||||
 | 
			
		||||
                    try:
 | 
			
		||||
                        subprocess.run(
 | 
			
		||||
                            [
 | 
			
		||||
                                shutil.which("docker"),
 | 
			
		||||
                                "buildx",
 | 
			
		||||
                                "imagetools",
 | 
			
		||||
                                "inspect",
 | 
			
		||||
                                "--raw",
 | 
			
		||||
                                digest_name,
 | 
			
		||||
                            ],
 | 
			
		||||
                            capture_output=True,
 | 
			
		||||
                            check=True,
 | 
			
		||||
                        )
 | 
			
		||||
                    except subprocess.CalledProcessError as e:
 | 
			
		||||
                        logger.error(f"Failed to inspect digest: {e.stderr}")
 | 
			
		||||
                        a_tag_failed = True
 | 
			
		||||
            except subprocess.CalledProcessError as e:
 | 
			
		||||
                a_tag_failed = True
 | 
			
		||||
                logger.error(f"Failed to inspect: {e.stderr}")
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
        if a_tag_failed:
 | 
			
		||||
            raise Exception("At least one image tag failed to inspect")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MainImageTagsCleaner(RegistryTagsCleaner):
 | 
			
		||||
    def decide_what_tags_to_keep(self):
 | 
			
		||||
        """
 | 
			
		||||
        Overrides the default logic for deciding what images to keep.  Images tagged as "feature-"
 | 
			
		||||
        will be removed, if the corresponding branch no longer exists.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        # Default to everything gets kept still
 | 
			
		||||
        super().decide_what_tags_to_keep()
 | 
			
		||||
 | 
			
		||||
        # Locate the feature branches
 | 
			
		||||
        feature_branches = {}
 | 
			
		||||
        for branch in self.branch_api.get_branches(
 | 
			
		||||
            repo=self.repo_name,
 | 
			
		||||
        ):
 | 
			
		||||
            if branch.name.startswith("feature-"):
 | 
			
		||||
                logger.debug(f"Found feature branch {branch.name}")
 | 
			
		||||
                feature_branches[branch.name] = branch
 | 
			
		||||
 | 
			
		||||
        logger.info(f"Located {len(feature_branches)} feature branches")
 | 
			
		||||
 | 
			
		||||
        if not len(feature_branches):
 | 
			
		||||
            # Our work here is done, delete nothing
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        # Filter to packages which are tagged with feature-*
 | 
			
		||||
        packages_tagged_feature: List[ContainerPackage] = []
 | 
			
		||||
        for package in self.all_package_versions:
 | 
			
		||||
            if package.tag_matches("feature-"):
 | 
			
		||||
                packages_tagged_feature.append(package)
 | 
			
		||||
 | 
			
		||||
        # Map tags like "feature-xyz" to a ContainerPackage
 | 
			
		||||
        feature_pkgs_tags_to_versions: Dict[str, ContainerPackage] = {}
 | 
			
		||||
        for pkg in packages_tagged_feature:
 | 
			
		||||
            for tag in pkg.tags:
 | 
			
		||||
                feature_pkgs_tags_to_versions[tag] = pkg
 | 
			
		||||
 | 
			
		||||
        logger.info(
 | 
			
		||||
            f'Located {len(feature_pkgs_tags_to_versions)} versions of package {self.package_name} tagged "feature-"',
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        # All the feature tags minus all the feature branches leaves us feature tags
 | 
			
		||||
        # with no corresponding branch
 | 
			
		||||
        self.tags_to_delete = list(
 | 
			
		||||
            set(feature_pkgs_tags_to_versions.keys()) - set(feature_branches.keys()),
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        # All the tags minus the set of going to be deleted tags leaves us the
 | 
			
		||||
        # tags which will be kept around
 | 
			
		||||
        self.tags_to_keep = list(
 | 
			
		||||
            set(self.all_pkgs_tags_to_version.keys()) - set(self.tags_to_delete),
 | 
			
		||||
        )
 | 
			
		||||
        logger.info(
 | 
			
		||||
            f"Located {len(self.tags_to_delete)} versions of package {self.package_name} to delete",
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LibraryTagsCleaner(RegistryTagsCleaner):
 | 
			
		||||
    """
 | 
			
		||||
    Exists for the off chance that someday, the installer library images
 | 
			
		||||
    will need their own logic
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _main():
 | 
			
		||||
    parser = ArgumentParser(
 | 
			
		||||
        description="Using the GitHub API locate and optionally delete container"
 | 
			
		||||
        " tags which no longer have an associated feature branch",
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # Requires an affirmative command to actually do a delete
 | 
			
		||||
    parser.add_argument(
 | 
			
		||||
        "--delete",
 | 
			
		||||
        action="store_true",
 | 
			
		||||
        default=False,
 | 
			
		||||
        help="If provided, actually delete the container tags",
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # When a tagged image is updated, the previous version remains, but it no longer tagged
 | 
			
		||||
    # Add this option to remove them as well
 | 
			
		||||
    parser.add_argument(
 | 
			
		||||
        "--untagged",
 | 
			
		||||
        action="store_true",
 | 
			
		||||
        default=False,
 | 
			
		||||
        help="If provided, delete untagged containers as well",
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # If given, the package is assumed to be a multi-arch manifest.  Cache packages are
 | 
			
		||||
    # not multi-arch, all other types are
 | 
			
		||||
    parser.add_argument(
 | 
			
		||||
        "--is-manifest",
 | 
			
		||||
        action="store_true",
 | 
			
		||||
        default=False,
 | 
			
		||||
        help="If provided, the package is assumed to be a multi-arch manifest following schema v2",
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # Allows configuration of log level for debugging
 | 
			
		||||
    parser.add_argument(
 | 
			
		||||
        "--loglevel",
 | 
			
		||||
        default="info",
 | 
			
		||||
        help="Configures the logging level",
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # Get the name of the package being processed this round
 | 
			
		||||
    parser.add_argument(
 | 
			
		||||
        "package",
 | 
			
		||||
        help="The package to process",
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    args = parser.parse_args()
 | 
			
		||||
 | 
			
		||||
    logging.basicConfig(
 | 
			
		||||
        level=get_log_level(args),
 | 
			
		||||
        datefmt="%Y-%m-%d %H:%M:%S",
 | 
			
		||||
        format="%(asctime)s %(levelname)-8s %(message)s",
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # Must be provided in the environment
 | 
			
		||||
    repo_owner: Final[str] = os.environ["GITHUB_REPOSITORY_OWNER"]
 | 
			
		||||
    repo: Final[str] = os.environ["GITHUB_REPOSITORY"]
 | 
			
		||||
    gh_token: Final[str] = os.environ["TOKEN"]
 | 
			
		||||
 | 
			
		||||
    # Find all branches named feature-*
 | 
			
		||||
    # Note: Only relevant to the main application, but simpler to
 | 
			
		||||
    # leave in for all packages
 | 
			
		||||
    with GithubBranchApi(gh_token) as branch_api:
 | 
			
		||||
        with GithubContainerRegistryApi(gh_token, repo_owner) as container_api:
 | 
			
		||||
            if args.package in {"paperless-ngx", "paperless-ngx/builder/cache/app"}:
 | 
			
		||||
                cleaner = MainImageTagsCleaner(
 | 
			
		||||
                    args.package,
 | 
			
		||||
                    repo_owner,
 | 
			
		||||
                    repo,
 | 
			
		||||
                    container_api,
 | 
			
		||||
                    branch_api,
 | 
			
		||||
                )
 | 
			
		||||
            else:
 | 
			
		||||
                cleaner = LibraryTagsCleaner(
 | 
			
		||||
                    args.package,
 | 
			
		||||
                    repo_owner,
 | 
			
		||||
                    repo,
 | 
			
		||||
                    container_api,
 | 
			
		||||
                    None,
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
            # Set if actually doing a delete vs dry run
 | 
			
		||||
            cleaner.actually_delete = args.delete
 | 
			
		||||
 | 
			
		||||
            # Clean images with tags
 | 
			
		||||
            cleaner.clean()
 | 
			
		||||
 | 
			
		||||
            # Clean images which are untagged
 | 
			
		||||
            cleaner.clean_untagged(args.is_manifest)
 | 
			
		||||
 | 
			
		||||
            # Verify remaining tags still pull
 | 
			
		||||
            if args.is_manifest:
 | 
			
		||||
                cleaner.check_remaining_tags_valid()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    _main()
 | 
			
		||||
							
								
								
									
										270
									
								
								.github/scripts/github.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										270
									
								
								.github/scripts/github.py
									
									
									
									
										vendored
									
									
								
							@@ -1,270 +0,0 @@
 | 
			
		||||
"""
 | 
			
		||||
This module contains some useful classes for interacting with the Github API.
 | 
			
		||||
The full documentation for the API can be found here: https://docs.github.com/en/rest
 | 
			
		||||
 | 
			
		||||
Mostly, this focusses on two areas, repo branches and repo packages, as the use case
 | 
			
		||||
is cleaning up container images which are no longer referred to.
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
import functools
 | 
			
		||||
import logging
 | 
			
		||||
import re
 | 
			
		||||
import urllib.parse
 | 
			
		||||
from typing import Dict
 | 
			
		||||
from typing import List
 | 
			
		||||
from typing import Optional
 | 
			
		||||
 | 
			
		||||
import httpx
 | 
			
		||||
 | 
			
		||||
logger = logging.getLogger("github-api")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class _GithubApiBase:
 | 
			
		||||
    """
 | 
			
		||||
    A base class for interacting with the Github API.  It
 | 
			
		||||
    will handle the session and setting authorization headers.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, token: str) -> None:
 | 
			
		||||
        self._token = token
 | 
			
		||||
        self._client: Optional[httpx.Client] = None
 | 
			
		||||
 | 
			
		||||
    def __enter__(self) -> "_GithubApiBase":
 | 
			
		||||
        """
 | 
			
		||||
        Sets up the required headers for auth and response
 | 
			
		||||
        type from the API
 | 
			
		||||
        """
 | 
			
		||||
        self._client = httpx.Client()
 | 
			
		||||
        self._client.headers.update(
 | 
			
		||||
            {
 | 
			
		||||
                "Accept": "application/vnd.github.v3+json",
 | 
			
		||||
                "Authorization": f"token {self._token}",
 | 
			
		||||
            },
 | 
			
		||||
        )
 | 
			
		||||
        return self
 | 
			
		||||
 | 
			
		||||
    def __exit__(self, exc_type, exc_val, exc_tb):
 | 
			
		||||
        """
 | 
			
		||||
        Ensures the authorization token is cleaned up no matter
 | 
			
		||||
        the reason for the exit
 | 
			
		||||
        """
 | 
			
		||||
        if "Accept" in self._client.headers:
 | 
			
		||||
            del self._client.headers["Accept"]
 | 
			
		||||
        if "Authorization" in self._client.headers:
 | 
			
		||||
            del self._client.headers["Authorization"]
 | 
			
		||||
 | 
			
		||||
        # Close the session as well
 | 
			
		||||
        self._client.close()
 | 
			
		||||
        self._client = None
 | 
			
		||||
 | 
			
		||||
    def _read_all_pages(self, endpoint):
 | 
			
		||||
        """
 | 
			
		||||
        Helper function to read all pages of an endpoint, utilizing the
 | 
			
		||||
        next.url until exhausted.  Assumes the endpoint returns a list
 | 
			
		||||
        """
 | 
			
		||||
        internal_data = []
 | 
			
		||||
 | 
			
		||||
        while True:
 | 
			
		||||
            resp = self._client.get(endpoint)
 | 
			
		||||
            if resp.status_code == 200:
 | 
			
		||||
                internal_data += resp.json()
 | 
			
		||||
                if "next" in resp.links:
 | 
			
		||||
                    endpoint = resp.links["next"]["url"]
 | 
			
		||||
                else:
 | 
			
		||||
                    logger.debug("Exiting pagination loop")
 | 
			
		||||
                    break
 | 
			
		||||
            else:
 | 
			
		||||
                logger.warning(f"Request to {endpoint} return HTTP {resp.status_code}")
 | 
			
		||||
                resp.raise_for_status()
 | 
			
		||||
 | 
			
		||||
        return internal_data
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class _EndpointResponse:
 | 
			
		||||
    """
 | 
			
		||||
    For all endpoint JSON responses, store the full
 | 
			
		||||
    response data, for ease of extending later, if need be.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, data: Dict) -> None:
 | 
			
		||||
        self._data = data
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GithubBranch(_EndpointResponse):
 | 
			
		||||
    """
 | 
			
		||||
    Simple wrapper for a repository branch, only extracts name information
 | 
			
		||||
    for now.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, data: Dict) -> None:
 | 
			
		||||
        super().__init__(data)
 | 
			
		||||
        self.name = self._data["name"]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GithubBranchApi(_GithubApiBase):
 | 
			
		||||
    """
 | 
			
		||||
    Wrapper around branch API.
 | 
			
		||||
 | 
			
		||||
    See https://docs.github.com/en/rest/branches/branches
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, token: str) -> None:
 | 
			
		||||
        super().__init__(token)
 | 
			
		||||
 | 
			
		||||
        self._ENDPOINT = "https://api.github.com/repos/{REPO}/branches"
 | 
			
		||||
 | 
			
		||||
    def get_branches(self, repo: str) -> List[GithubBranch]:
 | 
			
		||||
        """
 | 
			
		||||
        Returns all current branches of the given repository owned by the given
 | 
			
		||||
        owner or organization.
 | 
			
		||||
        """
 | 
			
		||||
        # The environment GITHUB_REPOSITORY already contains the owner in the correct location
 | 
			
		||||
        endpoint = self._ENDPOINT.format(REPO=repo)
 | 
			
		||||
        internal_data = self._read_all_pages(endpoint)
 | 
			
		||||
        return [GithubBranch(branch) for branch in internal_data]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ContainerPackage(_EndpointResponse):
 | 
			
		||||
    """
 | 
			
		||||
    Data class wrapping the JSON response from the package related
 | 
			
		||||
    endpoints
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, data: Dict):
 | 
			
		||||
        super().__init__(data)
 | 
			
		||||
        # This is a numerical ID, required for interactions with this
 | 
			
		||||
        # specific package, including deletion of it or restoration
 | 
			
		||||
        self.id: int = self._data["id"]
 | 
			
		||||
 | 
			
		||||
        # A string name.  This might be an actual name or it could be a
 | 
			
		||||
        # digest string like "sha256:"
 | 
			
		||||
        self.name: str = self._data["name"]
 | 
			
		||||
 | 
			
		||||
        # URL to the package, including its ID, can be used for deletion
 | 
			
		||||
        # or restoration without needing to build up a URL ourselves
 | 
			
		||||
        self.url: str = self._data["url"]
 | 
			
		||||
 | 
			
		||||
        # The list of tags applied to this image. Maybe an empty list
 | 
			
		||||
        self.tags: List[str] = self._data["metadata"]["container"]["tags"]
 | 
			
		||||
 | 
			
		||||
    @functools.cached_property
 | 
			
		||||
    def untagged(self) -> bool:
 | 
			
		||||
        """
 | 
			
		||||
        Returns True if the image has no tags applied to it, False otherwise
 | 
			
		||||
        """
 | 
			
		||||
        return len(self.tags) == 0
 | 
			
		||||
 | 
			
		||||
    @functools.cache
 | 
			
		||||
    def tag_matches(self, pattern: str) -> bool:
 | 
			
		||||
        """
 | 
			
		||||
        Returns True if the image has at least one tag which matches the given regex,
 | 
			
		||||
        False otherwise
 | 
			
		||||
        """
 | 
			
		||||
        return any(re.match(pattern, tag) is not None for tag in self.tags)
 | 
			
		||||
 | 
			
		||||
    def __repr__(self):
 | 
			
		||||
        return f"Package {self.name}"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GithubContainerRegistryApi(_GithubApiBase):
 | 
			
		||||
    """
 | 
			
		||||
    Class wrapper to deal with the Github packages API.  This class only deals with
 | 
			
		||||
    container type packages, the only type published by paperless-ngx.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, token: str, owner_or_org: str) -> None:
 | 
			
		||||
        super().__init__(token)
 | 
			
		||||
        self._owner_or_org = owner_or_org
 | 
			
		||||
        if self._owner_or_org == "paperless-ngx":
 | 
			
		||||
            # https://docs.github.com/en/rest/packages#get-all-package-versions-for-a-package-owned-by-an-organization
 | 
			
		||||
            self._PACKAGES_VERSIONS_ENDPOINT = "https://api.github.com/orgs/{ORG}/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions"
 | 
			
		||||
            # https://docs.github.com/en/rest/packages#delete-package-version-for-an-organization
 | 
			
		||||
            self._PACKAGE_VERSION_DELETE_ENDPOINT = "https://api.github.com/orgs/{ORG}/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions/{PACKAGE_VERSION_ID}"
 | 
			
		||||
        else:
 | 
			
		||||
            # https://docs.github.com/en/rest/packages#get-all-package-versions-for-a-package-owned-by-the-authenticated-user
 | 
			
		||||
            self._PACKAGES_VERSIONS_ENDPOINT = "https://api.github.com/user/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions"
 | 
			
		||||
            # https://docs.github.com/en/rest/packages#delete-a-package-version-for-the-authenticated-user
 | 
			
		||||
            self._PACKAGE_VERSION_DELETE_ENDPOINT = "https://api.github.com/user/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions/{PACKAGE_VERSION_ID}"
 | 
			
		||||
        self._PACKAGE_VERSION_RESTORE_ENDPOINT = (
 | 
			
		||||
            f"{self._PACKAGE_VERSION_DELETE_ENDPOINT}/restore"
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def get_active_package_versions(
 | 
			
		||||
        self,
 | 
			
		||||
        package_name: str,
 | 
			
		||||
    ) -> List[ContainerPackage]:
 | 
			
		||||
        """
 | 
			
		||||
        Returns all the versions of a given package (container images) from
 | 
			
		||||
        the API
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        package_type: str = "container"
 | 
			
		||||
        # Need to quote this for slashes in the name
 | 
			
		||||
        package_name = urllib.parse.quote(package_name, safe="")
 | 
			
		||||
 | 
			
		||||
        endpoint = self._PACKAGES_VERSIONS_ENDPOINT.format(
 | 
			
		||||
            ORG=self._owner_or_org,
 | 
			
		||||
            PACKAGE_TYPE=package_type,
 | 
			
		||||
            PACKAGE_NAME=package_name,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        pkgs = []
 | 
			
		||||
 | 
			
		||||
        for data in self._read_all_pages(endpoint):
 | 
			
		||||
            pkgs.append(ContainerPackage(data))
 | 
			
		||||
 | 
			
		||||
        return pkgs
 | 
			
		||||
 | 
			
		||||
    def get_deleted_package_versions(
 | 
			
		||||
        self,
 | 
			
		||||
        package_name: str,
 | 
			
		||||
    ) -> List[ContainerPackage]:
 | 
			
		||||
        package_type: str = "container"
 | 
			
		||||
        # Need to quote this for slashes in the name
 | 
			
		||||
        package_name = urllib.parse.quote(package_name, safe="")
 | 
			
		||||
 | 
			
		||||
        endpoint = (
 | 
			
		||||
            self._PACKAGES_VERSIONS_ENDPOINT.format(
 | 
			
		||||
                ORG=self._owner_or_org,
 | 
			
		||||
                PACKAGE_TYPE=package_type,
 | 
			
		||||
                PACKAGE_NAME=package_name,
 | 
			
		||||
            )
 | 
			
		||||
            + "?state=deleted"
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        pkgs = []
 | 
			
		||||
 | 
			
		||||
        for data in self._read_all_pages(endpoint):
 | 
			
		||||
            pkgs.append(ContainerPackage(data))
 | 
			
		||||
 | 
			
		||||
        return pkgs
 | 
			
		||||
 | 
			
		||||
    def delete_package_version(self, package_data: ContainerPackage):
 | 
			
		||||
        """
 | 
			
		||||
        Deletes the given package version from the GHCR
 | 
			
		||||
        """
 | 
			
		||||
        resp = self._client.delete(package_data.url)
 | 
			
		||||
        if resp.status_code != 204:
 | 
			
		||||
            logger.warning(
 | 
			
		||||
                f"Request to delete {package_data.url} returned HTTP {resp.status_code}",
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    def restore_package_version(
 | 
			
		||||
        self,
 | 
			
		||||
        package_name: str,
 | 
			
		||||
        package_data: ContainerPackage,
 | 
			
		||||
    ):
 | 
			
		||||
        package_type: str = "container"
 | 
			
		||||
        endpoint = self._PACKAGE_VERSION_RESTORE_ENDPOINT.format(
 | 
			
		||||
            ORG=self._owner_or_org,
 | 
			
		||||
            PACKAGE_TYPE=package_type,
 | 
			
		||||
            PACKAGE_NAME=package_name,
 | 
			
		||||
            PACKAGE_VERSION_ID=package_data.id,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        resp = self._client.post(endpoint)
 | 
			
		||||
        if resp.status_code != 204:
 | 
			
		||||
            logger.warning(
 | 
			
		||||
                f"Request to delete {endpoint} returned HTTP {resp.status_code}",
 | 
			
		||||
            )
 | 
			
		||||
							
								
								
									
										95
									
								
								.github/workflows/cleanup-tags.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										95
									
								
								.github/workflows/cleanup-tags.yml
									
									
									
									
										vendored
									
									
								
							@@ -12,9 +12,6 @@ on:
 | 
			
		||||
  push:
 | 
			
		||||
    paths:
 | 
			
		||||
      - ".github/workflows/cleanup-tags.yml"
 | 
			
		||||
      - ".github/scripts/cleanup-tags.py"
 | 
			
		||||
      - ".github/scripts/github.py"
 | 
			
		||||
      - ".github/scripts/common.py"
 | 
			
		||||
 | 
			
		||||
concurrency:
 | 
			
		||||
  group: registry-tags-cleanup
 | 
			
		||||
@@ -22,62 +19,56 @@ concurrency:
 | 
			
		||||
 | 
			
		||||
jobs:
 | 
			
		||||
  cleanup-images:
 | 
			
		||||
    name: Cleanup Image Tags for ${{ matrix.primary-name }}
 | 
			
		||||
    name: Cleanup Image Tags for paperless-ngx
 | 
			
		||||
    if: github.repository_owner == 'paperless-ngx'
 | 
			
		||||
    runs-on: ubuntu-22.04
 | 
			
		||||
    strategy:
 | 
			
		||||
      matrix:
 | 
			
		||||
        include:
 | 
			
		||||
          - primary-name: "paperless-ngx"
 | 
			
		||||
            cache-name: "paperless-ngx/builder/cache/app"
 | 
			
		||||
 | 
			
		||||
          - primary-name: "paperless-ngx/builder/qpdf"
 | 
			
		||||
            cache-name: "paperless-ngx/builder/cache/qpdf"
 | 
			
		||||
 | 
			
		||||
          - primary-name: "paperless-ngx/builder/pikepdf"
 | 
			
		||||
            cache-name: "paperless-ngx/builder/cache/pikepdf"
 | 
			
		||||
 | 
			
		||||
          - primary-name: "paperless-ngx/builder/jbig2enc"
 | 
			
		||||
            cache-name: "paperless-ngx/builder/cache/jbig2enc"
 | 
			
		||||
 | 
			
		||||
          - primary-name: "paperless-ngx/builder/psycopg2"
 | 
			
		||||
            cache-name: "paperless-ngx/builder/cache/psycopg2"
 | 
			
		||||
    env:
 | 
			
		||||
      # Requires a personal access token with the OAuth scope delete:packages
 | 
			
		||||
      TOKEN: ${{ secrets.GHA_CONTAINER_DELETE_TOKEN }}
 | 
			
		||||
    steps:
 | 
			
		||||
      -
 | 
			
		||||
        name: Checkout
 | 
			
		||||
        uses: actions/checkout@v3
 | 
			
		||||
      -
 | 
			
		||||
        name: Login to Github Container Registry
 | 
			
		||||
        uses: docker/login-action@v2
 | 
			
		||||
        with:
 | 
			
		||||
          registry: ghcr.io
 | 
			
		||||
          username: ${{ github.actor }}
 | 
			
		||||
          password: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
      -
 | 
			
		||||
        name: Set up Python
 | 
			
		||||
        uses: actions/setup-python@v4
 | 
			
		||||
        with:
 | 
			
		||||
          python-version: "3.10"
 | 
			
		||||
      -
 | 
			
		||||
        name: Install Python libraries
 | 
			
		||||
        run: |
 | 
			
		||||
          python -m pip install httpx docker
 | 
			
		||||
      #
 | 
			
		||||
      # Clean up primary package
 | 
			
		||||
      #
 | 
			
		||||
      -
 | 
			
		||||
        name: Cleanup for package "${{ matrix.primary-name }}"
 | 
			
		||||
        name: Clean temporary images
 | 
			
		||||
        if: "${{ env.TOKEN != '' }}"
 | 
			
		||||
        run: |
 | 
			
		||||
          python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --untagged --is-manifest --delete "${{ matrix.primary-name }}"
 | 
			
		||||
      #
 | 
			
		||||
      # Clean up registry cache package
 | 
			
		||||
      #
 | 
			
		||||
        uses: stumpylog/image-cleaner-action/ephemeral@develop
 | 
			
		||||
        with:
 | 
			
		||||
          token: "${{ env.TOKEN }}"
 | 
			
		||||
          owner: "${{ github.repository_owner }}"
 | 
			
		||||
          is_org: "true"
 | 
			
		||||
          package_name: "paperless-ngx"
 | 
			
		||||
          scheme: "branch"
 | 
			
		||||
          repo_name: "paperless-ngx"
 | 
			
		||||
          match_regex: "feature-"
 | 
			
		||||
 | 
			
		||||
  cleanup-untagged-images:
 | 
			
		||||
    name: Cleanup Untagged Images Tags for ${{ matrix.primary-name }}
 | 
			
		||||
    if: github.repository_owner == 'paperless-ngx'
 | 
			
		||||
    runs-on: ubuntu-22.04
 | 
			
		||||
    needs:
 | 
			
		||||
      - cleanup-images
 | 
			
		||||
    strategy:
 | 
			
		||||
      fail-fast: false
 | 
			
		||||
      matrix:
 | 
			
		||||
        include:
 | 
			
		||||
          - primary-name: "paperless-ngx"
 | 
			
		||||
          - primary-name: "paperless-ngx/builder/cache/app"
 | 
			
		||||
          - primary-name: "paperless-ngx/builder/qpdf"
 | 
			
		||||
          - primary-name: "paperless-ngx/builder/cache/qpdf"
 | 
			
		||||
          - primary-name: "paperless-ngx/builder/pikepdf"
 | 
			
		||||
          - primary-name: "paperless-ngx/builder/cache/pikepdf"
 | 
			
		||||
          - primary-name: "paperless-ngx/builder/jbig2enc"
 | 
			
		||||
          - primary-name: "paperless-ngx/builder/cache/jbig2enc"
 | 
			
		||||
          - primary-name: "paperless-ngx/builder/psycopg2"
 | 
			
		||||
          - primary-name: "paperless-ngx/builder/cache/psycopg2"
 | 
			
		||||
    env:
 | 
			
		||||
      # Requires a personal access token with the OAuth scope delete:packages
 | 
			
		||||
      TOKEN: ${{ secrets.GHA_CONTAINER_DELETE_TOKEN }}
 | 
			
		||||
    steps:
 | 
			
		||||
      -
 | 
			
		||||
        name: Cleanup for package "${{ matrix.cache-name }}"
 | 
			
		||||
        name: Clean untagged images
 | 
			
		||||
        if: "${{ env.TOKEN != '' }}"
 | 
			
		||||
        run: |
 | 
			
		||||
          python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --untagged --delete "${{ matrix.cache-name }}"
 | 
			
		||||
        uses: stumpylog/image-cleaner-action/untagged@develop
 | 
			
		||||
        with:
 | 
			
		||||
          token: "${{ env.TOKEN }}"
 | 
			
		||||
          owner: "${{ github.repository_owner }}"
 | 
			
		||||
          is_org: "true"
 | 
			
		||||
          package_name: "${{ matrix.primary-name }}"
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user