mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Instead of pulling all images, inspect the manifest index and each digest index
This commit is contained in:
parent
8098ac6b15
commit
ee272da95f
172
.github/scripts/cleanup-tags.py
vendored
172
.github/scripts/cleanup-tags.py
vendored
@ -7,6 +7,7 @@ import subprocess
|
||||
from argparse import ArgumentParser
|
||||
from typing import Dict
|
||||
from typing import Final
|
||||
from typing import Iterator
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
|
||||
@ -15,16 +16,17 @@ from github import ContainerPackage
|
||||
from github import GithubBranchApi
|
||||
from github import GithubContainerRegistryApi
|
||||
|
||||
import docker
|
||||
|
||||
logger = logging.getLogger("cleanup-tags")
|
||||
|
||||
|
||||
class DockerManifest2:
|
||||
class ImageProperties:
|
||||
"""
|
||||
Data class wrapping the Docker Image Manifest Version 2.
|
||||
Data class wrapping the properties of an entry in the image index
|
||||
manifests list. It is NOT an actual image with layers, etc
|
||||
|
||||
See https://docs.docker.com/registry/spec/manifest-v2-2/
|
||||
https://docs.docker.com/registry/spec/manifest-v2-2/
|
||||
https://github.com/opencontainers/image-spec/blob/main/manifest.md
|
||||
https://github.com/opencontainers/image-spec/blob/main/descriptor.md
|
||||
"""
|
||||
|
||||
def __init__(self, data: Dict) -> None:
|
||||
@ -41,6 +43,45 @@ class DockerManifest2:
|
||||
self.platform = f"{platform_data_os}/{platform_arch}{platform_variant}"
|
||||
|
||||
|
||||
class ImageIndex:
|
||||
"""
|
||||
Data class wrapping up logic for an OCI Image Index
|
||||
JSON data. Primary use is to access the manifests listing
|
||||
|
||||
See https://github.com/opencontainers/image-spec/blob/main/image-index.md
|
||||
"""
|
||||
|
||||
def __init__(self, package_url: str, tag: str) -> None:
|
||||
self.qualified_name = f"{package_url}:{tag}"
|
||||
logger.info(f"Getting image index for {self.qualified_name}")
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[
|
||||
shutil.which("docker"),
|
||||
"buildx",
|
||||
"imagetools",
|
||||
"inspect",
|
||||
"--raw",
|
||||
self.qualified_name,
|
||||
],
|
||||
capture_output=True,
|
||||
check=True,
|
||||
)
|
||||
|
||||
self._data = json.loads(proc.stdout)
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(
|
||||
f"Failed to get image index for {self.qualified_name}: {e.stderr}",
|
||||
)
|
||||
raise e
|
||||
|
||||
@property
|
||||
def image_pointers(self) -> Iterator[ImageProperties]:
|
||||
for manifest_data in self._data["manifests"]:
|
||||
yield ImageProperties(manifest_data)
|
||||
|
||||
|
||||
class RegistryTagsCleaner:
|
||||
"""
|
||||
This is the base class for the image registry cleaning. Given a package
|
||||
@ -87,7 +128,10 @@ class RegistryTagsCleaner:
|
||||
|
||||
def clean(self):
|
||||
"""
|
||||
This method will delete image versions, based on the selected tags to delete
|
||||
This method will delete image versions, based on the selected tags to delete.
|
||||
It behaves more like an unlinking than actual deletion. Removing the tag
|
||||
simply removes a pointer to an image, but the actual image data remains accessible
|
||||
if one has the sha256 digest of it.
|
||||
"""
|
||||
for tag_to_delete in self.tags_to_delete:
|
||||
package_version_info = self.all_pkgs_tags_to_version[tag_to_delete]
|
||||
@ -151,31 +195,17 @@ class RegistryTagsCleaner:
|
||||
|
||||
# Parse manifests to locate digests pointed to
|
||||
for tag in sorted(self.tags_to_keep):
|
||||
full_name = f"ghcr.io/{self.repo_owner}/{self.package_name}:{tag}"
|
||||
logger.info(f"Checking manifest for {full_name}")
|
||||
# TODO: It would be nice to use RegistryData from docker
|
||||
# except the ID doesn't map to anything in the manifest
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[
|
||||
shutil.which("docker"),
|
||||
"buildx",
|
||||
"imagetools",
|
||||
"inspect",
|
||||
"--raw",
|
||||
full_name,
|
||||
],
|
||||
capture_output=True,
|
||||
image_index = ImageIndex(
|
||||
f"ghcr.io/{self.repo_owner}/{self.package_name}",
|
||||
tag,
|
||||
)
|
||||
|
||||
manifest_list = json.loads(proc.stdout)
|
||||
for manifest_data in manifest_list["manifests"]:
|
||||
manifest = DockerManifest2(manifest_data)
|
||||
for manifest in image_index.image_pointers:
|
||||
|
||||
if manifest.digest in untagged_versions:
|
||||
logger.info(
|
||||
f"Skipping deletion of {manifest.digest},"
|
||||
f" referred to by {full_name}"
|
||||
f" referred to by {image_index.qualified_name}"
|
||||
f" for {manifest.platform}",
|
||||
)
|
||||
del untagged_versions[manifest.digest]
|
||||
@ -247,64 +277,54 @@ class RegistryTagsCleaner:
|
||||
# By default, keep anything which is tagged
|
||||
self.tags_to_keep = list(set(self.all_pkgs_tags_to_version.keys()))
|
||||
|
||||
def check_tags_pull(self):
|
||||
def check_remaining_tags_valid(self):
|
||||
"""
|
||||
This method uses the Docker Python SDK to confirm all tags which were
|
||||
kept still pull, for all platforms.
|
||||
Checks the non-deleted tags are still valid. The assumption is if the
|
||||
manifest is can be inspected and each image manifest if points to can be
|
||||
inspected, the image will still pull.
|
||||
|
||||
TODO: This is much slower (although more comprehensive). Maybe a Pool?
|
||||
https://github.com/opencontainers/image-spec/blob/main/image-index.md
|
||||
"""
|
||||
logger.info("Beginning confirmation step")
|
||||
client = docker.from_env()
|
||||
imgs = []
|
||||
a_tag_failed = False
|
||||
for tag in sorted(self.tags_to_keep):
|
||||
repository = f"ghcr.io/{self.repo_owner}/{self.package_name}"
|
||||
for arch, variant in [("amd64", None), ("arm64", None), ("arm", "v7")]:
|
||||
# From 11.2.0 onwards, qpdf is cross compiled, so there is a single arch, amd64
|
||||
# skip others in this case
|
||||
if "qpdf" in self.package_name and arch != "amd64" and tag == "11.2.0":
|
||||
continue
|
||||
# Skip beta and release candidate tags
|
||||
elif "beta" in tag:
|
||||
continue
|
||||
|
||||
# Build the platform name
|
||||
if variant is not None:
|
||||
platform = f"linux/{arch}/{variant}"
|
||||
else:
|
||||
platform = f"linux/{arch}"
|
||||
try:
|
||||
image_index = ImageIndex(
|
||||
f"ghcr.io/{self.repo_owner}/{self.package_name}",
|
||||
tag,
|
||||
)
|
||||
for manifest in image_index.image_pointers:
|
||||
logger.info(f"Checking {manifest.digest} for {manifest.platform}")
|
||||
|
||||
try:
|
||||
logger.info(f"Pulling {repository}:{tag} for {platform}")
|
||||
image = client.images.pull(
|
||||
repository=repository,
|
||||
tag=tag,
|
||||
platform=platform,
|
||||
)
|
||||
imgs.append(image)
|
||||
except docker.errors.APIError as e:
|
||||
logger.error(
|
||||
f"Failed to pull {repository}:{tag}: {e}",
|
||||
)
|
||||
# This follows the pointer from the index to an actual image, layers and all
|
||||
# Note the format is @
|
||||
digest_name = f"ghcr.io/{self.repo_owner}/{self.package_name}@{manifest.digest}"
|
||||
|
||||
# Prevent out of space errors by removing after a few
|
||||
# pulls
|
||||
if len(imgs) > 50:
|
||||
for image in imgs:
|
||||
try:
|
||||
client.images.remove(image.id)
|
||||
except docker.errors.APIError as e:
|
||||
err_str = str(e)
|
||||
# Ignore attempts to remove images that are partly shared
|
||||
# Ignore images which are somehow gone already
|
||||
if (
|
||||
"must be forced" not in err_str
|
||||
and "No such image" not in err_str
|
||||
):
|
||||
logger.error(
|
||||
f"Remove image ghcr.io/{self.repo_owner}/{self.package_name}:{tag} failed: {e}",
|
||||
)
|
||||
imgs = []
|
||||
|
||||
subprocess.run(
|
||||
[
|
||||
shutil.which("docker"),
|
||||
"buildx",
|
||||
"imagetools",
|
||||
"inspect",
|
||||
"--raw",
|
||||
digest_name,
|
||||
],
|
||||
capture_output=True,
|
||||
check=True,
|
||||
)
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"Failed to inspect digest: {e.stderr}")
|
||||
a_tag_failed = True
|
||||
except subprocess.CalledProcessError as e:
|
||||
a_tag_failed = True
|
||||
logger.error(f"Failed to inspect: {e.stderr}")
|
||||
continue
|
||||
|
||||
if a_tag_failed:
|
||||
raise Exception("At least one image tag failed to inspect")
|
||||
|
||||
|
||||
class MainImageTagsCleaner(RegistryTagsCleaner):
|
||||
@ -366,7 +386,7 @@ class MainImageTagsCleaner(RegistryTagsCleaner):
|
||||
|
||||
class LibraryTagsCleaner(RegistryTagsCleaner):
|
||||
"""
|
||||
Exists for the off change that someday, the installer library images
|
||||
Exists for the off chance that someday, the installer library images
|
||||
will need their own logic
|
||||
"""
|
||||
|
||||
@ -464,7 +484,7 @@ def _main():
|
||||
|
||||
# Verify remaining tags still pull
|
||||
if args.is_manifest:
|
||||
cleaner.check_tags_pull()
|
||||
cleaner.check_remaining_tags_valid()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Loading…
x
Reference in New Issue
Block a user