mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-05-01 11:19:32 -05:00
Instead of pulling all images, inspect the manifest index and each digest index
This commit is contained in:
parent
4f9885d6b6
commit
74b7c49e1b
168
.github/scripts/cleanup-tags.py
vendored
168
.github/scripts/cleanup-tags.py
vendored
@ -7,6 +7,7 @@ import subprocess
|
|||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
from typing import Final
|
from typing import Final
|
||||||
|
from typing import Iterator
|
||||||
from typing import List
|
from typing import List
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
@ -15,16 +16,17 @@ from github import ContainerPackage
|
|||||||
from github import GithubBranchApi
|
from github import GithubBranchApi
|
||||||
from github import GithubContainerRegistryApi
|
from github import GithubContainerRegistryApi
|
||||||
|
|
||||||
import docker
|
|
||||||
|
|
||||||
logger = logging.getLogger("cleanup-tags")
|
logger = logging.getLogger("cleanup-tags")
|
||||||
|
|
||||||
|
|
||||||
class DockerManifest2:
|
class ImageProperties:
|
||||||
"""
|
"""
|
||||||
Data class wrapping the Docker Image Manifest Version 2.
|
Data class wrapping the properties of an entry in the image index
|
||||||
|
manifests list. It is NOT an actual image with layers, etc
|
||||||
|
|
||||||
See https://docs.docker.com/registry/spec/manifest-v2-2/
|
https://docs.docker.com/registry/spec/manifest-v2-2/
|
||||||
|
https://github.com/opencontainers/image-spec/blob/main/manifest.md
|
||||||
|
https://github.com/opencontainers/image-spec/blob/main/descriptor.md
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, data: Dict) -> None:
|
def __init__(self, data: Dict) -> None:
|
||||||
@ -41,6 +43,45 @@ class DockerManifest2:
|
|||||||
self.platform = f"{platform_data_os}/{platform_arch}{platform_variant}"
|
self.platform = f"{platform_data_os}/{platform_arch}{platform_variant}"
|
||||||
|
|
||||||
|
|
||||||
|
class ImageIndex:
|
||||||
|
"""
|
||||||
|
Data class wrapping up logic for an OCI Image Index
|
||||||
|
JSON data. Primary use is to access the manifests listing
|
||||||
|
|
||||||
|
See https://github.com/opencontainers/image-spec/blob/main/image-index.md
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, package_url: str, tag: str) -> None:
|
||||||
|
self.qualified_name = f"{package_url}:{tag}"
|
||||||
|
logger.info(f"Getting image index for {self.qualified_name}")
|
||||||
|
try:
|
||||||
|
proc = subprocess.run(
|
||||||
|
[
|
||||||
|
shutil.which("docker"),
|
||||||
|
"buildx",
|
||||||
|
"imagetools",
|
||||||
|
"inspect",
|
||||||
|
"--raw",
|
||||||
|
self.qualified_name,
|
||||||
|
],
|
||||||
|
capture_output=True,
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
self._data = json.loads(proc.stdout)
|
||||||
|
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
logger.error(
|
||||||
|
f"Failed to get image index for {self.qualified_name}: {e.stderr}",
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
@property
|
||||||
|
def image_pointers(self) -> Iterator[ImageProperties]:
|
||||||
|
for manifest_data in self._data["manifests"]:
|
||||||
|
yield ImageProperties(manifest_data)
|
||||||
|
|
||||||
|
|
||||||
class RegistryTagsCleaner:
|
class RegistryTagsCleaner:
|
||||||
"""
|
"""
|
||||||
This is the base class for the image registry cleaning. Given a package
|
This is the base class for the image registry cleaning. Given a package
|
||||||
@ -87,7 +128,10 @@ class RegistryTagsCleaner:
|
|||||||
|
|
||||||
def clean(self):
|
def clean(self):
|
||||||
"""
|
"""
|
||||||
This method will delete image versions, based on the selected tags to delete
|
This method will delete image versions, based on the selected tags to delete.
|
||||||
|
It behaves more like an unlinking than actual deletion. Removing the tag
|
||||||
|
simply removes a pointer to an image, but the actual image data remains accessible
|
||||||
|
if one has the sha256 digest of it.
|
||||||
"""
|
"""
|
||||||
for tag_to_delete in self.tags_to_delete:
|
for tag_to_delete in self.tags_to_delete:
|
||||||
package_version_info = self.all_pkgs_tags_to_version[tag_to_delete]
|
package_version_info = self.all_pkgs_tags_to_version[tag_to_delete]
|
||||||
@ -151,31 +195,17 @@ class RegistryTagsCleaner:
|
|||||||
|
|
||||||
# Parse manifests to locate digests pointed to
|
# Parse manifests to locate digests pointed to
|
||||||
for tag in sorted(self.tags_to_keep):
|
for tag in sorted(self.tags_to_keep):
|
||||||
full_name = f"ghcr.io/{self.repo_owner}/{self.package_name}:{tag}"
|
|
||||||
logger.info(f"Checking manifest for {full_name}")
|
|
||||||
# TODO: It would be nice to use RegistryData from docker
|
|
||||||
# except the ID doesn't map to anything in the manifest
|
|
||||||
try:
|
try:
|
||||||
proc = subprocess.run(
|
image_index = ImageIndex(
|
||||||
[
|
f"ghcr.io/{self.repo_owner}/{self.package_name}",
|
||||||
shutil.which("docker"),
|
tag,
|
||||||
"buildx",
|
|
||||||
"imagetools",
|
|
||||||
"inspect",
|
|
||||||
"--raw",
|
|
||||||
full_name,
|
|
||||||
],
|
|
||||||
capture_output=True,
|
|
||||||
)
|
)
|
||||||
|
for manifest in image_index.image_pointers:
|
||||||
manifest_list = json.loads(proc.stdout)
|
|
||||||
for manifest_data in manifest_list["manifests"]:
|
|
||||||
manifest = DockerManifest2(manifest_data)
|
|
||||||
|
|
||||||
if manifest.digest in untagged_versions:
|
if manifest.digest in untagged_versions:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Skipping deletion of {manifest.digest},"
|
f"Skipping deletion of {manifest.digest},"
|
||||||
f" referred to by {full_name}"
|
f" referred to by {image_index.qualified_name}"
|
||||||
f" for {manifest.platform}",
|
f" for {manifest.platform}",
|
||||||
)
|
)
|
||||||
del untagged_versions[manifest.digest]
|
del untagged_versions[manifest.digest]
|
||||||
@ -247,64 +277,54 @@ class RegistryTagsCleaner:
|
|||||||
# By default, keep anything which is tagged
|
# By default, keep anything which is tagged
|
||||||
self.tags_to_keep = list(set(self.all_pkgs_tags_to_version.keys()))
|
self.tags_to_keep = list(set(self.all_pkgs_tags_to_version.keys()))
|
||||||
|
|
||||||
def check_tags_pull(self):
|
def check_remaining_tags_valid(self):
|
||||||
"""
|
"""
|
||||||
This method uses the Docker Python SDK to confirm all tags which were
|
Checks the non-deleted tags are still valid. The assumption is if the
|
||||||
kept still pull, for all platforms.
|
manifest is can be inspected and each image manifest if points to can be
|
||||||
|
inspected, the image will still pull.
|
||||||
|
|
||||||
TODO: This is much slower (although more comprehensive). Maybe a Pool?
|
https://github.com/opencontainers/image-spec/blob/main/image-index.md
|
||||||
"""
|
"""
|
||||||
logger.info("Beginning confirmation step")
|
logger.info("Beginning confirmation step")
|
||||||
client = docker.from_env()
|
a_tag_failed = False
|
||||||
imgs = []
|
|
||||||
for tag in sorted(self.tags_to_keep):
|
for tag in sorted(self.tags_to_keep):
|
||||||
repository = f"ghcr.io/{self.repo_owner}/{self.package_name}"
|
|
||||||
for arch, variant in [("amd64", None), ("arm64", None), ("arm", "v7")]:
|
|
||||||
# From 11.2.0 onwards, qpdf is cross compiled, so there is a single arch, amd64
|
|
||||||
# skip others in this case
|
|
||||||
if "qpdf" in self.package_name and arch != "amd64" and tag == "11.2.0":
|
|
||||||
continue
|
|
||||||
# Skip beta and release candidate tags
|
|
||||||
elif "beta" in tag:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Build the platform name
|
|
||||||
if variant is not None:
|
|
||||||
platform = f"linux/{arch}/{variant}"
|
|
||||||
else:
|
|
||||||
platform = f"linux/{arch}"
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logger.info(f"Pulling {repository}:{tag} for {platform}")
|
image_index = ImageIndex(
|
||||||
image = client.images.pull(
|
f"ghcr.io/{self.repo_owner}/{self.package_name}",
|
||||||
repository=repository,
|
tag,
|
||||||
tag=tag,
|
|
||||||
platform=platform,
|
|
||||||
)
|
|
||||||
imgs.append(image)
|
|
||||||
except docker.errors.APIError as e:
|
|
||||||
logger.error(
|
|
||||||
f"Failed to pull {repository}:{tag}: {e}",
|
|
||||||
)
|
)
|
||||||
|
for manifest in image_index.image_pointers:
|
||||||
|
logger.info(f"Checking {manifest.digest} for {manifest.platform}")
|
||||||
|
|
||||||
|
# This follows the pointer from the index to an actual image, layers and all
|
||||||
|
# Note the format is @
|
||||||
|
digest_name = f"ghcr.io/{self.repo_owner}/{self.package_name}@{manifest.digest}"
|
||||||
|
|
||||||
# Prevent out of space errors by removing after a few
|
|
||||||
# pulls
|
|
||||||
if len(imgs) > 50:
|
|
||||||
for image in imgs:
|
|
||||||
try:
|
try:
|
||||||
client.images.remove(image.id)
|
|
||||||
except docker.errors.APIError as e:
|
subprocess.run(
|
||||||
err_str = str(e)
|
[
|
||||||
# Ignore attempts to remove images that are partly shared
|
shutil.which("docker"),
|
||||||
# Ignore images which are somehow gone already
|
"buildx",
|
||||||
if (
|
"imagetools",
|
||||||
"must be forced" not in err_str
|
"inspect",
|
||||||
and "No such image" not in err_str
|
"--raw",
|
||||||
):
|
digest_name,
|
||||||
logger.error(
|
],
|
||||||
f"Remove image ghcr.io/{self.repo_owner}/{self.package_name}:{tag} failed: {e}",
|
capture_output=True,
|
||||||
|
check=True,
|
||||||
)
|
)
|
||||||
imgs = []
|
except subprocess.CalledProcessError as e:
|
||||||
|
logger.error(f"Failed to inspect digest: {e.stderr}")
|
||||||
|
a_tag_failed = True
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
a_tag_failed = True
|
||||||
|
logger.error(f"Failed to inspect: {e.stderr}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if a_tag_failed:
|
||||||
|
raise Exception("At least one image tag failed to inspect")
|
||||||
|
|
||||||
|
|
||||||
class MainImageTagsCleaner(RegistryTagsCleaner):
|
class MainImageTagsCleaner(RegistryTagsCleaner):
|
||||||
@ -366,7 +386,7 @@ class MainImageTagsCleaner(RegistryTagsCleaner):
|
|||||||
|
|
||||||
class LibraryTagsCleaner(RegistryTagsCleaner):
|
class LibraryTagsCleaner(RegistryTagsCleaner):
|
||||||
"""
|
"""
|
||||||
Exists for the off change that someday, the installer library images
|
Exists for the off chance that someday, the installer library images
|
||||||
will need their own logic
|
will need their own logic
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -464,7 +484,7 @@ def _main():
|
|||||||
|
|
||||||
# Verify remaining tags still pull
|
# Verify remaining tags still pull
|
||||||
if args.is_manifest:
|
if args.is_manifest:
|
||||||
cleaner.check_tags_pull()
|
cleaner.check_remaining_tags_valid()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
Loading…
x
Reference in New Issue
Block a user