mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-17 10:13:56 -05:00
Fine, I made my own GitHub API interface. With blackjack and ...
This commit is contained in:
parent
0090e27699
commit
499bd552a1
376
.github/scripts/cleanup-tags.py
vendored
Executable file → Normal file
376
.github/scripts/cleanup-tags.py
vendored
Executable file → Normal file
@ -1,102 +1,159 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
When a feature branch is created, a new GitHub container is built and tagged
|
|
||||||
with the feature branch name. When a feature branch is deleted, either through
|
|
||||||
a merge or deletion, the old image tag will still exist.
|
|
||||||
|
|
||||||
Though this isn't a problem for storage size, etc, it does lead to a long list
|
|
||||||
of tags which are no longer relevant and the last released version is pushed
|
|
||||||
further and further down that list.
|
|
||||||
|
|
||||||
This script utlizes the GitHub API (through the gh cli application) to list the
|
|
||||||
package versions (aka tags) and the repository branches. Then it removes feature
|
|
||||||
tags which have no matching branch
|
|
||||||
|
|
||||||
This pruning is applied to the primary package, the frontend builder package and the
|
|
||||||
frontend build cache package.
|
|
||||||
|
|
||||||
"""
|
|
||||||
import argparse
|
|
||||||
import logging
|
import logging
|
||||||
import os.path
|
import os
|
||||||
import pprint
|
from argparse import ArgumentParser
|
||||||
from typing import Dict
|
|
||||||
from typing import Final
|
from typing import Final
|
||||||
from typing import List
|
from typing import List
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
|
import requests
|
||||||
from common import get_log_level
|
from common import get_log_level
|
||||||
from ghapi.all import GhApi
|
|
||||||
from ghapi.all import paged
|
logger = logging.getLogger("cleanup-tags")
|
||||||
|
|
||||||
|
|
||||||
def _get_feature_packages(
|
class GithubContainerRegistry:
|
||||||
logger: logging.Logger,
|
def __init__(
|
||||||
api: GhApi,
|
self,
|
||||||
is_org_repo: bool,
|
session: requests.Session,
|
||||||
repo_owner: str,
|
token: str,
|
||||||
package_name: str,
|
owner_or_org: str,
|
||||||
) -> Dict:
|
):
|
||||||
"""
|
self._session: requests.Session = session
|
||||||
Uses the GitHub packages API endpoint data filter to containers
|
self._token = token
|
||||||
which have a tag starting with "feature-"
|
self._owner_or_org = owner_or_org
|
||||||
"""
|
self._BRANCHES_ENDPOINT = "https://api.github.com/repos/{OWNER}/{REPO}/branches"
|
||||||
|
if self._owner_or_org == "paperless-ngx":
|
||||||
# Get all package versions
|
self._PACKAGES_VERSIONS_ENDPOINT = "https://api.github.com/orgs/{ORG}/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions"
|
||||||
pkg_versions = []
|
self._PACKAGE_VERSION_DELETE_ENDPOINT = "https://api.github.com/orgs/{ORG}/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions/{PACKAGE_VERSION_ID}"
|
||||||
if is_org_repo:
|
|
||||||
|
|
||||||
for pkg_version in paged(
|
|
||||||
api.packages.get_all_package_versions_for_package_owned_by_org,
|
|
||||||
org=repo_owner,
|
|
||||||
package_type="container",
|
|
||||||
package_name=package_name,
|
|
||||||
):
|
|
||||||
pkg_versions.extend(pkg_version)
|
|
||||||
else:
|
|
||||||
for pkg_version in paged(
|
|
||||||
api.packages.get_all_package_versions_for_package_owned_by_authenticated_user, # noqa: E501
|
|
||||||
package_type="container",
|
|
||||||
package_name=package_name,
|
|
||||||
):
|
|
||||||
pkg_versions.extend(pkg_version)
|
|
||||||
|
|
||||||
logger.debug(f"Found {len(pkg_versions)} package versions for {package_name}")
|
|
||||||
|
|
||||||
# Filter to just those containers tagged "feature-"
|
|
||||||
feature_versions = {}
|
|
||||||
|
|
||||||
for item in pkg_versions:
|
|
||||||
is_feature_version = False
|
|
||||||
feature_tag_name = None
|
|
||||||
if (
|
|
||||||
"metadata" in item
|
|
||||||
and "container" in item["metadata"]
|
|
||||||
and "tags" in item["metadata"]["container"]
|
|
||||||
):
|
|
||||||
for tag in item["metadata"]["container"]["tags"]:
|
|
||||||
if tag.startswith("feature-"):
|
|
||||||
feature_tag_name = tag
|
|
||||||
is_feature_version = True
|
|
||||||
if is_feature_version:
|
|
||||||
logger.info(
|
|
||||||
f"Located feature tag: {feature_tag_name} for image {package_name}",
|
|
||||||
)
|
|
||||||
# logger.debug(pprint.pformat(item, indent=2))
|
|
||||||
feature_versions[feature_tag_name] = item
|
|
||||||
else:
|
else:
|
||||||
logger.debug(f"Filtered {pprint.pformat(item, indent=2)}")
|
self._PACKAGES_VERSIONS_ENDPOINT = "https://api.github.com/user/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions"
|
||||||
|
self._PACKAGE_VERSION_DELETE_ENDPOINT = "https://api.github.com/user/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions/{PACKAGE_VERSION_ID}"
|
||||||
|
|
||||||
logger.info(
|
def __enter__(self):
|
||||||
f"Found {len(feature_versions)} package versions for"
|
self._session.headers.update(
|
||||||
f" {package_name} with feature tags",
|
{
|
||||||
)
|
"Accept": "application/vnd.github.v3+json",
|
||||||
|
"Authorization": f"token {self._token}",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return self
|
||||||
|
|
||||||
return feature_versions
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
if "Accept" in self._session.headers:
|
||||||
|
del self._session.headers["Accept"]
|
||||||
|
if "Authorization" in self._session.headers:
|
||||||
|
del self._session.headers["Authorization"]
|
||||||
|
|
||||||
|
def _read_all_pages(self, endpoint):
|
||||||
|
internal_data = []
|
||||||
|
|
||||||
|
while True:
|
||||||
|
resp = self._session.get(endpoint)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
internal_data += resp.json()
|
||||||
|
if "next" in resp.links:
|
||||||
|
endpoint = resp.links["next"]["url"]
|
||||||
|
else:
|
||||||
|
logger.debug("Exiting pagination loop")
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
logger.warning(f"Request to {endpoint} return HTTP {resp.status_code}")
|
||||||
|
break
|
||||||
|
|
||||||
|
return internal_data
|
||||||
|
|
||||||
|
def get_branches(self, repo: str):
|
||||||
|
endpoint = self._BRANCHES_ENDPOINT.format(OWNER=self._owner_or_org, REPO=repo)
|
||||||
|
internal_data = self._read_all_pages(endpoint)
|
||||||
|
return internal_data
|
||||||
|
|
||||||
|
def filter_branches_by_name_pattern(self, branch_data, pattern: str):
|
||||||
|
matches = {}
|
||||||
|
|
||||||
|
for branch in branch_data:
|
||||||
|
if branch["name"].startswith(pattern):
|
||||||
|
matches[branch["name"]] = branch
|
||||||
|
|
||||||
|
return matches
|
||||||
|
|
||||||
|
def get_package_versions(
|
||||||
|
self,
|
||||||
|
package_name: str,
|
||||||
|
package_type: str = "container",
|
||||||
|
) -> List:
|
||||||
|
package_name = quote(package_name, safe="")
|
||||||
|
endpoint = self._PACKAGES_VERSIONS_ENDPOINT.format(
|
||||||
|
ORG=self._owner_or_org,
|
||||||
|
PACKAGE_TYPE=package_type,
|
||||||
|
PACKAGE_NAME=package_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
internal_data = self._read_all_pages(endpoint)
|
||||||
|
|
||||||
|
return internal_data
|
||||||
|
|
||||||
|
def filter_packages_by_tag_pattern(self, package_data, pattern: str):
|
||||||
|
matches = {}
|
||||||
|
|
||||||
|
for package in package_data:
|
||||||
|
if "metadata" in package and "container" in package["metadata"]:
|
||||||
|
container_metadata = package["metadata"]["container"]
|
||||||
|
if "tags" in container_metadata:
|
||||||
|
container_tags = container_metadata["tags"]
|
||||||
|
for tag in container_tags:
|
||||||
|
if tag.startswith(pattern):
|
||||||
|
matches[tag] = package
|
||||||
|
break
|
||||||
|
|
||||||
|
return matches
|
||||||
|
|
||||||
|
def filter_packages_untagged(self, package_data):
|
||||||
|
matches = {}
|
||||||
|
|
||||||
|
for package in package_data:
|
||||||
|
if "metadata" in package and "container" in package["metadata"]:
|
||||||
|
container_metadata = package["metadata"]["container"]
|
||||||
|
if "tags" in container_metadata:
|
||||||
|
container_tags = container_metadata["tags"]
|
||||||
|
if not len(container_tags):
|
||||||
|
matches[package["name"]] = package
|
||||||
|
|
||||||
|
return matches
|
||||||
|
|
||||||
|
def delete_package_version(self, package_name, package_data):
|
||||||
|
package_name = quote(package_name, safe="")
|
||||||
|
endpoint = self._PACKAGE_VERSION_DELETE_ENDPOINT.format(
|
||||||
|
ORG=self._owner_or_org,
|
||||||
|
PACKAGE_TYPE=package_data["metadata"]["package_type"],
|
||||||
|
PACKAGE_NAME=package_name,
|
||||||
|
PACKAGE_VERSION_ID=package_data["id"],
|
||||||
|
)
|
||||||
|
resp = self._session.delete(endpoint)
|
||||||
|
if resp.status_code != 204:
|
||||||
|
logger.warning(
|
||||||
|
f"Request to delete {endpoint} returned HTTP {resp.status_code}",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DockerHubContainerRegistery:
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_image_versions(self) -> List:
|
||||||
|
return []
|
||||||
|
|
||||||
|
def delete_image_version(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def _main():
|
def _main():
|
||||||
|
parser = ArgumentParser(
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="Using the GitHub API locate and optionally delete container"
|
description="Using the GitHub API locate and optionally delete container"
|
||||||
" tags which no longer have an associated feature branch",
|
" tags which no longer have an associated feature branch",
|
||||||
)
|
)
|
||||||
@ -108,6 +165,14 @@ def _main():
|
|||||||
help="If provided, actually delete the container tags",
|
help="If provided, actually delete the container tags",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# TODO There's a lot of untagged images, do those need to stay for anything?
|
||||||
|
parser.add_argument(
|
||||||
|
"--untagged",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
help="If provided, delete untagged containers as well",
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--loglevel",
|
"--loglevel",
|
||||||
default="info",
|
default="info",
|
||||||
@ -122,89 +187,82 @@ def _main():
|
|||||||
format="%(asctime)s %(levelname)-8s %(message)s",
|
format="%(asctime)s %(levelname)-8s %(message)s",
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger("cleanup-tags")
|
|
||||||
|
|
||||||
repo: Final[str] = os.environ["GITHUB_REPOSITORY"]
|
|
||||||
repo_owner: Final[str] = os.environ["GITHUB_REPOSITORY_OWNER"]
|
repo_owner: Final[str] = os.environ["GITHUB_REPOSITORY_OWNER"]
|
||||||
|
repo: Final[str] = os.environ["GITHUB_REPOSITORY"]
|
||||||
|
gh_token: Final[str] = os.environ["GITHUB_TOKEN"]
|
||||||
|
|
||||||
is_org_repo: Final[bool] = repo_owner == "paperless-ngx"
|
with requests.session() as sess:
|
||||||
dry_run: Final[bool] = not args.delete
|
with GithubContainerRegistry(sess, gh_token, repo_owner) as gh_api:
|
||||||
|
all_branches = gh_api.get_branches("paperless-ngx")
|
||||||
|
logger.info(f"Located {len(all_branches)} branches of {repo_owner}/{repo} ")
|
||||||
|
|
||||||
logger.debug(f"Org Repo? {is_org_repo}")
|
feature_branches = gh_api.filter_branches_by_name_pattern(
|
||||||
logger.debug(f"Dry Run? {dry_run}")
|
all_branches,
|
||||||
|
"feature-",
|
||||||
api = GhApi(
|
|
||||||
owner=repo_owner,
|
|
||||||
repo=os.path.basename(repo),
|
|
||||||
token=os.environ["GITHUB_TOKEN"],
|
|
||||||
)
|
|
||||||
|
|
||||||
pkg_list: Final[List[str]] = [
|
|
||||||
"paperless-ngx",
|
|
||||||
# TODO: It would be nice to cleanup additional packages, but we can't
|
|
||||||
# see https://github.com/fastai/ghapi/issues/84
|
|
||||||
# "builder/frontend",
|
|
||||||
# "builder-frontend-cache",
|
|
||||||
]
|
|
||||||
|
|
||||||
# Get the list of current "feature-" branches
|
|
||||||
feature_branch_info = api.list_branches(prefix="feature-")
|
|
||||||
feature_branch_names = []
|
|
||||||
for branch in feature_branch_info:
|
|
||||||
name_only = branch["ref"].removeprefix("refs/heads/")
|
|
||||||
logger.info(f"Located feature branch: {name_only}")
|
|
||||||
feature_branch_names.append(name_only)
|
|
||||||
|
|
||||||
logger.info(f"Located {len(feature_branch_names)} feature branches")
|
|
||||||
|
|
||||||
# TODO The deletion doesn't yet actually work
|
|
||||||
# See https://github.com/fastai/ghapi/issues/132
|
|
||||||
# This would need to be updated to use gh cli app or requests or curl
|
|
||||||
# or something
|
|
||||||
if is_org_repo:
|
|
||||||
endpoint = (
|
|
||||||
"https://api.github.com/orgs/{ORG}/packages/container/{name}/versions/{id}"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
endpoint = "https://api.github.com/user/packages/container/{name}/{id}"
|
|
||||||
|
|
||||||
for package_name in pkg_list:
|
|
||||||
|
|
||||||
logger.info(f"Processing image {package_name}")
|
|
||||||
|
|
||||||
# Get the list of images tagged with "feature-"
|
|
||||||
feature_packages = _get_feature_packages(
|
|
||||||
logger,
|
|
||||||
api,
|
|
||||||
is_org_repo,
|
|
||||||
repo_owner,
|
|
||||||
package_name,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get the set of container tags without matching feature branches
|
|
||||||
to_delete = list(set(feature_packages.keys()) - set(feature_branch_names))
|
|
||||||
|
|
||||||
for container_tag in to_delete:
|
|
||||||
container_info = feature_packages[container_tag]
|
|
||||||
|
|
||||||
formatted_endpoint = endpoint.format(
|
|
||||||
ORG=repo_owner,
|
|
||||||
name=package_name,
|
|
||||||
id=container_info["id"],
|
|
||||||
)
|
)
|
||||||
|
logger.info(f"Located {len(feature_branches)} feature branches")
|
||||||
|
|
||||||
if dry_run:
|
for package_name in ["paperless-ngx", "paperless-ngx/builder/cache/app"]:
|
||||||
|
|
||||||
|
all_package_versions = gh_api.get_package_versions(package_name)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Would delete {package_name}:{container_tag} with"
|
f"Located {len(all_package_versions)} versions of package {package_name}",
|
||||||
f" id: {container_info['id']}",
|
)
|
||||||
|
|
||||||
|
packages_tagged_feature = gh_api.filter_packages_by_tag_pattern(
|
||||||
|
all_package_versions,
|
||||||
|
"feature-",
|
||||||
)
|
)
|
||||||
# logger.debug(formatted_endpoint)
|
|
||||||
else:
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Deleting {package_name}:{container_tag} with"
|
f'Located {len(packages_tagged_feature)} versions of package {package_name} tagged "feature-"',
|
||||||
f" id: {container_info['id']}",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
untagged_packages = gh_api.filter_packages_untagged(
|
||||||
|
all_package_versions,
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"Located {len(untagged_packages)} untagged versions of package {package_name}",
|
||||||
|
)
|
||||||
|
|
||||||
|
to_delete = list(
|
||||||
|
set(packages_tagged_feature.keys()) - set(feature_branches.keys()),
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"Located {len(to_delete)} versions of package {package_name} to delete",
|
||||||
|
)
|
||||||
|
|
||||||
|
for tag_to_delete in to_delete:
|
||||||
|
package_version_info = packages_tagged_feature[tag_to_delete]
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Deleting {tag_to_delete} (id {package_version_info['id']})",
|
||||||
|
)
|
||||||
|
if args.delete:
|
||||||
|
gh_api.delete_package_version(
|
||||||
|
package_name,
|
||||||
|
package_version_info,
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.untagged:
|
||||||
|
logger.info(f"Deleting untagged packages of {package_name}")
|
||||||
|
for to_delete_name in untagged_packages:
|
||||||
|
to_delete_version = untagged_packages[to_delete_name]
|
||||||
|
logger.info(f"Deleting id {to_delete_version['id']}")
|
||||||
|
if args.delete:
|
||||||
|
gh_api.delete_package_version(
|
||||||
|
package_name,
|
||||||
|
to_delete_version,
|
||||||
|
)
|
||||||
|
|
||||||
|
with DockerHubContainerRegistery() as dh_api:
|
||||||
|
docker_hub_image_version = dh_api.get_image_versions()
|
||||||
|
|
||||||
|
# TODO
|
||||||
|
docker_hub_to_delete = []
|
||||||
|
|
||||||
|
for x in docker_hub_to_delete:
|
||||||
|
dh_api.delete_image_version()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
_main()
|
_main()
|
||||||
|
2
.github/scripts/common.py
vendored
2
.github/scripts/common.py
vendored
@ -40,5 +40,5 @@ def get_log_level(args) -> int:
|
|||||||
}
|
}
|
||||||
level = levels.get(args.loglevel.lower())
|
level = levels.get(args.loglevel.lower())
|
||||||
if level is None:
|
if level is None:
|
||||||
raise ArgumentError(f"{args.loglevel} is not a valid level")
|
level = logging.INFO
|
||||||
return level
|
return level
|
||||||
|
4
.github/workflows/cleanup-tags.yml
vendored
4
.github/workflows/cleanup-tags.yml
vendored
@ -39,9 +39,9 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
python-version: "3.9"
|
python-version: "3.9"
|
||||||
-
|
-
|
||||||
name: Install fastai GitHub API
|
name: Install requests
|
||||||
run: |
|
run: |
|
||||||
python -m pip install ghapi requests
|
python -m pip install requests
|
||||||
-
|
-
|
||||||
name: Cleanup feature tags
|
name: Cleanup feature tags
|
||||||
run: |
|
run: |
|
||||||
|
Loading…
x
Reference in New Issue
Block a user