Compare commits

..

1 Commits

Author SHA1 Message Date
shamoon
3cfb0f5856 Add GitHub Actions workflow for release automation 2025-09-03 16:53:17 -07:00
7 changed files with 485 additions and 840 deletions

View File

@@ -37,7 +37,7 @@ jobs:
labels.push('bug');
} else if (/^feature/i.test(title)) {
labels.push('enhancement');
} else if (!/^(dependabot)/i.test(title) && !/^(chore)/i.test(title)) {
} else if (!/^(dependabot)/i.test(title) && /^(chore)/i.test(title)) {
labels.push('enhancement'); // Default fallback
}

135
.github/workflows/release.yml vendored Normal file
View File

@@ -0,0 +1,135 @@
name: Paperless-ngx Release
on:
workflow_dispatch:
inputs:
version:
description: "Release version (e.g., 2.18.3)"
required: true
type: string
permissions:
contents: write
actions: read
concurrency:
group: release-main
cancel-in-progress: false
jobs:
release:
runs-on: ubuntu-24.04
steps:
- name: Checkout (full)
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Configure git
run: |
git config user.name "${{ github.actor }}"
git config user.email "${{ github.actor }}@users.noreply.github.com"
- name: Sanitize & validate input
id: ver
shell: bash
run: |
RAW="${{ github.event.inputs.version }}"
# trim spaces + strip leading 'v' if present
RAW="${RAW//[[:space:]]/}"
RAW="${RAW#v}"
# basic semver X.Y.Z
if [[ ! "$RAW" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "❌ Invalid version: '$RAW' (expected X.Y.Z or vX.Y.Z)"; exit 1
fi
MAJOR="${RAW%%.*}"
REST="${RAW#*.}"
MINOR="${REST%%.*}"
PATCH="${REST#*.}"
echo "version=$RAW" >> "$GITHUB_OUTPUT"
echo "major=$MAJOR" >> "$GITHUB_OUTPUT"
echo "minor=$MINOR" >> "$GITHUB_OUTPUT"
echo "patch=$PATCH" >> "$GITHUB_OUTPUT"
echo "✅ Using version $RAW"
- name: Ensure tag does not already exist
run: |
git fetch --tags
if git rev-parse "v${{ steps.ver.outputs.version }}" >/dev/null 2>&1; then
echo "❌ Tag v${{ steps.ver.outputs.version }} already exists"; exit 1
fi
- name: Update local branches
run: |
git fetch origin main dev
- name: Fast-forward main to dev (no merge commits)
run: |
# Reset local main to remote, then try fast-forward to dev.
git checkout main
git reset --hard origin/main
# --ff-only ensures the workflow fails if the branches diverged.
git merge --ff-only origin/dev
echo "✅ main fast-forwarded to dev at $(git rev-parse --short HEAD)"
- name: Bump versions in files
shell: bash
run: |
VER="${{ steps.ver.outputs.version }}"
MAJ="${{ steps.ver.outputs.major }}"
MIN="${{ steps.ver.outputs.minor }}"
PAT="${{ steps.ver.outputs.patch }}"
# 1) pyproject.toml: [project] version = "X.Y.Z"
sed -i -E 's/^version = "[0-9]+\.[0-9]+\.[0-9]+"/version = "'"$VER"'"/' pyproject.toml
# 2) src-ui/package.json: "version": "X.Y.Z"
# Use jq if available; otherwise sed fallback.
if command -v jq >/dev/null 2>&1; then
tmp=$(mktemp)
jq --arg v "$VER" '.version=$v' src-ui/package.json > "$tmp" && mv "$tmp" src-ui/package.json
else
sed -i -E 's/"version": "[0-9]+\.[0-9]+\.[0-9]+"/"version": "'"$VER"'"/' src-ui/package.json
fi
# 3) src-ui/src/environments/environment.prod.ts: version: 'X.Y.Z'
sed -i -E "s/version: '[0-9]+\.[0-9]+\.[0-9]+'/version: '$VER'/" src-ui/src/environments/environment.prod.ts
# 4) src/paperless/version.py: __version__ = (X, Y, Z)
sed -i -E "s/__version__:\s*Final\[tuple\[int,\s*int,\s*int\]\]\s*=\s*\([0-9]+,\s*[0-9]+,\s*[0-9]+\)/__version__: Final[tuple[int, int, int]] = ($MAJ, $MIN, $PAT)/" src/paperless/version.py
# 5) uv.lock: in the [[package]] name = "paperless-ngx" block, set version = "X.Y.Z"
# This awk edits only the block for paperless-ngx.
awk -v ver="$VER" '
BEGIN{inpkg=0}
/^\[\[package\]\]/{inpkg=0}
/^\[\[package\]\]/{print; next}
{print > "/dev/stdout"}
' uv.lock >/dev/null 2>&1 # noop to ensure awk exists
# More robust in-place edit with awk:
awk -v ver="$VER" '
BEGIN{inpkg=0}
/^\[\[package\]\]/{inpkg=0; print; next}
/^name = "paperless-ngx"/{inpkg=1; print; next}
inpkg && /^version = "/{
sub(/version = "[0-9]+\.[0-9]+\.[0-9]+"/, "version = \"" ver "\"")
print; next
}
{print}
' uv.lock > uv.lock.new && mv uv.lock.new uv.lock
echo "✅ Files updated to $VER"
- name: Commit bump (if changes)
run: |
if git diff --quiet; then
echo " No changes to commit (versions may already match)";
else
git add pyproject.toml src-ui/package.json src-ui/src/environments/environment.prod.ts src/paperless/version.py uv.lock
git commit -m "Bump version to ${{ steps.ver.outputs.version }}"
fi
- name: Push main
run: |
# Push branch (even if no commit, ensures remote main == local)
git push origin HEAD:main
- name: Create and push tag
run: |
VER="${{ steps.ver.outputs.version }}"
git tag -a "v${VER}" -m "Release v${VER}"
git push origin "v${VER}"
- name: Done
run: echo "🎉 Release v${{ steps.ver.outputs.version }} created and pushed."

View File

@@ -56,12 +56,12 @@
"@playwright/test": "^1.55.0",
"@types/jest": "^30.0.0",
"@types/node": "^24.3.0",
"@typescript-eslint/eslint-plugin": "^8.41.0",
"@typescript-eslint/parser": "^8.41.0",
"@typescript-eslint/utils": "^8.41.0",
"eslint": "^9.34.0",
"jest": "30.1.3",
"jest-environment-jsdom": "^30.1.2",
"@typescript-eslint/eslint-plugin": "^8.38.0",
"@typescript-eslint/parser": "^8.38.0",
"@typescript-eslint/utils": "^8.38.0",
"eslint": "^9.32.0",
"jest": "30.0.5",
"jest-environment-jsdom": "^30.0.5",
"jest-junit": "^16.0.0",
"jest-preset-angular": "^15.0.0",
"jest-websocket-mock": "^2.5.0",

787
src-ui/pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -2764,7 +2764,7 @@ class SystemStatusView(PassUserMixin):
install_type = "docker"
db_conn = connections["default"]
db_url = str(db_conn.settings_dict["NAME"])
db_url = db_conn.settings_dict["NAME"]
db_error = None
try:

View File

@@ -425,7 +425,7 @@ WHITENOISE_STATIC_PREFIX = "/static/"
if machine().lower() == "aarch64": # pragma: no cover
_static_backend = "django.contrib.staticfiles.storage.StaticFilesStorage"
else:
_static_backend = "paperless.staticfiles.DeduplicatedCompressedStaticFilesStorage"
_static_backend = "whitenoise.storage.CompressedStaticFilesStorage"
STORAGES = {
"staticfiles": {

View File

@@ -1,385 +0,0 @@
import gzip
import hashlib
import logging
import os
import shutil
import threading
import time
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed
from dataclasses import dataclass
from pathlib import Path
import brotli
import humanize
from django.contrib.staticfiles.storage import StaticFilesStorage
logger = logging.getLogger(__name__)
@dataclass(slots=True)
class FileInfo:
file_path_str: str
file_path_path: Path
checksum: str
original_size: int
gzip_size: int | None = None
brotli_size: int | None = None
class DeduplicatedCompressedStaticFilesStorage(StaticFilesStorage):
# File extensions that should be compressed
COMPRESSIBLE_EXTENSIONS = {
".css",
".js",
".html",
".htm",
".xml",
".json",
".txt",
".svg",
".md",
".rst",
".csv",
".tsv",
".yaml",
".yml",
".map",
}
# Minimum file size to compress (bytes)
MIN_COMPRESS_SIZE = 1024 # 1KB
# Maximum number of threads for parallel processing
MAX_WORKERS = min(32, (os.cpu_count() or 1) + 4)
# Chunk size for file reading
CHUNK_SIZE = 64 * 1024 # 64KB
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# --- MODIFIED: Added path_to_file_info for easy lookup ---
self.hash_to_files: dict[str, list[FileInfo]] = defaultdict(list)
self.path_to_file_info: dict[str, FileInfo] = {}
self.linked_files: set[Path] = set()
self.compression_stats = {
"brotli": 0,
"gzip": 0,
"skipped_linked": 0,
"skipped_other": 0,
"errors": 0,
}
self._lock = threading.Lock()
def post_process(self, paths: list[str], **options):
"""
Post-process collected files: deduplicate first, then compress.
Django 5.2 compatible with proper options handling.
"""
start_time = time.time()
# Step 1: Build hash map for deduplication (parallel)
self._build_file_hash_map_parallel(paths)
# Step 2: Create hard links for duplicate files
self._create_hard_links()
# Step 3: Compress files (parallel, skip linked duplicates)
self._compress_files_parallel(paths)
# Step 4: Provide user a summary of the compression
self._log_compression_summary()
processing_time = time.time() - start_time
logger.info(f"Post-processing complete in {processing_time:.2f}s.")
# Return list of processed files
processed_files = []
for path in paths:
processed_files.append((path, path, True))
# Add compressed variants
file_path = self.path(path)
if Path(file_path + ".br").exists():
processed_files.append((path + ".br", path + ".br", True))
if Path(file_path + ".gz").exists():
processed_files.append((path + ".gz", path + ".gz", True))
return processed_files
def _build_file_hash_map_parallel(self, file_paths: list[str]):
"""Build a map of file hashes using parallel processing."""
logger.info(
f"Hashing {len(file_paths)} files with {self.MAX_WORKERS} workers...",
)
def hash_file(path: str):
"""Hash a single file."""
try:
file_path = Path(self.path(path))
if not file_path.is_file():
return None, None, None
file_hash = self._get_file_hash_fast(file_path)
file_size = file_path.stat().st_size
return path, file_hash, file_size
except Exception as e:
logger.warning(f"Error hashing file {path}: {e}")
return path, None, None
with ThreadPoolExecutor(max_workers=self.MAX_WORKERS) as executor:
future_to_path = {
executor.submit(hash_file, path): path for path in file_paths
}
for future in as_completed(future_to_path):
path, file_hash, file_size = future.result()
if path is not None and file_hash is not None and file_size is not None:
with self._lock:
file_info = FileInfo(
file_path_str=path,
file_path_path=Path(self.path(path)),
checksum=file_hash,
original_size=file_size,
)
self.hash_to_files[file_hash].append(file_info)
self.path_to_file_info[path] = file_info
duplicates = sum(1 for files in self.hash_to_files.values() if len(files) > 1)
logger.info(f"Found {duplicates} sets of duplicate files")
def _get_file_hash_fast(self, file_path: Path):
"""Calculate SHA-256 hash of file content with optimized reading."""
hash_sha256 = hashlib.sha256()
try:
with file_path.open("rb") as f:
while chunk := f.read(self.CHUNK_SIZE):
hash_sha256.update(chunk)
except OSError as e:
logger.warning(f"Could not read file {file_path}: {e}")
raise
return hash_sha256.hexdigest()
def _create_hard_links(self):
"""Create hard links for duplicate files."""
logger.info("Creating hard links for duplicate files...")
linked_count = 0
for file_info_list in self.hash_to_files.values():
if len(file_info_list) <= 1:
continue
# Sort by file size (desc) then path length (asc) to keep best original
file_info_list.sort(key=lambda x: (-x.original_size, len(x.file_path_str)))
original_file_info = file_info_list[0]
duplicate_info = file_info_list[1:]
for duplicate_file_info in duplicate_info:
try:
# Remove duplicate file and create hard link
if duplicate_file_info.file_path_path.exists():
duplicate_file_info.file_path_path.unlink()
# Create hard link
os.link(
original_file_info.file_path_path,
duplicate_file_info.file_path_path,
)
with self._lock:
self.linked_files.add(duplicate_file_info.file_path_path)
linked_count += 1
logger.info(
f"Linked {duplicate_file_info.file_path_path} -> {original_file_info.file_path_path}",
)
except OSError as e:
logger.error(
f"Hard link failed for {original_file_info.file_path_path}, copying instead: {e}",
)
# Fall back to copying if hard linking fails
try:
import shutil
shutil.copy2(
original_file_info.file_path_path,
original_file_info.file_path_path,
)
logger.error(
f"Copied {original_file_info.file_path_path} (hard link failed)",
)
except Exception as copy_error:
logger.error(
f"Failed to copy {original_file_info.file_path_path}: {copy_error}",
)
if linked_count > 0:
logger.info(f"Created {linked_count} hard links")
def _compress_files_parallel(self, file_paths: list[str]):
"""Compress files using parallel processing and update FileInfo objects."""
# Identify files to compress, excluding hard links
compressible_files = [
self.path_to_file_info[path]
for path in file_paths
if self.path_to_file_info[path].file_path_path not in self.linked_files
and self._should_compress_file(path)
]
if not compressible_files:
logger.info("No new files to compress")
return
logger.info(
f"Compressing {len(compressible_files)} files with {self.MAX_WORKERS} workers...",
)
def compress_file(file_info: FileInfo):
"""Compress a single file and update its FileInfo by side-effect."""
brotli_size = None
gzip_size = None
error = None
try:
brotli_size = self._compress_file_brotli(str(file_info.file_path_path))
gzip_size = self._compress_file_gzip(str(file_info.file_path_path))
# Store the compressed sizes
file_info.brotli_size = brotli_size
file_info.gzip_size = gzip_size
except Exception as e:
error = str(e)
logger.warning(f"Error compressing {file_info.file_path_str}: {e}")
return {
"brotli": brotli_size is not None,
"gzip": gzip_size is not None,
"error": error,
}
with ThreadPoolExecutor(max_workers=self.MAX_WORKERS) as executor:
future_to_info = {
executor.submit(compress_file, info): info
for info in compressible_files
}
for future in as_completed(future_to_info):
result = future.result()
with self._lock:
if result["brotli"]:
self.compression_stats["brotli"] += 1
if result["gzip"]:
self.compression_stats["gzip"] += 1
if result["error"]:
self.compression_stats["errors"] += 1
if (
not result["brotli"]
and not result["gzip"]
and not result["error"]
):
self.compression_stats["skipped_other"] += 1
self.compression_stats["skipped_linked"] = len(self.linked_files)
logger.info(f"File count stats: {self.compression_stats}")
def _should_compress_file(self, path: str):
"""Determine if a file should be compressed."""
file_ext = Path(path).suffix.lower()
if file_ext not in self.COMPRESSIBLE_EXTENSIONS:
return False
try:
if Path(self.path(path)).stat().st_size < self.MIN_COMPRESS_SIZE:
return False
except OSError:
return False
return True
def _compress_file_brotli(self, file_path: str) -> int | None:
"""Compress file using Brotli, returns compressed size or None."""
brotli_path = Path(file_path + ".br")
try:
with Path(file_path).open("rb") as f_in:
original_data = f_in.read()
compressed_data = brotli.compress(
original_data,
quality=10,
lgwin=22, # Window size
lgblock=0, # Auto block size
)
if len(compressed_data) < len(original_data) * 0.95:
with brotli_path.open("wb") as f_out:
f_out.write(compressed_data)
return len(compressed_data)
return None
except Exception as e:
logger.warning(f"Brotli compression failed for {file_path}: {e}")
return None
def _compress_file_gzip(self, file_path: str) -> int | None:
"""Compress file using GZip, returns compressed size or None."""
gzip_path = Path(file_path + ".gz")
file_path_path = Path(file_path)
try:
original_size = file_path_path.stat().st_size
with (
file_path_path.open("rb") as f_in,
gzip.open(
gzip_path,
"wb",
compresslevel=7,
) as f_out,
):
shutil.copyfileobj(f_in, f_out, length=self.CHUNK_SIZE)
compressed_size = gzip_path.stat().st_size
if compressed_size < original_size * 0.95:
return compressed_size
else:
gzip_path.unlink()
return None
except Exception as e:
logger.warning(f"GZip compression failed for {file_path}: {e}")
if gzip_path.exists():
try:
gzip_path.unlink()
except OSError:
pass
return None
def _log_compression_summary(self):
"""Calculates and logs the total size savings from compression."""
total_original_size = 0
total_brotli_size = 0
total_gzip_size = 0
# Only consider the original files, not the duplicates, for size calculation
unique_files = {
file_list[0].checksum: file_list[0]
for file_list in self.hash_to_files.values()
}
for file_info in unique_files.values():
if self._should_compress_file(file_info.file_path_str):
total_original_size += file_info.original_size
if file_info.brotli_size:
total_brotli_size += file_info.brotli_size
if file_info.gzip_size:
total_gzip_size += file_info.gzip_size
def get_savings(original: int, compressed: int) -> str:
if original == 0:
return "0.00%"
return f"{(1 - compressed / original) * 100:.2f}%"
logger.info(
f"Total Original Size (compressible files): {humanize.naturalsize(total_original_size)}",
)
if total_brotli_size > 0:
logger.info(
f"Total Brotli Size: {humanize.naturalsize(total_brotli_size)} "
f"(Savings: {get_savings(total_original_size, total_brotli_size)})",
)
if total_gzip_size > 0:
logger.info(
f"Total Gzip Size: {humanize.naturalsize(total_gzip_size)} "
f"(Savings: {get_savings(total_original_size, total_gzip_size)})",
)