Add GitHub Actions workflow for release automation

2025-10-16 02:46:16 -05:00 · 2025-09-03 16:53:17 -07:00
7 changed files with 485 additions and 840 deletions
--- a/.github/workflows/pr-bot.yml
+++ b/.github/workflows/pr-bot.yml
@@ -37,7 +37,7 @@ jobs:
              labels.push('bug');
            } else if (/^feature/i.test(title)) {
              labels.push('enhancement');
-            } else if (!/^(dependabot)/i.test(title) && !/^(chore)/i.test(title)) {
+            } else if (!/^(dependabot)/i.test(title) && /^(chore)/i.test(title)) {
              labels.push('enhancement'); // Default fallback
            }

--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -0,0 +1,135 @@
+name: Paperless-ngx Release
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: "Release version (e.g., 2.18.3)"
+        required: true
+        type: string
+permissions:
+  contents: write
+  actions: read
+concurrency:
+  group: release-main
+  cancel-in-progress: false
+jobs:
+  release:
+    runs-on: ubuntu-24.04
+    steps:
+      - name: Checkout (full)
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Configure git
+        run: |
+          git config user.name  "${{ github.actor }}"
+          git config user.email "${{ github.actor }}@users.noreply.github.com"
+      - name: Sanitize & validate input
+        id: ver
+        shell: bash
+        run: |
+          RAW="${{ github.event.inputs.version }}"
+          # trim spaces + strip leading 'v' if present
+          RAW="${RAW//[[:space:]]/}"
+          RAW="${RAW#v}"
+
+          # basic semver X.Y.Z
+          if [[ ! "$RAW" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+            echo "❌ Invalid version: '$RAW' (expected X.Y.Z or vX.Y.Z)"; exit 1
+          fi
+
+          MAJOR="${RAW%%.*}"
+          REST="${RAW#*.}"
+          MINOR="${REST%%.*}"
+          PATCH="${REST#*.}"
+
+          echo "version=$RAW"   >> "$GITHUB_OUTPUT"
+          echo "major=$MAJOR"   >> "$GITHUB_OUTPUT"
+          echo "minor=$MINOR"   >> "$GITHUB_OUTPUT"
+          echo "patch=$PATCH"   >> "$GITHUB_OUTPUT"
+
+          echo "✅ Using version $RAW"
+      - name: Ensure tag does not already exist
+        run: |
+          git fetch --tags
+          if git rev-parse "v${{ steps.ver.outputs.version }}" >/dev/null 2>&1; then
+            echo "❌ Tag v${{ steps.ver.outputs.version }} already exists"; exit 1
+          fi
+      - name: Update local branches
+        run: |
+          git fetch origin main dev
+      - name: Fast-forward main to dev (no merge commits)
+        run: |
+          # Reset local main to remote, then try fast-forward to dev.
+          git checkout main
+          git reset --hard origin/main
+          # --ff-only ensures the workflow fails if the branches diverged.
+          git merge --ff-only origin/dev
+          echo "✅ main fast-forwarded to dev at $(git rev-parse --short HEAD)"
+      - name: Bump versions in files
+        shell: bash
+        run: |
+          VER="${{ steps.ver.outputs.version }}"
+          MAJ="${{ steps.ver.outputs.major }}"
+          MIN="${{ steps.ver.outputs.minor }}"
+          PAT="${{ steps.ver.outputs.patch }}"
+
+          # 1) pyproject.toml: [project] version = "X.Y.Z"
+          sed -i -E 's/^version = "[0-9]+\.[0-9]+\.[0-9]+"/version = "'"$VER"'"/' pyproject.toml
+
+          # 2) src-ui/package.json: "version": "X.Y.Z"
+          # Use jq if available; otherwise sed fallback.
+          if command -v jq >/dev/null 2>&1; then
+            tmp=$(mktemp)
+            jq --arg v "$VER" '.version=$v' src-ui/package.json > "$tmp" && mv "$tmp" src-ui/package.json
+          else
+            sed -i -E 's/"version": "[0-9]+\.[0-9]+\.[0-9]+"/"version": "'"$VER"'"/' src-ui/package.json
+          fi
+
+          # 3) src-ui/src/environments/environment.prod.ts: version: 'X.Y.Z'
+          sed -i -E "s/version: '[0-9]+\.[0-9]+\.[0-9]+'/version: '$VER'/" src-ui/src/environments/environment.prod.ts
+
+          # 4) src/paperless/version.py: __version__ = (X, Y, Z)
+          sed -i -E "s/__version__:\s*Final\[tuple\[int,\s*int,\s*int\]\]\s*=\s*\([0-9]+,\s*[0-9]+,\s*[0-9]+\)/__version__: Final[tuple[int, int, int]] = ($MAJ, $MIN, $PAT)/" src/paperless/version.py
+
+          # 5) uv.lock: in the [[package]] name = "paperless-ngx" block, set version = "X.Y.Z"
+          # This awk edits only the block for paperless-ngx.
+          awk -v ver="$VER" '
+            BEGIN{inpkg=0}
+            /^\[\[package\]\]/{inpkg=0}
+            /^\[\[package\]\]/{print; next}
+            {print > "/dev/stdout"}
+          ' uv.lock >/dev/null 2>&1 # noop to ensure awk exists
+
+          # More robust in-place edit with awk:
+          awk -v ver="$VER" '
+            BEGIN{inpkg=0}
+            /^\[\[package\]\]/{inpkg=0; print; next}
+            /^name = "paperless-ngx"/{inpkg=1; print; next}
+            inpkg && /^version = "/{
+              sub(/version = "[0-9]+\.[0-9]+\.[0-9]+"/, "version = \"" ver "\"")
+              print; next
+            }
+            {print}
+          ' uv.lock > uv.lock.new && mv uv.lock.new uv.lock
+
+          echo "✅ Files updated to $VER"
+      - name: Commit bump (if changes)
+        run: |
+          if git diff --quiet; then
+            echo "ℹ️ No changes to commit (versions may already match)";
+          else
+            git add pyproject.toml src-ui/package.json src-ui/src/environments/environment.prod.ts src/paperless/version.py uv.lock
+            git commit -m "Bump version to ${{ steps.ver.outputs.version }}"
+          fi
+      - name: Push main
+        run: |
+          # Push branch (even if no commit, ensures remote main == local)
+          git push origin HEAD:main
+      - name: Create and push tag
+        run: |
+          VER="${{ steps.ver.outputs.version }}"
+          git tag -a "v${VER}" -m "Release v${VER}"
+          git push origin "v${VER}"
+      - name: Done
+        run: echo "🎉 Release v${{ steps.ver.outputs.version }} created and pushed."
--- a/src-ui/package.json
+++ b/src-ui/package.json
@@ -56,12 +56,12 @@
    "@playwright/test": "^1.55.0",
    "@types/jest": "^30.0.0",
    "@types/node": "^24.3.0",
-    "@typescript-eslint/eslint-plugin": "^8.41.0",
-    "@typescript-eslint/parser": "^8.41.0",
-    "@typescript-eslint/utils": "^8.41.0",
-    "eslint": "^9.34.0",
-    "jest": "30.1.3",
-    "jest-environment-jsdom": "^30.1.2",
+    "@typescript-eslint/eslint-plugin": "^8.38.0",
+    "@typescript-eslint/parser": "^8.38.0",
+    "@typescript-eslint/utils": "^8.38.0",
+    "eslint": "^9.32.0",
+    "jest": "30.0.5",
+    "jest-environment-jsdom": "^30.0.5",
    "jest-junit": "^16.0.0",
    "jest-preset-angular": "^15.0.0",
    "jest-websocket-mock": "^2.5.0",
--- a/src-ui/pnpm-lock.yaml
+++ b/src-ui/pnpm-lock.yaml
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -2764,7 +2764,7 @@ class SystemStatusView(PassUserMixin):
            install_type = "docker"

        db_conn = connections["default"]
-        db_url = str(db_conn.settings_dict["NAME"])
+        db_url = db_conn.settings_dict["NAME"]
        db_error = None

        try:
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -425,7 +425,7 @@ WHITENOISE_STATIC_PREFIX = "/static/"
 if machine().lower() == "aarch64":  # pragma: no cover
    _static_backend = "django.contrib.staticfiles.storage.StaticFilesStorage"
 else:
-    _static_backend = "paperless.staticfiles.DeduplicatedCompressedStaticFilesStorage"
+    _static_backend = "whitenoise.storage.CompressedStaticFilesStorage"

 STORAGES = {
    "staticfiles": {
--- a/src/paperless/staticfiles.py
+++ b/src/paperless/staticfiles.py
@@ -1,385 +0,0 @@
-import gzip
-import hashlib
-import logging
-import os
-import shutil
-import threading
-import time
-from collections import defaultdict
-from concurrent.futures import ThreadPoolExecutor
-from concurrent.futures import as_completed
-from dataclasses import dataclass
-from pathlib import Path
-
-import brotli
-import humanize
-from django.contrib.staticfiles.storage import StaticFilesStorage
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass(slots=True)
-class FileInfo:
-    file_path_str: str
-    file_path_path: Path
-    checksum: str
-    original_size: int
-    gzip_size: int | None = None
-    brotli_size: int | None = None
-
-
-class DeduplicatedCompressedStaticFilesStorage(StaticFilesStorage):
-    # File extensions that should be compressed
-    COMPRESSIBLE_EXTENSIONS = {
-        ".css",
-        ".js",
-        ".html",
-        ".htm",
-        ".xml",
-        ".json",
-        ".txt",
-        ".svg",
-        ".md",
-        ".rst",
-        ".csv",
-        ".tsv",
-        ".yaml",
-        ".yml",
-        ".map",
-    }
-
-    # Minimum file size to compress (bytes)
-    MIN_COMPRESS_SIZE = 1024  # 1KB
-
-    # Maximum number of threads for parallel processing
-    MAX_WORKERS = min(32, (os.cpu_count() or 1) + 4)
-
-    # Chunk size for file reading
-    CHUNK_SIZE = 64 * 1024  # 64KB
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        # --- MODIFIED: Added path_to_file_info for easy lookup ---
-        self.hash_to_files: dict[str, list[FileInfo]] = defaultdict(list)
-        self.path_to_file_info: dict[str, FileInfo] = {}
-        self.linked_files: set[Path] = set()
-        self.compression_stats = {
-            "brotli": 0,
-            "gzip": 0,
-            "skipped_linked": 0,
-            "skipped_other": 0,
-            "errors": 0,
-        }
-        self._lock = threading.Lock()
-
-    def post_process(self, paths: list[str], **options):
-        """
-        Post-process collected files: deduplicate first, then compress.
-        Django 5.2 compatible with proper options handling.
-        """
-        start_time = time.time()
-
-        # Step 1: Build hash map for deduplication (parallel)
-        self._build_file_hash_map_parallel(paths)
-
-        # Step 2: Create hard links for duplicate files
-        self._create_hard_links()
-
-        # Step 3: Compress files (parallel, skip linked duplicates)
-        self._compress_files_parallel(paths)
-
-        # Step 4: Provide user a summary of the compression
-        self._log_compression_summary()
-
-        processing_time = time.time() - start_time
-        logger.info(f"Post-processing complete in {processing_time:.2f}s.")
-
-        # Return list of processed files
-        processed_files = []
-        for path in paths:
-            processed_files.append((path, path, True))
-            # Add compressed variants
-            file_path = self.path(path)
-            if Path(file_path + ".br").exists():
-                processed_files.append((path + ".br", path + ".br", True))
-            if Path(file_path + ".gz").exists():
-                processed_files.append((path + ".gz", path + ".gz", True))
-
-        return processed_files
-
-    def _build_file_hash_map_parallel(self, file_paths: list[str]):
-        """Build a map of file hashes using parallel processing."""
-        logger.info(
-            f"Hashing {len(file_paths)} files with {self.MAX_WORKERS} workers...",
-        )
-
-        def hash_file(path: str):
-            """Hash a single file."""
-            try:
-                file_path = Path(self.path(path))
-                if not file_path.is_file():
-                    return None, None, None
-
-                file_hash = self._get_file_hash_fast(file_path)
-                file_size = file_path.stat().st_size
-                return path, file_hash, file_size
-            except Exception as e:
-                logger.warning(f"Error hashing file {path}: {e}")
-                return path, None, None
-
-        with ThreadPoolExecutor(max_workers=self.MAX_WORKERS) as executor:
-            future_to_path = {
-                executor.submit(hash_file, path): path for path in file_paths
-            }
-
-            for future in as_completed(future_to_path):
-                path, file_hash, file_size = future.result()
-                if path is not None and file_hash is not None and file_size is not None:
-                    with self._lock:
-                        file_info = FileInfo(
-                            file_path_str=path,
-                            file_path_path=Path(self.path(path)),
-                            checksum=file_hash,
-                            original_size=file_size,
-                        )
-                        self.hash_to_files[file_hash].append(file_info)
-                        self.path_to_file_info[path] = file_info
-
-        duplicates = sum(1 for files in self.hash_to_files.values() if len(files) > 1)
-        logger.info(f"Found {duplicates} sets of duplicate files")
-
-    def _get_file_hash_fast(self, file_path: Path):
-        """Calculate SHA-256 hash of file content with optimized reading."""
-        hash_sha256 = hashlib.sha256()
-        try:
-            with file_path.open("rb") as f:
-                while chunk := f.read(self.CHUNK_SIZE):
-                    hash_sha256.update(chunk)
-        except OSError as e:
-            logger.warning(f"Could not read file {file_path}: {e}")
-            raise
-        return hash_sha256.hexdigest()
-
-    def _create_hard_links(self):
-        """Create hard links for duplicate files."""
-        logger.info("Creating hard links for duplicate files...")
-
-        linked_count = 0
-        for file_info_list in self.hash_to_files.values():
-            if len(file_info_list) <= 1:
-                continue
-
-            # Sort by file size (desc) then path length (asc) to keep best original
-            file_info_list.sort(key=lambda x: (-x.original_size, len(x.file_path_str)))
-            original_file_info = file_info_list[0]
-            duplicate_info = file_info_list[1:]
-
-            for duplicate_file_info in duplicate_info:
-                try:
-                    # Remove duplicate file and create hard link
-                    if duplicate_file_info.file_path_path.exists():
-                        duplicate_file_info.file_path_path.unlink()
-
-                    # Create hard link
-                    os.link(
-                        original_file_info.file_path_path,
-                        duplicate_file_info.file_path_path,
-                    )
-
-                    with self._lock:
-                        self.linked_files.add(duplicate_file_info.file_path_path)
-
-                    linked_count += 1
-
-                    logger.info(
-                        f"Linked {duplicate_file_info.file_path_path} -> {original_file_info.file_path_path}",
-                    )
-
-                except OSError as e:
-                    logger.error(
-                        f"Hard link failed for {original_file_info.file_path_path}, copying instead: {e}",
-                    )
-                    # Fall back to copying if hard linking fails
-                    try:
-                        import shutil
-
-                        shutil.copy2(
-                            original_file_info.file_path_path,
-                            original_file_info.file_path_path,
-                        )
-                        logger.error(
-                            f"Copied {original_file_info.file_path_path} (hard link failed)",
-                        )
-                    except Exception as copy_error:
-                        logger.error(
-                            f"Failed to copy {original_file_info.file_path_path}: {copy_error}",
-                        )
-
-        if linked_count > 0:
-            logger.info(f"Created {linked_count} hard links")
-
-    def _compress_files_parallel(self, file_paths: list[str]):
-        """Compress files using parallel processing and update FileInfo objects."""
-        # Identify files to compress, excluding hard links
-        compressible_files = [
-            self.path_to_file_info[path]
-            for path in file_paths
-            if self.path_to_file_info[path].file_path_path not in self.linked_files
-            and self._should_compress_file(path)
-        ]
-
-        if not compressible_files:
-            logger.info("No new files to compress")
-            return
-
-        logger.info(
-            f"Compressing {len(compressible_files)} files with {self.MAX_WORKERS} workers...",
-        )
-
-        def compress_file(file_info: FileInfo):
-            """Compress a single file and update its FileInfo by side-effect."""
-            brotli_size = None
-            gzip_size = None
-            error = None
-            try:
-                brotli_size = self._compress_file_brotli(str(file_info.file_path_path))
-                gzip_size = self._compress_file_gzip(str(file_info.file_path_path))
-                # Store the compressed sizes
-                file_info.brotli_size = brotli_size
-                file_info.gzip_size = gzip_size
-            except Exception as e:
-                error = str(e)
-                logger.warning(f"Error compressing {file_info.file_path_str}: {e}")
-            return {
-                "brotli": brotli_size is not None,
-                "gzip": gzip_size is not None,
-                "error": error,
-            }
-
-        with ThreadPoolExecutor(max_workers=self.MAX_WORKERS) as executor:
-            future_to_info = {
-                executor.submit(compress_file, info): info
-                for info in compressible_files
-            }
-
-            for future in as_completed(future_to_info):
-                result = future.result()
-                with self._lock:
-                    if result["brotli"]:
-                        self.compression_stats["brotli"] += 1
-                    if result["gzip"]:
-                        self.compression_stats["gzip"] += 1
-                    if result["error"]:
-                        self.compression_stats["errors"] += 1
-                    if (
-                        not result["brotli"]
-                        and not result["gzip"]
-                        and not result["error"]
-                    ):
-                        self.compression_stats["skipped_other"] += 1
-
-        self.compression_stats["skipped_linked"] = len(self.linked_files)
-        logger.info(f"File count stats: {self.compression_stats}")
-
-    def _should_compress_file(self, path: str):
-        """Determine if a file should be compressed."""
-        file_ext = Path(path).suffix.lower()
-        if file_ext not in self.COMPRESSIBLE_EXTENSIONS:
-            return False
-        try:
-            if Path(self.path(path)).stat().st_size < self.MIN_COMPRESS_SIZE:
-                return False
-        except OSError:
-            return False
-        return True
-
-    def _compress_file_brotli(self, file_path: str) -> int | None:
-        """Compress file using Brotli, returns compressed size or None."""
-        brotli_path = Path(file_path + ".br")
-        try:
-            with Path(file_path).open("rb") as f_in:
-                original_data = f_in.read()
-            compressed_data = brotli.compress(
-                original_data,
-                quality=10,
-                lgwin=22,  # Window size
-                lgblock=0,  # Auto block size
-            )
-            if len(compressed_data) < len(original_data) * 0.95:
-                with brotli_path.open("wb") as f_out:
-                    f_out.write(compressed_data)
-                return len(compressed_data)
-            return None
-        except Exception as e:
-            logger.warning(f"Brotli compression failed for {file_path}: {e}")
-            return None
-
-    def _compress_file_gzip(self, file_path: str) -> int | None:
-        """Compress file using GZip, returns compressed size or None."""
-        gzip_path = Path(file_path + ".gz")
-        file_path_path = Path(file_path)
-        try:
-            original_size = file_path_path.stat().st_size
-            with (
-                file_path_path.open("rb") as f_in,
-                gzip.open(
-                    gzip_path,
-                    "wb",
-                    compresslevel=7,
-                ) as f_out,
-            ):
-                shutil.copyfileobj(f_in, f_out, length=self.CHUNK_SIZE)
-
-            compressed_size = gzip_path.stat().st_size
-            if compressed_size < original_size * 0.95:
-                return compressed_size
-            else:
-                gzip_path.unlink()
-                return None
-        except Exception as e:
-            logger.warning(f"GZip compression failed for {file_path}: {e}")
-            if gzip_path.exists():
-                try:
-                    gzip_path.unlink()
-                except OSError:
-                    pass
-            return None
-
-    def _log_compression_summary(self):
-        """Calculates and logs the total size savings from compression."""
-        total_original_size = 0
-        total_brotli_size = 0
-        total_gzip_size = 0
-
-        # Only consider the original files, not the duplicates, for size calculation
-        unique_files = {
-            file_list[0].checksum: file_list[0]
-            for file_list in self.hash_to_files.values()
-        }
-
-        for file_info in unique_files.values():
-            if self._should_compress_file(file_info.file_path_str):
-                total_original_size += file_info.original_size
-                if file_info.brotli_size:
-                    total_brotli_size += file_info.brotli_size
-                if file_info.gzip_size:
-                    total_gzip_size += file_info.gzip_size
-
-        def get_savings(original: int, compressed: int) -> str:
-            if original == 0:
-                return "0.00%"
-            return f"{(1 - compressed / original) * 100:.2f}%"
-
-        logger.info(
-            f"Total Original Size (compressible files): {humanize.naturalsize(total_original_size)}",
-        )
-        if total_brotli_size > 0:
-            logger.info(
-                f"Total Brotli Size: {humanize.naturalsize(total_brotli_size)} "
-                f"(Savings: {get_savings(total_original_size, total_brotli_size)})",
-            )
-        if total_gzip_size > 0:
-            logger.info(
-                f"Total Gzip Size:   {humanize.naturalsize(total_gzip_size)} "
-                f"(Savings: {get_savings(total_original_size, total_gzip_size)})",
-            )