Performance script, available as management command

[ci skip]
Fix user checks in management scripts
2026-01-28 22:59:03 -06:00 · 2026-01-28 11:58:08 -08:00 · 2026-01-28 09:56:21 -08:00 · 2026-01-26 09:12:03 -08:00 · 2026-01-25 21:47:08 -08:00 · 2026-01-25 17:08:15 -08:00
34 changed files with 848 additions and 54 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -617,7 +617,7 @@ jobs:
          version: ${{ steps.get_version.outputs.version }}
          prerelease: ${{ steps.get_version.outputs.prerelease }}
          publish: true # ensures release is not marked as draft
-          committish: ${{ github.sha }}
+          commitish: ${{ github.sha }}
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      - name: Upload release archive
--- a/docker/install_management_commands.sh
+++ b/docker/install_management_commands.sh
@@ -19,7 +19,8 @@ for command in decrypt_documents \
 	manage_superuser \
 	convert_mariadb_uuid \
 	prune_audit_logs \
-	createsuperuser;
+	createsuperuser \
+	document_perf_benchmark;
 do
 	echo "installing $command..."
 	sed "s/management_command/$command/g" management_script.sh >"$PWD/rootfs/usr/local/bin/$command"
--- a/docker/management_script.sh
+++ b/docker/management_script.sh
@@ -5,10 +5,13 @@ set -e

 cd "${PAPERLESS_SRC_DIR}"

-if [[ $(id -u) == 0 ]]; then
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py management_command "$@"
+elif [[ $(id -u) == 0 ]]; then
 	s6-setuidgid paperless python3 manage.py management_command "$@"
 elif [[ $(id -un) == "paperless" ]]; then
 	python3 manage.py management_command "$@"
 else
 	echo "Unknown user."
+	exit 1
 fi
--- a/docker/rootfs/usr/local/bin/convert_mariadb_uuid
+++ b/docker/rootfs/usr/local/bin/convert_mariadb_uuid
@@ -5,10 +5,13 @@ set -e

 cd "${PAPERLESS_SRC_DIR}"

-if [[ $(id -u) == 0 ]]; then
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py convert_mariadb_uuid "$@"
+elif [[ $(id -u) == 0 ]]; then
 	s6-setuidgid paperless python3 manage.py convert_mariadb_uuid "$@"
 elif [[ $(id -un) == "paperless" ]]; then
 	python3 manage.py convert_mariadb_uuid "$@"
 else
 	echo "Unknown user."
+	exit 1
 fi
--- a/docker/rootfs/usr/local/bin/createsuperuser
+++ b/docker/rootfs/usr/local/bin/createsuperuser
@@ -5,10 +5,13 @@ set -e

 cd "${PAPERLESS_SRC_DIR}"

-if [[ $(id -u) == 0 ]]; then
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py createsuperuser "$@"
+elif [[ $(id -u) == 0 ]]; then
 	s6-setuidgid paperless python3 manage.py createsuperuser "$@"
 elif [[ $(id -un) == "paperless" ]]; then
 	python3 manage.py createsuperuser "$@"
 else
 	echo "Unknown user."
+	exit 1
 fi
--- a/docker/rootfs/usr/local/bin/decrypt_documents
+++ b/docker/rootfs/usr/local/bin/decrypt_documents
@@ -5,10 +5,13 @@ set -e

 cd "${PAPERLESS_SRC_DIR}"

-if [[ $(id -u) == 0 ]]; then
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py decrypt_documents "$@"
+elif [[ $(id -u) == 0 ]]; then
 	s6-setuidgid paperless python3 manage.py decrypt_documents "$@"
 elif [[ $(id -un) == "paperless" ]]; then
 	python3 manage.py decrypt_documents "$@"
 else
 	echo "Unknown user."
+	exit 1
 fi
--- a/docker/rootfs/usr/local/bin/document_archiver
+++ b/docker/rootfs/usr/local/bin/document_archiver
@@ -5,10 +5,13 @@ set -e

 cd "${PAPERLESS_SRC_DIR}"

-if [[ $(id -u) == 0 ]]; then
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py document_archiver "$@"
+elif [[ $(id -u) == 0 ]]; then
 	s6-setuidgid paperless python3 manage.py document_archiver "$@"
 elif [[ $(id -un) == "paperless" ]]; then
 	python3 manage.py document_archiver "$@"
 else
 	echo "Unknown user."
+	exit 1
 fi
--- a/docker/rootfs/usr/local/bin/document_create_classifier
+++ b/docker/rootfs/usr/local/bin/document_create_classifier
@@ -5,10 +5,17 @@ set -e

 cd "${PAPERLESS_SRC_DIR}"

-if [[ $(id -u) == 0 ]]; then
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py document_create_classifier "$@"
+elif [[ $(id -u) == 0 ]]; then
 	s6-setuidgid paperless python3 manage.py document_create_classifier "$@"
 elif [[ $(id -un) == "paperless" ]]; then
 	python3 manage.py document_create_classifier "$@"
 else
 	echo "Unknown user."
+	exit 1
+fi
+er "$@"
+elif [[ $(id -un) == "paperless" ]]; then
+	s6-setuidgid paperless python3 manage.py document_create_classifier "$@"
 fi
--- a/docker/rootfs/usr/local/bin/document_exporter
+++ b/docker/rootfs/usr/local/bin/document_exporter
@@ -5,10 +5,13 @@ set -e

 cd "${PAPERLESS_SRC_DIR}"

-if [[ $(id -u) == 0 ]]; then
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py document_exporter "$@"
+elif [[ $(id -u) == 0 ]]; then
 	s6-setuidgid paperless python3 manage.py document_exporter "$@"
 elif [[ $(id -un) == "paperless" ]]; then
 	python3 manage.py document_exporter "$@"
 else
 	echo "Unknown user."
+	exit 1
 fi
--- a/docker/rootfs/usr/local/bin/document_fuzzy_match
+++ b/docker/rootfs/usr/local/bin/document_fuzzy_match
@@ -5,10 +5,13 @@ set -e

 cd "${PAPERLESS_SRC_DIR}"

-if [[ $(id -u) == 0 ]]; then
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py document_fuzzy_match "$@"
+elif [[ $(id -u) == 0 ]]; then
 	s6-setuidgid paperless python3 manage.py document_fuzzy_match "$@"
 elif [[ $(id -un) == "paperless" ]]; then
 	python3 manage.py document_fuzzy_match "$@"
 else
 	echo "Unknown user."
+	exit 1
 fi
--- a/docker/rootfs/usr/local/bin/document_importer
+++ b/docker/rootfs/usr/local/bin/document_importer
@@ -5,10 +5,13 @@ set -e

 cd "${PAPERLESS_SRC_DIR}"

-if [[ $(id -u) == 0 ]]; then
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py document_importer "$@"
+elif [[ $(id -u) == 0 ]]; then
 	s6-setuidgid paperless python3 manage.py document_importer "$@"
 elif [[ $(id -un) == "paperless" ]]; then
 	python3 manage.py document_importer "$@"
 else
 	echo "Unknown user."
+	exit 1
 fi
--- a/docker/rootfs/usr/local/bin/document_index
+++ b/docker/rootfs/usr/local/bin/document_index
@@ -5,10 +5,13 @@ set -e

 cd "${PAPERLESS_SRC_DIR}"

-if [[ $(id -u) == 0 ]]; then
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py document_index "$@"
+elif [[ $(id -u) == 0 ]]; then
 	s6-setuidgid paperless python3 manage.py document_index "$@"
 elif [[ $(id -un) == "paperless" ]]; then
 	python3 manage.py document_index "$@"
 else
 	echo "Unknown user."
+	exit 1
 fi
--- a/docker/rootfs/usr/local/bin/document_perf_benchmark
+++ b/docker/rootfs/usr/local/bin/document_perf_benchmark
@@ -0,0 +1,17 @@
+#!/command/with-contenv /usr/bin/bash
+# shellcheck shell=bash
+
+set -e
+
+cd "${PAPERLESS_SRC_DIR}"
+
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py document_perf_benchmark "$@"
+elif [[ $(id -u) == 0 ]]; then
+	s6-setuidgid paperless python3 manage.py document_perf_benchmark "$@"
+elif [[ $(id -un) == "paperless" ]]; then
+	python3 manage.py document_perf_benchmark "$@"
+else
+	echo "Unknown user."
+	exit 1
+fi
--- a/docker/rootfs/usr/local/bin/document_renamer
+++ b/docker/rootfs/usr/local/bin/document_renamer
@@ -5,10 +5,13 @@ set -e

 cd "${PAPERLESS_SRC_DIR}"

-if [[ $(id -u) == 0 ]]; then
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py document_renamer "$@"
+elif [[ $(id -u) == 0 ]]; then
 	s6-setuidgid paperless python3 manage.py document_renamer "$@"
 elif [[ $(id -un) == "paperless" ]]; then
 	python3 manage.py document_renamer "$@"
 else
 	echo "Unknown user."
+	exit 1
 fi
--- a/docker/rootfs/usr/local/bin/document_retagger
+++ b/docker/rootfs/usr/local/bin/document_retagger
@@ -5,10 +5,13 @@ set -e

 cd "${PAPERLESS_SRC_DIR}"

-if [[ $(id -u) == 0 ]]; then
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py document_retagger "$@"
+elif [[ $(id -u) == 0 ]]; then
 	s6-setuidgid paperless python3 manage.py document_retagger "$@"
 elif [[ $(id -un) == "paperless" ]]; then
 	python3 manage.py document_retagger "$@"
 else
 	echo "Unknown user."
+	exit 1
 fi
--- a/docker/rootfs/usr/local/bin/document_sanity_checker
+++ b/docker/rootfs/usr/local/bin/document_sanity_checker
@@ -5,10 +5,13 @@ set -e

 cd "${PAPERLESS_SRC_DIR}"

-if [[ $(id -u) == 0 ]]; then
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py document_sanity_checker "$@"
+elif [[ $(id -u) == 0 ]]; then
 	s6-setuidgid paperless python3 manage.py document_sanity_checker "$@"
 elif [[ $(id -un) == "paperless" ]]; then
 	python3 manage.py document_sanity_checker "$@"
 else
 	echo "Unknown user."
+	exit 1
 fi
--- a/docker/rootfs/usr/local/bin/document_thumbnails
+++ b/docker/rootfs/usr/local/bin/document_thumbnails
@@ -5,10 +5,13 @@ set -e

 cd "${PAPERLESS_SRC_DIR}"

-if [[ $(id -u) == 0 ]]; then
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py document_thumbnails "$@"
+elif [[ $(id -u) == 0 ]]; then
 	s6-setuidgid paperless python3 manage.py document_thumbnails "$@"
 elif [[ $(id -un) == "paperless" ]]; then
 	python3 manage.py document_thumbnails "$@"
 else
 	echo "Unknown user."
+	exit 1
 fi
--- a/docker/rootfs/usr/local/bin/mail_fetcher
+++ b/docker/rootfs/usr/local/bin/mail_fetcher
@@ -5,10 +5,13 @@ set -e

 cd "${PAPERLESS_SRC_DIR}"

-if [[ $(id -u) == 0 ]]; then
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py mail_fetcher "$@"
+elif [[ $(id -u) == 0 ]]; then
 	s6-setuidgid paperless python3 manage.py mail_fetcher "$@"
 elif [[ $(id -un) == "paperless" ]]; then
 	python3 manage.py mail_fetcher "$@"
 else
 	echo "Unknown user."
+	exit 1
 fi
--- a/docker/rootfs/usr/local/bin/manage_superuser
+++ b/docker/rootfs/usr/local/bin/manage_superuser
@@ -5,10 +5,13 @@ set -e

 cd "${PAPERLESS_SRC_DIR}"

-if [[ $(id -u) == 0 ]]; then
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py manage_superuser "$@"
+elif [[ $(id -u) == 0 ]]; then
 	s6-setuidgid paperless python3 manage.py manage_superuser "$@"
 elif [[ $(id -un) == "paperless" ]]; then
 	python3 manage.py manage_superuser "$@"
 else
 	echo "Unknown user."
+	exit 1
 fi
--- a/docker/rootfs/usr/local/bin/prune_audit_logs
+++ b/docker/rootfs/usr/local/bin/prune_audit_logs
@@ -5,10 +5,13 @@ set -e

 cd "${PAPERLESS_SRC_DIR}"

-if [[ $(id -u) == 0 ]]; then
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	python3 manage.py prune_audit_logs "$@"
+elif [[ $(id -u) == 0 ]]; then
 	s6-setuidgid paperless python3 manage.py prune_audit_logs "$@"
 elif [[ $(id -un) == "paperless" ]]; then
 	python3 manage.py prune_audit_logs "$@"
 else
 	echo "Unknown user."
+	exit 1
 fi
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "paperless-ngx"
-version = "2.20.4"
+version = "2.20.5"
 description = "A community-supported supercharged document management system: scan, index and archive all your physical documents"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -238,7 +238,7 @@ lint.isort.force-single-line = true

 [tool.codespell]
 write-changes = true
-ignore-words-list = "criterias,afterall,valeu,ureue,equest,ure,assertIn,Oktober"
+ignore-words-list = "criterias,afterall,valeu,ureue,equest,ure,assertIn,Oktober,commitish"
 skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/documents/tests/samples/*,*.po,*.json"

 [tool.pytest.ini_options]
--- a/src-ui/package.json
+++ b/src-ui/package.json
@@ -1,6 +1,6 @@
 {
  "name": "paperless-ngx-ui",
-  "version": "2.20.4",
+  "version": "2.20.5",
  "scripts": {
    "preinstall": "npx only-allow pnpm",
    "ng": "ng",
--- a/src-ui/src/app/components/common/input/tags/tags.component.scss
+++ b/src-ui/src/app/components/common/input/tags/tags.component.scss
@@ -22,8 +22,8 @@
 }

 // Dropdown hierarchy reveal for ng-select options
-::ng-deep .ng-dropdown-panel .ng-option {
-  overflow-x: scroll !important;
+:host ::ng-deep .ng-dropdown-panel .ng-option {
+  overflow-x: auto !important;

  .tag-option-row {
    font-size: 1rem;
@@ -41,12 +41,12 @@
  }
 }

-::ng-deep .ng-dropdown-panel .ng-option:hover .hierarchy-reveal,
-::ng-deep .ng-dropdown-panel .ng-option.ng-option-marked .hierarchy-reveal {
+:host ::ng-deep .ng-dropdown-panel .ng-option:hover .hierarchy-reveal,
+:host ::ng-deep .ng-dropdown-panel .ng-option.ng-option-marked .hierarchy-reveal {
  max-width: 1000px;
 }

 ::ng-deep .ng-dropdown-panel .ng-option:hover .hierarchy-indicator,
-::ng-deep .ng-dropdown-panel .ng-option.ng-option-marked .hierarchy-indicator {
+:host ::ng-deep .ng-dropdown-panel .ng-option.ng-option-marked .hierarchy-indicator {
  background: transparent;
 }
--- a/src-ui/src/app/components/manage/management-list/management-list.component.spec.ts
+++ b/src-ui/src/app/components/manage/management-list/management-list.component.spec.ts
@@ -229,6 +229,21 @@ describe('ManagementListComponent', () => {
    expect(reloadSpy).toHaveBeenCalled()
  })

+  it('should use the all list length for collection size when provided', fakeAsync(() => {
+    jest.spyOn(tagService, 'listFiltered').mockReturnValueOnce(
+      of({
+        count: 1,
+        all: [1, 2, 3],
+        results: tags.slice(0, 1),
+      })
+    )
+
+    component.reloadData()
+    tick(100)
+
+    expect(component.collectionSize).toBe(3)
+  }))
+
  it('should support quick filter for objects', () => {
    const qfSpy = jest.spyOn(documentListViewService, 'quickFilter')
    const filterButton = fixture.debugElement.queryAll(By.css('button'))[9]
--- a/src-ui/src/app/components/manage/management-list/management-list.component.ts
+++ b/src-ui/src/app/components/manage/management-list/management-list.component.ts
@@ -171,7 +171,7 @@ export abstract class ManagementListComponent<T extends MatchingModel>
        tap((c) => {
          this.unfilteredData = c.results
          this.data = this.filterData(c.results)
-          this.collectionSize = c.count
+          this.collectionSize = c.all?.length ?? c.count
        }),
        delay(100)
      )
--- a/src-ui/src/environments/environment.prod.ts
+++ b/src-ui/src/environments/environment.prod.ts
@@ -6,7 +6,7 @@ export const environment = {
  apiVersion: '9', // match src/paperless/settings.py
  appTitle: 'Paperless-ngx',
  tag: 'prod',
-  version: '2.20.4',
+  version: '2.20.5',
  webSocketHost: window.location.host,
  webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
  webSocketBaseUrl: base_url.pathname + 'ws/',
--- a/src/documents/data_models.py
+++ b/src/documents/data_models.py
@@ -115,7 +115,7 @@ class DocumentMetadataOverrides:
            ).values_list("id", flat=True),
        )
        overrides.custom_fields = {
-            custom_field.id: custom_field.value
+            custom_field.field.id: custom_field.value
            for custom_field in doc.custom_fields.all()
        }

--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -602,7 +602,7 @@ def rewrite_natural_date_keywords(query_string: str) -> str:

            case "this year":
                start = datetime(local_now.year, 1, 1, 0, 0, 0, tzinfo=tz)
-                end = datetime.combine(today, time.max, tzinfo=tz)
+                end = datetime(local_now.year, 12, 31, 23, 59, 59, tzinfo=tz)

            case "previous week":
                days_since_monday = local_now.weekday()
--- a/src/documents/management/commands/document_perf_benchmark.py
+++ b/src/documents/management/commands/document_perf_benchmark.py
@@ -0,0 +1,663 @@
+import contextlib
+import io
+import logging
+import math
+import signal
+import uuid
+from time import perf_counter
+
+from django.contrib.auth import get_user_model
+from django.contrib.auth.models import Permission
+from django.core.management.base import BaseCommand
+from django.core.management.base import CommandError
+from django.db import connections
+from django.db import reset_queries
+from django.db.models import Count
+from django.db.models import Q
+from django.db.models import Subquery
+from guardian.shortcuts import assign_perm
+from rest_framework.test import APIClient
+
+from documents.models import CustomField
+from documents.models import CustomFieldInstance
+from documents.models import Document
+from documents.models import Tag
+from documents.permissions import get_objects_for_user_owner_aware
+from documents.permissions import permitted_document_ids
+
+
+class Command(BaseCommand):
+    # e.g. docker compose exec webserver / manage.py ...
+    # document_perf_benchmark --reuse-existing --documents 500000 --chunk-size 5000 --tags 40 --tags-per-doc 3 --custom-fields 6 --custom-fields-per-doc 2
+    help = (
+        "Seed a synthetic dataset and benchmark permission-filtered document queries "
+        "for superusers vs non-superusers."
+    )
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--documents",
+            type=int,
+            default=10000,
+            help="Total documents to generate (default: 10,000)",
+        )
+        parser.add_argument(
+            "--owner-ratio",
+            type=float,
+            default=0.6,
+            help="Fraction owned by the benchmarked user (default: 0.6)",
+        )
+        parser.add_argument(
+            "--unowned-ratio",
+            type=float,
+            default=0.1,
+            help="Fraction of unowned documents (default: 0.1)",
+        )
+        parser.add_argument(
+            "--shared-ratio",
+            type=float,
+            default=0.25,
+            help=(
+                "Fraction of other-user documents that are shared via object perms "
+                "with the benchmarked user (default: 0.25)"
+            ),
+        )
+        parser.add_argument(
+            "--chunk-size",
+            type=int,
+            default=2000,
+            help="Bulk create size for documents (default: 2000)",
+        )
+        parser.add_argument(
+            "--iterations",
+            type=int,
+            default=3,
+            help="Number of timing runs per query shape (default: 3)",
+        )
+        parser.add_argument(
+            "--prefix",
+            default="perf-benchmark",
+            help="Title prefix used to mark generated documents (default: perf-benchmark)",
+        )
+        parser.add_argument(
+            "--tags",
+            type=int,
+            default=0,
+            help="Number of tags to create and assign (default: 0)",
+        )
+        parser.add_argument(
+            "--tags-per-doc",
+            type=int,
+            default=1,
+            help="How many tags to attach to each document (default: 1)",
+        )
+        parser.add_argument(
+            "--custom-fields",
+            type=int,
+            default=0,
+            help="Number of string custom fields to create (default: 0)",
+        )
+        parser.add_argument(
+            "--custom-fields-per-doc",
+            type=int,
+            default=1,
+            help="How many custom field instances per document (default: 1)",
+        )
+        parser.add_argument(
+            "--skip-tags",
+            action="store_true",
+            help="Skip tag document_count benchmarks (useful for large datasets on Postgres)",
+        )
+        parser.add_argument(
+            "--skip-custom-fields",
+            action="store_true",
+            help="Skip custom field document_count benchmarks",
+        )
+        parser.add_argument(
+            "--reuse-existing",
+            action="store_true",
+            help="Keep previously generated documents with the given prefix instead of recreating",
+        )
+        parser.add_argument(
+            "--cleanup",
+            action="store_true",
+            help="Delete previously generated documents with the given prefix and exit",
+        )
+        parser.add_argument(
+            "--api-timeout",
+            type=float,
+            default=30.0,
+            help="Per-request timeout (seconds) for API timings (default: 30s)",
+        )
+
+    def handle(self, *args, **options):
+        # keep options for downstream checks
+        self.options = options
+
+        document_total = options["documents"]
+        owner_ratio = options["owner_ratio"]
+        unowned_ratio = options["unowned_ratio"]
+        shared_ratio = options["shared_ratio"]
+        chunk_size = options["chunk_size"]
+        iterations = options["iterations"]
+        prefix = options["prefix"]
+        tags = options["tags"]
+        tags_per_doc = options["tags_per_doc"]
+        custom_fields = options["custom_fields"]
+        custom_fields_per_doc = options["custom_fields_per_doc"]
+
+        self._validate_ratios(owner_ratio, unowned_ratio)
+        if tags_per_doc < 0 or custom_fields_per_doc < 0:
+            raise CommandError("Per-document counts must be non-negative")
+
+        target_user, other_user, superuser = self._ensure_users()
+
+        skip_seed = False
+
+        if options["cleanup"]:
+            removed = self._cleanup(prefix)
+            self.stdout.write(
+                self.style.SUCCESS(f"Removed {removed} generated documents"),
+            )
+            return
+
+        if not options["reuse_existing"]:
+            removed = self._cleanup(prefix)
+            if removed:
+                self.stdout.write(f"Removed existing generated documents: {removed}")
+        else:
+            existing = Document.objects.filter(title__startswith=prefix).count()
+            if existing:
+                skip_seed = True
+                self.stdout.write(
+                    f"Reusing existing dataset with prefix '{prefix}': {existing} docs",
+                )
+
+        if skip_seed:
+            dataset_size = Document.objects.filter(title__startswith=prefix).count()
+            self.stdout.write(
+                self.style.SUCCESS(
+                    f"Dataset ready (reused): {dataset_size} docs | prefix={prefix}",
+                ),
+            )
+        else:
+            self.stdout.write(
+                f"Seeding {document_total} documents (owner_ratio={owner_ratio}, "
+                f"unowned_ratio={unowned_ratio}, shared_ratio={shared_ratio})",
+            )
+            created_counts = self._seed_documents(
+                total=document_total,
+                owner_ratio=owner_ratio,
+                unowned_ratio=unowned_ratio,
+                shared_ratio=shared_ratio,
+                chunk_size=chunk_size,
+                prefix=prefix,
+                target_user=target_user,
+                other_user=other_user,
+            )
+
+            created_tags = []
+            if tags:
+                created_tags = self._seed_tags(prefix=prefix, count=tags)
+                if tags_per_doc and created_tags:
+                    self._assign_tags_to_documents(
+                        prefix=prefix,
+                        tags=created_tags,
+                        tags_per_doc=tags_per_doc,
+                        chunk_size=chunk_size,
+                    )
+
+            created_custom_fields = []
+            if custom_fields:
+                created_custom_fields = self._seed_custom_fields(prefix, custom_fields)
+                if custom_fields_per_doc and created_custom_fields:
+                    self._seed_custom_field_instances(
+                        prefix=prefix,
+                        custom_fields=created_custom_fields,
+                        per_doc=custom_fields_per_doc,
+                        chunk_size=chunk_size,
+                    )
+
+            dataset_size = Document.objects.filter(title__startswith=prefix).count()
+            self.stdout.write(
+                self.style.SUCCESS(
+                    f"Dataset ready: {dataset_size} docs | owned by target {created_counts['owned']} | "
+                    f"owned by other {created_counts['other_owned']} | unowned {created_counts['unowned']} | "
+                    f"shared-perms {created_counts['shared']} | tags {len(created_tags)} | "
+                    f"custom fields {len(created_custom_fields)}",
+                ),
+            )
+
+        self.stdout.write("\nRunning benchmarks...\n")
+        self._run_benchmarks(
+            iterations=iterations,
+            target_user=target_user,
+            superuser=superuser,
+            prefix=prefix,
+        )
+
+    def _validate_ratios(self, owner_ratio: float, unowned_ratio: float):
+        if owner_ratio < 0 or unowned_ratio < 0:
+            raise CommandError("Ratios must be non-negative")
+        if owner_ratio + unowned_ratio > 1:
+            raise CommandError("owner-ratio + unowned-ratio cannot exceed 1.0")
+
+    def _ensure_users(self):
+        User = get_user_model()
+        target_user, _ = User.objects.get_or_create(
+            username="perf_user",
+            defaults={"email": "perf_user@example.com"},
+        )
+        target_user.set_password("perf_user")
+        target_user.is_staff = True
+        target_user.save()
+
+        other_user, _ = User.objects.get_or_create(
+            username="perf_owner",
+            defaults={"email": "perf_owner@example.com"},
+        )
+        other_user.set_password("perf_owner")
+        other_user.is_staff = True
+        other_user.save()
+
+        superuser, _ = User.objects.get_or_create(
+            username="perf_admin",
+            defaults={
+                "email": "perf_admin@example.com",
+                "is_staff": True,
+                "is_superuser": True,
+            },
+        )
+        superuser.set_password("perf_admin")
+        superuser.save()
+
+        perms = Permission.objects.all()
+        target_user.user_permissions.set(perms)
+        other_user.user_permissions.set(perms)
+        return target_user, other_user, superuser
+
+    def _cleanup(self, prefix: str) -> int:
+        docs_qs = Document.global_objects.filter(title__startswith=prefix)
+        doc_count = docs_qs.count()
+        if doc_count:
+            docs_qs.hard_delete()
+
+        tag_count = Tag.objects.filter(name__startswith=prefix).count()
+        if tag_count:
+            Tag.objects.filter(name__startswith=prefix).delete()
+
+        cf_qs = CustomField.objects.filter(name__startswith=prefix)
+        cf_count = cf_qs.count()
+        if cf_count:
+            cf_qs.delete()
+
+        cfi_qs = CustomFieldInstance.global_objects.filter(
+            document__title__startswith=prefix,
+        )
+        cfi_count = cfi_qs.count()
+        if cfi_count:
+            cfi_qs.hard_delete()
+
+        return doc_count + tag_count + cf_count + cfi_count
+
+    def _seed_documents(
+        self,
+        *,
+        total: int,
+        owner_ratio: float,
+        unowned_ratio: float,
+        shared_ratio: float,
+        chunk_size: int,
+        prefix: str,
+        target_user,
+        other_user,
+    ) -> dict[str, int]:
+        target_count = math.floor(total * owner_ratio)
+        unowned_count = math.floor(total * unowned_ratio)
+        other_count = total - target_count - unowned_count
+
+        documents: list[Document] = []
+        other_docs: list[Document] = []
+
+        for idx in range(total):
+            if idx < target_count:
+                owner = target_user
+            elif idx < target_count + other_count:
+                owner = other_user
+            else:
+                owner = None
+
+            doc = Document(
+                owner=owner,
+                title=f"{prefix}-{idx:07d}",
+                mime_type="application/pdf",
+                checksum=self._unique_checksum(idx),
+                page_count=1,
+            )
+
+            if owner is other_user:
+                other_docs.append(doc)
+
+            documents.append(doc)
+
+            if len(documents) >= chunk_size:
+                Document.objects.bulk_create(documents, batch_size=chunk_size)
+                documents.clear()
+
+        if documents:
+            Document.objects.bulk_create(documents, batch_size=chunk_size)
+
+        shared_target = math.floor(len(other_docs) * shared_ratio)
+        for doc in other_docs[:shared_target]:
+            assign_perm("documents.view_document", target_user, doc)
+
+        return {
+            "owned": target_count,
+            "other_owned": other_count,
+            "unowned": unowned_count,
+            "shared": shared_target,
+        }
+
+    def _seed_tags(self, *, prefix: str, count: int) -> list[Tag]:
+        tags = [
+            Tag(
+                name=f"{prefix}-tag-{idx:03d}",
+            )
+            for idx in range(count)
+        ]
+        Tag.objects.bulk_create(tags, ignore_conflicts=True)
+        return list(Tag.objects.filter(name__startswith=prefix))
+
+    def _assign_tags_to_documents(
+        self,
+        *,
+        prefix: str,
+        tags: list[Tag],
+        tags_per_doc: int,
+        chunk_size: int,
+    ):
+        if not tags or tags_per_doc < 1:
+            return
+
+        rels = []
+        through = Document.tags.through
+        tag_ids = [t.id for t in tags]
+        tag_count = len(tag_ids)
+        iterator = (
+            Document.objects.filter(title__startswith=prefix)
+            .values_list(
+                "id",
+                flat=True,
+            )
+            .iterator()
+        )
+
+        for idx, doc_id in enumerate(iterator):
+            start = idx % tag_count
+            chosen = set()
+            for offset in range(tags_per_doc):
+                tag_id = tag_ids[(start + offset) % tag_count]
+                if tag_id in chosen:
+                    continue
+                chosen.add(tag_id)
+                rels.append(through(document_id=doc_id, tag_id=tag_id))
+            if len(rels) >= chunk_size:
+                through.objects.bulk_create(rels, ignore_conflicts=True)
+                rels.clear()
+
+        if rels:
+            through.objects.bulk_create(rels, ignore_conflicts=True)
+
+    def _seed_custom_fields(self, prefix: str, count: int) -> list[CustomField]:
+        fields = [
+            CustomField(
+                name=f"{prefix}-cf-{idx:03d}",
+                data_type=CustomField.FieldDataType.STRING,
+            )
+            for idx in range(count)
+        ]
+        CustomField.objects.bulk_create(fields, ignore_conflicts=True)
+        return list(CustomField.objects.filter(name__startswith=prefix))
+
+    def _seed_custom_field_instances(
+        self,
+        *,
+        prefix: str,
+        custom_fields: list[CustomField],
+        per_doc: int,
+        chunk_size: int,
+    ):
+        if not custom_fields or per_doc < 1:
+            return
+
+        instances = []
+        cf_ids = [cf.id for cf in custom_fields]
+        cf_count = len(cf_ids)
+        iterator = (
+            Document.objects.filter(title__startswith=prefix)
+            .values_list(
+                "id",
+                flat=True,
+            )
+            .iterator()
+        )
+
+        for idx, doc_id in enumerate(iterator):
+            start = idx % cf_count
+            for offset in range(per_doc):
+                cf_id = cf_ids[(start + offset) % cf_count]
+                instances.append(
+                    CustomFieldInstance(
+                        document_id=doc_id,
+                        field_id=cf_id,
+                        value_text=f"val-{doc_id}-{cf_id}",
+                    ),
+                )
+            if len(instances) >= chunk_size:
+                CustomFieldInstance.objects.bulk_create(
+                    instances,
+                    batch_size=chunk_size,
+                    ignore_conflicts=True,
+                )
+                instances.clear()
+
+        if instances:
+            CustomFieldInstance.objects.bulk_create(
+                instances,
+                batch_size=chunk_size,
+                ignore_conflicts=True,
+            )
+
+    def _run_benchmarks(self, *, iterations: int, target_user, superuser, prefix: str):
+        self.stdout.write("-> API benchmarks")
+
+        for user in ("perf_admin", "perf_user"):
+            pwd = user
+            self.stdout.write(f"-> API documents ({user})")
+            self._time_api_get(
+                label=f"{user} /api/documents/",
+                username=user,
+                password=pwd,
+                path="/api/documents/",
+            )
+            self.stdout.write(f"-> API tags ({user})")
+            self._time_api_get(
+                label=f"{user} /api/tags/",
+                username=user,
+                password=pwd,
+                path="/api/tags/",
+            )
+            self.stdout.write(f"-> API custom fields ({user})")
+            self._time_api_get(
+                label=f"{user} /api/custom_fields/",
+                username=user,
+                password=pwd,
+                path="/api/custom_fields/",
+            )
+
+    def _count_with_values_list(self, user) -> int:
+        qs = get_objects_for_user_owner_aware(
+            user,
+            "documents.view_document",
+            Document,
+        )
+        return Document.objects.filter(id__in=qs.values_list("id", flat=True)).count()
+
+    def _count_with_subquery(self, user) -> int:
+        qs = get_objects_for_user_owner_aware(
+            user,
+            "documents.view_document",
+            Document,
+        )
+        subquery = Subquery(qs.values_list("id"))
+        return Document.objects.filter(id__in=subquery).count()
+
+    def _document_filter(self, user, *, use_subquery: bool):
+        if user is None or getattr(user, "is_superuser", False):
+            return Q(documents__deleted_at__isnull=True)
+
+        qs = get_objects_for_user_owner_aware(
+            user,
+            "documents.view_document",
+            Document,
+        )
+        ids = (
+            Subquery(qs.values_list("id"))
+            if use_subquery
+            else qs.values_list("id", flat=True)
+        )
+        return Q(documents__deleted_at__isnull=True, documents__id__in=ids)
+
+    def _tag_queryset(self, *, prefix: str, filter_q: Q):
+        return Tag.objects.filter(name__startswith=prefix).annotate(
+            document_count=Count("documents", filter=filter_q),
+        )
+
+    def _time_tag_counts(self, *, iterations: int, prefix: str, user):
+        if not Tag.objects.filter(name__startswith=prefix).exists():
+            return
+
+        self._time_query(
+            label="tag document_count (grouped)",
+            iterations=iterations,
+            fn=lambda: list(
+                Tag.documents.through.objects.filter(
+                    document_id__in=Subquery(permitted_document_ids(user)),
+                )
+                .values("tag_id")
+                .annotate(c=Count("document_id"))
+                .values_list("tag_id", "c"),
+            ),
+        )
+
+    def _time_custom_field_counts(
+        self,
+        *,
+        iterations: int,
+        prefix: str,
+        user,
+        superuser,
+    ):
+        if not CustomField.objects.filter(name__startswith=prefix).exists():
+            return
+
+        permitted = Subquery(permitted_document_ids(user))
+        super_permitted = CustomFieldInstance.objects.filter(
+            document__deleted_at__isnull=True,
+        ).values_list("document_id")
+
+        def _run(ids_subquery):
+            return list(
+                CustomFieldInstance.objects.filter(
+                    document_id__in=ids_subquery,
+                    field__name__startswith=prefix,
+                )
+                .values("field_id")
+                .annotate(c=Count("document_id"))
+                .values_list("field_id", "c"),
+            )
+
+        self._time_query(
+            label="custom fields document_count (grouped permitted)",
+            iterations=iterations,
+            fn=lambda: _run(permitted),
+        )
+        self._time_query(
+            label="custom fields document_count superuser baseline",
+            iterations=iterations,
+            fn=lambda: _run(super_permitted),
+        )
+
+    def _time_api_get(self, *, label: str, username: str, password: str, path: str):
+        client = APIClient()
+        client.raise_request_exception = False
+        if not client.login(username=username, password=password):
+            self.stdout.write(f"{label}: login failed")
+            return
+
+        timeout_s = float(self.options.get("api_timeout", 30.0))
+        logger = logging.getLogger("django.request")
+        prev_level = logger.level
+        logger.setLevel(logging.CRITICAL)
+
+        def _timeout_handler(signum, frame):  # pragma: no cover
+            raise TimeoutError
+
+        old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
+        signal.alarm(max(1, int(timeout_s)))
+
+        try:
+            reset_queries()
+            start = perf_counter()
+            with contextlib.redirect_stderr(io.StringIO()):
+                resp = client.get(path, format="json")
+            duration = perf_counter() - start
+            size = None
+            if resp.headers.get("Content-Type", "").startswith("application/json"):
+                data = resp.json()
+                if isinstance(data, dict) and "results" in data:
+                    size = len(data.get("results", []))
+                elif isinstance(data, list):
+                    size = len(data)
+            if resp.status_code == 500 and duration >= timeout_s - 0.1:
+                self.stdout.write(
+                    self.style.ERROR(f"{label}: TIMEOUT after {timeout_s:.1f}s"),
+                )
+            elif resp.status_code == 200:
+                self.stdout.write(
+                    self.style.SUCCESS(
+                        f"{label}: status={resp.status_code} time={duration:.4f}s items={size}",
+                    ),
+                )
+            else:
+                self.stdout.write(
+                    self.style.WARNING(
+                        f"{label}: status={resp.status_code} time={duration:.4f}s",
+                    ),
+                )
+        except TimeoutError:
+            self.stdout.write(f"{label}: TIMEOUT after {timeout_s:.1f}s")
+        except Exception as e:
+            text = str(e)
+            self.stdout.write(f"{label}: ERROR {text}")
+        finally:
+            signal.alarm(0)
+            signal.signal(signal.SIGALRM, old_handler)
+            logger.setLevel(prev_level)
+            connections.close_all()
+
+    def _time_query(self, *, label: str, iterations: int, fn):
+        durations = []
+        for _ in range(iterations):
+            reset_queries()
+            start = perf_counter()
+            fn()
+            durations.append(perf_counter() - start)
+
+        avg = sum(durations) / len(durations)
+        self.stdout.write(
+            f"{label}: min={min(durations):.4f}s avg={avg:.4f}s max={max(durations):.4f}s",
+        )
+
+    def _unique_checksum(self, idx: int) -> str:
+        return f"{uuid.uuid4().hex}{idx:08d}"[:32]
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -580,30 +580,34 @@ class TagSerializer(MatchingModelSerializer, OwnedObjectSerializer):
        ),
    )
    def get_children(self, obj):
-        filter_q = self.context.get("document_count_filter")
-        request = self.context.get("request")
-        if filter_q is None:
-            user = getattr(request, "user", None) if request else None
-            filter_q = get_document_count_filter_for_user(user)
-            self.context["document_count_filter"] = filter_q
+        children_map = self.context.get("children_map")
+        if children_map is not None:
+            children = children_map.get(obj.pk, [])
+        else:
+            filter_q = self.context.get("document_count_filter")
+            request = self.context.get("request")
+            if filter_q is None:
+                user = getattr(request, "user", None) if request else None
+                filter_q = get_document_count_filter_for_user(user)
+                self.context["document_count_filter"] = filter_q

-        children_queryset = (
-            obj.get_children_queryset()
-            .select_related("owner")
-            .annotate(document_count=Count("documents", filter=filter_q))
-        )
+            children = (
+                obj.get_children_queryset()
+                .select_related("owner")
+                .annotate(document_count=Count("documents", filter=filter_q))
+            )

-        view = self.context.get("view")
-        ordering = (
-            OrderingFilter().get_ordering(request, children_queryset, view)
-            if request and view
-            else None
-        )
-        ordering = ordering or (Lower("name"),)
-        children_queryset = children_queryset.order_by(*ordering)
+            view = self.context.get("view")
+            ordering = (
+                OrderingFilter().get_ordering(request, children, view)
+                if request and view
+                else None
+            )
+            ordering = ordering or (Lower("name"),)
+            children = children.order_by(*ordering)

        serializer = TagSerializer(
-            children_queryset,
+            children,
            many=True,
            user=self.user,
            full_perms=self.full_perms,
--- a/src/documents/tests/test_index.py
+++ b/src/documents/tests/test_index.py
@@ -180,7 +180,7 @@ class TestRewriteNaturalDateKeywords(SimpleTestCase):
            (
                "added:this year",
                datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
-                ("added:[20250101", "TO 20250715"),
+                ("added:[20250101", "TO 20251231"),
            ),
            (
                "added:previous year",
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -448,8 +448,47 @@ class TagViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin):
    def get_serializer_context(self):
        context = super().get_serializer_context()
        context["document_count_filter"] = self.get_document_count_filter()
+        if hasattr(self, "_children_map"):
+            context["children_map"] = self._children_map
        return context

+    def list(self, request, *args, **kwargs):
+        """
+        Build a children map once to avoid per-parent queries in the serializer.
+        """
+        queryset = self.filter_queryset(self.get_queryset())
+        ordering = OrderingFilter().get_ordering(request, queryset, self) or (
+            Lower("name"),
+        )
+        queryset = queryset.order_by(*ordering)
+
+        all_tags = list(queryset)
+        descendant_pks = {pk for tag in all_tags for pk in tag.get_descendants_pks()}
+
+        if descendant_pks:
+            filter_q = self.get_document_count_filter()
+            children_source = list(
+                Tag.objects.filter(pk__in=descendant_pks | {t.pk for t in all_tags})
+                .select_related("owner")
+                .annotate(document_count=Count("documents", filter=filter_q))
+                .order_by(*ordering),
+            )
+        else:
+            children_source = all_tags
+
+        children_map = {}
+        for tag in children_source:
+            children_map.setdefault(tag.tn_parent_id, []).append(tag)
+        self._children_map = children_map
+
+        page = self.paginate_queryset(queryset)
+        serializer = self.get_serializer(page, many=True)
+        response = self.get_paginated_response(serializer.data)
+        if descendant_pks:
+            # Include children in the "all" field, if needed
+            response.data["all"] = [tag.pk for tag in children_source]
+        return response
+
    def perform_update(self, serializer):
        old_parent = self.get_object().get_parent()
        tag = serializer.save()
--- a/src/paperless/version.py
+++ b/src/paperless/version.py
@@ -1,6 +1,6 @@
 from typing import Final

-__version__: Final[tuple[int, int, int]] = (2, 20, 4)
+__version__: Final[tuple[int, int, int]] = (2, 20, 5)
 # Version string like X.Y.Z
 __full_version_str__: Final[str] = ".".join(map(str, __version__))
 # Version string like X.Y
--- a/uv.lock
+++ b/uv.lock
@@ -2115,7 +2115,7 @@ wheels = [

 [[package]]
 name = "paperless-ngx"
-version = "2.20.4"
+version = "2.20.5"
 source = { virtual = "." }
 dependencies = [
    { name = "babel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
Author	SHA1	Message	Date
shamoon	8c01926be5	Performance script, available as management command [ci skip]	2026-01-28 11:58:08 -08:00
shamoon	d9091aa42b	Fix user checks in management scripts	2026-01-28 09:56:21 -08:00
shamoon	891f4a2faf	Fix: correctly extract all ids for nested tags (#11888 )	2026-01-26 09:12:03 -08:00
shamoon	2312314aa7	Performance: improve treenode inefficiencies (#11606 )	2026-01-25 21:47:08 -08:00
shamoon	72e8b73108	Fix test	2026-01-25 17:08:15 -08:00
shamoon	5c9ff367e3	Fixhancement: change date calculation for 'this year' to include future documents (#11884 )	2026-01-25 16:56:51 -08:00
Trenton H	94f6b8d36d	Fixes the management scripts under a non-root install where the user ID is something besides 1000 (#11870 )	2026-01-23 16:08:28 -08:00
shamoon	32d04e1fd3	Fix: use correct field id for overrides (#11869 )	2026-01-23 15:49:22 -08:00
Trenton H	56c744fd56	Fixes the spelling of the commitish argument to the action	2026-01-23 15:49:00 -08:00
shamoon	d1aa76e4ce	Narrow scope of these css rules	2026-01-20 12:30:06 -08:00
shamoon	5381bc5907	Fix: fix tag list horizontal scroll, again (#11839 )	2026-01-20 12:30:06 -08:00
shamoon	771f3f150a	Bump version to 2.20.5	2026-01-19 09:18:23 -08:00