Compare commits

..

8 Commits

Author SHA1 Message Date
shamoon
1f432a3378 Merge branch 'release/v2.20.x' 2026-01-20 16:30:16 -08:00
shamoon
16cc704539 Merge branch 'release/v2.20.x' 2026-01-19 10:58:39 -08:00
github-actions[bot]
245d9fb4a1 Documentation: Add v2.20.5 changelog (#11824)
---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
2026-01-19 10:57:26 -08:00
github-actions[bot]
71ecdc528e Documentation: Add v2.20.4 changelog (#11772)
---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
2026-01-13 11:51:37 -08:00
shamoon
00ec8a577b Merge branch 'hotfix/v2.20.4' 2026-01-13 11:45:55 -08:00
Antoine Mérino
65aed2405c Documentation: update notes for DB pool size (#11600) 2025-12-30 13:06:21 -08:00
shamoon
985dc9be31 Documentation: default bool value consistency 2025-12-26 08:17:44 -08:00
github-actions[bot]
305d764805 Changelog v2.20.3 - GHA (#11623) 2025-12-18 08:12:05 -08:00
31 changed files with 111 additions and 845 deletions

View File

@@ -617,7 +617,7 @@ jobs:
version: ${{ steps.get_version.outputs.version }}
prerelease: ${{ steps.get_version.outputs.prerelease }}
publish: true # ensures release is not marked as draft
commitish: ${{ github.sha }}
committish: ${{ github.sha }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Upload release archive

View File

@@ -19,8 +19,7 @@ for command in decrypt_documents \
manage_superuser \
convert_mariadb_uuid \
prune_audit_logs \
createsuperuser \
document_perf_benchmark;
createsuperuser;
do
echo "installing $command..."
sed "s/management_command/$command/g" management_script.sh >"$PWD/rootfs/usr/local/bin/$command"

View File

@@ -5,13 +5,10 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py management_command "$@"
elif [[ $(id -u) == 0 ]]; then
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py management_command "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py management_command "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -5,13 +5,10 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py convert_mariadb_uuid "$@"
elif [[ $(id -u) == 0 ]]; then
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py convert_mariadb_uuid "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py convert_mariadb_uuid "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -5,13 +5,10 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py createsuperuser "$@"
elif [[ $(id -u) == 0 ]]; then
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py createsuperuser "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py createsuperuser "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -5,13 +5,10 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py decrypt_documents "$@"
elif [[ $(id -u) == 0 ]]; then
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py decrypt_documents "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py decrypt_documents "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -5,13 +5,10 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_archiver "$@"
elif [[ $(id -u) == 0 ]]; then
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_archiver "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_archiver "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -5,17 +5,10 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_create_classifier "$@"
elif [[ $(id -u) == 0 ]]; then
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_create_classifier "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_create_classifier "$@"
else
echo "Unknown user."
exit 1
fi
er "$@"
elif [[ $(id -un) == "paperless" ]]; then
s6-setuidgid paperless python3 manage.py document_create_classifier "$@"
fi

View File

@@ -5,13 +5,10 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_exporter "$@"
elif [[ $(id -u) == 0 ]]; then
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_exporter "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_exporter "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -5,13 +5,10 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_fuzzy_match "$@"
elif [[ $(id -u) == 0 ]]; then
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_fuzzy_match "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_fuzzy_match "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -5,13 +5,10 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_importer "$@"
elif [[ $(id -u) == 0 ]]; then
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_importer "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_importer "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -5,13 +5,10 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_index "$@"
elif [[ $(id -u) == 0 ]]; then
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_index "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_index "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -1,17 +0,0 @@
#!/command/with-contenv /usr/bin/bash
# shellcheck shell=bash
set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_perf_benchmark "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_perf_benchmark "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_perf_benchmark "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -5,13 +5,10 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_renamer "$@"
elif [[ $(id -u) == 0 ]]; then
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_renamer "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_renamer "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -5,13 +5,10 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_retagger "$@"
elif [[ $(id -u) == 0 ]]; then
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_retagger "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_retagger "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -5,13 +5,10 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_sanity_checker "$@"
elif [[ $(id -u) == 0 ]]; then
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_sanity_checker "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_sanity_checker "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -5,13 +5,10 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_thumbnails "$@"
elif [[ $(id -u) == 0 ]]; then
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_thumbnails "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_thumbnails "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -5,13 +5,10 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py mail_fetcher "$@"
elif [[ $(id -u) == 0 ]]; then
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py mail_fetcher "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py mail_fetcher "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -5,13 +5,10 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py manage_superuser "$@"
elif [[ $(id -u) == 0 ]]; then
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py manage_superuser "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py manage_superuser "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -5,13 +5,10 @@ set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py prune_audit_logs "$@"
elif [[ $(id -u) == 0 ]]; then
if [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py prune_audit_logs "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py prune_audit_logs "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -1,7 +1,60 @@
# Changelog
## paperless-ngx 2.20.5
### Bug Fixes
- Fix: ensure horizontal scroll for long tag names in list, wrap tags without parent [@shamoon](https://github.com/shamoon) ([#11811](https://github.com/paperless-ngx/paperless-ngx/pull/11811))
- Fix: use explicit order field for workflow actions [@shamoon](https://github.com/shamoon) [@stumpylog](https://github.com/stumpylog) ([#11781](https://github.com/paperless-ngx/paperless-ngx/pull/11781))
### All App Changes
<details>
<summary>2 changes</summary>
- Fix: ensure horizontal scroll for long tag names in list, wrap tags without parent [@shamoon](https://github.com/shamoon) ([#11811](https://github.com/paperless-ngx/paperless-ngx/pull/11811))
- Fix: use explicit order field for workflow actions [@shamoon](https://github.com/shamoon) [@stumpylog](https://github.com/stumpylog) ([#11781](https://github.com/paperless-ngx/paperless-ngx/pull/11781))
</details>
## paperless-ngx 2.20.4
### Security
- Resolve [GHSA-28cf-xvcf-hw6m](https://github.com/paperless-ngx/paperless-ngx/security/advisories/GHSA-28cf-xvcf-hw6m)
### Bug Fixes
- Fix: propagate metadata override created value [@shamoon](https://github.com/shamoon) ([#11659](https://github.com/paperless-ngx/paperless-ngx/pull/11659))
- Fix: support ordering by storage path name [@shamoon](https://github.com/shamoon) ([#11661](https://github.com/paperless-ngx/paperless-ngx/pull/11661))
- Fix: validate cf integer values within PostgreSQL range [@shamoon](https://github.com/shamoon) ([#11666](https://github.com/paperless-ngx/paperless-ngx/pull/11666))
- Fixhancement: add error handling and retry when opening index [@shamoon](https://github.com/shamoon) ([#11731](https://github.com/paperless-ngx/paperless-ngx/pull/11731))
- Fix: fix recurring workflow to respect latest run time [@shamoon](https://github.com/shamoon) ([#11735](https://github.com/paperless-ngx/paperless-ngx/pull/11735))
### All App Changes
<details>
<summary>5 changes</summary>
- Fix: propagate metadata override created value [@shamoon](https://github.com/shamoon) ([#11659](https://github.com/paperless-ngx/paperless-ngx/pull/11659))
- Fix: support ordering by storage path name [@shamoon](https://github.com/shamoon) ([#11661](https://github.com/paperless-ngx/paperless-ngx/pull/11661))
- Fix: validate cf integer values within PostgreSQL range [@shamoon](https://github.com/shamoon) ([#11666](https://github.com/paperless-ngx/paperless-ngx/pull/11666))
- Fixhancement: add error handling and retry when opening index [@shamoon](https://github.com/shamoon) ([#11731](https://github.com/paperless-ngx/paperless-ngx/pull/11731))
- Fix: fix recurring workflow to respect latest run time [@shamoon](https://github.com/shamoon) ([#11735](https://github.com/paperless-ngx/paperless-ngx/pull/11735))
</details>
## paperless-ngx 2.20.3
### Security
- Resolve [GHSA-7cq3-mhxq-w946](https://github.com/paperless-ngx/paperless-ngx/security/advisories/GHSA-7cq3-mhxq-w946)
## paperless-ngx 2.20.2
### Security
- Resolve [GHSA-6653-vcx4-69mc](https://github.com/paperless-ngx/paperless-ngx/security/advisories/GHSA-6653-vcx4-69mc)
- Resolve [GHSA-24x5-wp64-9fcc](https://github.com/paperless-ngx/paperless-ngx/security/advisories/GHSA-24x5-wp64-9fcc)
### Features / Enhancements
- Tweakhancement: dim inactive users in users-groups list [@shamoon](https://github.com/shamoon) ([#11537](https://github.com/paperless-ngx/paperless-ngx/pull/11537))

View File

@@ -170,11 +170,18 @@ Available options are `postgresql` and `mariadb`.
!!! note
A small pool is typically sufficient — for example, a size of 4.
Make sure your PostgreSQL server's max_connections setting is large enough to handle:
```(Paperless workers + Celery workers) × pool size + safety margin```
For example, with 4 Paperless workers and 2 Celery workers, and a pool size of 4:
(4 + 2) × 4 + 10 = 34 connections required.
A pool of 8-10 connections per worker is typically sufficient.
If you encounter error messages such as `couldn't get a connection`
or database connection timeouts, you probably need to increase the pool size.
!!! warning
Make sure your PostgreSQL `max_connections` setting is large enough to handle the connection pools:
`(NB_PAPERLESS_WORKERS + NB_CELERY_WORKERS) × POOL_SIZE + SAFETY_MARGIN`. For example, with
4 Paperless workers and 2 Celery workers, and a pool size of 8:``(4 + 2) × 8 + 10 = 58`,
so `max_connections = 60` (or even more) is appropriate.
This assumes only Paperless-ngx connects to your PostgreSQL instance. If you have other applications,
you should increase `max_connections` accordingly.
#### [`PAPERLESS_DB_READ_CACHE_ENABLED=<bool>`](#PAPERLESS_DB_READ_CACHE_ENABLED) {#PAPERLESS_DB_READ_CACHE_ENABLED}
@@ -1007,7 +1014,7 @@ still perform some basic text pre-processing before matching.
: See also `PAPERLESS_NLTK_DIR`.
Defaults to 1.
Defaults to true, enabling the feature.
#### [`PAPERLESS_DATE_PARSER_LANGUAGES=<lang>`](#PAPERLESS_DATE_PARSER_LANGUAGES) {#PAPERLESS_DATE_PARSER_LANGUAGES}
@@ -1074,7 +1081,7 @@ valid crontab(5) expression describing when to run.
: Enables compression of the responses from the webserver.
: Defaults to 1, enabling compression.
: Defaults to true, enabling compression.
!!! note

View File

@@ -238,7 +238,7 @@ lint.isort.force-single-line = true
[tool.codespell]
write-changes = true
ignore-words-list = "criterias,afterall,valeu,ureue,equest,ure,assertIn,Oktober,commitish"
ignore-words-list = "criterias,afterall,valeu,ureue,equest,ure,assertIn,Oktober"
skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/documents/tests/samples/*,*.po,*.json"
[tool.pytest.ini_options]

View File

@@ -229,21 +229,6 @@ describe('ManagementListComponent', () => {
expect(reloadSpy).toHaveBeenCalled()
})
it('should use the all list length for collection size when provided', fakeAsync(() => {
jest.spyOn(tagService, 'listFiltered').mockReturnValueOnce(
of({
count: 1,
all: [1, 2, 3],
results: tags.slice(0, 1),
})
)
component.reloadData()
tick(100)
expect(component.collectionSize).toBe(3)
}))
it('should support quick filter for objects', () => {
const qfSpy = jest.spyOn(documentListViewService, 'quickFilter')
const filterButton = fixture.debugElement.queryAll(By.css('button'))[9]

View File

@@ -171,7 +171,7 @@ export abstract class ManagementListComponent<T extends MatchingModel>
tap((c) => {
this.unfilteredData = c.results
this.data = this.filterData(c.results)
this.collectionSize = c.all?.length ?? c.count
this.collectionSize = c.count
}),
delay(100)
)

View File

@@ -115,7 +115,7 @@ class DocumentMetadataOverrides:
).values_list("id", flat=True),
)
overrides.custom_fields = {
custom_field.field.id: custom_field.value
custom_field.id: custom_field.value
for custom_field in doc.custom_fields.all()
}

View File

@@ -602,7 +602,7 @@ def rewrite_natural_date_keywords(query_string: str) -> str:
case "this year":
start = datetime(local_now.year, 1, 1, 0, 0, 0, tzinfo=tz)
end = datetime(local_now.year, 12, 31, 23, 59, 59, tzinfo=tz)
end = datetime.combine(today, time.max, tzinfo=tz)
case "previous week":
days_since_monday = local_now.weekday()

View File

@@ -1,663 +0,0 @@
import contextlib
import io
import logging
import math
import signal
import uuid
from time import perf_counter
from django.contrib.auth import get_user_model
from django.contrib.auth.models import Permission
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.db import connections
from django.db import reset_queries
from django.db.models import Count
from django.db.models import Q
from django.db.models import Subquery
from guardian.shortcuts import assign_perm
from rest_framework.test import APIClient
from documents.models import CustomField
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import Tag
from documents.permissions import get_objects_for_user_owner_aware
from documents.permissions import permitted_document_ids
class Command(BaseCommand):
# e.g. docker compose exec webserver / manage.py ...
# document_perf_benchmark --reuse-existing --documents 500000 --chunk-size 5000 --tags 40 --tags-per-doc 3 --custom-fields 6 --custom-fields-per-doc 2
help = (
"Seed a synthetic dataset and benchmark permission-filtered document queries "
"for superusers vs non-superusers."
)
def add_arguments(self, parser):
parser.add_argument(
"--documents",
type=int,
default=10000,
help="Total documents to generate (default: 10,000)",
)
parser.add_argument(
"--owner-ratio",
type=float,
default=0.6,
help="Fraction owned by the benchmarked user (default: 0.6)",
)
parser.add_argument(
"--unowned-ratio",
type=float,
default=0.1,
help="Fraction of unowned documents (default: 0.1)",
)
parser.add_argument(
"--shared-ratio",
type=float,
default=0.25,
help=(
"Fraction of other-user documents that are shared via object perms "
"with the benchmarked user (default: 0.25)"
),
)
parser.add_argument(
"--chunk-size",
type=int,
default=2000,
help="Bulk create size for documents (default: 2000)",
)
parser.add_argument(
"--iterations",
type=int,
default=3,
help="Number of timing runs per query shape (default: 3)",
)
parser.add_argument(
"--prefix",
default="perf-benchmark",
help="Title prefix used to mark generated documents (default: perf-benchmark)",
)
parser.add_argument(
"--tags",
type=int,
default=0,
help="Number of tags to create and assign (default: 0)",
)
parser.add_argument(
"--tags-per-doc",
type=int,
default=1,
help="How many tags to attach to each document (default: 1)",
)
parser.add_argument(
"--custom-fields",
type=int,
default=0,
help="Number of string custom fields to create (default: 0)",
)
parser.add_argument(
"--custom-fields-per-doc",
type=int,
default=1,
help="How many custom field instances per document (default: 1)",
)
parser.add_argument(
"--skip-tags",
action="store_true",
help="Skip tag document_count benchmarks (useful for large datasets on Postgres)",
)
parser.add_argument(
"--skip-custom-fields",
action="store_true",
help="Skip custom field document_count benchmarks",
)
parser.add_argument(
"--reuse-existing",
action="store_true",
help="Keep previously generated documents with the given prefix instead of recreating",
)
parser.add_argument(
"--cleanup",
action="store_true",
help="Delete previously generated documents with the given prefix and exit",
)
parser.add_argument(
"--api-timeout",
type=float,
default=30.0,
help="Per-request timeout (seconds) for API timings (default: 30s)",
)
def handle(self, *args, **options):
# keep options for downstream checks
self.options = options
document_total = options["documents"]
owner_ratio = options["owner_ratio"]
unowned_ratio = options["unowned_ratio"]
shared_ratio = options["shared_ratio"]
chunk_size = options["chunk_size"]
iterations = options["iterations"]
prefix = options["prefix"]
tags = options["tags"]
tags_per_doc = options["tags_per_doc"]
custom_fields = options["custom_fields"]
custom_fields_per_doc = options["custom_fields_per_doc"]
self._validate_ratios(owner_ratio, unowned_ratio)
if tags_per_doc < 0 or custom_fields_per_doc < 0:
raise CommandError("Per-document counts must be non-negative")
target_user, other_user, superuser = self._ensure_users()
skip_seed = False
if options["cleanup"]:
removed = self._cleanup(prefix)
self.stdout.write(
self.style.SUCCESS(f"Removed {removed} generated documents"),
)
return
if not options["reuse_existing"]:
removed = self._cleanup(prefix)
if removed:
self.stdout.write(f"Removed existing generated documents: {removed}")
else:
existing = Document.objects.filter(title__startswith=prefix).count()
if existing:
skip_seed = True
self.stdout.write(
f"Reusing existing dataset with prefix '{prefix}': {existing} docs",
)
if skip_seed:
dataset_size = Document.objects.filter(title__startswith=prefix).count()
self.stdout.write(
self.style.SUCCESS(
f"Dataset ready (reused): {dataset_size} docs | prefix={prefix}",
),
)
else:
self.stdout.write(
f"Seeding {document_total} documents (owner_ratio={owner_ratio}, "
f"unowned_ratio={unowned_ratio}, shared_ratio={shared_ratio})",
)
created_counts = self._seed_documents(
total=document_total,
owner_ratio=owner_ratio,
unowned_ratio=unowned_ratio,
shared_ratio=shared_ratio,
chunk_size=chunk_size,
prefix=prefix,
target_user=target_user,
other_user=other_user,
)
created_tags = []
if tags:
created_tags = self._seed_tags(prefix=prefix, count=tags)
if tags_per_doc and created_tags:
self._assign_tags_to_documents(
prefix=prefix,
tags=created_tags,
tags_per_doc=tags_per_doc,
chunk_size=chunk_size,
)
created_custom_fields = []
if custom_fields:
created_custom_fields = self._seed_custom_fields(prefix, custom_fields)
if custom_fields_per_doc and created_custom_fields:
self._seed_custom_field_instances(
prefix=prefix,
custom_fields=created_custom_fields,
per_doc=custom_fields_per_doc,
chunk_size=chunk_size,
)
dataset_size = Document.objects.filter(title__startswith=prefix).count()
self.stdout.write(
self.style.SUCCESS(
f"Dataset ready: {dataset_size} docs | owned by target {created_counts['owned']} | "
f"owned by other {created_counts['other_owned']} | unowned {created_counts['unowned']} | "
f"shared-perms {created_counts['shared']} | tags {len(created_tags)} | "
f"custom fields {len(created_custom_fields)}",
),
)
self.stdout.write("\nRunning benchmarks...\n")
self._run_benchmarks(
iterations=iterations,
target_user=target_user,
superuser=superuser,
prefix=prefix,
)
def _validate_ratios(self, owner_ratio: float, unowned_ratio: float):
if owner_ratio < 0 or unowned_ratio < 0:
raise CommandError("Ratios must be non-negative")
if owner_ratio + unowned_ratio > 1:
raise CommandError("owner-ratio + unowned-ratio cannot exceed 1.0")
def _ensure_users(self):
User = get_user_model()
target_user, _ = User.objects.get_or_create(
username="perf_user",
defaults={"email": "perf_user@example.com"},
)
target_user.set_password("perf_user")
target_user.is_staff = True
target_user.save()
other_user, _ = User.objects.get_or_create(
username="perf_owner",
defaults={"email": "perf_owner@example.com"},
)
other_user.set_password("perf_owner")
other_user.is_staff = True
other_user.save()
superuser, _ = User.objects.get_or_create(
username="perf_admin",
defaults={
"email": "perf_admin@example.com",
"is_staff": True,
"is_superuser": True,
},
)
superuser.set_password("perf_admin")
superuser.save()
perms = Permission.objects.all()
target_user.user_permissions.set(perms)
other_user.user_permissions.set(perms)
return target_user, other_user, superuser
def _cleanup(self, prefix: str) -> int:
docs_qs = Document.global_objects.filter(title__startswith=prefix)
doc_count = docs_qs.count()
if doc_count:
docs_qs.hard_delete()
tag_count = Tag.objects.filter(name__startswith=prefix).count()
if tag_count:
Tag.objects.filter(name__startswith=prefix).delete()
cf_qs = CustomField.objects.filter(name__startswith=prefix)
cf_count = cf_qs.count()
if cf_count:
cf_qs.delete()
cfi_qs = CustomFieldInstance.global_objects.filter(
document__title__startswith=prefix,
)
cfi_count = cfi_qs.count()
if cfi_count:
cfi_qs.hard_delete()
return doc_count + tag_count + cf_count + cfi_count
def _seed_documents(
self,
*,
total: int,
owner_ratio: float,
unowned_ratio: float,
shared_ratio: float,
chunk_size: int,
prefix: str,
target_user,
other_user,
) -> dict[str, int]:
target_count = math.floor(total * owner_ratio)
unowned_count = math.floor(total * unowned_ratio)
other_count = total - target_count - unowned_count
documents: list[Document] = []
other_docs: list[Document] = []
for idx in range(total):
if idx < target_count:
owner = target_user
elif idx < target_count + other_count:
owner = other_user
else:
owner = None
doc = Document(
owner=owner,
title=f"{prefix}-{idx:07d}",
mime_type="application/pdf",
checksum=self._unique_checksum(idx),
page_count=1,
)
if owner is other_user:
other_docs.append(doc)
documents.append(doc)
if len(documents) >= chunk_size:
Document.objects.bulk_create(documents, batch_size=chunk_size)
documents.clear()
if documents:
Document.objects.bulk_create(documents, batch_size=chunk_size)
shared_target = math.floor(len(other_docs) * shared_ratio)
for doc in other_docs[:shared_target]:
assign_perm("documents.view_document", target_user, doc)
return {
"owned": target_count,
"other_owned": other_count,
"unowned": unowned_count,
"shared": shared_target,
}
def _seed_tags(self, *, prefix: str, count: int) -> list[Tag]:
tags = [
Tag(
name=f"{prefix}-tag-{idx:03d}",
)
for idx in range(count)
]
Tag.objects.bulk_create(tags, ignore_conflicts=True)
return list(Tag.objects.filter(name__startswith=prefix))
def _assign_tags_to_documents(
self,
*,
prefix: str,
tags: list[Tag],
tags_per_doc: int,
chunk_size: int,
):
if not tags or tags_per_doc < 1:
return
rels = []
through = Document.tags.through
tag_ids = [t.id for t in tags]
tag_count = len(tag_ids)
iterator = (
Document.objects.filter(title__startswith=prefix)
.values_list(
"id",
flat=True,
)
.iterator()
)
for idx, doc_id in enumerate(iterator):
start = idx % tag_count
chosen = set()
for offset in range(tags_per_doc):
tag_id = tag_ids[(start + offset) % tag_count]
if tag_id in chosen:
continue
chosen.add(tag_id)
rels.append(through(document_id=doc_id, tag_id=tag_id))
if len(rels) >= chunk_size:
through.objects.bulk_create(rels, ignore_conflicts=True)
rels.clear()
if rels:
through.objects.bulk_create(rels, ignore_conflicts=True)
def _seed_custom_fields(self, prefix: str, count: int) -> list[CustomField]:
fields = [
CustomField(
name=f"{prefix}-cf-{idx:03d}",
data_type=CustomField.FieldDataType.STRING,
)
for idx in range(count)
]
CustomField.objects.bulk_create(fields, ignore_conflicts=True)
return list(CustomField.objects.filter(name__startswith=prefix))
def _seed_custom_field_instances(
self,
*,
prefix: str,
custom_fields: list[CustomField],
per_doc: int,
chunk_size: int,
):
if not custom_fields or per_doc < 1:
return
instances = []
cf_ids = [cf.id for cf in custom_fields]
cf_count = len(cf_ids)
iterator = (
Document.objects.filter(title__startswith=prefix)
.values_list(
"id",
flat=True,
)
.iterator()
)
for idx, doc_id in enumerate(iterator):
start = idx % cf_count
for offset in range(per_doc):
cf_id = cf_ids[(start + offset) % cf_count]
instances.append(
CustomFieldInstance(
document_id=doc_id,
field_id=cf_id,
value_text=f"val-{doc_id}-{cf_id}",
),
)
if len(instances) >= chunk_size:
CustomFieldInstance.objects.bulk_create(
instances,
batch_size=chunk_size,
ignore_conflicts=True,
)
instances.clear()
if instances:
CustomFieldInstance.objects.bulk_create(
instances,
batch_size=chunk_size,
ignore_conflicts=True,
)
def _run_benchmarks(self, *, iterations: int, target_user, superuser, prefix: str):
self.stdout.write("-> API benchmarks")
for user in ("perf_admin", "perf_user"):
pwd = user
self.stdout.write(f"-> API documents ({user})")
self._time_api_get(
label=f"{user} /api/documents/",
username=user,
password=pwd,
path="/api/documents/",
)
self.stdout.write(f"-> API tags ({user})")
self._time_api_get(
label=f"{user} /api/tags/",
username=user,
password=pwd,
path="/api/tags/",
)
self.stdout.write(f"-> API custom fields ({user})")
self._time_api_get(
label=f"{user} /api/custom_fields/",
username=user,
password=pwd,
path="/api/custom_fields/",
)
def _count_with_values_list(self, user) -> int:
qs = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
)
return Document.objects.filter(id__in=qs.values_list("id", flat=True)).count()
def _count_with_subquery(self, user) -> int:
qs = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
)
subquery = Subquery(qs.values_list("id"))
return Document.objects.filter(id__in=subquery).count()
def _document_filter(self, user, *, use_subquery: bool):
if user is None or getattr(user, "is_superuser", False):
return Q(documents__deleted_at__isnull=True)
qs = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
)
ids = (
Subquery(qs.values_list("id"))
if use_subquery
else qs.values_list("id", flat=True)
)
return Q(documents__deleted_at__isnull=True, documents__id__in=ids)
def _tag_queryset(self, *, prefix: str, filter_q: Q):
return Tag.objects.filter(name__startswith=prefix).annotate(
document_count=Count("documents", filter=filter_q),
)
def _time_tag_counts(self, *, iterations: int, prefix: str, user):
if not Tag.objects.filter(name__startswith=prefix).exists():
return
self._time_query(
label="tag document_count (grouped)",
iterations=iterations,
fn=lambda: list(
Tag.documents.through.objects.filter(
document_id__in=Subquery(permitted_document_ids(user)),
)
.values("tag_id")
.annotate(c=Count("document_id"))
.values_list("tag_id", "c"),
),
)
def _time_custom_field_counts(
self,
*,
iterations: int,
prefix: str,
user,
superuser,
):
if not CustomField.objects.filter(name__startswith=prefix).exists():
return
permitted = Subquery(permitted_document_ids(user))
super_permitted = CustomFieldInstance.objects.filter(
document__deleted_at__isnull=True,
).values_list("document_id")
def _run(ids_subquery):
return list(
CustomFieldInstance.objects.filter(
document_id__in=ids_subquery,
field__name__startswith=prefix,
)
.values("field_id")
.annotate(c=Count("document_id"))
.values_list("field_id", "c"),
)
self._time_query(
label="custom fields document_count (grouped permitted)",
iterations=iterations,
fn=lambda: _run(permitted),
)
self._time_query(
label="custom fields document_count superuser baseline",
iterations=iterations,
fn=lambda: _run(super_permitted),
)
def _time_api_get(self, *, label: str, username: str, password: str, path: str):
client = APIClient()
client.raise_request_exception = False
if not client.login(username=username, password=password):
self.stdout.write(f"{label}: login failed")
return
timeout_s = float(self.options.get("api_timeout", 30.0))
logger = logging.getLogger("django.request")
prev_level = logger.level
logger.setLevel(logging.CRITICAL)
def _timeout_handler(signum, frame): # pragma: no cover
raise TimeoutError
old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
signal.alarm(max(1, int(timeout_s)))
try:
reset_queries()
start = perf_counter()
with contextlib.redirect_stderr(io.StringIO()):
resp = client.get(path, format="json")
duration = perf_counter() - start
size = None
if resp.headers.get("Content-Type", "").startswith("application/json"):
data = resp.json()
if isinstance(data, dict) and "results" in data:
size = len(data.get("results", []))
elif isinstance(data, list):
size = len(data)
if resp.status_code == 500 and duration >= timeout_s - 0.1:
self.stdout.write(
self.style.ERROR(f"{label}: TIMEOUT after {timeout_s:.1f}s"),
)
elif resp.status_code == 200:
self.stdout.write(
self.style.SUCCESS(
f"{label}: status={resp.status_code} time={duration:.4f}s items={size}",
),
)
else:
self.stdout.write(
self.style.WARNING(
f"{label}: status={resp.status_code} time={duration:.4f}s",
),
)
except TimeoutError:
self.stdout.write(f"{label}: TIMEOUT after {timeout_s:.1f}s")
except Exception as e:
text = str(e)
self.stdout.write(f"{label}: ERROR {text}")
finally:
signal.alarm(0)
signal.signal(signal.SIGALRM, old_handler)
logger.setLevel(prev_level)
connections.close_all()
def _time_query(self, *, label: str, iterations: int, fn):
durations = []
for _ in range(iterations):
reset_queries()
start = perf_counter()
fn()
durations.append(perf_counter() - start)
avg = sum(durations) / len(durations)
self.stdout.write(
f"{label}: min={min(durations):.4f}s avg={avg:.4f}s max={max(durations):.4f}s",
)
def _unique_checksum(self, idx: int) -> str:
return f"{uuid.uuid4().hex}{idx:08d}"[:32]

View File

@@ -580,34 +580,30 @@ class TagSerializer(MatchingModelSerializer, OwnedObjectSerializer):
),
)
def get_children(self, obj):
children_map = self.context.get("children_map")
if children_map is not None:
children = children_map.get(obj.pk, [])
else:
filter_q = self.context.get("document_count_filter")
request = self.context.get("request")
if filter_q is None:
user = getattr(request, "user", None) if request else None
filter_q = get_document_count_filter_for_user(user)
self.context["document_count_filter"] = filter_q
filter_q = self.context.get("document_count_filter")
request = self.context.get("request")
if filter_q is None:
user = getattr(request, "user", None) if request else None
filter_q = get_document_count_filter_for_user(user)
self.context["document_count_filter"] = filter_q
children = (
obj.get_children_queryset()
.select_related("owner")
.annotate(document_count=Count("documents", filter=filter_q))
)
children_queryset = (
obj.get_children_queryset()
.select_related("owner")
.annotate(document_count=Count("documents", filter=filter_q))
)
view = self.context.get("view")
ordering = (
OrderingFilter().get_ordering(request, children, view)
if request and view
else None
)
ordering = ordering or (Lower("name"),)
children = children.order_by(*ordering)
view = self.context.get("view")
ordering = (
OrderingFilter().get_ordering(request, children_queryset, view)
if request and view
else None
)
ordering = ordering or (Lower("name"),)
children_queryset = children_queryset.order_by(*ordering)
serializer = TagSerializer(
children,
children_queryset,
many=True,
user=self.user,
full_perms=self.full_perms,

View File

@@ -180,7 +180,7 @@ class TestRewriteNaturalDateKeywords(SimpleTestCase):
(
"added:this year",
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
("added:[20250101", "TO 20251231"),
("added:[20250101", "TO 20250715"),
),
(
"added:previous year",

View File

@@ -448,47 +448,8 @@ class TagViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin):
def get_serializer_context(self):
context = super().get_serializer_context()
context["document_count_filter"] = self.get_document_count_filter()
if hasattr(self, "_children_map"):
context["children_map"] = self._children_map
return context
def list(self, request, *args, **kwargs):
"""
Build a children map once to avoid per-parent queries in the serializer.
"""
queryset = self.filter_queryset(self.get_queryset())
ordering = OrderingFilter().get_ordering(request, queryset, self) or (
Lower("name"),
)
queryset = queryset.order_by(*ordering)
all_tags = list(queryset)
descendant_pks = {pk for tag in all_tags for pk in tag.get_descendants_pks()}
if descendant_pks:
filter_q = self.get_document_count_filter()
children_source = list(
Tag.objects.filter(pk__in=descendant_pks | {t.pk for t in all_tags})
.select_related("owner")
.annotate(document_count=Count("documents", filter=filter_q))
.order_by(*ordering),
)
else:
children_source = all_tags
children_map = {}
for tag in children_source:
children_map.setdefault(tag.tn_parent_id, []).append(tag)
self._children_map = children_map
page = self.paginate_queryset(queryset)
serializer = self.get_serializer(page, many=True)
response = self.get_paginated_response(serializer.data)
if descendant_pks:
# Include children in the "all" field, if needed
response.data["all"] = [tag.pk for tag in children_source]
return response
def perform_update(self, serializer):
old_parent = self.get_object().get_parent()
tag = serializer.save()