Compare commits

..

3 Commits

Author SHA1 Message Date
shamoon
6f4cdb3875 A ChatGPT script for testing 2025-12-15 14:31:59 -08:00
shamoon
02a63144cf Optimize tag children retrieval 2025-12-15 14:31:58 -08:00
shamoon
917e3f3c60 Run Tag tree updates once per transaction 2025-12-15 14:31:58 -08:00
15 changed files with 279 additions and 422 deletions

View File

@@ -275,12 +275,8 @@ jobs:
tests-frontend-e2e:
name: "Frontend E2E Tests (Node ${{ matrix.node-version }} - ${{ matrix.shard-index }}/${{ matrix.shard-count }})"
runs-on: ubuntu-24.04
container: mcr.microsoft.com/playwright:v1.57.0-noble
needs:
- install-frontend-dependencies
env:
PLAYWRIGHT_BROWSERS_PATH: /ms-playwright
PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: 1
strategy:
fail-fast: false
matrix:
@@ -309,8 +305,19 @@ jobs:
key: ${{ runner.os }}-frontenddeps-${{ hashFiles('src-ui/pnpm-lock.yaml') }}
- name: Re-link Angular cli
run: cd src-ui && pnpm link @angular/cli
- name: Cache Playwright browsers
uses: actions/cache@v4
with:
path: ~/.cache/ms-playwright
key: ${{ runner.os }}-playwright-${{ hashFiles('src-ui/pnpm-lock.yaml') }}
restore-keys: |
${{ runner.os }}-playwright-
- name: Install Playwright system dependencies
run: npx playwright install-deps
- name: Install dependencies
run: cd src-ui && pnpm install --no-frozen-lockfile
- name: Install Playwright
run: cd src-ui && pnpm exec playwright install
- name: Run Playwright e2e tests
run: cd src-ui && pnpm exec playwright test --shard ${{ matrix.shard-index }}/${{ matrix.shard-count }}
frontend-bundle-analysis:

139
scripts/tag_perf_probe.py Normal file
View File

@@ -0,0 +1,139 @@
# noqa: INP001
"""
Ad-hoc script to gauge Tag + treenode performance locally.
It bootstraps a fresh SQLite DB in a temp folder (or PAPERLESS_DATA_DIR),
uses locmem cache/redis to avoid external services, creates synthetic tags,
and measures:
- creation time
- query count and wall time for the Tag list view
Usage:
PAPERLESS_DEBUG=1 PAPERLESS_REDIS=locmem:// PYTHONPATH=src \
PAPERLESS_DATA_DIR=/tmp/paperless-tags-probe \
.venv/bin/python scripts/tag_perf_probe.py
"""
import os
import sys
import time
from collections.abc import Iterable
from contextlib import contextmanager
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
os.environ.setdefault("PAPERLESS_DEBUG", "1")
os.environ.setdefault("PAPERLESS_REDIS", "locmem://")
os.environ.setdefault("PYTHONPATH", "src")
import django
django.setup()
from django.contrib.auth import get_user_model # noqa: E402
from django.core.management import call_command # noqa: E402
from django.db import connection # noqa: E402
from django.test.client import RequestFactory # noqa: E402
from rest_framework.test import force_authenticate # noqa: E402
from treenode.signals import no_signals # noqa: E402
from documents.models import Tag # noqa: E402
from documents.views import TagViewSet # noqa: E402
User = get_user_model()
@contextmanager
def count_queries():
total = 0
def wrapper(execute, sql, params, many, context):
nonlocal total
total += 1
return execute(sql, params, many, context)
with connection.execute_wrapper(wrapper):
yield lambda: total
def measure_list(tag_count: int, user) -> tuple[int, float]:
"""Render Tag list with page_size=tag_count and return (queries, seconds)."""
rf = RequestFactory()
view = TagViewSet.as_view({"get": "list"})
request = rf.get("/api/tags/", {"page_size": tag_count})
force_authenticate(request, user=user)
with count_queries() as get_count:
start = time.perf_counter()
response = view(request)
response.render()
elapsed = time.perf_counter() - start
total_queries = get_count()
return total_queries, elapsed
def bulk_create_tags(count: int, parents: Iterable[Tag] | None = None) -> None:
"""Create tags; when parents provided, create one child per parent."""
if parents is None:
Tag.objects.bulk_create([Tag(name=f"Flat {i}") for i in range(count)])
return
children = []
for p in parents:
children.append(Tag(name=f"Child {p.id}", tn_parent=p))
Tag.objects.bulk_create(children)
def run():
# Ensure tables exist when pointing at a fresh DATA_DIR.
call_command("migrate", interactive=False, verbosity=0)
user, _ = User.objects.get_or_create(
username="admin",
defaults={"is_superuser": True, "is_staff": True},
)
# Flat scenario
Tag.objects.all().delete()
start = time.perf_counter()
bulk_create_tags(200)
flat_create = time.perf_counter() - start
q, t = measure_list(tag_count=200, user=user)
print(f"Flat create 200 -> {flat_create:.2f}s; list -> {q} queries, {t:.2f}s") # noqa: T201
# Nested scenario (parents + 2 children each => 600 total)
Tag.objects.all().delete()
start = time.perf_counter()
with no_signals(): # avoid per-save tree rebuild; rebuild once
parents = Tag.objects.bulk_create([Tag(name=f"Parent {i}") for i in range(200)])
children = []
for p in parents:
children.extend(
Tag(name=f"Child {p.id}-{j}", tn_parent=p) for j in range(2)
)
Tag.objects.bulk_create(children)
Tag.update_tree()
nested_create = time.perf_counter() - start
q, t = measure_list(tag_count=600, user=user)
print(f"Nested create 600 -> {nested_create:.2f}s; list -> {q} queries, {t:.2f}s") # noqa: T201
# Larger nested scenario (1 child per parent, 3000 total)
Tag.objects.all().delete()
start = time.perf_counter()
with no_signals():
parents = Tag.objects.bulk_create(
[Tag(name=f"Parent {i}") for i in range(1500)],
)
bulk_create_tags(0, parents=parents)
Tag.update_tree()
big_create = time.perf_counter() - start
q, t = measure_list(tag_count=3000, user=user)
print(f"Nested create 3000 -> {big_create:.2f}s; list -> {q} queries, {t:.2f}s") # noqa: T201
if __name__ == "__main__":
if "runserver" in sys.argv:
print("Run directly: .venv/bin/python scripts/tag_perf_probe.py") # noqa: T201
sys.exit(1)
run()

View File

@@ -416,9 +416,6 @@ describe('WorkflowEditDialogComponent', () => {
return newFilter
}
const correspondentAny = addFilterOfType(TriggerFilterType.CorrespondentAny)
correspondentAny.get('values').setValue([11])
const correspondentIs = addFilterOfType(TriggerFilterType.CorrespondentIs)
correspondentIs.get('values').setValue(1)
@@ -428,18 +425,12 @@ describe('WorkflowEditDialogComponent', () => {
const documentTypeIs = addFilterOfType(TriggerFilterType.DocumentTypeIs)
documentTypeIs.get('values').setValue(1)
const documentTypeAny = addFilterOfType(TriggerFilterType.DocumentTypeAny)
documentTypeAny.get('values').setValue([12])
const documentTypeNot = addFilterOfType(TriggerFilterType.DocumentTypeNot)
documentTypeNot.get('values').setValue([1])
const storagePathIs = addFilterOfType(TriggerFilterType.StoragePathIs)
storagePathIs.get('values').setValue(1)
const storagePathAny = addFilterOfType(TriggerFilterType.StoragePathAny)
storagePathAny.get('values').setValue([13])
const storagePathNot = addFilterOfType(TriggerFilterType.StoragePathNot)
storagePathNot.get('values').setValue([1])
@@ -454,13 +445,10 @@ describe('WorkflowEditDialogComponent', () => {
expect(formValues.triggers[0].filter_has_tags).toEqual([1])
expect(formValues.triggers[0].filter_has_all_tags).toEqual([2, 3])
expect(formValues.triggers[0].filter_has_not_tags).toEqual([4])
expect(formValues.triggers[0].filter_has_any_correspondents).toEqual([11])
expect(formValues.triggers[0].filter_has_correspondent).toEqual(1)
expect(formValues.triggers[0].filter_has_not_correspondents).toEqual([1])
expect(formValues.triggers[0].filter_has_any_document_types).toEqual([12])
expect(formValues.triggers[0].filter_has_document_type).toEqual(1)
expect(formValues.triggers[0].filter_has_not_document_types).toEqual([1])
expect(formValues.triggers[0].filter_has_any_storage_paths).toEqual([13])
expect(formValues.triggers[0].filter_has_storage_path).toEqual(1)
expect(formValues.triggers[0].filter_has_not_storage_paths).toEqual([1])
expect(formValues.triggers[0].filter_custom_field_query).toEqual(
@@ -523,22 +511,16 @@ describe('WorkflowEditDialogComponent', () => {
setFilter(TriggerFilterType.TagsAll, 11)
setFilter(TriggerFilterType.TagsNone, 12)
setFilter(TriggerFilterType.CorrespondentAny, 16)
setFilter(TriggerFilterType.CorrespondentNot, 13)
setFilter(TriggerFilterType.DocumentTypeAny, 17)
setFilter(TriggerFilterType.DocumentTypeNot, 14)
setFilter(TriggerFilterType.StoragePathAny, 18)
setFilter(TriggerFilterType.StoragePathNot, 15)
const formValues = component['getFormValues']()
expect(formValues.triggers[0].filter_has_all_tags).toEqual([11])
expect(formValues.triggers[0].filter_has_not_tags).toEqual([12])
expect(formValues.triggers[0].filter_has_any_correspondents).toEqual([16])
expect(formValues.triggers[0].filter_has_not_correspondents).toEqual([13])
expect(formValues.triggers[0].filter_has_any_document_types).toEqual([17])
expect(formValues.triggers[0].filter_has_not_document_types).toEqual([14])
expect(formValues.triggers[0].filter_has_any_storage_paths).toEqual([18])
expect(formValues.triggers[0].filter_has_not_storage_paths).toEqual([15])
})
@@ -662,11 +644,8 @@ describe('WorkflowEditDialogComponent', () => {
filter_has_tags: [],
filter_has_all_tags: [],
filter_has_not_tags: [],
filter_has_any_correspondents: [],
filter_has_not_correspondents: [],
filter_has_any_document_types: [],
filter_has_not_document_types: [],
filter_has_any_storage_paths: [],
filter_has_not_storage_paths: [],
filter_has_correspondent: null,
filter_has_document_type: null,
@@ -724,14 +703,11 @@ describe('WorkflowEditDialogComponent', () => {
trigger.filter_has_tags = [1]
trigger.filter_has_all_tags = [2, 3]
trigger.filter_has_not_tags = [4]
trigger.filter_has_any_correspondents = [10] as any
trigger.filter_has_correspondent = 5 as any
trigger.filter_has_not_correspondents = [6] as any
trigger.filter_has_document_type = 7 as any
trigger.filter_has_any_document_types = [11] as any
trigger.filter_has_not_document_types = [8] as any
trigger.filter_has_storage_path = 9 as any
trigger.filter_has_any_storage_paths = [12] as any
trigger.filter_has_not_storage_paths = [10] as any
trigger.filter_custom_field_query = JSON.stringify([
'AND',
@@ -742,8 +718,8 @@ describe('WorkflowEditDialogComponent', () => {
component.ngOnInit()
const triggerGroup = component.triggerFields.at(0) as FormGroup
const filters = component.getFiltersFormArray(triggerGroup)
expect(filters.length).toBe(13)
const customFieldFilter = filters.at(12) as FormGroup
expect(filters.length).toBe(10)
const customFieldFilter = filters.at(9) as FormGroup
expect(customFieldFilter.get('type').value).toBe(
TriggerFilterType.CustomFieldQuery
)
@@ -752,27 +728,12 @@ describe('WorkflowEditDialogComponent', () => {
})
it('should expose select metadata helpers', () => {
expect(component.isSelectMultiple(TriggerFilterType.CorrespondentAny)).toBe(
true
)
expect(component.isSelectMultiple(TriggerFilterType.CorrespondentNot)).toBe(
true
)
expect(component.isSelectMultiple(TriggerFilterType.CorrespondentIs)).toBe(
false
)
expect(component.isSelectMultiple(TriggerFilterType.DocumentTypeAny)).toBe(
true
)
expect(component.isSelectMultiple(TriggerFilterType.DocumentTypeIs)).toBe(
false
)
expect(component.isSelectMultiple(TriggerFilterType.StoragePathAny)).toBe(
true
)
expect(component.isSelectMultiple(TriggerFilterType.StoragePathIs)).toBe(
false
)
component.correspondents = [{ id: 1, name: 'C1' } as any]
component.documentTypes = [{ id: 2, name: 'DT' } as any]
@@ -784,15 +745,9 @@ describe('WorkflowEditDialogComponent', () => {
expect(
component.getFilterSelectItems(TriggerFilterType.DocumentTypeIs)
).toEqual(component.documentTypes)
expect(
component.getFilterSelectItems(TriggerFilterType.DocumentTypeAny)
).toEqual(component.documentTypes)
expect(
component.getFilterSelectItems(TriggerFilterType.StoragePathIs)
).toEqual(component.storagePaths)
expect(
component.getFilterSelectItems(TriggerFilterType.StoragePathAny)
).toEqual(component.storagePaths)
expect(component.getFilterSelectItems(TriggerFilterType.TagsAll)).toEqual(
[]
)

View File

@@ -145,13 +145,10 @@ export enum TriggerFilterType {
TagsAny = 'tags_any',
TagsAll = 'tags_all',
TagsNone = 'tags_none',
CorrespondentAny = 'correspondent_any',
CorrespondentIs = 'correspondent_is',
CorrespondentNot = 'correspondent_not',
DocumentTypeAny = 'document_type_any',
DocumentTypeIs = 'document_type_is',
DocumentTypeNot = 'document_type_not',
StoragePathAny = 'storage_path_any',
StoragePathIs = 'storage_path_is',
StoragePathNot = 'storage_path_not',
CustomFieldQuery = 'custom_field_query',
@@ -175,11 +172,8 @@ type TriggerFilterAggregate = {
filter_has_tags: number[]
filter_has_all_tags: number[]
filter_has_not_tags: number[]
filter_has_any_correspondents: number[]
filter_has_not_correspondents: number[]
filter_has_any_document_types: number[]
filter_has_not_document_types: number[]
filter_has_any_storage_paths: number[]
filter_has_not_storage_paths: number[]
filter_has_correspondent: number | null
filter_has_document_type: number | null
@@ -225,14 +219,6 @@ const TRIGGER_FILTER_DEFINITIONS: TriggerFilterDefinition[] = [
allowMultipleEntries: false,
allowMultipleValues: true,
},
{
id: TriggerFilterType.CorrespondentAny,
name: $localize`Has any of these correspondents`,
inputType: 'select',
allowMultipleEntries: false,
allowMultipleValues: true,
selectItems: 'correspondents',
},
{
id: TriggerFilterType.CorrespondentIs,
name: $localize`Has correspondent`,
@@ -257,14 +243,6 @@ const TRIGGER_FILTER_DEFINITIONS: TriggerFilterDefinition[] = [
allowMultipleValues: false,
selectItems: 'documentTypes',
},
{
id: TriggerFilterType.DocumentTypeAny,
name: $localize`Has any of these document types`,
inputType: 'select',
allowMultipleEntries: false,
allowMultipleValues: true,
selectItems: 'documentTypes',
},
{
id: TriggerFilterType.DocumentTypeNot,
name: $localize`Does not have document types`,
@@ -281,14 +259,6 @@ const TRIGGER_FILTER_DEFINITIONS: TriggerFilterDefinition[] = [
allowMultipleValues: false,
selectItems: 'storagePaths',
},
{
id: TriggerFilterType.StoragePathAny,
name: $localize`Has any of these storage paths`,
inputType: 'select',
allowMultipleEntries: false,
allowMultipleValues: true,
selectItems: 'storagePaths',
},
{
id: TriggerFilterType.StoragePathNot,
name: $localize`Does not have storage paths`,
@@ -336,15 +306,6 @@ const FILTER_HANDLERS: Record<TriggerFilterType, FilterHandler> = {
extract: (trigger) => trigger.filter_has_not_tags,
hasValue: (value) => Array.isArray(value) && value.length > 0,
},
[TriggerFilterType.CorrespondentAny]: {
apply: (aggregate, values) => {
aggregate.filter_has_any_correspondents = Array.isArray(values)
? [...values]
: [values]
},
extract: (trigger) => trigger.filter_has_any_correspondents,
hasValue: (value) => Array.isArray(value) && value.length > 0,
},
[TriggerFilterType.CorrespondentIs]: {
apply: (aggregate, values) => {
aggregate.filter_has_correspondent = Array.isArray(values)
@@ -372,15 +333,6 @@ const FILTER_HANDLERS: Record<TriggerFilterType, FilterHandler> = {
extract: (trigger) => trigger.filter_has_document_type,
hasValue: (value) => value !== null && value !== undefined,
},
[TriggerFilterType.DocumentTypeAny]: {
apply: (aggregate, values) => {
aggregate.filter_has_any_document_types = Array.isArray(values)
? [...values]
: [values]
},
extract: (trigger) => trigger.filter_has_any_document_types,
hasValue: (value) => Array.isArray(value) && value.length > 0,
},
[TriggerFilterType.DocumentTypeNot]: {
apply: (aggregate, values) => {
aggregate.filter_has_not_document_types = Array.isArray(values)
@@ -399,15 +351,6 @@ const FILTER_HANDLERS: Record<TriggerFilterType, FilterHandler> = {
extract: (trigger) => trigger.filter_has_storage_path,
hasValue: (value) => value !== null && value !== undefined,
},
[TriggerFilterType.StoragePathAny]: {
apply: (aggregate, values) => {
aggregate.filter_has_any_storage_paths = Array.isArray(values)
? [...values]
: [values]
},
extract: (trigger) => trigger.filter_has_any_storage_paths,
hasValue: (value) => Array.isArray(value) && value.length > 0,
},
[TriggerFilterType.StoragePathNot]: {
apply: (aggregate, values) => {
aggregate.filter_has_not_storage_paths = Array.isArray(values)
@@ -699,11 +642,8 @@ export class WorkflowEditDialogComponent
filter_has_tags: [],
filter_has_all_tags: [],
filter_has_not_tags: [],
filter_has_any_correspondents: [],
filter_has_not_correspondents: [],
filter_has_any_document_types: [],
filter_has_not_document_types: [],
filter_has_any_storage_paths: [],
filter_has_not_storage_paths: [],
filter_has_correspondent: null,
filter_has_document_type: null,
@@ -730,16 +670,10 @@ export class WorkflowEditDialogComponent
trigger.filter_has_tags = aggregate.filter_has_tags
trigger.filter_has_all_tags = aggregate.filter_has_all_tags
trigger.filter_has_not_tags = aggregate.filter_has_not_tags
trigger.filter_has_any_correspondents =
aggregate.filter_has_any_correspondents
trigger.filter_has_not_correspondents =
aggregate.filter_has_not_correspondents
trigger.filter_has_any_document_types =
aggregate.filter_has_any_document_types
trigger.filter_has_not_document_types =
aggregate.filter_has_not_document_types
trigger.filter_has_any_storage_paths =
aggregate.filter_has_any_storage_paths
trigger.filter_has_not_storage_paths =
aggregate.filter_has_not_storage_paths
trigger.filter_has_correspondent =
@@ -922,11 +856,8 @@ export class WorkflowEditDialogComponent
case TriggerFilterType.TagsAny:
case TriggerFilterType.TagsAll:
case TriggerFilterType.TagsNone:
case TriggerFilterType.CorrespondentAny:
case TriggerFilterType.CorrespondentNot:
case TriggerFilterType.DocumentTypeAny:
case TriggerFilterType.DocumentTypeNot:
case TriggerFilterType.StoragePathAny:
case TriggerFilterType.StoragePathNot:
return true
default:
@@ -1248,11 +1179,8 @@ export class WorkflowEditDialogComponent
filter_has_tags: [],
filter_has_all_tags: [],
filter_has_not_tags: [],
filter_has_any_correspondents: [],
filter_has_not_correspondents: [],
filter_has_any_document_types: [],
filter_has_not_document_types: [],
filter_has_any_storage_paths: [],
filter_has_not_storage_paths: [],
filter_custom_field_query: null,
filter_has_correspondent: null,

View File

@@ -44,16 +44,10 @@ export interface WorkflowTrigger extends ObjectWithId {
filter_has_not_tags?: number[] // Tag.id[]
filter_has_any_correspondents?: number[] // Correspondent.id[]
filter_has_not_correspondents?: number[] // Correspondent.id[]
filter_has_any_document_types?: number[] // DocumentType.id[]
filter_has_not_document_types?: number[] // DocumentType.id[]
filter_has_any_storage_paths?: number[] // StoragePath.id[]
filter_has_not_storage_paths?: number[] // StoragePath.id[]
filter_custom_field_query?: string

View File

@@ -1,5 +1,9 @@
from django.apps import AppConfig
from django.db.models.signals import post_delete
from django.db.models.signals import post_save
from django.utils.translation import gettext_lazy as _
from treenode.signals import post_delete_treenode
from treenode.signals import post_save_treenode
class DocumentsConfig(AppConfig):
@@ -8,12 +12,14 @@ class DocumentsConfig(AppConfig):
verbose_name = _("Documents")
def ready(self):
from documents.models import Tag
from documents.signals import document_consumption_finished
from documents.signals import document_updated
from documents.signals.handlers import add_inbox_tags
from documents.signals.handlers import add_to_index
from documents.signals.handlers import run_workflows_added
from documents.signals.handlers import run_workflows_updated
from documents.signals.handlers import schedule_tag_tree_update
from documents.signals.handlers import set_correspondent
from documents.signals.handlers import set_document_type
from documents.signals.handlers import set_storage_path
@@ -28,6 +34,29 @@ class DocumentsConfig(AppConfig):
document_consumption_finished.connect(run_workflows_added)
document_updated.connect(run_workflows_updated)
# treenode updates the entire tree on every save/delete via hooks
# so disconnect for Tags and run once-per-transaction.
post_save.disconnect(
post_save_treenode,
sender=Tag,
dispatch_uid="post_save_treenode",
)
post_delete.disconnect(
post_delete_treenode,
sender=Tag,
dispatch_uid="post_delete_treenode",
)
post_save.connect(
schedule_tag_tree_update,
sender=Tag,
dispatch_uid="paperless_tag_mark_dirty_save",
)
post_delete.connect(
schedule_tag_tree_update,
sender=Tag,
dispatch_uid="paperless_tag_mark_dirty_delete",
)
import documents.schema # noqa: F401
AppConfig.ready(self)

View File

@@ -403,18 +403,6 @@ def existing_document_matches_workflow(
f"Document tags {list(document.tags.all())} include excluded tags {list(trigger_has_not_tags_qs)}",
)
allowed_correspondent_ids = set(
trigger.filter_has_any_correspondents.values_list("id", flat=True),
)
if (
allowed_correspondent_ids
and document.correspondent_id not in allowed_correspondent_ids
):
return (
False,
f"Document correspondent {document.correspondent} is not one of {list(trigger.filter_has_any_correspondents.all())}",
)
# Document correspondent vs trigger has_correspondent
if (
trigger.filter_has_correspondent_id is not None
@@ -436,17 +424,6 @@ def existing_document_matches_workflow(
f"Document correspondent {document.correspondent} is excluded by {list(trigger.filter_has_not_correspondents.all())}",
)
allowed_document_type_ids = set(
trigger.filter_has_any_document_types.values_list("id", flat=True),
)
if allowed_document_type_ids and (
document.document_type_id not in allowed_document_type_ids
):
return (
False,
f"Document doc type {document.document_type} is not one of {list(trigger.filter_has_any_document_types.all())}",
)
# Document document_type vs trigger has_document_type
if (
trigger.filter_has_document_type_id is not None
@@ -468,17 +445,6 @@ def existing_document_matches_workflow(
f"Document doc type {document.document_type} is excluded by {list(trigger.filter_has_not_document_types.all())}",
)
allowed_storage_path_ids = set(
trigger.filter_has_any_storage_paths.values_list("id", flat=True),
)
if allowed_storage_path_ids and (
document.storage_path_id not in allowed_storage_path_ids
):
return (
False,
f"Document storage path {document.storage_path} is not one of {list(trigger.filter_has_any_storage_paths.all())}",
)
# Document storage_path vs trigger has_storage_path
if (
trigger.filter_has_storage_path_id is not None
@@ -566,10 +532,6 @@ def prefilter_documents_by_workflowtrigger(
# Correspondent, DocumentType, etc. filtering
if trigger.filter_has_any_correspondents.exists():
documents = documents.filter(
correspondent__in=trigger.filter_has_any_correspondents.all(),
)
if trigger.filter_has_correspondent is not None:
documents = documents.filter(
correspondent=trigger.filter_has_correspondent,
@@ -579,10 +541,6 @@ def prefilter_documents_by_workflowtrigger(
correspondent__in=trigger.filter_has_not_correspondents.all(),
)
if trigger.filter_has_any_document_types.exists():
documents = documents.filter(
document_type__in=trigger.filter_has_any_document_types.all(),
)
if trigger.filter_has_document_type is not None:
documents = documents.filter(
document_type=trigger.filter_has_document_type,
@@ -592,10 +550,6 @@ def prefilter_documents_by_workflowtrigger(
document_type__in=trigger.filter_has_not_document_types.all(),
)
if trigger.filter_has_any_storage_paths.exists():
documents = documents.filter(
storage_path__in=trigger.filter_has_any_storage_paths.all(),
)
if trigger.filter_has_storage_path is not None:
documents = documents.filter(
storage_path=trigger.filter_has_storage_path,
@@ -650,11 +604,8 @@ def document_matches_workflow(
"filter_has_tags",
"filter_has_all_tags",
"filter_has_not_tags",
"filter_has_any_document_types",
"filter_has_not_document_types",
"filter_has_any_correspondents",
"filter_has_not_correspondents",
"filter_has_any_storage_paths",
"filter_has_not_storage_paths",
)
)

View File

@@ -1,43 +0,0 @@
# Generated by Django 5.2.7 on 2025-12-17 22:25
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1074_workflowrun_deleted_at_workflowrun_restored_at_and_more"),
]
operations = [
migrations.AddField(
model_name="workflowtrigger",
name="filter_has_any_correspondents",
field=models.ManyToManyField(
blank=True,
related_name="workflowtriggers_has_any_correspondent",
to="documents.correspondent",
verbose_name="has one of these correspondents",
),
),
migrations.AddField(
model_name="workflowtrigger",
name="filter_has_any_document_types",
field=models.ManyToManyField(
blank=True,
related_name="workflowtriggers_has_any_document_type",
to="documents.documenttype",
verbose_name="has one of these document types",
),
),
migrations.AddField(
model_name="workflowtrigger",
name="filter_has_any_storage_paths",
field=models.ManyToManyField(
blank=True,
related_name="workflowtriggers_has_any_storage_path",
to="documents.storagepath",
verbose_name="has one of these storage paths",
),
),
]

View File

@@ -1087,13 +1087,6 @@ class WorkflowTrigger(models.Model):
verbose_name=_("has this document type"),
)
filter_has_any_document_types = models.ManyToManyField(
DocumentType,
blank=True,
related_name="workflowtriggers_has_any_document_type",
verbose_name=_("has one of these document types"),
)
filter_has_not_document_types = models.ManyToManyField(
DocumentType,
blank=True,
@@ -1116,13 +1109,6 @@ class WorkflowTrigger(models.Model):
verbose_name=_("does not have these correspondent(s)"),
)
filter_has_any_correspondents = models.ManyToManyField(
Correspondent,
blank=True,
related_name="workflowtriggers_has_any_correspondent",
verbose_name=_("has one of these correspondents"),
)
filter_has_storage_path = models.ForeignKey(
StoragePath,
null=True,
@@ -1131,13 +1117,6 @@ class WorkflowTrigger(models.Model):
verbose_name=_("has this storage path"),
)
filter_has_any_storage_paths = models.ManyToManyField(
StoragePath,
blank=True,
related_name="workflowtriggers_has_any_storage_path",
verbose_name=_("has one of these storage paths"),
)
filter_has_not_storage_paths = models.ManyToManyField(
StoragePath,
blank=True,

View File

@@ -578,30 +578,34 @@ class TagSerializer(MatchingModelSerializer, OwnedObjectSerializer):
),
)
def get_children(self, obj):
filter_q = self.context.get("document_count_filter")
request = self.context.get("request")
if filter_q is None:
user = getattr(request, "user", None) if request else None
filter_q = get_document_count_filter_for_user(user)
self.context["document_count_filter"] = filter_q
children_map = self.context.get("children_map")
if children_map is not None:
children = children_map.get(obj.pk, [])
else:
filter_q = self.context.get("document_count_filter")
request = self.context.get("request")
if filter_q is None:
user = getattr(request, "user", None) if request else None
filter_q = get_document_count_filter_for_user(user)
self.context["document_count_filter"] = filter_q
children_queryset = (
obj.get_children_queryset()
.select_related("owner")
.annotate(document_count=Count("documents", filter=filter_q))
)
children = (
obj.get_children_queryset()
.select_related("owner")
.annotate(document_count=Count("documents", filter=filter_q))
)
view = self.context.get("view")
ordering = (
OrderingFilter().get_ordering(request, children_queryset, view)
if request and view
else None
)
ordering = ordering or (Lower("name"),)
children_queryset = children_queryset.order_by(*ordering)
view = self.context.get("view")
ordering = (
OrderingFilter().get_ordering(request, children, view)
if request and view
else None
)
ordering = ordering or (Lower("name"),)
children = children.order_by(*ordering)
serializer = TagSerializer(
children_queryset,
children,
many=True,
user=self.user,
full_perms=self.full_perms,
@@ -2275,11 +2279,8 @@ class WorkflowTriggerSerializer(serializers.ModelSerializer):
"filter_has_all_tags",
"filter_has_not_tags",
"filter_custom_field_query",
"filter_has_any_correspondents",
"filter_has_not_correspondents",
"filter_has_any_document_types",
"filter_has_not_document_types",
"filter_has_any_storage_paths",
"filter_has_not_storage_paths",
"filter_has_correspondent",
"filter_has_document_type",
@@ -2517,26 +2518,14 @@ class WorkflowSerializer(serializers.ModelSerializer):
filter_has_tags = trigger.pop("filter_has_tags", None)
filter_has_all_tags = trigger.pop("filter_has_all_tags", None)
filter_has_not_tags = trigger.pop("filter_has_not_tags", None)
filter_has_any_correspondents = trigger.pop(
"filter_has_any_correspondents",
None,
)
filter_has_not_correspondents = trigger.pop(
"filter_has_not_correspondents",
None,
)
filter_has_any_document_types = trigger.pop(
"filter_has_any_document_types",
None,
)
filter_has_not_document_types = trigger.pop(
"filter_has_not_document_types",
None,
)
filter_has_any_storage_paths = trigger.pop(
"filter_has_any_storage_paths",
None,
)
filter_has_not_storage_paths = trigger.pop(
"filter_has_not_storage_paths",
None,
@@ -2553,26 +2542,14 @@ class WorkflowSerializer(serializers.ModelSerializer):
trigger_instance.filter_has_all_tags.set(filter_has_all_tags)
if filter_has_not_tags is not None:
trigger_instance.filter_has_not_tags.set(filter_has_not_tags)
if filter_has_any_correspondents is not None:
trigger_instance.filter_has_any_correspondents.set(
filter_has_any_correspondents,
)
if filter_has_not_correspondents is not None:
trigger_instance.filter_has_not_correspondents.set(
filter_has_not_correspondents,
)
if filter_has_any_document_types is not None:
trigger_instance.filter_has_any_document_types.set(
filter_has_any_document_types,
)
if filter_has_not_document_types is not None:
trigger_instance.filter_has_not_document_types.set(
filter_has_not_document_types,
)
if filter_has_any_storage_paths is not None:
trigger_instance.filter_has_any_storage_paths.set(
filter_has_any_storage_paths,
)
if filter_has_not_storage_paths is not None:
trigger_instance.filter_has_not_storage_paths.set(
filter_has_not_storage_paths,

View File

@@ -19,6 +19,7 @@ from django.db import DatabaseError
from django.db import close_old_connections
from django.db import connections
from django.db import models
from django.db import transaction
from django.db.models import Q
from django.dispatch import receiver
from django.utils import timezone
@@ -60,6 +61,8 @@ if TYPE_CHECKING:
logger = logging.getLogger("paperless.handlers")
_tag_tree_update_scheduled = False
def add_inbox_tags(sender, document: Document, logging_group=None, **kwargs):
if document.owner is not None:
@@ -944,3 +947,26 @@ def close_connection_pool_on_worker_init(**kwargs):
for conn in connections.all(initialized_only=True):
if conn.alias == "default" and hasattr(conn, "pool") and conn.pool:
conn.close_pool()
def schedule_tag_tree_update(**_kwargs):
"""
Schedule a single Tag.update_tree() at transaction commit.
Treenode's default post_save hooks rebuild the entire tree on every save,
which is very slow for large tag sets so collapse to one update per
transaction.
"""
global _tag_tree_update_scheduled
if _tag_tree_update_scheduled:
return
_tag_tree_update_scheduled = True
def _run():
global _tag_tree_update_scheduled
try:
Tag.update_tree()
finally:
_tag_tree_update_scheduled = False
transaction.on_commit(_run)

View File

@@ -186,11 +186,8 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
"filter_has_tags": [self.t1.id],
"filter_has_all_tags": [self.t2.id],
"filter_has_not_tags": [self.t3.id],
"filter_has_any_correspondents": [self.c.id],
"filter_has_not_correspondents": [self.c2.id],
"filter_has_any_document_types": [self.dt.id],
"filter_has_not_document_types": [self.dt2.id],
"filter_has_any_storage_paths": [self.sp.id],
"filter_has_not_storage_paths": [self.sp2.id],
"filter_custom_field_query": json.dumps(
[
@@ -251,26 +248,14 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
set(trigger.filter_has_not_tags.values_list("id", flat=True)),
{self.t3.id},
)
self.assertSetEqual(
set(trigger.filter_has_any_correspondents.values_list("id", flat=True)),
{self.c.id},
)
self.assertSetEqual(
set(trigger.filter_has_not_correspondents.values_list("id", flat=True)),
{self.c2.id},
)
self.assertSetEqual(
set(trigger.filter_has_any_document_types.values_list("id", flat=True)),
{self.dt.id},
)
self.assertSetEqual(
set(trigger.filter_has_not_document_types.values_list("id", flat=True)),
{self.dt2.id},
)
self.assertSetEqual(
set(trigger.filter_has_any_storage_paths.values_list("id", flat=True)),
{self.sp.id},
)
self.assertSetEqual(
set(trigger.filter_has_not_storage_paths.values_list("id", flat=True)),
{self.sp2.id},
@@ -434,11 +419,8 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
"filter_has_tags": [self.t1.id],
"filter_has_all_tags": [self.t2.id],
"filter_has_not_tags": [self.t3.id],
"filter_has_any_correspondents": [self.c.id],
"filter_has_not_correspondents": [self.c2.id],
"filter_has_any_document_types": [self.dt.id],
"filter_has_not_document_types": [self.dt2.id],
"filter_has_any_storage_paths": [self.sp.id],
"filter_has_not_storage_paths": [self.sp2.id],
"filter_custom_field_query": json.dumps(
["AND", [[self.cf1.id, "exact", "value"]]],
@@ -468,26 +450,14 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
workflow.triggers.first().filter_has_not_tags.first(),
self.t3,
)
self.assertEqual(
workflow.triggers.first().filter_has_any_correspondents.first(),
self.c,
)
self.assertEqual(
workflow.triggers.first().filter_has_not_correspondents.first(),
self.c2,
)
self.assertEqual(
workflow.triggers.first().filter_has_any_document_types.first(),
self.dt,
)
self.assertEqual(
workflow.triggers.first().filter_has_not_document_types.first(),
self.dt2,
)
self.assertEqual(
workflow.triggers.first().filter_has_any_storage_paths.first(),
self.sp,
)
self.assertEqual(
workflow.triggers.first().filter_has_not_storage_paths.first(),
self.sp2,

View File

@@ -250,3 +250,16 @@ class TestTagHierarchy(APITestCase):
row for row in response.data["results"] if row["id"] == self.parent.pk
)
assert any(child["id"] == self.child.pk for child in parent_entry["children"])
def test_tag_tree_deferred_update_runs_on_commit(self):
from django.db import transaction
# Create tags inside an explicit transaction and commit.
with transaction.atomic():
parent = Tag.objects.create(name="Parent 2")
child = Tag.objects.create(name="Child 2", tn_parent=parent)
# After commit, tn_* fields should be populated.
parent.refresh_from_db()
child.refresh_from_db()
assert parent.tn_children_count == 1
assert child.tn_ancestors_count == 1

View File

@@ -1276,76 +1276,6 @@ class TestWorkflows(
)
self.assertIn(expected_str, cm.output[1])
def test_document_added_any_filters(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
)
trigger.filter_has_any_correspondents.set([self.c])
trigger.filter_has_any_document_types.set([self.dt])
trigger.filter_has_any_storage_paths.set([self.sp])
matching_doc = Document.objects.create(
title="sample test",
correspondent=self.c,
document_type=self.dt,
storage_path=self.sp,
original_filename="sample.pdf",
checksum="checksum-any-match",
)
matched, reason = existing_document_matches_workflow(matching_doc, trigger)
self.assertTrue(matched)
self.assertIsNone(reason)
wrong_correspondent = Document.objects.create(
title="wrong correspondent",
correspondent=self.c2,
document_type=self.dt,
storage_path=self.sp,
original_filename="sample2.pdf",
)
matched, reason = existing_document_matches_workflow(
wrong_correspondent,
trigger,
)
self.assertFalse(matched)
self.assertIn("correspondent", reason)
other_document_type = DocumentType.objects.create(name="Other")
wrong_document_type = Document.objects.create(
title="wrong doc type",
correspondent=self.c,
document_type=other_document_type,
storage_path=self.sp,
original_filename="sample3.pdf",
checksum="checksum-wrong-doc-type",
)
matched, reason = existing_document_matches_workflow(
wrong_document_type,
trigger,
)
self.assertFalse(matched)
self.assertIn("doc type", reason)
other_storage_path = StoragePath.objects.create(
name="Other path",
path="/other/",
)
wrong_storage_path = Document.objects.create(
title="wrong storage",
correspondent=self.c,
document_type=self.dt,
storage_path=other_storage_path,
original_filename="sample4.pdf",
checksum="checksum-wrong-storage-path",
)
matched, reason = existing_document_matches_workflow(
wrong_storage_path,
trigger,
)
self.assertFalse(matched)
self.assertIn("storage path", reason)
def test_document_added_custom_field_query_no_match(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
@@ -1454,39 +1384,6 @@ class TestWorkflows(
self.assertIn(doc1, filtered)
self.assertNotIn(doc2, filtered)
def test_prefilter_documents_any_filters(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
)
trigger.filter_has_any_correspondents.set([self.c])
trigger.filter_has_any_document_types.set([self.dt])
trigger.filter_has_any_storage_paths.set([self.sp])
allowed_document = Document.objects.create(
title="allowed",
correspondent=self.c,
document_type=self.dt,
storage_path=self.sp,
original_filename="doc-allowed.pdf",
checksum="checksum-any-allowed",
)
blocked_document = Document.objects.create(
title="blocked",
correspondent=self.c2,
document_type=self.dt,
storage_path=self.sp,
original_filename="doc-blocked.pdf",
checksum="checksum-any-blocked",
)
filtered = prefilter_documents_by_workflowtrigger(
Document.objects.all(),
trigger,
)
self.assertIn(allowed_document, filtered)
self.assertNotIn(blocked_document, filtered)
def test_consumption_trigger_requires_filter_configuration(self):
serializer = WorkflowTriggerSerializer(
data={

View File

@@ -448,8 +448,43 @@ class TagViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin):
def get_serializer_context(self):
context = super().get_serializer_context()
context["document_count_filter"] = self.get_document_count_filter()
if hasattr(self, "_children_map"):
context["children_map"] = self._children_map
return context
def list(self, request, *args, **kwargs):
"""
Build a children map once to avoid per-parent queries in the serializer.
"""
queryset = self.filter_queryset(self.get_queryset())
ordering = OrderingFilter().get_ordering(request, queryset, self) or (
Lower("name"),
)
queryset = queryset.order_by(*ordering)
all_tags = list(queryset)
descendant_pks = {pk for tag in all_tags for pk in tag.get_descendants_pks()}
if descendant_pks:
filter_q = self.get_document_count_filter()
children_source = (
Tag.objects.filter(pk__in=descendant_pks | {t.pk for t in all_tags})
.select_related("owner")
.annotate(document_count=Count("documents", filter=filter_q))
.order_by(*ordering)
)
else:
children_source = all_tags
children_map = {}
for tag in children_source:
children_map.setdefault(tag.tn_parent_id, []).append(tag)
self._children_map = children_map
page = self.paginate_queryset(queryset)
serializer = self.get_serializer(page, many=True)
return self.get_paginated_response(serializer.data)
def perform_update(self, serializer):
old_parent = self.get_object().get_parent()
tag = serializer.save()