Run Tag tree updates once per transaction

This commit is contained in:
shamoon
2025-12-15 10:55:22 -08:00
parent 72fd05501b
commit d45dee6d39
3 changed files with 85 additions and 0 deletions

View File

@@ -1,5 +1,9 @@
from django.apps import AppConfig from django.apps import AppConfig
from django.db.models.signals import post_delete
from django.db.models.signals import post_save
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from treenode.signals import post_delete_treenode
from treenode.signals import post_save_treenode
class DocumentsConfig(AppConfig): class DocumentsConfig(AppConfig):
@@ -8,12 +12,14 @@ class DocumentsConfig(AppConfig):
verbose_name = _("Documents") verbose_name = _("Documents")
def ready(self): def ready(self):
from documents.models import Tag
from documents.signals import document_consumption_finished from documents.signals import document_consumption_finished
from documents.signals import document_updated from documents.signals import document_updated
from documents.signals.handlers import add_inbox_tags from documents.signals.handlers import add_inbox_tags
from documents.signals.handlers import add_to_index from documents.signals.handlers import add_to_index
from documents.signals.handlers import run_workflows_added from documents.signals.handlers import run_workflows_added
from documents.signals.handlers import run_workflows_updated from documents.signals.handlers import run_workflows_updated
from documents.signals.handlers import schedule_tag_tree_update
from documents.signals.handlers import set_correspondent from documents.signals.handlers import set_correspondent
from documents.signals.handlers import set_document_type from documents.signals.handlers import set_document_type
from documents.signals.handlers import set_storage_path from documents.signals.handlers import set_storage_path
@@ -28,6 +34,29 @@ class DocumentsConfig(AppConfig):
document_consumption_finished.connect(run_workflows_added) document_consumption_finished.connect(run_workflows_added)
document_updated.connect(run_workflows_updated) document_updated.connect(run_workflows_updated)
# treenode updates the entire tree on every save/delete via hooks
# so disconnect for Tags and run once-per-transaction.
post_save.disconnect(
post_save_treenode,
sender=Tag,
dispatch_uid="post_save_treenode",
)
post_delete.disconnect(
post_delete_treenode,
sender=Tag,
dispatch_uid="post_delete_treenode",
)
post_save.connect(
schedule_tag_tree_update,
sender=Tag,
dispatch_uid="paperless_tag_mark_dirty_save",
)
post_delete.connect(
schedule_tag_tree_update,
sender=Tag,
dispatch_uid="paperless_tag_mark_dirty_delete",
)
import documents.schema # noqa: F401 import documents.schema # noqa: F401
AppConfig.ready(self) AppConfig.ready(self)

View File

@@ -19,6 +19,7 @@ from django.db import DatabaseError
from django.db import close_old_connections from django.db import close_old_connections
from django.db import connections from django.db import connections
from django.db import models from django.db import models
from django.db import transaction
from django.db.models import Q from django.db.models import Q
from django.dispatch import receiver from django.dispatch import receiver
from django.utils import timezone from django.utils import timezone
@@ -60,6 +61,8 @@ if TYPE_CHECKING:
logger = logging.getLogger("paperless.handlers") logger = logging.getLogger("paperless.handlers")
_tag_tree_update_scheduled = False
def add_inbox_tags(sender, document: Document, logging_group=None, **kwargs): def add_inbox_tags(sender, document: Document, logging_group=None, **kwargs):
if document.owner is not None: if document.owner is not None:
@@ -944,3 +947,26 @@ def close_connection_pool_on_worker_init(**kwargs):
for conn in connections.all(initialized_only=True): for conn in connections.all(initialized_only=True):
if conn.alias == "default" and hasattr(conn, "pool") and conn.pool: if conn.alias == "default" and hasattr(conn, "pool") and conn.pool:
conn.close_pool() conn.close_pool()
def schedule_tag_tree_update(**_kwargs):
"""
Schedule a single Tag.update_tree() at transaction commit.
Treenode's default post_save hooks rebuild the entire tree on every save,
which is very slow for large tag sets so collapse to one update per
transaction.
"""
global _tag_tree_update_scheduled
if _tag_tree_update_scheduled:
return
_tag_tree_update_scheduled = True
def _run():
global _tag_tree_update_scheduled
try:
Tag.update_tree()
finally:
_tag_tree_update_scheduled = False
transaction.on_commit(_run)

View File

@@ -1,6 +1,7 @@
from unittest import mock from unittest import mock
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.db import transaction
from rest_framework.test import APITestCase from rest_framework.test import APITestCase
from documents import bulk_edit from documents import bulk_edit
@@ -10,6 +11,7 @@ from documents.models import Workflow
from documents.models import WorkflowAction from documents.models import WorkflowAction
from documents.models import WorkflowTrigger from documents.models import WorkflowTrigger
from documents.serialisers import TagSerializer from documents.serialisers import TagSerializer
from documents.signals import handlers
from documents.signals.handlers import run_workflows from documents.signals.handlers import run_workflows
@@ -250,3 +252,31 @@ class TestTagHierarchy(APITestCase):
row for row in response.data["results"] if row["id"] == self.parent.pk row for row in response.data["results"] if row["id"] == self.parent.pk
) )
assert any(child["id"] == self.child.pk for child in parent_entry["children"]) assert any(child["id"] == self.child.pk for child in parent_entry["children"])
def test_tag_tree_deferred_update_runs_on_commit(self):
from django.db import transaction
# Create tags inside an explicit transaction and commit.
with transaction.atomic():
parent = Tag.objects.create(name="Parent 2")
child = Tag.objects.create(name="Child 2", tn_parent=parent)
# After commit, tn_* fields should be populated.
parent.refresh_from_db()
child.refresh_from_db()
assert parent.tn_children_count == 1
assert child.tn_ancestors_count == 1
def test_tag_tree_update_runs_once_per_transaction(self):
handlers._tag_tree_update_scheduled = False
with mock.patch("documents.signals.handlers.Tag.update_tree") as update_tree:
with self.captureOnCommitCallbacks(execute=True) as callbacks:
with transaction.atomic():
handlers.schedule_tag_tree_update()
handlers.schedule_tag_tree_update()
update_tree.assert_not_called()
assert handlers._tag_tree_update_scheduled is True
assert len(callbacks) == 1
update_tree.assert_called_once()
assert handlers._tag_tree_update_scheduled is False