diff --git a/src/documents/apps.py b/src/documents/apps.py index f3b798c0b..5b5df42d2 100644 --- a/src/documents/apps.py +++ b/src/documents/apps.py @@ -1,5 +1,9 @@ from django.apps import AppConfig +from django.db.models.signals import post_delete +from django.db.models.signals import post_save from django.utils.translation import gettext_lazy as _ +from treenode.signals import post_delete_treenode +from treenode.signals import post_save_treenode class DocumentsConfig(AppConfig): @@ -8,12 +12,14 @@ class DocumentsConfig(AppConfig): verbose_name = _("Documents") def ready(self): + from documents.models import Tag from documents.signals import document_consumption_finished from documents.signals import document_updated from documents.signals.handlers import add_inbox_tags from documents.signals.handlers import add_to_index from documents.signals.handlers import run_workflows_added from documents.signals.handlers import run_workflows_updated + from documents.signals.handlers import schedule_tag_tree_update from documents.signals.handlers import set_correspondent from documents.signals.handlers import set_document_type from documents.signals.handlers import set_storage_path @@ -28,6 +34,29 @@ class DocumentsConfig(AppConfig): document_consumption_finished.connect(run_workflows_added) document_updated.connect(run_workflows_updated) + # treenode updates the entire tree on every save/delete via hooks + # so disconnect for Tags and run once-per-transaction. + post_save.disconnect( + post_save_treenode, + sender=Tag, + dispatch_uid="post_save_treenode", + ) + post_delete.disconnect( + post_delete_treenode, + sender=Tag, + dispatch_uid="post_delete_treenode", + ) + post_save.connect( + schedule_tag_tree_update, + sender=Tag, + dispatch_uid="paperless_tag_mark_dirty_save", + ) + post_delete.connect( + schedule_tag_tree_update, + sender=Tag, + dispatch_uid="paperless_tag_mark_dirty_delete", + ) + import documents.schema # noqa: F401 AppConfig.ready(self) diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index 5f2c8b4b2..7d59876f8 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -19,6 +19,7 @@ from django.db import DatabaseError from django.db import close_old_connections from django.db import connections from django.db import models +from django.db import transaction from django.db.models import Q from django.dispatch import receiver from django.utils import timezone @@ -60,6 +61,8 @@ if TYPE_CHECKING: logger = logging.getLogger("paperless.handlers") +_tag_tree_update_scheduled = False + def add_inbox_tags(sender, document: Document, logging_group=None, **kwargs): if document.owner is not None: @@ -944,3 +947,26 @@ def close_connection_pool_on_worker_init(**kwargs): for conn in connections.all(initialized_only=True): if conn.alias == "default" and hasattr(conn, "pool") and conn.pool: conn.close_pool() + + +def schedule_tag_tree_update(**_kwargs): + """ + Schedule a single Tag.update_tree() at transaction commit. + + Treenode's default post_save hooks rebuild the entire tree on every save, + which is very slow for large tag sets so collapse to one update per + transaction. + """ + global _tag_tree_update_scheduled + if _tag_tree_update_scheduled: + return + _tag_tree_update_scheduled = True + + def _run(): + global _tag_tree_update_scheduled + try: + Tag.update_tree() + finally: + _tag_tree_update_scheduled = False + + transaction.on_commit(_run) diff --git a/src/documents/tests/test_tag_hierarchy.py b/src/documents/tests/test_tag_hierarchy.py index e748225cd..08d317f26 100644 --- a/src/documents/tests/test_tag_hierarchy.py +++ b/src/documents/tests/test_tag_hierarchy.py @@ -250,3 +250,16 @@ class TestTagHierarchy(APITestCase): row for row in response.data["results"] if row["id"] == self.parent.pk ) assert any(child["id"] == self.child.pk for child in parent_entry["children"]) + + def test_tag_tree_deferred_update_runs_on_commit(self): + from django.db import transaction + + # Create tags inside an explicit transaction and commit. + with transaction.atomic(): + parent = Tag.objects.create(name="Parent 2") + child = Tag.objects.create(name="Child 2", tn_parent=parent) + # After commit, tn_* fields should be populated. + parent.refresh_from_db() + child.refresh_from_db() + assert parent.tn_children_count == 1 + assert child.tn_ancestors_count == 1