From df07b8a03e1d7c992e8b7e8df60337d223437aa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20M=C3=A9rino?= Date: Mon, 26 Jan 2026 21:10:57 +0100 Subject: [PATCH] Performance: faster statistics panel on dashboard (#11760) --- .../0007_document_content_length.py | 25 ++++++++ src/documents/models.py | 13 +++- src/documents/tests/test_api_documents.py | 4 ++ .../tests/test_management_exporter.py | 4 ++ src/documents/views.py | 61 +++++++++---------- 5 files changed, 73 insertions(+), 34 deletions(-) create mode 100644 src/documents/migrations/0007_document_content_length.py diff --git a/src/documents/migrations/0007_document_content_length.py b/src/documents/migrations/0007_document_content_length.py new file mode 100644 index 000000000..c294afca5 --- /dev/null +++ b/src/documents/migrations/0007_document_content_length.py @@ -0,0 +1,25 @@ +# Generated by Django 5.2.6 on 2026-01-24 07:33 + +import django.db.models.functions.text +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + dependencies = [ + ("documents", "0006_alter_document_checksum_unique"), + ] + + operations = [ + migrations.AddField( + model_name="document", + name="content_length", + field=models.GeneratedField( + db_persist=True, + expression=django.db.models.functions.text.Length("content"), + null=False, + help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.", + output_field=models.PositiveIntegerField(default=0), + ), + ), + ] diff --git a/src/documents/models.py b/src/documents/models.py index fe41796bd..ad5e66fe9 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -20,7 +20,9 @@ if settings.AUDIT_LOG_ENABLED: from auditlog.registry import auditlog from django.db.models import Case +from django.db.models import PositiveIntegerField from django.db.models.functions import Cast +from django.db.models.functions import Length from django.db.models.functions import Substr from django_softdelete.models import SoftDeleteModel @@ -192,6 +194,15 @@ class Document(SoftDeleteModel, ModelWithOwner): ), ) + content_length = models.GeneratedField( + expression=Length("content"), + output_field=PositiveIntegerField(default=0), + db_persist=True, + null=False, + serialize=False, + help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.", + ) + mime_type = models.CharField(_("mime type"), max_length=256, editable=False) tags = models.ManyToManyField( @@ -945,7 +956,7 @@ if settings.AUDIT_LOG_ENABLED: auditlog.register( Document, m2m_fields={"tags"}, - exclude_fields=["modified"], + exclude_fields=["content_length", "modified"], ) auditlog.register(Correspondent) auditlog.register(Tag) diff --git a/src/documents/tests/test_api_documents.py b/src/documents/tests/test_api_documents.py index f40ef157f..96d22dc2c 100644 --- a/src/documents/tests/test_api_documents.py +++ b/src/documents/tests/test_api_documents.py @@ -131,6 +131,10 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): self.assertIn("content", results_full[0]) self.assertIn("id", results_full[0]) + # Content length is used internally for performance reasons. + # No need to expose this field. + self.assertNotIn("content_length", results_full[0]) + response = self.client.get("/api/documents/?fields=id", format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) results = response.data["results"] diff --git a/src/documents/tests/test_management_exporter.py b/src/documents/tests/test_management_exporter.py index 81262779a..c2a1360ca 100644 --- a/src/documents/tests/test_management_exporter.py +++ b/src/documents/tests/test_management_exporter.py @@ -241,6 +241,10 @@ class TestExportImport( checksum = hashlib.md5(f.read()).hexdigest() self.assertEqual(checksum, element["fields"]["checksum"]) + # Generated field "content_length" should not be exported, + # it is automatically computed during import. + self.assertNotIn("content_length", element["fields"]) + if document_exporter.EXPORTER_ARCHIVE_NAME in element: fname = ( self.target / element[document_exporter.EXPORTER_ARCHIVE_NAME] diff --git a/src/documents/views.py b/src/documents/views.py index c9ac9c848..88c9c5cf7 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -35,7 +35,6 @@ from django.db.models import Model from django.db.models import Q from django.db.models import Sum from django.db.models import When -from django.db.models.functions import Length from django.db.models.functions import Lower from django.db.models.manager import Manager from django.http import FileResponse @@ -2326,23 +2325,19 @@ class StatisticsView(GenericAPIView): user = request.user if request.user is not None else None documents = ( - ( - Document.objects.all() - if user is None - else get_objects_for_user_owner_aware( - user, - "documents.view_document", - Document, - ) + Document.objects.all() + if user is None + else get_objects_for_user_owner_aware( + user, + "documents.view_document", + Document, ) - .only("mime_type", "content") - .prefetch_related("tags") ) tags = ( Tag.objects.all() if user is None else get_objects_for_user_owner_aware(user, "documents.view_tag", Tag) - ) + ).only("id", "is_inbox_tag") correspondent_count = ( Correspondent.objects.count() if user is None @@ -2371,31 +2366,33 @@ class StatisticsView(GenericAPIView): ).count() ) - documents_total = documents.count() - - inbox_tags = tags.filter(is_inbox_tag=True) + inbox_tag_pks = list( + tags.filter(is_inbox_tag=True).values_list("pk", flat=True), + ) documents_inbox = ( - documents.filter(tags__id__in=inbox_tags).distinct().count() - if inbox_tags.exists() + documents.filter(tags__id__in=inbox_tag_pks).values("id").distinct().count() + if inbox_tag_pks else None ) - document_file_type_counts = ( + # Single SQL request for document stats and mime type counts + mime_type_stats = list( documents.values("mime_type") - .annotate(mime_type_count=Count("mime_type")) - .order_by("-mime_type_count") - if documents_total > 0 - else [] + .annotate( + mime_type_count=Count("id"), + mime_type_chars=Sum("content_length"), + ) + .order_by("-mime_type_count"), ) - character_count = ( - documents.annotate( - characters=Length("content"), - ) - .aggregate(Sum("characters")) - .get("characters__sum") - ) + # Calculate totals from grouped results + documents_total = sum(row["mime_type_count"] for row in mime_type_stats) + character_count = sum(row["mime_type_chars"] or 0 for row in mime_type_stats) + document_file_type_counts = [ + {"mime_type": row["mime_type"], "mime_type_count": row["mime_type_count"]} + for row in mime_type_stats + ] current_asn = Document.objects.aggregate( Max("archive_serial_number", default=0), @@ -2408,11 +2405,9 @@ class StatisticsView(GenericAPIView): "documents_total": documents_total, "documents_inbox": documents_inbox, "inbox_tag": ( - inbox_tags.first().pk if inbox_tags.exists() else None + inbox_tag_pks[0] if inbox_tag_pks else None ), # backwards compatibility - "inbox_tags": ( - [tag.pk for tag in inbox_tags] if inbox_tags.exists() else None - ), + "inbox_tags": (inbox_tag_pks if inbox_tag_pks else None), "document_file_type_counts": document_file_type_counts, "character_count": character_count, "tag_count": len(tags),