Performance: faster statistics panel on dashboard (#11760)

This commit is contained in:
Antoine Mérino
2026-01-26 21:10:57 +01:00
committed by GitHub
parent cac1b721b9
commit df07b8a03e
5 changed files with 73 additions and 34 deletions

View File

@@ -0,0 +1,25 @@
# Generated by Django 5.2.6 on 2026-01-24 07:33
import django.db.models.functions.text
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0006_alter_document_checksum_unique"),
]
operations = [
migrations.AddField(
model_name="document",
name="content_length",
field=models.GeneratedField(
db_persist=True,
expression=django.db.models.functions.text.Length("content"),
null=False,
help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.",
output_field=models.PositiveIntegerField(default=0),
),
),
]

View File

@@ -20,7 +20,9 @@ if settings.AUDIT_LOG_ENABLED:
from auditlog.registry import auditlog from auditlog.registry import auditlog
from django.db.models import Case from django.db.models import Case
from django.db.models import PositiveIntegerField
from django.db.models.functions import Cast from django.db.models.functions import Cast
from django.db.models.functions import Length
from django.db.models.functions import Substr from django.db.models.functions import Substr
from django_softdelete.models import SoftDeleteModel from django_softdelete.models import SoftDeleteModel
@@ -192,6 +194,15 @@ class Document(SoftDeleteModel, ModelWithOwner):
), ),
) )
content_length = models.GeneratedField(
expression=Length("content"),
output_field=PositiveIntegerField(default=0),
db_persist=True,
null=False,
serialize=False,
help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.",
)
mime_type = models.CharField(_("mime type"), max_length=256, editable=False) mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
tags = models.ManyToManyField( tags = models.ManyToManyField(
@@ -945,7 +956,7 @@ if settings.AUDIT_LOG_ENABLED:
auditlog.register( auditlog.register(
Document, Document,
m2m_fields={"tags"}, m2m_fields={"tags"},
exclude_fields=["modified"], exclude_fields=["content_length", "modified"],
) )
auditlog.register(Correspondent) auditlog.register(Correspondent)
auditlog.register(Tag) auditlog.register(Tag)

View File

@@ -131,6 +131,10 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
self.assertIn("content", results_full[0]) self.assertIn("content", results_full[0])
self.assertIn("id", results_full[0]) self.assertIn("id", results_full[0])
# Content length is used internally for performance reasons.
# No need to expose this field.
self.assertNotIn("content_length", results_full[0])
response = self.client.get("/api/documents/?fields=id", format="json") response = self.client.get("/api/documents/?fields=id", format="json")
self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.status_code, status.HTTP_200_OK)
results = response.data["results"] results = response.data["results"]

View File

@@ -241,6 +241,10 @@ class TestExportImport(
checksum = hashlib.md5(f.read()).hexdigest() checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(checksum, element["fields"]["checksum"]) self.assertEqual(checksum, element["fields"]["checksum"])
# Generated field "content_length" should not be exported,
# it is automatically computed during import.
self.assertNotIn("content_length", element["fields"])
if document_exporter.EXPORTER_ARCHIVE_NAME in element: if document_exporter.EXPORTER_ARCHIVE_NAME in element:
fname = ( fname = (
self.target / element[document_exporter.EXPORTER_ARCHIVE_NAME] self.target / element[document_exporter.EXPORTER_ARCHIVE_NAME]

View File

@@ -35,7 +35,6 @@ from django.db.models import Model
from django.db.models import Q from django.db.models import Q
from django.db.models import Sum from django.db.models import Sum
from django.db.models import When from django.db.models import When
from django.db.models.functions import Length
from django.db.models.functions import Lower from django.db.models.functions import Lower
from django.db.models.manager import Manager from django.db.models.manager import Manager
from django.http import FileResponse from django.http import FileResponse
@@ -2326,23 +2325,19 @@ class StatisticsView(GenericAPIView):
user = request.user if request.user is not None else None user = request.user if request.user is not None else None
documents = ( documents = (
( Document.objects.all()
Document.objects.all() if user is None
if user is None else get_objects_for_user_owner_aware(
else get_objects_for_user_owner_aware( user,
user, "documents.view_document",
"documents.view_document", Document,
Document,
)
) )
.only("mime_type", "content")
.prefetch_related("tags")
) )
tags = ( tags = (
Tag.objects.all() Tag.objects.all()
if user is None if user is None
else get_objects_for_user_owner_aware(user, "documents.view_tag", Tag) else get_objects_for_user_owner_aware(user, "documents.view_tag", Tag)
) ).only("id", "is_inbox_tag")
correspondent_count = ( correspondent_count = (
Correspondent.objects.count() Correspondent.objects.count()
if user is None if user is None
@@ -2371,31 +2366,33 @@ class StatisticsView(GenericAPIView):
).count() ).count()
) )
documents_total = documents.count() inbox_tag_pks = list(
tags.filter(is_inbox_tag=True).values_list("pk", flat=True),
inbox_tags = tags.filter(is_inbox_tag=True) )
documents_inbox = ( documents_inbox = (
documents.filter(tags__id__in=inbox_tags).distinct().count() documents.filter(tags__id__in=inbox_tag_pks).values("id").distinct().count()
if inbox_tags.exists() if inbox_tag_pks
else None else None
) )
document_file_type_counts = ( # Single SQL request for document stats and mime type counts
mime_type_stats = list(
documents.values("mime_type") documents.values("mime_type")
.annotate(mime_type_count=Count("mime_type")) .annotate(
.order_by("-mime_type_count") mime_type_count=Count("id"),
if documents_total > 0 mime_type_chars=Sum("content_length"),
else [] )
.order_by("-mime_type_count"),
) )
character_count = ( # Calculate totals from grouped results
documents.annotate( documents_total = sum(row["mime_type_count"] for row in mime_type_stats)
characters=Length("content"), character_count = sum(row["mime_type_chars"] or 0 for row in mime_type_stats)
) document_file_type_counts = [
.aggregate(Sum("characters")) {"mime_type": row["mime_type"], "mime_type_count": row["mime_type_count"]}
.get("characters__sum") for row in mime_type_stats
) ]
current_asn = Document.objects.aggregate( current_asn = Document.objects.aggregate(
Max("archive_serial_number", default=0), Max("archive_serial_number", default=0),
@@ -2408,11 +2405,9 @@ class StatisticsView(GenericAPIView):
"documents_total": documents_total, "documents_total": documents_total,
"documents_inbox": documents_inbox, "documents_inbox": documents_inbox,
"inbox_tag": ( "inbox_tag": (
inbox_tags.first().pk if inbox_tags.exists() else None inbox_tag_pks[0] if inbox_tag_pks else None
), # backwards compatibility ), # backwards compatibility
"inbox_tags": ( "inbox_tags": (inbox_tag_pks if inbox_tag_pks else None),
[tag.pk for tag in inbox_tags] if inbox_tags.exists() else None
),
"document_file_type_counts": document_file_type_counts, "document_file_type_counts": document_file_type_counts,
"character_count": character_count, "character_count": character_count,
"tag_count": len(tags), "tag_count": len(tags),