Performance: faster statistics panel on dashboard (#11760)

This commit is contained in:
Antoine Mérino
2026-01-26 21:10:57 +01:00
committed by GitHub
parent cac1b721b9
commit df07b8a03e
5 changed files with 73 additions and 34 deletions

View File

@@ -0,0 +1,25 @@
# Generated by Django 5.2.6 on 2026-01-24 07:33
import django.db.models.functions.text
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0006_alter_document_checksum_unique"),
]
operations = [
migrations.AddField(
model_name="document",
name="content_length",
field=models.GeneratedField(
db_persist=True,
expression=django.db.models.functions.text.Length("content"),
null=False,
help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.",
output_field=models.PositiveIntegerField(default=0),
),
),
]

View File

@@ -20,7 +20,9 @@ if settings.AUDIT_LOG_ENABLED:
from auditlog.registry import auditlog
from django.db.models import Case
from django.db.models import PositiveIntegerField
from django.db.models.functions import Cast
from django.db.models.functions import Length
from django.db.models.functions import Substr
from django_softdelete.models import SoftDeleteModel
@@ -192,6 +194,15 @@ class Document(SoftDeleteModel, ModelWithOwner):
),
)
content_length = models.GeneratedField(
expression=Length("content"),
output_field=PositiveIntegerField(default=0),
db_persist=True,
null=False,
serialize=False,
help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.",
)
mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
tags = models.ManyToManyField(
@@ -945,7 +956,7 @@ if settings.AUDIT_LOG_ENABLED:
auditlog.register(
Document,
m2m_fields={"tags"},
exclude_fields=["modified"],
exclude_fields=["content_length", "modified"],
)
auditlog.register(Correspondent)
auditlog.register(Tag)

View File

@@ -131,6 +131,10 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
self.assertIn("content", results_full[0])
self.assertIn("id", results_full[0])
# Content length is used internally for performance reasons.
# No need to expose this field.
self.assertNotIn("content_length", results_full[0])
response = self.client.get("/api/documents/?fields=id", format="json")
self.assertEqual(response.status_code, status.HTTP_200_OK)
results = response.data["results"]

View File

@@ -241,6 +241,10 @@ class TestExportImport(
checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(checksum, element["fields"]["checksum"])
# Generated field "content_length" should not be exported,
# it is automatically computed during import.
self.assertNotIn("content_length", element["fields"])
if document_exporter.EXPORTER_ARCHIVE_NAME in element:
fname = (
self.target / element[document_exporter.EXPORTER_ARCHIVE_NAME]

View File

@@ -35,7 +35,6 @@ from django.db.models import Model
from django.db.models import Q
from django.db.models import Sum
from django.db.models import When
from django.db.models.functions import Length
from django.db.models.functions import Lower
from django.db.models.manager import Manager
from django.http import FileResponse
@@ -2326,23 +2325,19 @@ class StatisticsView(GenericAPIView):
user = request.user if request.user is not None else None
documents = (
(
Document.objects.all()
if user is None
else get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
)
Document.objects.all()
if user is None
else get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
)
.only("mime_type", "content")
.prefetch_related("tags")
)
tags = (
Tag.objects.all()
if user is None
else get_objects_for_user_owner_aware(user, "documents.view_tag", Tag)
)
).only("id", "is_inbox_tag")
correspondent_count = (
Correspondent.objects.count()
if user is None
@@ -2371,31 +2366,33 @@ class StatisticsView(GenericAPIView):
).count()
)
documents_total = documents.count()
inbox_tags = tags.filter(is_inbox_tag=True)
inbox_tag_pks = list(
tags.filter(is_inbox_tag=True).values_list("pk", flat=True),
)
documents_inbox = (
documents.filter(tags__id__in=inbox_tags).distinct().count()
if inbox_tags.exists()
documents.filter(tags__id__in=inbox_tag_pks).values("id").distinct().count()
if inbox_tag_pks
else None
)
document_file_type_counts = (
# Single SQL request for document stats and mime type counts
mime_type_stats = list(
documents.values("mime_type")
.annotate(mime_type_count=Count("mime_type"))
.order_by("-mime_type_count")
if documents_total > 0
else []
.annotate(
mime_type_count=Count("id"),
mime_type_chars=Sum("content_length"),
)
.order_by("-mime_type_count"),
)
character_count = (
documents.annotate(
characters=Length("content"),
)
.aggregate(Sum("characters"))
.get("characters__sum")
)
# Calculate totals from grouped results
documents_total = sum(row["mime_type_count"] for row in mime_type_stats)
character_count = sum(row["mime_type_chars"] or 0 for row in mime_type_stats)
document_file_type_counts = [
{"mime_type": row["mime_type"], "mime_type_count": row["mime_type_count"]}
for row in mime_type_stats
]
current_asn = Document.objects.aggregate(
Max("archive_serial_number", default=0),
@@ -2408,11 +2405,9 @@ class StatisticsView(GenericAPIView):
"documents_total": documents_total,
"documents_inbox": documents_inbox,
"inbox_tag": (
inbox_tags.first().pk if inbox_tags.exists() else None
inbox_tag_pks[0] if inbox_tag_pks else None
), # backwards compatibility
"inbox_tags": (
[tag.pk for tag in inbox_tags] if inbox_tags.exists() else None
),
"inbox_tags": (inbox_tag_pks if inbox_tag_pks else None),
"document_file_type_counts": document_file_type_counts,
"character_count": character_count,
"tag_count": len(tags),