mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-01-26 22:49:01 -06:00
Performance: faster statistics panel on dashboard (#11760)
This commit is contained in:
25
src/documents/migrations/0007_document_content_length.py
Normal file
25
src/documents/migrations/0007_document_content_length.py
Normal file
@@ -0,0 +1,25 @@
|
||||
# Generated by Django 5.2.6 on 2026-01-24 07:33
|
||||
|
||||
import django.db.models.functions.text
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0006_alter_document_checksum_unique"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="document",
|
||||
name="content_length",
|
||||
field=models.GeneratedField(
|
||||
db_persist=True,
|
||||
expression=django.db.models.functions.text.Length("content"),
|
||||
null=False,
|
||||
help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.",
|
||||
output_field=models.PositiveIntegerField(default=0),
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -20,7 +20,9 @@ if settings.AUDIT_LOG_ENABLED:
|
||||
from auditlog.registry import auditlog
|
||||
|
||||
from django.db.models import Case
|
||||
from django.db.models import PositiveIntegerField
|
||||
from django.db.models.functions import Cast
|
||||
from django.db.models.functions import Length
|
||||
from django.db.models.functions import Substr
|
||||
from django_softdelete.models import SoftDeleteModel
|
||||
|
||||
@@ -192,6 +194,15 @@ class Document(SoftDeleteModel, ModelWithOwner):
|
||||
),
|
||||
)
|
||||
|
||||
content_length = models.GeneratedField(
|
||||
expression=Length("content"),
|
||||
output_field=PositiveIntegerField(default=0),
|
||||
db_persist=True,
|
||||
null=False,
|
||||
serialize=False,
|
||||
help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.",
|
||||
)
|
||||
|
||||
mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
|
||||
|
||||
tags = models.ManyToManyField(
|
||||
@@ -945,7 +956,7 @@ if settings.AUDIT_LOG_ENABLED:
|
||||
auditlog.register(
|
||||
Document,
|
||||
m2m_fields={"tags"},
|
||||
exclude_fields=["modified"],
|
||||
exclude_fields=["content_length", "modified"],
|
||||
)
|
||||
auditlog.register(Correspondent)
|
||||
auditlog.register(Tag)
|
||||
|
||||
@@ -131,6 +131,10 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
self.assertIn("content", results_full[0])
|
||||
self.assertIn("id", results_full[0])
|
||||
|
||||
# Content length is used internally for performance reasons.
|
||||
# No need to expose this field.
|
||||
self.assertNotIn("content_length", results_full[0])
|
||||
|
||||
response = self.client.get("/api/documents/?fields=id", format="json")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
results = response.data["results"]
|
||||
|
||||
@@ -241,6 +241,10 @@ class TestExportImport(
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(checksum, element["fields"]["checksum"])
|
||||
|
||||
# Generated field "content_length" should not be exported,
|
||||
# it is automatically computed during import.
|
||||
self.assertNotIn("content_length", element["fields"])
|
||||
|
||||
if document_exporter.EXPORTER_ARCHIVE_NAME in element:
|
||||
fname = (
|
||||
self.target / element[document_exporter.EXPORTER_ARCHIVE_NAME]
|
||||
|
||||
@@ -35,7 +35,6 @@ from django.db.models import Model
|
||||
from django.db.models import Q
|
||||
from django.db.models import Sum
|
||||
from django.db.models import When
|
||||
from django.db.models.functions import Length
|
||||
from django.db.models.functions import Lower
|
||||
from django.db.models.manager import Manager
|
||||
from django.http import FileResponse
|
||||
@@ -2326,23 +2325,19 @@ class StatisticsView(GenericAPIView):
|
||||
user = request.user if request.user is not None else None
|
||||
|
||||
documents = (
|
||||
(
|
||||
Document.objects.all()
|
||||
if user is None
|
||||
else get_objects_for_user_owner_aware(
|
||||
user,
|
||||
"documents.view_document",
|
||||
Document,
|
||||
)
|
||||
Document.objects.all()
|
||||
if user is None
|
||||
else get_objects_for_user_owner_aware(
|
||||
user,
|
||||
"documents.view_document",
|
||||
Document,
|
||||
)
|
||||
.only("mime_type", "content")
|
||||
.prefetch_related("tags")
|
||||
)
|
||||
tags = (
|
||||
Tag.objects.all()
|
||||
if user is None
|
||||
else get_objects_for_user_owner_aware(user, "documents.view_tag", Tag)
|
||||
)
|
||||
).only("id", "is_inbox_tag")
|
||||
correspondent_count = (
|
||||
Correspondent.objects.count()
|
||||
if user is None
|
||||
@@ -2371,31 +2366,33 @@ class StatisticsView(GenericAPIView):
|
||||
).count()
|
||||
)
|
||||
|
||||
documents_total = documents.count()
|
||||
|
||||
inbox_tags = tags.filter(is_inbox_tag=True)
|
||||
inbox_tag_pks = list(
|
||||
tags.filter(is_inbox_tag=True).values_list("pk", flat=True),
|
||||
)
|
||||
|
||||
documents_inbox = (
|
||||
documents.filter(tags__id__in=inbox_tags).distinct().count()
|
||||
if inbox_tags.exists()
|
||||
documents.filter(tags__id__in=inbox_tag_pks).values("id").distinct().count()
|
||||
if inbox_tag_pks
|
||||
else None
|
||||
)
|
||||
|
||||
document_file_type_counts = (
|
||||
# Single SQL request for document stats and mime type counts
|
||||
mime_type_stats = list(
|
||||
documents.values("mime_type")
|
||||
.annotate(mime_type_count=Count("mime_type"))
|
||||
.order_by("-mime_type_count")
|
||||
if documents_total > 0
|
||||
else []
|
||||
.annotate(
|
||||
mime_type_count=Count("id"),
|
||||
mime_type_chars=Sum("content_length"),
|
||||
)
|
||||
.order_by("-mime_type_count"),
|
||||
)
|
||||
|
||||
character_count = (
|
||||
documents.annotate(
|
||||
characters=Length("content"),
|
||||
)
|
||||
.aggregate(Sum("characters"))
|
||||
.get("characters__sum")
|
||||
)
|
||||
# Calculate totals from grouped results
|
||||
documents_total = sum(row["mime_type_count"] for row in mime_type_stats)
|
||||
character_count = sum(row["mime_type_chars"] or 0 for row in mime_type_stats)
|
||||
document_file_type_counts = [
|
||||
{"mime_type": row["mime_type"], "mime_type_count": row["mime_type_count"]}
|
||||
for row in mime_type_stats
|
||||
]
|
||||
|
||||
current_asn = Document.objects.aggregate(
|
||||
Max("archive_serial_number", default=0),
|
||||
@@ -2408,11 +2405,9 @@ class StatisticsView(GenericAPIView):
|
||||
"documents_total": documents_total,
|
||||
"documents_inbox": documents_inbox,
|
||||
"inbox_tag": (
|
||||
inbox_tags.first().pk if inbox_tags.exists() else None
|
||||
inbox_tag_pks[0] if inbox_tag_pks else None
|
||||
), # backwards compatibility
|
||||
"inbox_tags": (
|
||||
[tag.pk for tag in inbox_tags] if inbox_tags.exists() else None
|
||||
),
|
||||
"inbox_tags": (inbox_tag_pks if inbox_tag_pks else None),
|
||||
"document_file_type_counts": document_file_type_counts,
|
||||
"character_count": character_count,
|
||||
"tag_count": len(tags),
|
||||
|
||||
Reference in New Issue
Block a user