mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-01-30 23:08:59 -06:00
Optimize tag/custom-field counts with subqueries
This commit is contained in:
@@ -2,10 +2,17 @@ from django.contrib.auth.models import Group
|
|||||||
from django.contrib.auth.models import Permission
|
from django.contrib.auth.models import Permission
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from django.contrib.contenttypes.models import ContentType
|
from django.contrib.contenttypes.models import ContentType
|
||||||
|
from django.db.models import Count
|
||||||
|
from django.db.models import IntegerField
|
||||||
|
from django.db.models import OuterRef
|
||||||
from django.db.models import Q
|
from django.db.models import Q
|
||||||
from django.db.models import QuerySet
|
from django.db.models import QuerySet
|
||||||
|
from django.db.models import Subquery
|
||||||
|
from django.db.models.functions import Cast
|
||||||
|
from django.db.models.functions import Coalesce
|
||||||
from guardian.core import ObjectPermissionChecker
|
from guardian.core import ObjectPermissionChecker
|
||||||
from guardian.models import GroupObjectPermission
|
from guardian.models import GroupObjectPermission
|
||||||
|
from guardian.models import UserObjectPermission
|
||||||
from guardian.shortcuts import assign_perm
|
from guardian.shortcuts import assign_perm
|
||||||
from guardian.shortcuts import get_objects_for_user
|
from guardian.shortcuts import get_objects_for_user
|
||||||
from guardian.shortcuts import get_users_with_perms
|
from guardian.shortcuts import get_users_with_perms
|
||||||
@@ -129,24 +136,90 @@ def set_permissions_for_object(permissions: dict, object, *, merge: bool = False
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_document_count_filter_for_user(user):
|
def _permitted_document_ids(user):
|
||||||
"""
|
"""
|
||||||
Return the Q object used to filter document counts for the given user.
|
Return a queryset of document IDs the user may view, limited to non-deleted
|
||||||
|
documents. This intentionally avoids ``get_objects_for_user`` to keep the
|
||||||
|
subquery small and index-friendly.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
base_docs = Document.objects.filter(deleted_at__isnull=True)
|
||||||
|
|
||||||
if user is None or not getattr(user, "is_authenticated", False):
|
if user is None or not getattr(user, "is_authenticated", False):
|
||||||
return Q(documents__deleted_at__isnull=True, documents__owner__isnull=True)
|
return base_docs.filter(owner__isnull=True).values_list("id", flat=True)
|
||||||
|
|
||||||
if getattr(user, "is_superuser", False):
|
if getattr(user, "is_superuser", False):
|
||||||
return Q(documents__deleted_at__isnull=True)
|
return base_docs.values_list("id", flat=True)
|
||||||
return Q(
|
|
||||||
documents__deleted_at__isnull=True,
|
document_ct = ContentType.objects.get_for_model(Document)
|
||||||
documents__id__in=get_objects_for_user_owner_aware(
|
perm_filter = {
|
||||||
user,
|
"permission__codename": "view_document",
|
||||||
"documents.view_document",
|
"permission__content_type": document_ct,
|
||||||
Document,
|
}
|
||||||
).values_list("id", flat=True),
|
|
||||||
|
user_perm_docs = (
|
||||||
|
UserObjectPermission.objects.filter(user=user, **perm_filter)
|
||||||
|
.annotate(object_pk_int=Cast("object_pk", IntegerField()))
|
||||||
|
.values_list("object_pk_int", flat=True)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
group_perm_docs = (
|
||||||
|
GroupObjectPermission.objects.filter(group__user=user, **perm_filter)
|
||||||
|
.annotate(object_pk_int=Cast("object_pk", IntegerField()))
|
||||||
|
.values_list("object_pk_int", flat=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
permitted_documents = user_perm_docs.union(group_perm_docs)
|
||||||
|
|
||||||
|
return base_docs.filter(
|
||||||
|
Q(owner=user) | Q(owner__isnull=True) | Q(id__in=permitted_documents),
|
||||||
|
).values_list("id", flat=True)
|
||||||
|
|
||||||
|
|
||||||
|
def get_document_count_filter_for_user(user, *, relation_prefix: str = "documents"):
|
||||||
|
"""
|
||||||
|
Return the Q object used to filter document counts for the given user.
|
||||||
|
|
||||||
|
The filter is expressed as an ``id__in`` against a small subquery of permitted
|
||||||
|
document IDs to keep the generated SQL simple and avoid large OR clauses.
|
||||||
|
"""
|
||||||
|
|
||||||
|
id_key = f"{relation_prefix}__id__in"
|
||||||
|
permitted_ids = _permitted_document_ids(user)
|
||||||
|
return Q(**{id_key: permitted_ids})
|
||||||
|
|
||||||
|
|
||||||
|
def annotate_document_count_for_related_queryset(
|
||||||
|
queryset,
|
||||||
|
through_model,
|
||||||
|
source_field: str,
|
||||||
|
target_field: str,
|
||||||
|
user=None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Annotate a queryset with permissions-aware document counts using a subquery
|
||||||
|
against a relation table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
queryset: base queryset to annotate (must contain pk)
|
||||||
|
through_model: model representing the relation (e.g., Document.tags.through
|
||||||
|
or CustomFieldInstance)
|
||||||
|
source_field: field on the relation pointing back to queryset pk
|
||||||
|
target_field: field on the relation pointing to Document id
|
||||||
|
user: the user for whom to filter permitted document ids
|
||||||
|
"""
|
||||||
|
|
||||||
|
permitted_ids = _permitted_document_ids(user)
|
||||||
|
counts = (
|
||||||
|
through_model.objects.filter(
|
||||||
|
**{source_field: OuterRef("pk"), f"{target_field}__in": permitted_ids},
|
||||||
|
)
|
||||||
|
.values(source_field)
|
||||||
|
.annotate(c=Count(target_field))
|
||||||
|
.values("c")
|
||||||
|
)
|
||||||
|
return queryset.annotate(document_count=Coalesce(Subquery(counts[:1]), 0))
|
||||||
|
|
||||||
|
|
||||||
def get_objects_for_user_owner_aware(user, perms, Model) -> QuerySet:
|
def get_objects_for_user_owner_aware(user, perms, Model) -> QuerySet:
|
||||||
objects_owned = Model.objects.filter(owner=user)
|
objects_owned = Model.objects.filter(owner=user)
|
||||||
|
|||||||
@@ -32,7 +32,6 @@ from django.db.models import Count
|
|||||||
from django.db.models import IntegerField
|
from django.db.models import IntegerField
|
||||||
from django.db.models import Max
|
from django.db.models import Max
|
||||||
from django.db.models import Model
|
from django.db.models import Model
|
||||||
from django.db.models import Q
|
|
||||||
from django.db.models import Sum
|
from django.db.models import Sum
|
||||||
from django.db.models import When
|
from django.db.models import When
|
||||||
from django.db.models.functions import Length
|
from django.db.models.functions import Length
|
||||||
@@ -128,6 +127,7 @@ from documents.matching import match_storage_paths
|
|||||||
from documents.matching import match_tags
|
from documents.matching import match_tags
|
||||||
from documents.models import Correspondent
|
from documents.models import Correspondent
|
||||||
from documents.models import CustomField
|
from documents.models import CustomField
|
||||||
|
from documents.models import CustomFieldInstance
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.models import DocumentType
|
from documents.models import DocumentType
|
||||||
from documents.models import Note
|
from documents.models import Note
|
||||||
@@ -147,6 +147,7 @@ from documents.permissions import PaperlessAdminPermissions
|
|||||||
from documents.permissions import PaperlessNotePermissions
|
from documents.permissions import PaperlessNotePermissions
|
||||||
from documents.permissions import PaperlessObjectPermissions
|
from documents.permissions import PaperlessObjectPermissions
|
||||||
from documents.permissions import ViewDocumentsPermissions
|
from documents.permissions import ViewDocumentsPermissions
|
||||||
|
from documents.permissions import annotate_document_count_for_related_queryset
|
||||||
from documents.permissions import get_document_count_filter_for_user
|
from documents.permissions import get_document_count_filter_for_user
|
||||||
from documents.permissions import get_objects_for_user_owner_aware
|
from documents.permissions import get_objects_for_user_owner_aware
|
||||||
from documents.permissions import has_perms_owner_aware
|
from documents.permissions import has_perms_owner_aware
|
||||||
@@ -429,6 +430,26 @@ class TagViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin):
|
|||||||
Lower("name"),
|
Lower("name"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _with_document_counts(self, queryset):
|
||||||
|
"""
|
||||||
|
Annotate tags with a permissions-aware document_count using only the
|
||||||
|
through table plus a compact subquery of permitted document IDs. This
|
||||||
|
keeps PostgreSQL from evaluating large OR clauses against the documents
|
||||||
|
table for every tag.
|
||||||
|
"""
|
||||||
|
|
||||||
|
user = getattr(self.request, "user", None)
|
||||||
|
return annotate_document_count_for_related_queryset(
|
||||||
|
queryset,
|
||||||
|
through_model=Document.tags.through,
|
||||||
|
source_field="tag_id",
|
||||||
|
target_field="document_id",
|
||||||
|
user=user,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_queryset(self):
|
||||||
|
return self._with_document_counts(self.queryset.all())
|
||||||
|
|
||||||
def get_serializer_class(self, *args, **kwargs):
|
def get_serializer_class(self, *args, **kwargs):
|
||||||
if int(self.request.version) == 1:
|
if int(self.request.version) == 1:
|
||||||
return TagSerializerVersion1
|
return TagSerializerVersion1
|
||||||
@@ -466,12 +487,12 @@ class TagViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin):
|
|||||||
descendant_pks = {pk for tag in all_tags for pk in tag.get_descendants_pks()}
|
descendant_pks = {pk for tag in all_tags for pk in tag.get_descendants_pks()}
|
||||||
|
|
||||||
if descendant_pks:
|
if descendant_pks:
|
||||||
filter_q = self.get_document_count_filter()
|
|
||||||
children_source = list(
|
children_source = list(
|
||||||
Tag.objects.filter(pk__in=descendant_pks | {t.pk for t in all_tags})
|
self._with_document_counts(
|
||||||
.select_related("owner")
|
Tag.objects.filter(pk__in=descendant_pks | {t.pk for t in all_tags})
|
||||||
.annotate(document_count=Count("documents", filter=filter_q))
|
.select_related("owner")
|
||||||
.order_by(*ordering),
|
.order_by(*ordering),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
children_source = all_tags
|
children_source = all_tags
|
||||||
@@ -2874,31 +2895,26 @@ class CustomFieldViewSet(ModelViewSet):
|
|||||||
|
|
||||||
queryset = CustomField.objects.all().order_by("-created")
|
queryset = CustomField.objects.all().order_by("-created")
|
||||||
|
|
||||||
|
def _with_document_counts(self, queryset):
|
||||||
|
"""
|
||||||
|
Annotate custom fields with permissions-aware document_count by
|
||||||
|
counting CustomFieldInstance rows whose document is viewable by the
|
||||||
|
current user. Uses a correlated subquery to avoid large joins that
|
||||||
|
previously caused timeouts on big datasets.
|
||||||
|
"""
|
||||||
|
|
||||||
|
user = getattr(self.request, "user", None)
|
||||||
|
return annotate_document_count_for_related_queryset(
|
||||||
|
queryset,
|
||||||
|
through_model=CustomFieldInstance,
|
||||||
|
source_field="field_id",
|
||||||
|
target_field="document_id",
|
||||||
|
user=user,
|
||||||
|
)
|
||||||
|
|
||||||
def get_queryset(self):
|
def get_queryset(self):
|
||||||
filter = (
|
base_qs = super().get_queryset()
|
||||||
Q(fields__document__deleted_at__isnull=True)
|
return self._with_document_counts(base_qs)
|
||||||
if self.request.user is None or self.request.user.is_superuser
|
|
||||||
else (
|
|
||||||
Q(
|
|
||||||
fields__document__deleted_at__isnull=True,
|
|
||||||
fields__document__id__in=get_objects_for_user_owner_aware(
|
|
||||||
self.request.user,
|
|
||||||
"documents.view_document",
|
|
||||||
Document,
|
|
||||||
).values_list("id", flat=True),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return (
|
|
||||||
super()
|
|
||||||
.get_queryset()
|
|
||||||
.annotate(
|
|
||||||
document_count=Count(
|
|
||||||
"fields",
|
|
||||||
filter=filter,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@extend_schema_view(
|
@extend_schema_view(
|
||||||
|
|||||||
Reference in New Issue
Block a user