diff --git a/src/documents/permissions.py b/src/documents/permissions.py index ac6d3f9ca..b8ec01d7e 100644 --- a/src/documents/permissions.py +++ b/src/documents/permissions.py @@ -2,10 +2,17 @@ from django.contrib.auth.models import Group from django.contrib.auth.models import Permission from django.contrib.auth.models import User from django.contrib.contenttypes.models import ContentType +from django.db.models import Count +from django.db.models import IntegerField +from django.db.models import OuterRef from django.db.models import Q from django.db.models import QuerySet +from django.db.models import Subquery +from django.db.models.functions import Cast +from django.db.models.functions import Coalesce from guardian.core import ObjectPermissionChecker from guardian.models import GroupObjectPermission +from guardian.models import UserObjectPermission from guardian.shortcuts import assign_perm from guardian.shortcuts import get_objects_for_user from guardian.shortcuts import get_users_with_perms @@ -129,24 +136,90 @@ def set_permissions_for_object(permissions: dict, object, *, merge: bool = False ) -def get_document_count_filter_for_user(user): +def _permitted_document_ids(user): """ - Return the Q object used to filter document counts for the given user. + Return a queryset of document IDs the user may view, limited to non-deleted + documents. This intentionally avoids ``get_objects_for_user`` to keep the + subquery small and index-friendly. """ + base_docs = Document.objects.filter(deleted_at__isnull=True) + if user is None or not getattr(user, "is_authenticated", False): - return Q(documents__deleted_at__isnull=True, documents__owner__isnull=True) + return base_docs.filter(owner__isnull=True).values_list("id", flat=True) + if getattr(user, "is_superuser", False): - return Q(documents__deleted_at__isnull=True) - return Q( - documents__deleted_at__isnull=True, - documents__id__in=get_objects_for_user_owner_aware( - user, - "documents.view_document", - Document, - ).values_list("id", flat=True), + return base_docs.values_list("id", flat=True) + + document_ct = ContentType.objects.get_for_model(Document) + perm_filter = { + "permission__codename": "view_document", + "permission__content_type": document_ct, + } + + user_perm_docs = ( + UserObjectPermission.objects.filter(user=user, **perm_filter) + .annotate(object_pk_int=Cast("object_pk", IntegerField())) + .values_list("object_pk_int", flat=True) ) + group_perm_docs = ( + GroupObjectPermission.objects.filter(group__user=user, **perm_filter) + .annotate(object_pk_int=Cast("object_pk", IntegerField())) + .values_list("object_pk_int", flat=True) + ) + + permitted_documents = user_perm_docs.union(group_perm_docs) + + return base_docs.filter( + Q(owner=user) | Q(owner__isnull=True) | Q(id__in=permitted_documents), + ).values_list("id", flat=True) + + +def get_document_count_filter_for_user(user, *, relation_prefix: str = "documents"): + """ + Return the Q object used to filter document counts for the given user. + + The filter is expressed as an ``id__in`` against a small subquery of permitted + document IDs to keep the generated SQL simple and avoid large OR clauses. + """ + + id_key = f"{relation_prefix}__id__in" + permitted_ids = _permitted_document_ids(user) + return Q(**{id_key: permitted_ids}) + + +def annotate_document_count_for_related_queryset( + queryset, + through_model, + source_field: str, + target_field: str, + user=None, +): + """ + Annotate a queryset with permissions-aware document counts using a subquery + against a relation table. + + Args: + queryset: base queryset to annotate (must contain pk) + through_model: model representing the relation (e.g., Document.tags.through + or CustomFieldInstance) + source_field: field on the relation pointing back to queryset pk + target_field: field on the relation pointing to Document id + user: the user for whom to filter permitted document ids + """ + + permitted_ids = _permitted_document_ids(user) + counts = ( + through_model.objects.filter( + **{source_field: OuterRef("pk"), f"{target_field}__in": permitted_ids}, + ) + .values(source_field) + .annotate(c=Count(target_field)) + .values("c") + ) + return queryset.annotate(document_count=Coalesce(Subquery(counts[:1]), 0)) + def get_objects_for_user_owner_aware(user, perms, Model) -> QuerySet: objects_owned = Model.objects.filter(owner=user) diff --git a/src/documents/views.py b/src/documents/views.py index f6bec1f0d..44d4d65fa 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -32,7 +32,6 @@ from django.db.models import Count from django.db.models import IntegerField from django.db.models import Max from django.db.models import Model -from django.db.models import Q from django.db.models import Sum from django.db.models import When from django.db.models.functions import Length @@ -128,6 +127,7 @@ from documents.matching import match_storage_paths from documents.matching import match_tags from documents.models import Correspondent from documents.models import CustomField +from documents.models import CustomFieldInstance from documents.models import Document from documents.models import DocumentType from documents.models import Note @@ -147,6 +147,7 @@ from documents.permissions import PaperlessAdminPermissions from documents.permissions import PaperlessNotePermissions from documents.permissions import PaperlessObjectPermissions from documents.permissions import ViewDocumentsPermissions +from documents.permissions import annotate_document_count_for_related_queryset from documents.permissions import get_document_count_filter_for_user from documents.permissions import get_objects_for_user_owner_aware from documents.permissions import has_perms_owner_aware @@ -429,6 +430,26 @@ class TagViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin): Lower("name"), ) + def _with_document_counts(self, queryset): + """ + Annotate tags with a permissions-aware document_count using only the + through table plus a compact subquery of permitted document IDs. This + keeps PostgreSQL from evaluating large OR clauses against the documents + table for every tag. + """ + + user = getattr(self.request, "user", None) + return annotate_document_count_for_related_queryset( + queryset, + through_model=Document.tags.through, + source_field="tag_id", + target_field="document_id", + user=user, + ) + + def get_queryset(self): + return self._with_document_counts(self.queryset.all()) + def get_serializer_class(self, *args, **kwargs): if int(self.request.version) == 1: return TagSerializerVersion1 @@ -466,12 +487,12 @@ class TagViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin): descendant_pks = {pk for tag in all_tags for pk in tag.get_descendants_pks()} if descendant_pks: - filter_q = self.get_document_count_filter() children_source = list( - Tag.objects.filter(pk__in=descendant_pks | {t.pk for t in all_tags}) - .select_related("owner") - .annotate(document_count=Count("documents", filter=filter_q)) - .order_by(*ordering), + self._with_document_counts( + Tag.objects.filter(pk__in=descendant_pks | {t.pk for t in all_tags}) + .select_related("owner") + .order_by(*ordering), + ), ) else: children_source = all_tags @@ -2874,31 +2895,26 @@ class CustomFieldViewSet(ModelViewSet): queryset = CustomField.objects.all().order_by("-created") + def _with_document_counts(self, queryset): + """ + Annotate custom fields with permissions-aware document_count by + counting CustomFieldInstance rows whose document is viewable by the + current user. Uses a correlated subquery to avoid large joins that + previously caused timeouts on big datasets. + """ + + user = getattr(self.request, "user", None) + return annotate_document_count_for_related_queryset( + queryset, + through_model=CustomFieldInstance, + source_field="field_id", + target_field="document_id", + user=user, + ) + def get_queryset(self): - filter = ( - Q(fields__document__deleted_at__isnull=True) - if self.request.user is None or self.request.user.is_superuser - else ( - Q( - fields__document__deleted_at__isnull=True, - fields__document__id__in=get_objects_for_user_owner_aware( - self.request.user, - "documents.view_document", - Document, - ).values_list("id", flat=True), - ) - ) - ) - return ( - super() - .get_queryset() - .annotate( - document_count=Count( - "fields", - filter=filter, - ), - ) - ) + base_qs = super().get_queryset() + return self._with_document_counts(base_qs) @extend_schema_view(