Performance fix: use subqueries to improve object retrieval in large installs (#11950)

This commit is contained in:
shamoon
2026-02-05 08:46:32 -08:00
committed by GitHub
parent 5b9bb147cf
commit 5b45b89d35
3 changed files with 131 additions and 50 deletions

View File

@@ -2,10 +2,17 @@ from django.contrib.auth.models import Group
from django.contrib.auth.models import Permission
from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType
from django.db.models import Count
from django.db.models import IntegerField
from django.db.models import OuterRef
from django.db.models import Q
from django.db.models import QuerySet
from django.db.models import Subquery
from django.db.models.functions import Cast
from django.db.models.functions import Coalesce
from guardian.core import ObjectPermissionChecker
from guardian.models import GroupObjectPermission
from guardian.models import UserObjectPermission
from guardian.shortcuts import assign_perm
from guardian.shortcuts import get_objects_for_user
from guardian.shortcuts import get_users_with_perms
@@ -129,23 +136,96 @@ def set_permissions_for_object(permissions: dict, object, *, merge: bool = False
)
def _permitted_document_ids(user):
"""
Return a queryset of document IDs the user may view, limited to non-deleted
documents. This intentionally avoids ``get_objects_for_user`` to keep the
subquery small and index-friendly.
"""
base_docs = Document.objects.filter(deleted_at__isnull=True).only("id", "owner")
if user is None or not getattr(user, "is_authenticated", False):
# Just Anonymous user e.g. for drf-spectacular
return base_docs.filter(owner__isnull=True).values_list("id", flat=True)
if getattr(user, "is_superuser", False):
return base_docs.values_list("id", flat=True)
document_ct = ContentType.objects.get_for_model(Document)
perm_filter = {
"permission__codename": "view_document",
"permission__content_type": document_ct,
}
user_perm_docs = (
UserObjectPermission.objects.filter(user=user, **perm_filter)
.annotate(object_pk_int=Cast("object_pk", IntegerField()))
.values_list("object_pk_int", flat=True)
)
group_perm_docs = (
GroupObjectPermission.objects.filter(group__user=user, **perm_filter)
.annotate(object_pk_int=Cast("object_pk", IntegerField()))
.values_list("object_pk_int", flat=True)
)
permitted_documents = user_perm_docs.union(group_perm_docs)
return base_docs.filter(
Q(owner=user) | Q(owner__isnull=True) | Q(id__in=permitted_documents),
).values_list("id", flat=True)
def get_document_count_filter_for_user(user):
"""
Return the Q object used to filter document counts for the given user.
The filter is expressed as an ``id__in`` against a small subquery of permitted
document IDs to keep the generated SQL simple and avoid large OR clauses.
"""
if user is None or not getattr(user, "is_authenticated", False):
return Q(documents__deleted_at__isnull=True, documents__owner__isnull=True)
if getattr(user, "is_superuser", False):
# Superuser: no permission filtering needed
return Q(documents__deleted_at__isnull=True)
return Q(
documents__deleted_at__isnull=True,
documents__id__in=get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
).values_list("id", flat=True),
permitted_ids = _permitted_document_ids(user)
return Q(documents__id__in=permitted_ids)
def annotate_document_count_for_related_queryset(
queryset,
through_model,
related_object_field: str,
target_field: str = "document_id",
user=None,
):
"""
Annotate a queryset with permissions-aware document counts using a subquery
against a relation table.
Args:
queryset: base queryset to annotate (must contain pk)
through_model: model representing the relation (e.g., Document.tags.through
or CustomFieldInstance)
source_field: field on the relation pointing back to queryset pk
target_field: field on the relation pointing to Document id
user: the user for whom to filter permitted document ids
"""
permitted_ids = _permitted_document_ids(user)
counts = (
through_model.objects.filter(
**{
related_object_field: OuterRef("pk"),
f"{target_field}__in": permitted_ids,
},
)
.values(related_object_field)
.annotate(c=Count(target_field))
.values("c")
)
return queryset.annotate(document_count=Coalesce(Subquery(counts[:1]), 0))
def get_objects_for_user_owner_aware(user, perms, Model) -> QuerySet: