From e0ff7244ab2341adf3b0e61358820652851ef26a Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Fri, 30 Jan 2026 08:50:02 -0800 Subject: [PATCH] Update script to always report counts --- .../commands/document_perf_benchmark.py | 68 ++++++++++++++----- 1 file changed, 51 insertions(+), 17 deletions(-) diff --git a/src/documents/management/commands/document_perf_benchmark.py b/src/documents/management/commands/document_perf_benchmark.py index c3dc26154..0d975076c 100644 --- a/src/documents/management/commands/document_perf_benchmark.py +++ b/src/documents/management/commands/document_perf_benchmark.py @@ -8,13 +8,17 @@ from time import perf_counter from django.contrib.auth import get_user_model from django.contrib.auth.models import Permission +from django.contrib.contenttypes.models import ContentType from django.core.management.base import BaseCommand from django.core.management.base import CommandError from django.db import connections from django.db import reset_queries from django.db.models import Count +from django.db.models import IntegerField from django.db.models import Q from django.db.models import Subquery +from django.db.models.functions import Cast +from guardian.models import UserObjectPermission from guardian.shortcuts import assign_perm from rest_framework.test import APIClient @@ -28,7 +32,7 @@ from documents.permissions import get_objects_for_user_owner_aware class Command(BaseCommand): # e.g. docker compose exec webserver / manage.py ... - # document_perf_benchmark --reuse-existing --documents 500000 --chunk-size 5000 --tags 40 --tags-per-doc 3 --custom-fields 6 --custom-fields-per-doc 2 + # document_perf_benchmark --reuse-existing --documents 500000 --chunk-size 5000 --tags 40 --tags-per-doc 3 --correspondents 10 --correspondents-per-doc 1 --custom-fields 6 --custom-fields-per-doc 2 help = ( "Seed a synthetic dataset and benchmark permission-filtered document queries " "for superusers vs non-superusers." @@ -188,12 +192,39 @@ class Command(BaseCommand): ) if skip_seed: - dataset_size = Document.objects.filter(title__startswith=prefix).count() - self.stdout.write( - self.style.SUCCESS( - f"Dataset ready (reused): {dataset_size} docs | prefix={prefix}", - ), - ) + tags_dataset_size = Tag.objects.filter(name__startswith=prefix).count() + correspondents_dataset_size = Correspondent.objects.filter( + name__startswith=prefix, + ).count() + cfs_dataset_size = CustomField.objects.filter( + name__startswith=prefix, + ).count() + created_counts = { + "owned": Document.objects.filter( + title__startswith=prefix, + owner=target_user, + ).count(), + "other_owned": Document.objects.filter( + title__startswith=prefix, + owner=other_user, + ).count(), + "unowned": Document.objects.filter( + title__startswith=prefix, + owner__isnull=True, + ).count(), + "shared": Document.objects.filter( + title__startswith=prefix, + owner=other_user, + id__in=UserObjectPermission.objects.filter( + user=target_user, + content_type=ContentType.objects.get_for_model(Document), + permission__codename="view_document", + ) + .annotate(object_pk_int=Cast("object_pk", IntegerField())) + .values_list("object_pk_int", flat=True), + ).count(), + } + else: self.stdout.write( f"Seeding {document_total} documents (owner_ratio={owner_ratio}, " @@ -220,6 +251,7 @@ class Command(BaseCommand): tags_per_doc=tags_per_doc, chunk_size=chunk_size, ) + tags_dataset_size = len(created_tags) created_correspondents = [] if correspondents: @@ -234,6 +266,7 @@ class Command(BaseCommand): correspondents_per_doc=correspondents_per_doc, chunk_size=chunk_size, ) + correspondents_dataset_size = len(created_correspondents) created_custom_fields = [] if custom_fields: @@ -245,17 +278,18 @@ class Command(BaseCommand): per_doc=custom_fields_per_doc, chunk_size=chunk_size, ) + cfs_dataset_size = len(created_custom_fields) - dataset_size = Document.objects.filter(title__startswith=prefix).count() - self.stdout.write( - self.style.SUCCESS( - f"Dataset ready: {dataset_size} docs | owned by target {created_counts['owned']} | " - f"owned by other {created_counts['other_owned']} | unowned {created_counts['unowned']} | " - f"shared-perms {created_counts['shared']} | tags {len(created_tags)} | " - f"correspondents {len(created_correspondents)} | " - f"custom fields {len(created_custom_fields)}", - ), - ) + docs_dataset_size = Document.objects.filter(title__startswith=prefix).count() + self.stdout.write( + self.style.SUCCESS( + f"Dataset ready: {docs_dataset_size} docs | owned by target {created_counts['owned']} | " + f"owned by other {created_counts['other_owned']} | unowned {created_counts['unowned']} | " + f"shared-perms {created_counts['shared']} | tags {tags_dataset_size} | " + f"correspondents {correspondents_dataset_size} | " + f"custom fields {cfs_dataset_size}", + ), + ) self.stdout.write("\nRunning benchmarks...\n") self._run_benchmarks(