Update script to always report counts

This commit is contained in:
shamoon
2026-01-30 08:50:02 -08:00
parent e08af2f726
commit e0ff7244ab

View File

@@ -8,13 +8,17 @@ from time import perf_counter
from django.contrib.auth import get_user_model from django.contrib.auth import get_user_model
from django.contrib.auth.models import Permission from django.contrib.auth.models import Permission
from django.contrib.contenttypes.models import ContentType
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.core.management.base import CommandError from django.core.management.base import CommandError
from django.db import connections from django.db import connections
from django.db import reset_queries from django.db import reset_queries
from django.db.models import Count from django.db.models import Count
from django.db.models import IntegerField
from django.db.models import Q from django.db.models import Q
from django.db.models import Subquery from django.db.models import Subquery
from django.db.models.functions import Cast
from guardian.models import UserObjectPermission
from guardian.shortcuts import assign_perm from guardian.shortcuts import assign_perm
from rest_framework.test import APIClient from rest_framework.test import APIClient
@@ -28,7 +32,7 @@ from documents.permissions import get_objects_for_user_owner_aware
class Command(BaseCommand): class Command(BaseCommand):
# e.g. docker compose exec webserver / manage.py ... # e.g. docker compose exec webserver / manage.py ...
# document_perf_benchmark --reuse-existing --documents 500000 --chunk-size 5000 --tags 40 --tags-per-doc 3 --custom-fields 6 --custom-fields-per-doc 2 # document_perf_benchmark --reuse-existing --documents 500000 --chunk-size 5000 --tags 40 --tags-per-doc 3 --correspondents 10 --correspondents-per-doc 1 --custom-fields 6 --custom-fields-per-doc 2
help = ( help = (
"Seed a synthetic dataset and benchmark permission-filtered document queries " "Seed a synthetic dataset and benchmark permission-filtered document queries "
"for superusers vs non-superusers." "for superusers vs non-superusers."
@@ -188,12 +192,39 @@ class Command(BaseCommand):
) )
if skip_seed: if skip_seed:
dataset_size = Document.objects.filter(title__startswith=prefix).count() tags_dataset_size = Tag.objects.filter(name__startswith=prefix).count()
self.stdout.write( correspondents_dataset_size = Correspondent.objects.filter(
self.style.SUCCESS( name__startswith=prefix,
f"Dataset ready (reused): {dataset_size} docs | prefix={prefix}", ).count()
), cfs_dataset_size = CustomField.objects.filter(
) name__startswith=prefix,
).count()
created_counts = {
"owned": Document.objects.filter(
title__startswith=prefix,
owner=target_user,
).count(),
"other_owned": Document.objects.filter(
title__startswith=prefix,
owner=other_user,
).count(),
"unowned": Document.objects.filter(
title__startswith=prefix,
owner__isnull=True,
).count(),
"shared": Document.objects.filter(
title__startswith=prefix,
owner=other_user,
id__in=UserObjectPermission.objects.filter(
user=target_user,
content_type=ContentType.objects.get_for_model(Document),
permission__codename="view_document",
)
.annotate(object_pk_int=Cast("object_pk", IntegerField()))
.values_list("object_pk_int", flat=True),
).count(),
}
else: else:
self.stdout.write( self.stdout.write(
f"Seeding {document_total} documents (owner_ratio={owner_ratio}, " f"Seeding {document_total} documents (owner_ratio={owner_ratio}, "
@@ -220,6 +251,7 @@ class Command(BaseCommand):
tags_per_doc=tags_per_doc, tags_per_doc=tags_per_doc,
chunk_size=chunk_size, chunk_size=chunk_size,
) )
tags_dataset_size = len(created_tags)
created_correspondents = [] created_correspondents = []
if correspondents: if correspondents:
@@ -234,6 +266,7 @@ class Command(BaseCommand):
correspondents_per_doc=correspondents_per_doc, correspondents_per_doc=correspondents_per_doc,
chunk_size=chunk_size, chunk_size=chunk_size,
) )
correspondents_dataset_size = len(created_correspondents)
created_custom_fields = [] created_custom_fields = []
if custom_fields: if custom_fields:
@@ -245,17 +278,18 @@ class Command(BaseCommand):
per_doc=custom_fields_per_doc, per_doc=custom_fields_per_doc,
chunk_size=chunk_size, chunk_size=chunk_size,
) )
cfs_dataset_size = len(created_custom_fields)
dataset_size = Document.objects.filter(title__startswith=prefix).count() docs_dataset_size = Document.objects.filter(title__startswith=prefix).count()
self.stdout.write( self.stdout.write(
self.style.SUCCESS( self.style.SUCCESS(
f"Dataset ready: {dataset_size} docs | owned by target {created_counts['owned']} | " f"Dataset ready: {docs_dataset_size} docs | owned by target {created_counts['owned']} | "
f"owned by other {created_counts['other_owned']} | unowned {created_counts['unowned']} | " f"owned by other {created_counts['other_owned']} | unowned {created_counts['unowned']} | "
f"shared-perms {created_counts['shared']} | tags {len(created_tags)} | " f"shared-perms {created_counts['shared']} | tags {tags_dataset_size} | "
f"correspondents {len(created_correspondents)} | " f"correspondents {correspondents_dataset_size} | "
f"custom fields {len(created_custom_fields)}", f"custom fields {cfs_dataset_size}",
), ),
) )
self.stdout.write("\nRunning benchmarks...\n") self.stdout.write("\nRunning benchmarks...\n")
self._run_benchmarks( self._run_benchmarks(