Handcrafts SQL queries a little more to reduce the query count and/or the amount of returned data (#6489)

This commit is contained in:
Trenton H 2024-04-30 07:37:09 -07:00 committed by GitHub
parent 63e1f9f5d3
commit 7be7185418
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 188 additions and 135 deletions

View File

@ -330,10 +330,10 @@ class BarcodePlugin(ConsumeTaskPlugin):
break
if tag:
tag = Tag.objects.get_or_create(
tag, _ = Tag.objects.get_or_create(
name__iexact=tag,
defaults={"name": tag},
)[0]
)
logger.debug(
f"Found Tag Barcode '{raw}', substituted "

View File

@ -24,12 +24,17 @@ from documents.tasks import update_document_archive_file
logger = logging.getLogger("paperless.bulk_edit")
def set_correspondent(doc_ids, correspondent):
if correspondent:
correspondent = Correspondent.objects.get(id=correspondent)
def set_correspondent(doc_ids: list[int], correspondent):
qs = Document.objects.filter(Q(id__in=doc_ids) & ~Q(correspondent=correspondent))
affected_docs = [doc.id for doc in qs]
if correspondent:
correspondent = Correspondent.objects.only("pk").get(id=correspondent)
qs = (
Document.objects.filter(Q(id__in=doc_ids) & ~Q(correspondent=correspondent))
.select_related("correspondent")
.only("pk", "correspondent__id")
)
affected_docs = list(qs.values_list("pk", flat=True))
qs.update(correspondent=correspondent)
bulk_update_documents.delay(document_ids=affected_docs)
@ -37,14 +42,18 @@ def set_correspondent(doc_ids, correspondent):
return "OK"
def set_storage_path(doc_ids, storage_path):
def set_storage_path(doc_ids: list[int], storage_path):
if storage_path:
storage_path = StoragePath.objects.get(id=storage_path)
storage_path = StoragePath.objects.only("pk").get(id=storage_path)
qs = Document.objects.filter(
Q(id__in=doc_ids) & ~Q(storage_path=storage_path),
qs = (
Document.objects.filter(
Q(id__in=doc_ids) & ~Q(storage_path=storage_path),
)
.select_related("storage_path")
.only("pk", "storage_path__id")
)
affected_docs = [doc.id for doc in qs]
affected_docs = list(qs.values_list("pk", flat=True))
qs.update(storage_path=storage_path)
bulk_update_documents.delay(
@ -54,12 +63,16 @@ def set_storage_path(doc_ids, storage_path):
return "OK"
def set_document_type(doc_ids, document_type):
def set_document_type(doc_ids: list[int], document_type):
if document_type:
document_type = DocumentType.objects.get(id=document_type)
document_type = DocumentType.objects.only("pk").get(id=document_type)
qs = Document.objects.filter(Q(id__in=doc_ids) & ~Q(document_type=document_type))
affected_docs = [doc.id for doc in qs]
qs = (
Document.objects.filter(Q(id__in=doc_ids) & ~Q(document_type=document_type))
.select_related("document_type")
.only("pk", "document_type__id")
)
affected_docs = list(qs.values_list("pk", flat=True))
qs.update(document_type=document_type)
bulk_update_documents.delay(document_ids=affected_docs)
@ -67,9 +80,10 @@ def set_document_type(doc_ids, document_type):
return "OK"
def add_tag(doc_ids, tag):
qs = Document.objects.filter(Q(id__in=doc_ids) & ~Q(tags__id=tag))
affected_docs = [doc.id for doc in qs]
def add_tag(doc_ids: list[int], tag: int):
qs = Document.objects.filter(Q(id__in=doc_ids) & ~Q(tags__id=tag)).only("pk")
affected_docs = list(qs.values_list("pk", flat=True))
DocumentTagRelationship = Document.tags.through
@ -82,9 +96,10 @@ def add_tag(doc_ids, tag):
return "OK"
def remove_tag(doc_ids, tag):
qs = Document.objects.filter(Q(id__in=doc_ids) & Q(tags__id=tag))
affected_docs = [doc.id for doc in qs]
def remove_tag(doc_ids: list[int], tag: int):
qs = Document.objects.filter(Q(id__in=doc_ids) & Q(tags__id=tag)).only("pk")
affected_docs = list(qs.values_list("pk", flat=True))
DocumentTagRelationship = Document.tags.through
@ -97,9 +112,9 @@ def remove_tag(doc_ids, tag):
return "OK"
def modify_tags(doc_ids, add_tags, remove_tags):
qs = Document.objects.filter(id__in=doc_ids)
affected_docs = [doc.id for doc in qs]
def modify_tags(doc_ids: list[int], add_tags: list[int], remove_tags: list[int]):
qs = Document.objects.filter(id__in=doc_ids).only("pk")
affected_docs = list(qs.values_list("pk", flat=True))
DocumentTagRelationship = Document.tags.through
@ -121,9 +136,9 @@ def modify_tags(doc_ids, add_tags, remove_tags):
return "OK"
def modify_custom_fields(doc_ids, add_custom_fields, remove_custom_fields):
qs = Document.objects.filter(id__in=doc_ids)
affected_docs = [doc.id for doc in qs]
def modify_custom_fields(doc_ids: list[int], add_custom_fields, remove_custom_fields):
qs = Document.objects.filter(id__in=doc_ids).only("pk")
affected_docs = list(qs.values_list("pk", flat=True))
fields_to_add = []
for field in add_custom_fields:
@ -145,7 +160,7 @@ def modify_custom_fields(doc_ids, add_custom_fields, remove_custom_fields):
return "OK"
def delete(doc_ids):
def delete(doc_ids: list[int]):
Document.objects.filter(id__in=doc_ids).delete()
from documents import index
@ -157,7 +172,7 @@ def delete(doc_ids):
return "OK"
def redo_ocr(doc_ids):
def redo_ocr(doc_ids: list[int]):
for document_id in doc_ids:
update_document_archive_file.delay(
document_id=document_id,
@ -166,8 +181,8 @@ def redo_ocr(doc_ids):
return "OK"
def set_permissions(doc_ids, set_permissions, owner=None, merge=False):
qs = Document.objects.filter(id__in=doc_ids)
def set_permissions(doc_ids: list[int], set_permissions, owner=None, merge=False):
qs = Document.objects.filter(id__in=doc_ids).select_related("owner")
if merge:
# If merging, only set owner for documents that don't have an owner
@ -178,7 +193,7 @@ def set_permissions(doc_ids, set_permissions, owner=None, merge=False):
for doc in qs:
set_permissions_for_object(permissions=set_permissions, object=doc, merge=merge)
affected_docs = [doc.id for doc in qs]
affected_docs = list(qs.values_list("pk", flat=True))
bulk_update_documents.delay(document_ids=affected_docs)

View File

@ -131,7 +131,12 @@ def get_metadata_cache(document_id: int) -> Optional[MetadataCacheData]:
# The metadata exists in the cache
if doc_metadata is not None:
try:
doc = Document.objects.get(pk=document_id)
doc = Document.objects.only(
"pk",
"checksum",
"archive_checksum",
"archive_filename",
).get(pk=document_id)
# The original checksums match
# If it has one, the archive checksums match
# Then, we can use the metadata

View File

@ -16,9 +16,13 @@ def changed_password_check(app_configs, **kwargs):
from paperless.db import GnuPG
try:
encrypted_doc = Document.objects.filter(
storage_type=Document.STORAGE_TYPE_GPG,
).first()
encrypted_doc = (
Document.objects.filter(
storage_type=Document.STORAGE_TYPE_GPG,
)
.only("pk", "storage_type")
.first()
)
except (OperationalError, ProgrammingError, FieldError):
return [] # No documents table yet

View File

@ -155,8 +155,12 @@ class DocumentClassifier:
def train(self):
# Get non-inbox documents
docs_queryset = Document.objects.exclude(
tags__is_inbox_tag=True,
docs_queryset = (
Document.objects.exclude(
tags__is_inbox_tag=True,
)
.select_related("document_type", "correspondent", "storage_path")
.prefetch_related("tags")
)
# No documents exit to train against

View File

@ -73,7 +73,7 @@ def metadata_etag(request, pk: int) -> Optional[str]:
ETag
"""
try:
doc = Document.objects.get(pk=pk)
doc = Document.objects.only("checksum").get(pk=pk)
return doc.checksum
except Document.DoesNotExist: # pragma: no cover
return None
@ -87,7 +87,7 @@ def metadata_last_modified(request, pk: int) -> Optional[datetime]:
error on the side of more cautious
"""
try:
doc = Document.objects.get(pk=pk)
doc = Document.objects.only("modified").get(pk=pk)
return doc.modified
except Document.DoesNotExist: # pragma: no cover
return None
@ -99,7 +99,7 @@ def preview_etag(request, pk: int) -> Optional[str]:
ETag for the document preview, using the original or archive checksum, depending on the request
"""
try:
doc = Document.objects.get(pk=pk)
doc = Document.objects.only("checksum", "archive_checksum").get(pk=pk)
use_original = (
"original" in request.query_params
and request.query_params["original"] == "true"
@ -116,7 +116,7 @@ def preview_last_modified(request, pk: int) -> Optional[datetime]:
speaking correct, but close enough and quick
"""
try:
doc = Document.objects.get(pk=pk)
doc = Document.objects.only("modified").get(pk=pk)
return doc.modified
except Document.DoesNotExist: # pragma: no cover
return None
@ -129,7 +129,7 @@ def thumbnail_last_modified(request, pk: int) -> Optional[datetime]:
Cache should be (slightly?) faster than filesystem
"""
try:
doc = Document.objects.get(pk=pk)
doc = Document.objects.only("storage_type").get(pk=pk)
if not doc.thumbnail_path.exists():
return None
doc_key = get_thumbnail_modified_key(pk)

View File

@ -2,6 +2,7 @@ from django.contrib.auth.models import Group
from django.contrib.auth.models import Permission
from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType
from django.db.models import QuerySet
from guardian.core import ObjectPermissionChecker
from guardian.models import GroupObjectPermission
from guardian.shortcuts import assign_perm
@ -122,7 +123,7 @@ def set_permissions_for_object(permissions: list[str], object, merge: bool = Fal
)
def get_objects_for_user_owner_aware(user, perms, Model):
def get_objects_for_user_owner_aware(user, perms, Model) -> QuerySet:
objects_owned = Model.objects.filter(owner=user)
objects_unowned = Model.objects.filter(owner__isnull=True)
objects_with_perms = get_objects_for_user(

View File

@ -12,7 +12,7 @@ from documents.models import Document
class SanityCheckMessages:
def __init__(self):
self._messages = defaultdict(list)
self._messages: dict[int, list[dict]] = defaultdict(list)
self.has_error = False
self.has_warning = False

View File

@ -53,9 +53,9 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.cf1 = CustomField.objects.create(name="cf1", data_type="text")
self.cf2 = CustomField.objects.create(name="cf2", data_type="text")
@mock.patch("documents.serialisers.bulk_edit.set_correspondent")
def test_api_set_correspondent(self, m):
m.return_value = "OK"
@mock.patch("documents.bulk_edit.bulk_update_documents.delay")
def test_api_set_correspondent(self, bulk_update_task_mock):
self.assertNotEqual(self.doc1.correspondent, self.c1)
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
@ -68,14 +68,16 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertEqual(args[0], [self.doc1.id])
self.assertEqual(kwargs["correspondent"], self.c1.id)
self.doc1.refresh_from_db()
self.assertEqual(self.doc1.correspondent, self.c1)
bulk_update_task_mock.assert_called_once_with(document_ids=[self.doc1.pk])
@mock.patch("documents.bulk_edit.bulk_update_documents.delay")
def test_api_unset_correspondent(self, bulk_update_task_mock):
self.doc1.correspondent = self.c1
self.doc1.save()
self.assertIsNotNone(self.doc1.correspondent)
@mock.patch("documents.serialisers.bulk_edit.set_correspondent")
def test_api_unset_correspondent(self, m):
m.return_value = "OK"
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
@ -88,14 +90,13 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertEqual(args[0], [self.doc1.id])
self.assertIsNone(kwargs["correspondent"])
bulk_update_task_mock.assert_called_once()
self.doc1.refresh_from_db()
self.assertIsNone(self.doc1.correspondent)
@mock.patch("documents.serialisers.bulk_edit.set_document_type")
def test_api_set_type(self, m):
m.return_value = "OK"
@mock.patch("documents.bulk_edit.bulk_update_documents.delay")
def test_api_set_type(self, bulk_update_task_mock):
self.assertNotEqual(self.doc1.document_type, self.dt1)
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
@ -108,14 +109,15 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertEqual(args[0], [self.doc1.id])
self.assertEqual(kwargs["document_type"], self.dt1.id)
self.doc1.refresh_from_db()
self.assertEqual(self.doc1.document_type, self.dt1)
bulk_update_task_mock.assert_called_once_with(document_ids=[self.doc1.pk])
@mock.patch("documents.bulk_edit.bulk_update_documents.delay")
def test_api_unset_type(self, bulk_update_task_mock):
self.doc1.document_type = self.dt1
self.doc1.save()
@mock.patch("documents.serialisers.bulk_edit.set_document_type")
def test_api_unset_type(self, m):
m.return_value = "OK"
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
@ -128,14 +130,15 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertEqual(args[0], [self.doc1.id])
self.assertIsNone(kwargs["document_type"])
self.doc1.refresh_from_db()
self.assertIsNone(self.doc1.document_type)
bulk_update_task_mock.assert_called_once_with(document_ids=[self.doc1.pk])
@mock.patch("documents.bulk_edit.bulk_update_documents.delay")
def test_api_add_tag(self, bulk_update_task_mock):
self.assertFalse(self.doc1.tags.filter(pk=self.t1.pk).exists())
@mock.patch("documents.serialisers.bulk_edit.add_tag")
def test_api_add_tag(self, m):
m.return_value = "OK"
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
@ -148,14 +151,16 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertEqual(args[0], [self.doc1.id])
self.assertEqual(kwargs["tag"], self.t1.id)
self.doc1.refresh_from_db()
self.assertTrue(self.doc1.tags.filter(pk=self.t1.pk).exists())
bulk_update_task_mock.assert_called_once_with(document_ids=[self.doc1.pk])
@mock.patch("documents.bulk_edit.bulk_update_documents.delay")
def test_api_remove_tag(self, bulk_update_task_mock):
self.doc1.tags.add(self.t1)
@mock.patch("documents.serialisers.bulk_edit.remove_tag")
def test_api_remove_tag(self, m):
m.return_value = "OK"
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
@ -168,10 +173,8 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertEqual(args[0], [self.doc1.id])
self.assertEqual(kwargs["tag"], self.t1.id)
self.doc1.refresh_from_db()
self.assertFalse(self.doc1.tags.filter(pk=self.t1.pk).exists())
@mock.patch("documents.serialisers.bulk_edit.modify_tags")
def test_api_modify_tags(self, m):

View File

@ -246,7 +246,8 @@ class CorrespondentViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin):
model = Correspondent
queryset = (
Correspondent.objects.annotate(
Correspondent.objects.prefetch_related("documents")
.annotate(
last_correspondence=Max("documents__created"),
)
.select_related("owner")
@ -394,7 +395,7 @@ class DocumentViewSet(
)
def file_response(self, pk, request, disposition):
doc = Document.objects.get(id=pk)
doc = Document.objects.select_related("owner").get(id=pk)
if request.user is not None and not has_perms_owner_aware(
request.user,
"view_document",
@ -438,7 +439,7 @@ class DocumentViewSet(
)
def metadata(self, request, pk=None):
try:
doc = Document.objects.get(pk=pk)
doc = Document.objects.select_related("owner").get(pk=pk)
if request.user is not None and not has_perms_owner_aware(
request.user,
"view_document",
@ -498,7 +499,7 @@ class DocumentViewSet(
),
)
def suggestions(self, request, pk=None):
doc = get_object_or_404(Document, pk=pk)
doc = get_object_or_404(Document.objects.select_related("owner"), pk=pk)
if request.user is not None and not has_perms_owner_aware(
request.user,
"view_document",
@ -557,7 +558,7 @@ class DocumentViewSet(
@method_decorator(last_modified(thumbnail_last_modified))
def thumb(self, request, pk=None):
try:
doc = Document.objects.get(id=pk)
doc = Document.objects.select_related("owner").get(id=pk)
if request.user is not None and not has_perms_owner_aware(
request.user,
"view_document",
@ -583,7 +584,7 @@ class DocumentViewSet(
def getNotes(self, doc):
return [
{
"id": c.id,
"id": c.pk,
"note": c.note,
"created": c.created,
"user": {
@ -593,14 +594,31 @@ class DocumentViewSet(
"last_name": c.user.last_name,
},
}
for c in Note.objects.filter(document=doc).order_by("-created")
for c in Note.objects.select_related("user")
.only(
"pk",
"note",
"created",
"user__id",
"user__username",
"user__first_name",
"user__last_name",
)
.filter(document=doc)
.order_by("-created")
]
@action(methods=["get", "post", "delete"], detail=True)
def notes(self, request, pk=None):
currentUser = request.user
try:
doc = Document.objects.get(pk=pk)
doc = (
Document.objects.select_related("owner")
.prefetch_related("notes")
.only("pk", "owner__id")
.get(pk=pk)
)
if currentUser is not None and not has_perms_owner_aware(
currentUser,
"view_document",
@ -612,7 +630,8 @@ class DocumentViewSet(
if request.method == "GET":
try:
return Response(self.getNotes(doc))
notes = self.getNotes(doc)
return Response(notes)
except Exception as e:
logger.warning(f"An error occurred retrieving notes: {e!s}")
return Response(
@ -634,7 +653,6 @@ class DocumentViewSet(
note=request.data["note"],
user=currentUser,
)
c.save()
# If audit log is enabled make an entry in the log
# about this note change
if settings.AUDIT_LOG_ENABLED:
@ -653,9 +671,11 @@ class DocumentViewSet(
from documents import index
index.add_or_update_document(self.get_object())
index.add_or_update_document(doc)
return Response(self.getNotes(doc))
notes = self.getNotes(doc)
return Response(notes)
except Exception as e:
logger.warning(f"An error occurred saving note: {e!s}")
return Response(
@ -704,7 +724,7 @@ class DocumentViewSet(
def share_links(self, request, pk=None):
currentUser = request.user
try:
doc = Document.objects.get(pk=pk)
doc = Document.objects.select_related("owner").get(pk=pk)
if currentUser is not None and not has_perms_owner_aware(
currentUser,
"change_document",
@ -720,12 +740,13 @@ class DocumentViewSet(
now = timezone.now()
links = [
{
"id": c.id,
"id": c.pk,
"created": c.created,
"expiration": c.expiration,
"slug": c.slug,
}
for c in ShareLink.objects.filter(document=doc)
.only("pk", "created", "expiration", "slug")
.exclude(expiration__lt=now)
.order_by("-created")
]
@ -949,7 +970,9 @@ class BulkEditView(PassUserMixin):
documents = serializer.validated_data.get("documents")
if not user.is_superuser:
document_objs = Document.objects.filter(pk__in=documents)
document_objs = Document.objects.select_related("owner").filter(
pk__in=documents,
)
has_perms = (
all((doc.owner == user or doc.owner is None) for doc in document_objs)
if method
@ -1139,16 +1162,21 @@ class StatisticsView(APIView):
permission_classes = (IsAuthenticated,)
def get(self, request, format=None):
user = request.user if request.user is not None else None
documents = (
Document.objects.all()
if user is None
else get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
(
Document.objects.all()
if user is None
else get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
)
)
.only("mime_type", "content")
.prefetch_related("tags")
)
tags = (
Tag.objects.all()
@ -1158,35 +1186,29 @@ class StatisticsView(APIView):
correspondent_count = (
Correspondent.objects.count()
if user is None
else len(
get_objects_for_user_owner_aware(
user,
"documents.view_correspondent",
Correspondent,
),
)
else get_objects_for_user_owner_aware(
user,
"documents.view_correspondent",
Correspondent,
).count()
)
document_type_count = (
DocumentType.objects.count()
if user is None
else len(
get_objects_for_user_owner_aware(
user,
"documents.view_documenttype",
DocumentType,
),
)
else get_objects_for_user_owner_aware(
user,
"documents.view_documenttype",
DocumentType,
).count()
)
storage_path_count = (
StoragePath.objects.count()
if user is None
else len(
get_objects_for_user_owner_aware(
user,
"documents.view_storagepath",
StoragePath,
),
)
else get_objects_for_user_owner_aware(
user,
"documents.view_storagepath",
StoragePath,
).count()
)
documents_total = documents.count()
@ -1260,9 +1282,8 @@ class BulkDownloadView(GenericAPIView):
with zipfile.ZipFile(temp.name, "w", compression) as zipf:
strategy = strategy_class(zipf, follow_filename_format)
for id in ids:
doc = Document.objects.get(id=id)
strategy.add_document(doc)
for document in Document.objects.filter(pk__in=ids):
strategy.add_document(document)
with open(temp.name, "rb") as f:
response = HttpResponse(f, content_type="application/zip")
@ -1323,7 +1344,7 @@ class UiSettingsView(GenericAPIView):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
user = User.objects.get(pk=request.user.id)
user = User.objects.select_related("ui_settings").get(pk=request.user.id)
ui_settings = {}
if hasattr(user, "ui_settings"):
ui_settings = user.ui_settings.settings
@ -1545,7 +1566,7 @@ class BulkEditObjectsView(PassUserMixin):
object_class = serializer.get_object_class(object_type)
operation = serializer.validated_data.get("operation")
objs = object_class.objects.filter(pk__in=object_ids)
objs = object_class.objects.select_related("owner").filter(pk__in=object_ids)
if not user.is_superuser:
model_name = object_class._meta.verbose_name