diff --git a/src/documents/conditionals.py b/src/documents/conditionals.py index 1768525fc..b017f410c 100644 --- a/src/documents/conditionals.py +++ b/src/documents/conditionals.py @@ -13,59 +13,7 @@ from documents.caching import CLASSIFIER_VERSION_KEY from documents.caching import get_thumbnail_modified_key from documents.classifier import DocumentClassifier from documents.models import Document - - -def _resolve_effective_doc(pk: int, request) -> Document | None: - """ - Resolve which Document row should be considered for caching keys: - - If a version is requested, use that version - - If pk is a root doc, use its newest child version if present, else the root. - - Else, pk is a version, use that version. - Returns None if resolution fails (treat as no-cache). - """ - try: - request_doc = Document.objects.only("id", "root_document_id").get(pk=pk) - except Document.DoesNotExist: - return None - - root_doc = ( - request_doc - if request_doc.root_document_id is None - else Document.objects.only("id").get(id=request_doc.root_document_id) - ) - - version_param = ( - request.query_params.get("version") - if hasattr(request, "query_params") - else None - ) - if version_param: - try: - version_id = int(version_param) - candidate = Document.objects.only("id", "root_document_id").get( - id=version_id, - ) - if ( - candidate.id != root_doc.id - and candidate.root_document_id != root_doc.id - ): - return None - return candidate - except Exception: - return None - - # Default behavior: if pk is a root doc, prefer its newest child version - if request_doc.root_document_id is None: - latest = ( - Document.objects.filter(root_document=root_doc) - .only("id") - .order_by("id") - .last() - ) - return latest or root_doc - - # pk is already a version - return request_doc +from documents.versioning import resolve_effective_document_by_pk def suggestions_etag(request, pk: int) -> str | None: @@ -125,7 +73,7 @@ def metadata_etag(request, pk: int) -> str | None: Metadata is extracted from the original file, so use its checksum as the ETag """ - doc = _resolve_effective_doc(pk, request) + doc = resolve_effective_document_by_pk(pk, request).document if doc is None: return None return doc.checksum @@ -137,7 +85,7 @@ def metadata_last_modified(request, pk: int) -> datetime | None: not the modification of the original file, but of the database object, but might as well error on the side of more cautious """ - doc = _resolve_effective_doc(pk, request) + doc = resolve_effective_document_by_pk(pk, request).document if doc is None: return None return doc.modified @@ -147,7 +95,7 @@ def preview_etag(request, pk: int) -> str | None: """ ETag for the document preview, using the original or archive checksum, depending on the request """ - doc = _resolve_effective_doc(pk, request) + doc = resolve_effective_document_by_pk(pk, request).document if doc is None: return None use_original = ( @@ -163,7 +111,7 @@ def preview_last_modified(request, pk: int) -> datetime | None: Uses the documents modified time to set the Last-Modified header. Not strictly speaking correct, but close enough and quick """ - doc = _resolve_effective_doc(pk, request) + doc = resolve_effective_document_by_pk(pk, request).document if doc is None: return None return doc.modified @@ -175,7 +123,7 @@ def thumbnail_last_modified(request: Any, pk: int) -> datetime | None: Cache should be (slightly?) faster than filesystem """ try: - doc = _resolve_effective_doc(pk, request) + doc = resolve_effective_document_by_pk(pk, request).document if doc is None: return None if not doc.thumbnail_path.exists(): @@ -195,5 +143,5 @@ def thumbnail_last_modified(request: Any, pk: int) -> datetime | None: ) cache.set(doc_key, last_modified, CACHE_50_MINUTES) return last_modified - except Document.DoesNotExist: # pragma: no cover + except (Document.DoesNotExist, OSError): # pragma: no cover return None diff --git a/src/documents/index.py b/src/documents/index.py index cdc038b46..709e2bd21 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -50,6 +50,7 @@ from documents.models import CustomFieldInstance from documents.models import Document from documents.models import Note from documents.models import User +from documents.versioning import get_latest_version_for_root if TYPE_CHECKING: from django.db.models import QuerySet @@ -185,16 +186,11 @@ def update_document(writer: AsyncWriter, doc: Document) -> None: only_with_perms_in=["view_document"], ) viewer_ids: str = ",".join([str(u.id) for u in users_with_perms]) - effective_content = doc.content - if doc.root_document_id is None: - latest_version = ( - Document.objects.filter(root_document=doc) - .only("content") - .order_by("-id") - .first() - ) - if latest_version is not None: - effective_content = latest_version.content + effective_content = ( + get_latest_version_for_root(doc).content + if doc.root_document_id is None + else doc.content + ) writer.update_document( id=doc.pk, title=doc.title, diff --git a/src/documents/tests/test_version_conditionals.py b/src/documents/tests/test_version_conditionals.py index af82fe4f0..fd24a2a51 100644 --- a/src/documents/tests/test_version_conditionals.py +++ b/src/documents/tests/test_version_conditionals.py @@ -3,12 +3,12 @@ from unittest import mock from django.test import TestCase -from documents.conditionals import _resolve_effective_doc from documents.conditionals import metadata_etag from documents.conditionals import preview_etag from documents.conditionals import thumbnail_last_modified from documents.models import Document from documents.tests.utils import DirectoriesMixin +from documents.versioning import resolve_effective_document_by_pk class TestConditionals(DirectoriesMixin, TestCase): @@ -56,8 +56,12 @@ class TestConditionals(DirectoriesMixin, TestCase): query_params={"version": str(other_version.id)}, ) - self.assertIsNone(_resolve_effective_doc(root.id, invalid_request)) - self.assertIsNone(_resolve_effective_doc(root.id, unrelated_request)) + self.assertIsNone( + resolve_effective_document_by_pk(root.id, invalid_request).document, + ) + self.assertIsNone( + resolve_effective_document_by_pk(root.id, unrelated_request).document, + ) def test_thumbnail_last_modified_uses_effective_document_for_cache_key( self, diff --git a/src/documents/versioning.py b/src/documents/versioning.py new file mode 100644 index 000000000..c45715350 --- /dev/null +++ b/src/documents/versioning.py @@ -0,0 +1,120 @@ +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum +from typing import Any + +from documents.models import Document + + +class VersionResolutionError(str, Enum): + INVALID = "invalid" + NOT_FOUND = "not_found" + + +@dataclass(frozen=True) +class VersionResolution: + document: Document | None + error: VersionResolutionError | None = None + + +def _document_manager(*, include_deleted: bool) -> Any: + return Document.global_objects if include_deleted else Document.objects + + +def get_request_version_param(request: Any) -> str | None: + if hasattr(request, "query_params"): + return request.query_params.get("version") + return None + + +def get_root_document(doc: Document, *, include_deleted: bool = False) -> Document: + # Use root_document_id to avoid a query when this is already a root. + # If root_document isn't available, fall back to the document itself. + if doc.root_document_id is None: + return doc + if doc.root_document is not None: + return doc.root_document + + manager = _document_manager(include_deleted=include_deleted) + root_doc = manager.only("id").filter(id=doc.root_document_id).first() + return root_doc or doc + + +def get_latest_version_for_root( + root_doc: Document, + *, + include_deleted: bool = False, +) -> Document: + manager = _document_manager(include_deleted=include_deleted) + latest = manager.filter(root_document=root_doc).order_by("-id").first() + return latest or root_doc + + +def resolve_requested_version_for_root( + root_doc: Document, + request: Any, + *, + include_deleted: bool = False, +) -> VersionResolution: + version_param = get_request_version_param(request) + if not version_param: + return VersionResolution( + document=get_latest_version_for_root( + root_doc, + include_deleted=include_deleted, + ), + ) + + try: + version_id = int(version_param) + except (TypeError, ValueError): + return VersionResolution(document=None, error=VersionResolutionError.INVALID) + + manager = _document_manager(include_deleted=include_deleted) + candidate = manager.only("id", "root_document_id").filter(id=version_id).first() + if candidate is None: + return VersionResolution(document=None, error=VersionResolutionError.NOT_FOUND) + if candidate.id != root_doc.id and candidate.root_document_id != root_doc.id: + return VersionResolution(document=None, error=VersionResolutionError.NOT_FOUND) + return VersionResolution(document=candidate) + + +def resolve_effective_document( + request_doc: Document, + request: Any, + *, + include_deleted: bool = False, +) -> VersionResolution: + root_doc = get_root_document(request_doc, include_deleted=include_deleted) + if get_request_version_param(request) is not None: + return resolve_requested_version_for_root( + root_doc, + request, + include_deleted=include_deleted, + ) + if request_doc.root_document_id is None: + return VersionResolution( + document=get_latest_version_for_root( + root_doc, + include_deleted=include_deleted, + ), + ) + return VersionResolution(document=request_doc) + + +def resolve_effective_document_by_pk( + pk: int, + request: Any, + *, + include_deleted: bool = False, +) -> VersionResolution: + manager = _document_manager(include_deleted=include_deleted) + request_doc = manager.only("id", "root_document_id").filter(pk=pk).first() + if request_doc is None: + return VersionResolution(document=None, error=VersionResolutionError.NOT_FOUND) + return resolve_effective_document( + request_doc, + request, + include_deleted=include_deleted, + ) diff --git a/src/documents/views.py b/src/documents/views.py index 315da3b81..8a6401943 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -206,6 +206,11 @@ from documents.tasks import sanity_check from documents.tasks import train_classifier from documents.tasks import update_document_parent_tags from documents.utils import get_boolean +from documents.versioning import VersionResolutionError +from documents.versioning import get_latest_version_for_root +from documents.versioning import get_request_version_param +from documents.versioning import get_root_document +from documents.versioning import resolve_requested_version_for_root from paperless import version from paperless.celery import app as celery_app from paperless.config import AIConfig @@ -834,14 +839,6 @@ class DocumentViewSet( ) return super().get_serializer(*args, **kwargs) - @staticmethod - def _get_root_doc(doc: Document) -> Document: - # Use root_document_id to avoid a query when this is already a root. - # If root_document isn't available, fall back to the document itself. - if doc.root_document_id is None: - return doc - return doc.root_document or doc - @extend_schema( operation_id="documents_root", responses=inline_serializer( @@ -861,7 +858,7 @@ class DocumentViewSet( except Document.DoesNotExist: raise Http404 - root_doc = self._get_root_doc(doc) + root_doc = get_root_document(doc) if request.user is not None and not has_perms_owner_aware( request.user, "view_document", @@ -896,7 +893,7 @@ class DocumentViewSet( content_doc = ( self._resolve_file_doc(root_doc, request) if "version" in request.query_params - else self._get_latest_doc_for_root(root_doc) + else get_latest_version_for_root(root_doc) ) content_updated = "content" in request.data updated_content = request.data.get("content") if content_updated else None @@ -967,31 +964,18 @@ class DocumentViewSet( ) def _resolve_file_doc(self, root_doc: Document, request): - version_param = request.query_params.get("version") - if version_param: - try: - version_id = int(version_param) - except (TypeError, ValueError): + if get_request_version_param(request): + resolution = resolve_requested_version_for_root( + root_doc, + request, + include_deleted=True, + ) + if resolution.error == VersionResolutionError.INVALID: raise NotFound("Invalid version parameter") - try: - candidate = Document.global_objects.select_related("owner").get( - id=version_id, - ) - except Document.DoesNotExist: + if resolution.document is None: raise Http404 - if ( - candidate.id != root_doc.id - and candidate.root_document_id != root_doc.id - ): - raise Http404 - return candidate - latest = Document.objects.filter(root_document=root_doc).order_by("id").last() - return latest or root_doc - - @staticmethod - def _get_latest_doc_for_root(root_doc: Document) -> Document: - latest = Document.objects.filter(root_document=root_doc).order_by("-id").first() - return latest or root_doc + return resolution.document + return get_latest_version_for_root(root_doc) def _get_effective_file_doc( self, @@ -1015,7 +999,7 @@ class DocumentViewSet( "owner", "root_document", ).get(id=pk) - root_doc = self._get_root_doc(request_doc) + root_doc = get_root_document(request_doc) if request.user is not None and not has_perms_owner_aware( request.user, "view_document", @@ -1065,7 +1049,7 @@ class DocumentViewSet( "owner", "root_document", ).get(pk=pk) - root_doc = self._get_root_doc(request_doc) + root_doc = get_root_document(request_doc) if request.user is not None and not has_perms_owner_aware( request.user, "view_document", @@ -1251,7 +1235,7 @@ class DocumentViewSet( "owner", "root_document", ).get(id=pk) - root_doc = self._get_root_doc(request_doc) + root_doc = get_root_document(request_doc) if request.user is not None and not has_perms_owner_aware( request.user, "view_document", @@ -1279,7 +1263,7 @@ class DocumentViewSet( "owner", "root_document", ).get(id=pk) - root_doc = self._get_root_doc(request_doc) + root_doc = get_root_document(request_doc) if request.user is not None and not has_perms_owner_aware( request.user, "view_document", @@ -1672,7 +1656,7 @@ class DocumentViewSet( "owner", "root_document", ).get(pk=pk) - root_doc = self._get_root_doc(root_doc) + root_doc = get_root_document(root_doc) except Document.DoesNotExist: raise Http404