From 6a0fae67e9ef01c6a9ceb668a617e70c98d31d4f Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Thu, 12 Feb 2026 10:20:47 -0800 Subject: [PATCH] Make content follow the version - store content per version - root doc retrieval returns latest content - updating content affects the latest version - load metadata per version --- .../document-detail.component.ts | 81 +++++++++++-------- .../services/rest/document.service.spec.ts | 8 ++ .../src/app/services/rest/document.service.ts | 11 ++- src/documents/consumer.py | 2 +- src/documents/filters.py | 32 +++++++- src/documents/index.py | 12 ++- src/documents/serialisers.py | 2 + .../tests/test_api_document_versions.py | 64 +++++++++++++++ src/documents/tests/test_consumer.py | 1 + src/documents/views.py | 54 ++++++++++++- 10 files changed, 224 insertions(+), 43 deletions(-) diff --git a/src-ui/src/app/components/document-detail/document-detail.component.ts b/src-ui/src/app/components/document-detail/document-detail.component.ts index 568072861..523974e6a 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.ts +++ b/src-ui/src/app/components/document-detail/document-detail.component.ts @@ -330,13 +330,19 @@ export class DocumentDetailComponent } get archiveContentRenderType(): ContentRenderType { - return this.document?.archived_file_name + const hasArchiveVersion = + this.metadata?.has_archive_version ?? !!this.document?.archived_file_name + return hasArchiveVersion ? this.getRenderType('application/pdf') - : this.getRenderType(this.document?.mime_type) + : this.getRenderType( + this.metadata?.original_mime_type || this.document?.mime_type + ) } get originalContentRenderType(): ContentRenderType { - return this.getRenderType(this.document?.mime_type) + return this.getRenderType( + this.metadata?.original_mime_type || this.document?.mime_type + ) } get showThumbnailOverlay(): boolean { @@ -372,6 +378,39 @@ export class DocumentDetailComponent } } + private loadMetadataForSelectedVersion() { + this.documentsService + .getMetadata(this.documentId, this.selectedVersionId) + .pipe( + first(), + takeUntil(this.unsubscribeNotifier), + takeUntil(this.docChangeNotifier) + ) + .subscribe({ + next: (result) => { + this.metadata = result + this.tiffURL = null + this.tiffError = null + if (this.archiveContentRenderType === ContentRenderType.TIFF) { + this.tryRenderTiff() + } + if ( + this.archiveContentRenderType !== ContentRenderType.PDF || + this.useNativePdfViewer + ) { + this.previewLoaded = true + } + }, + error: (error) => { + this.metadata = {} // allow display to fallback to tag + this.toastService.showError( + $localize`Error retrieving metadata`, + error + ) + }, + }) + } + get isRTL() { if (!this.metadata || !this.metadata.lang) return false else { @@ -724,36 +763,10 @@ export class DocumentDetailComponent this.selectedVersionId = versions.length ? Math.max(...versions.map((version) => version.id)) : doc.id + this.previewLoaded = false this.requiresPassword = false this.updateFormForCustomFields() - if (this.archiveContentRenderType === ContentRenderType.TIFF) { - this.tryRenderTiff() - } - this.documentsService - .getMetadata(doc.id) - .pipe( - first(), - takeUntil(this.unsubscribeNotifier), - takeUntil(this.docChangeNotifier) - ) - .subscribe({ - next: (result) => { - this.metadata = result - if ( - this.archiveContentRenderType !== ContentRenderType.PDF || - this.useNativePdfViewer - ) { - this.previewLoaded = true - } - }, - error: (error) => { - this.metadata = {} // allow display to fallback to tag - this.toastService.showError( - $localize`Error retrieving metadata`, - error - ) - }, - }) + this.loadMetadataForSelectedVersion() if ( this.permissionsService.currentUserHasObjectPermissions( PermissionAction.Change, @@ -785,6 +798,7 @@ export class DocumentDetailComponent // Update file preview and download target to a specific version (by document id) selectVersion(versionId: number) { this.selectedVersionId = versionId + this.previewLoaded = false this.previewUrl = this.documentsService.getPreviewUrl( this.documentId, false, @@ -792,6 +806,7 @@ export class DocumentDetailComponent ) this.updatePdfSource() this.thumbUrl = this.documentsService.getThumbUrl(this.selectedVersionId) + this.loadMetadataForSelectedVersion() // For text previews, refresh content this.http .get(this.previewUrl, { responseType: 'text' }) @@ -1840,7 +1855,7 @@ export class DocumentDetailComponent const modal = this.modalService.open(ShareLinksDialogComponent) modal.componentInstance.documentId = this.document.id modal.componentInstance.hasArchiveVersion = - !!this.document?.archived_file_name + this.metadata?.has_archive_version ?? !!this.document?.archived_file_name } get emailEnabled(): boolean { @@ -1853,7 +1868,7 @@ export class DocumentDetailComponent }) modal.componentInstance.documentIds = [this.document.id] modal.componentInstance.hasArchiveVersion = - !!this.document?.archived_file_name + this.metadata?.has_archive_version ?? !!this.document?.archived_file_name } private tryRenderTiff() { diff --git a/src-ui/src/app/services/rest/document.service.spec.ts b/src-ui/src/app/services/rest/document.service.spec.ts index 77a54dbc7..6ded03e81 100644 --- a/src-ui/src/app/services/rest/document.service.spec.ts +++ b/src-ui/src/app/services/rest/document.service.spec.ts @@ -165,6 +165,14 @@ describe(`DocumentService`, () => { expect(req.request.method).toEqual('GET') }) + it('should call appropriate api endpoint for versioned metadata', () => { + subscription = service.getMetadata(documents[0].id, 123).subscribe() + const req = httpTestingController.expectOne( + `${environment.apiBaseUrl}${endpoint}/${documents[0].id}/metadata/?version=123` + ) + expect(req.request.method).toEqual('GET') + }) + it('should call appropriate api endpoint for getting selection data', () => { const ids = [documents[0].id] subscription = service.getSelectionData(ids).subscribe() diff --git a/src-ui/src/app/services/rest/document.service.ts b/src-ui/src/app/services/rest/document.service.ts index 9792cdcec..21dece81a 100644 --- a/src-ui/src/app/services/rest/document.service.ts +++ b/src-ui/src/app/services/rest/document.service.ts @@ -242,8 +242,15 @@ export class DocumentService extends AbstractPaperlessService { ) } - getMetadata(id: number): Observable { - return this.http.get(this.getResourceUrl(id, 'metadata')) + getMetadata( + id: number, + versionID: number = null + ): Observable { + let url = new URL(this.getResourceUrl(id, 'metadata')) + if (versionID) { + url.searchParams.append('version', versionID.toString()) + } + return this.http.get(url.toString()) } bulkEdit(ids: number[], method: string, args: any) { diff --git a/src/documents/consumer.py b/src/documents/consumer.py index a896ffbb6..6730a1192 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -520,7 +520,7 @@ class ConsumerPlugin( original_document.checksum = hashlib.md5( file_for_checksum.read_bytes(), ).hexdigest() - original_document.content = "" + original_document.content = text original_document.page_count = page_count original_document.mime_type = mime_type original_document.original_filename = self.filename diff --git a/src/documents/filters.py b/src/documents/filters.py index f1713882c..4cbf78819 100644 --- a/src/documents/filters.py +++ b/src/documents/filters.py @@ -8,6 +8,7 @@ from contextlib import contextmanager from typing import TYPE_CHECKING from django.contrib.contenttypes.models import ContentType +from django.core.exceptions import FieldError from django.db.models import Case from django.db.models import CharField from django.db.models import Count @@ -163,11 +164,34 @@ class TitleContentFilter(Filter): def filter(self, qs, value): value = value.strip() if isinstance(value, str) else value if value: - return qs.filter(Q(title__icontains=value) | Q(content__icontains=value)) + try: + return qs.filter( + Q(title__icontains=value) | Q(effective_content__icontains=value), + ) + except FieldError: + return qs.filter( + Q(title__icontains=value) | Q(content__icontains=value), + ) else: return qs +@extend_schema_field(serializers.CharField) +class EffectiveContentFilter(Filter): + def filter(self, qs, value): + value = value.strip() if isinstance(value, str) else value + if not value: + return qs + try: + return qs.filter( + **{f"effective_content__{self.lookup_expr}": value}, + ) + except FieldError: + return qs.filter( + **{f"content__{self.lookup_expr}": value}, + ) + + @extend_schema_field(serializers.BooleanField) class SharedByUser(Filter): def filter(self, qs, value): @@ -724,6 +748,11 @@ class DocumentFilterSet(FilterSet): title_content = TitleContentFilter() + content__istartswith = EffectiveContentFilter(lookup_expr="istartswith") + content__iendswith = EffectiveContentFilter(lookup_expr="iendswith") + content__icontains = EffectiveContentFilter(lookup_expr="icontains") + content__iexact = EffectiveContentFilter(lookup_expr="iexact") + owner__id__none = ObjectFilter(field_name="owner", exclude=True) custom_fields__icontains = CustomFieldsFilter() @@ -764,7 +793,6 @@ class DocumentFilterSet(FilterSet): fields = { "id": ID_KWARGS, "title": CHAR_KWARGS, - "content": CHAR_KWARGS, "archive_serial_number": INT_KWARGS, "created": DATE_KWARGS, "added": DATETIME_KWARGS, diff --git a/src/documents/index.py b/src/documents/index.py index be944b48b..cdc038b46 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -185,10 +185,20 @@ def update_document(writer: AsyncWriter, doc: Document) -> None: only_with_perms_in=["view_document"], ) viewer_ids: str = ",".join([str(u.id) for u in users_with_perms]) + effective_content = doc.content + if doc.root_document_id is None: + latest_version = ( + Document.objects.filter(root_document=doc) + .only("content") + .order_by("-id") + .first() + ) + if latest_version is not None: + effective_content = latest_version.content writer.update_document( id=doc.pk, title=doc.title, - content=doc.content, + content=effective_content, correspondent=doc.correspondent.name if doc.correspondent else None, correspondent_id=doc.correspondent.id if doc.correspondent else None, has_correspondent=doc.correspondent is not None, diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py index 71953bd90..a2bc77bf4 100644 --- a/src/documents/serialisers.py +++ b/src/documents/serialisers.py @@ -1177,6 +1177,8 @@ class DocumentSerializer( def to_representation(self, instance): doc = super().to_representation(instance) + if "content" in self.fields and hasattr(instance, "effective_content"): + doc["content"] = getattr(instance, "effective_content") or "" if self.truncate_content and "content" in self.fields: doc["content"] = doc.get("content")[0:550] diff --git a/src/documents/tests/test_api_document_versions.py b/src/documents/tests/test_api_document_versions.py index 7f05d611f..a6b2b72f1 100644 --- a/src/documents/tests/test_api_document_versions.py +++ b/src/documents/tests/test_api_document_versions.py @@ -116,18 +116,21 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase): title="root", checksum="root", mime_type="application/pdf", + content="root-content", ) v1 = Document.objects.create( title="v1", checksum="v1", mime_type="application/pdf", root_document=root, + content="v1-content", ) v2 = Document.objects.create( title="v2", checksum="v2", mime_type="application/pdf", root_document=root, + content="v2-content", ) with mock.patch("documents.index.remove_document_from_index"): @@ -136,6 +139,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase): self.assertEqual(resp.status_code, status.HTTP_200_OK) self.assertFalse(Document.objects.filter(id=v2.id).exists()) self.assertEqual(resp.data["current_version_id"], v1.id) + root.refresh_from_db() + self.assertEqual(root.content, "root-content") with mock.patch("documents.index.remove_document_from_index"): resp = self.client.delete(f"/api/documents/{root.id}/versions/{v1.id}/") @@ -143,6 +148,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase): self.assertEqual(resp.status_code, status.HTTP_200_OK) self.assertFalse(Document.objects.filter(id=v1.id).exists()) self.assertEqual(resp.data["current_version_id"], root.id) + root.refresh_from_db() + self.assertEqual(root.content, "root-content") def test_delete_version_writes_audit_log_entry(self) -> None: root = Document.objects.create( @@ -454,3 +461,60 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase): ) self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST) + + def test_patch_content_updates_latest_version_content(self) -> None: + root = Document.objects.create( + title="root", + checksum="root", + mime_type="application/pdf", + content="root-content", + ) + v1 = Document.objects.create( + title="v1", + checksum="v1", + mime_type="application/pdf", + root_document=root, + content="v1-content", + ) + v2 = Document.objects.create( + title="v2", + checksum="v2", + mime_type="application/pdf", + root_document=root, + content="v2-content", + ) + + resp = self.client.patch( + f"/api/documents/{root.id}/", + {"content": "edited-content"}, + format="json", + ) + + self.assertEqual(resp.status_code, status.HTTP_200_OK) + self.assertEqual(resp.data["content"], "edited-content") + root.refresh_from_db() + v1.refresh_from_db() + v2.refresh_from_db() + self.assertEqual(v2.content, "edited-content") + self.assertEqual(root.content, "root-content") + self.assertEqual(v1.content, "v1-content") + + def test_retrieve_returns_latest_version_content(self) -> None: + root = Document.objects.create( + title="root", + checksum="root", + mime_type="application/pdf", + content="root-content", + ) + Document.objects.create( + title="v1", + checksum="v1", + mime_type="application/pdf", + root_document=root, + content="v1-content", + ) + + resp = self.client.get(f"/api/documents/{root.id}/") + + self.assertEqual(resp.status_code, status.HTTP_200_OK) + self.assertEqual(resp.data["content"], "v1-content") diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index f8c93c9ba..554bf18a5 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -745,6 +745,7 @@ class TestConsumer( assert version.original_filename is not None self.assertEqual(version.version_label, "v2") self.assertTrue(version.original_filename.endswith("_v0.pdf")) + self.assertTrue(bool(version.content)) @mock.patch("documents.consumer.load_classifier") def testClassifyDocument(self, m) -> None: diff --git a/src/documents/views.py b/src/documents/views.py index 49cf6713d..82639843e 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -30,12 +30,16 @@ from django.db.migrations.loader import MigrationLoader from django.db.migrations.recorder import MigrationRecorder from django.db.models import Case from django.db.models import Count +from django.db.models import F from django.db.models import IntegerField from django.db.models import Max from django.db.models import Model +from django.db.models import OuterRef from django.db.models import Q +from django.db.models import Subquery from django.db.models import Sum from django.db.models import When +from django.db.models.functions import Coalesce from django.db.models.functions import Lower from django.db.models.manager import Manager from django.db.models.query import QuerySet @@ -763,7 +767,7 @@ class DocumentViewSet( ObjectOwnedOrGrantedPermissionsFilter, ) filterset_class = DocumentFilterSet - search_fields = ("title", "correspondent__name", "content") + search_fields = ("title", "correspondent__name", "effective_content") ordering_fields = ( "id", "title", @@ -781,10 +785,16 @@ class DocumentViewSet( ) def get_queryset(self): + latest_version_content = Subquery( + Document.objects.filter(root_document=OuterRef("pk")) + .order_by("-id") + .values("content")[:1], + ) return ( Document.objects.filter(root_document__isnull=True) .distinct() .order_by("-created") + .annotate(effective_content=Coalesce(latest_version_content, F("content"))) .annotate(num_notes=Count("notes")) .select_related("correspondent", "storage_path", "document_type", "owner") .prefetch_related("tags", "custom_fields", "notes") @@ -847,14 +857,45 @@ class DocumentViewSet( return Response({"root_id": root_doc.id}) def update(self, request, *args, **kwargs): - response = super().update(request, *args, **kwargs) + partial = kwargs.pop("partial", False) + root_doc = self.get_object() + content_updated = "content" in request.data + updated_content = request.data.get("content") if content_updated else None + latest_doc = self._get_latest_doc_for_root(root_doc) + + data = request.data.copy() + serializer_partial = partial + if content_updated and latest_doc.id != root_doc.id: + if updated_content is None: + raise ValidationError({"content": ["This field may not be null."]}) + data.pop("content", None) + serializer_partial = True + + serializer = self.get_serializer( + root_doc, + data=data, + partial=serializer_partial, + ) + serializer.is_valid(raise_exception=True) + self.perform_update(serializer) + + if content_updated and latest_doc.id != root_doc.id: + latest_doc.content = updated_content + latest_doc.save(update_fields=["content", "modified"]) + + if getattr(root_doc, "_prefetched_objects_cache", None): + root_doc._prefetched_objects_cache = {} + + refreshed_doc = self.get_queryset().get(pk=root_doc.pk) + response = Response(self.get_serializer(refreshed_doc).data) + from documents import index - index.add_or_update_document(self.get_object()) + index.add_or_update_document(refreshed_doc) document_updated.send( sender=self.__class__, - document=self.get_object(), + document=refreshed_doc, ) return response @@ -904,6 +945,11 @@ class DocumentViewSet( latest = Document.objects.filter(root_document=root_doc).order_by("id").last() return latest or root_doc + @staticmethod + def _get_latest_doc_for_root(root_doc: Document) -> Document: + latest = Document.objects.filter(root_document=root_doc).order_by("-id").first() + return latest or root_doc + def file_response(self, pk, request, disposition): request_doc = Document.global_objects.select_related( "owner",