Make content follow the version

- store content per version
- root doc retrieval returns latest content
- updating content affects the latest version
- load metadata per version
This commit is contained in:
shamoon
2026-02-12 10:20:47 -08:00
parent 60e400fb68
commit 6a0fae67e9
10 changed files with 224 additions and 43 deletions

View File

@@ -330,13 +330,19 @@ export class DocumentDetailComponent
} }
get archiveContentRenderType(): ContentRenderType { get archiveContentRenderType(): ContentRenderType {
return this.document?.archived_file_name const hasArchiveVersion =
this.metadata?.has_archive_version ?? !!this.document?.archived_file_name
return hasArchiveVersion
? this.getRenderType('application/pdf') ? this.getRenderType('application/pdf')
: this.getRenderType(this.document?.mime_type) : this.getRenderType(
this.metadata?.original_mime_type || this.document?.mime_type
)
} }
get originalContentRenderType(): ContentRenderType { get originalContentRenderType(): ContentRenderType {
return this.getRenderType(this.document?.mime_type) return this.getRenderType(
this.metadata?.original_mime_type || this.document?.mime_type
)
} }
get showThumbnailOverlay(): boolean { get showThumbnailOverlay(): boolean {
@@ -372,6 +378,39 @@ export class DocumentDetailComponent
} }
} }
private loadMetadataForSelectedVersion() {
this.documentsService
.getMetadata(this.documentId, this.selectedVersionId)
.pipe(
first(),
takeUntil(this.unsubscribeNotifier),
takeUntil(this.docChangeNotifier)
)
.subscribe({
next: (result) => {
this.metadata = result
this.tiffURL = null
this.tiffError = null
if (this.archiveContentRenderType === ContentRenderType.TIFF) {
this.tryRenderTiff()
}
if (
this.archiveContentRenderType !== ContentRenderType.PDF ||
this.useNativePdfViewer
) {
this.previewLoaded = true
}
},
error: (error) => {
this.metadata = {} // allow display to fallback to <object> tag
this.toastService.showError(
$localize`Error retrieving metadata`,
error
)
},
})
}
get isRTL() { get isRTL() {
if (!this.metadata || !this.metadata.lang) return false if (!this.metadata || !this.metadata.lang) return false
else { else {
@@ -724,36 +763,10 @@ export class DocumentDetailComponent
this.selectedVersionId = versions.length this.selectedVersionId = versions.length
? Math.max(...versions.map((version) => version.id)) ? Math.max(...versions.map((version) => version.id))
: doc.id : doc.id
this.previewLoaded = false
this.requiresPassword = false this.requiresPassword = false
this.updateFormForCustomFields() this.updateFormForCustomFields()
if (this.archiveContentRenderType === ContentRenderType.TIFF) { this.loadMetadataForSelectedVersion()
this.tryRenderTiff()
}
this.documentsService
.getMetadata(doc.id)
.pipe(
first(),
takeUntil(this.unsubscribeNotifier),
takeUntil(this.docChangeNotifier)
)
.subscribe({
next: (result) => {
this.metadata = result
if (
this.archiveContentRenderType !== ContentRenderType.PDF ||
this.useNativePdfViewer
) {
this.previewLoaded = true
}
},
error: (error) => {
this.metadata = {} // allow display to fallback to <object> tag
this.toastService.showError(
$localize`Error retrieving metadata`,
error
)
},
})
if ( if (
this.permissionsService.currentUserHasObjectPermissions( this.permissionsService.currentUserHasObjectPermissions(
PermissionAction.Change, PermissionAction.Change,
@@ -785,6 +798,7 @@ export class DocumentDetailComponent
// Update file preview and download target to a specific version (by document id) // Update file preview and download target to a specific version (by document id)
selectVersion(versionId: number) { selectVersion(versionId: number) {
this.selectedVersionId = versionId this.selectedVersionId = versionId
this.previewLoaded = false
this.previewUrl = this.documentsService.getPreviewUrl( this.previewUrl = this.documentsService.getPreviewUrl(
this.documentId, this.documentId,
false, false,
@@ -792,6 +806,7 @@ export class DocumentDetailComponent
) )
this.updatePdfSource() this.updatePdfSource()
this.thumbUrl = this.documentsService.getThumbUrl(this.selectedVersionId) this.thumbUrl = this.documentsService.getThumbUrl(this.selectedVersionId)
this.loadMetadataForSelectedVersion()
// For text previews, refresh content // For text previews, refresh content
this.http this.http
.get(this.previewUrl, { responseType: 'text' }) .get(this.previewUrl, { responseType: 'text' })
@@ -1840,7 +1855,7 @@ export class DocumentDetailComponent
const modal = this.modalService.open(ShareLinksDialogComponent) const modal = this.modalService.open(ShareLinksDialogComponent)
modal.componentInstance.documentId = this.document.id modal.componentInstance.documentId = this.document.id
modal.componentInstance.hasArchiveVersion = modal.componentInstance.hasArchiveVersion =
!!this.document?.archived_file_name this.metadata?.has_archive_version ?? !!this.document?.archived_file_name
} }
get emailEnabled(): boolean { get emailEnabled(): boolean {
@@ -1853,7 +1868,7 @@ export class DocumentDetailComponent
}) })
modal.componentInstance.documentIds = [this.document.id] modal.componentInstance.documentIds = [this.document.id]
modal.componentInstance.hasArchiveVersion = modal.componentInstance.hasArchiveVersion =
!!this.document?.archived_file_name this.metadata?.has_archive_version ?? !!this.document?.archived_file_name
} }
private tryRenderTiff() { private tryRenderTiff() {

View File

@@ -165,6 +165,14 @@ describe(`DocumentService`, () => {
expect(req.request.method).toEqual('GET') expect(req.request.method).toEqual('GET')
}) })
it('should call appropriate api endpoint for versioned metadata', () => {
subscription = service.getMetadata(documents[0].id, 123).subscribe()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}${endpoint}/${documents[0].id}/metadata/?version=123`
)
expect(req.request.method).toEqual('GET')
})
it('should call appropriate api endpoint for getting selection data', () => { it('should call appropriate api endpoint for getting selection data', () => {
const ids = [documents[0].id] const ids = [documents[0].id]
subscription = service.getSelectionData(ids).subscribe() subscription = service.getSelectionData(ids).subscribe()

View File

@@ -242,8 +242,15 @@ export class DocumentService extends AbstractPaperlessService<Document> {
) )
} }
getMetadata(id: number): Observable<DocumentMetadata> { getMetadata(
return this.http.get<DocumentMetadata>(this.getResourceUrl(id, 'metadata')) id: number,
versionID: number = null
): Observable<DocumentMetadata> {
let url = new URL(this.getResourceUrl(id, 'metadata'))
if (versionID) {
url.searchParams.append('version', versionID.toString())
}
return this.http.get<DocumentMetadata>(url.toString())
} }
bulkEdit(ids: number[], method: string, args: any) { bulkEdit(ids: number[], method: string, args: any) {

View File

@@ -520,7 +520,7 @@ class ConsumerPlugin(
original_document.checksum = hashlib.md5( original_document.checksum = hashlib.md5(
file_for_checksum.read_bytes(), file_for_checksum.read_bytes(),
).hexdigest() ).hexdigest()
original_document.content = "" original_document.content = text
original_document.page_count = page_count original_document.page_count = page_count
original_document.mime_type = mime_type original_document.mime_type = mime_type
original_document.original_filename = self.filename original_document.original_filename = self.filename

View File

@@ -8,6 +8,7 @@ from contextlib import contextmanager
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes.models import ContentType
from django.core.exceptions import FieldError
from django.db.models import Case from django.db.models import Case
from django.db.models import CharField from django.db.models import CharField
from django.db.models import Count from django.db.models import Count
@@ -163,11 +164,34 @@ class TitleContentFilter(Filter):
def filter(self, qs, value): def filter(self, qs, value):
value = value.strip() if isinstance(value, str) else value value = value.strip() if isinstance(value, str) else value
if value: if value:
return qs.filter(Q(title__icontains=value) | Q(content__icontains=value)) try:
return qs.filter(
Q(title__icontains=value) | Q(effective_content__icontains=value),
)
except FieldError:
return qs.filter(
Q(title__icontains=value) | Q(content__icontains=value),
)
else: else:
return qs return qs
@extend_schema_field(serializers.CharField)
class EffectiveContentFilter(Filter):
def filter(self, qs, value):
value = value.strip() if isinstance(value, str) else value
if not value:
return qs
try:
return qs.filter(
**{f"effective_content__{self.lookup_expr}": value},
)
except FieldError:
return qs.filter(
**{f"content__{self.lookup_expr}": value},
)
@extend_schema_field(serializers.BooleanField) @extend_schema_field(serializers.BooleanField)
class SharedByUser(Filter): class SharedByUser(Filter):
def filter(self, qs, value): def filter(self, qs, value):
@@ -724,6 +748,11 @@ class DocumentFilterSet(FilterSet):
title_content = TitleContentFilter() title_content = TitleContentFilter()
content__istartswith = EffectiveContentFilter(lookup_expr="istartswith")
content__iendswith = EffectiveContentFilter(lookup_expr="iendswith")
content__icontains = EffectiveContentFilter(lookup_expr="icontains")
content__iexact = EffectiveContentFilter(lookup_expr="iexact")
owner__id__none = ObjectFilter(field_name="owner", exclude=True) owner__id__none = ObjectFilter(field_name="owner", exclude=True)
custom_fields__icontains = CustomFieldsFilter() custom_fields__icontains = CustomFieldsFilter()
@@ -764,7 +793,6 @@ class DocumentFilterSet(FilterSet):
fields = { fields = {
"id": ID_KWARGS, "id": ID_KWARGS,
"title": CHAR_KWARGS, "title": CHAR_KWARGS,
"content": CHAR_KWARGS,
"archive_serial_number": INT_KWARGS, "archive_serial_number": INT_KWARGS,
"created": DATE_KWARGS, "created": DATE_KWARGS,
"added": DATETIME_KWARGS, "added": DATETIME_KWARGS,

View File

@@ -185,10 +185,20 @@ def update_document(writer: AsyncWriter, doc: Document) -> None:
only_with_perms_in=["view_document"], only_with_perms_in=["view_document"],
) )
viewer_ids: str = ",".join([str(u.id) for u in users_with_perms]) viewer_ids: str = ",".join([str(u.id) for u in users_with_perms])
effective_content = doc.content
if doc.root_document_id is None:
latest_version = (
Document.objects.filter(root_document=doc)
.only("content")
.order_by("-id")
.first()
)
if latest_version is not None:
effective_content = latest_version.content
writer.update_document( writer.update_document(
id=doc.pk, id=doc.pk,
title=doc.title, title=doc.title,
content=doc.content, content=effective_content,
correspondent=doc.correspondent.name if doc.correspondent else None, correspondent=doc.correspondent.name if doc.correspondent else None,
correspondent_id=doc.correspondent.id if doc.correspondent else None, correspondent_id=doc.correspondent.id if doc.correspondent else None,
has_correspondent=doc.correspondent is not None, has_correspondent=doc.correspondent is not None,

View File

@@ -1177,6 +1177,8 @@ class DocumentSerializer(
def to_representation(self, instance): def to_representation(self, instance):
doc = super().to_representation(instance) doc = super().to_representation(instance)
if "content" in self.fields and hasattr(instance, "effective_content"):
doc["content"] = getattr(instance, "effective_content") or ""
if self.truncate_content and "content" in self.fields: if self.truncate_content and "content" in self.fields:
doc["content"] = doc.get("content")[0:550] doc["content"] = doc.get("content")[0:550]

View File

@@ -116,18 +116,21 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
title="root", title="root",
checksum="root", checksum="root",
mime_type="application/pdf", mime_type="application/pdf",
content="root-content",
) )
v1 = Document.objects.create( v1 = Document.objects.create(
title="v1", title="v1",
checksum="v1", checksum="v1",
mime_type="application/pdf", mime_type="application/pdf",
root_document=root, root_document=root,
content="v1-content",
) )
v2 = Document.objects.create( v2 = Document.objects.create(
title="v2", title="v2",
checksum="v2", checksum="v2",
mime_type="application/pdf", mime_type="application/pdf",
root_document=root, root_document=root,
content="v2-content",
) )
with mock.patch("documents.index.remove_document_from_index"): with mock.patch("documents.index.remove_document_from_index"):
@@ -136,6 +139,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
self.assertEqual(resp.status_code, status.HTTP_200_OK) self.assertEqual(resp.status_code, status.HTTP_200_OK)
self.assertFalse(Document.objects.filter(id=v2.id).exists()) self.assertFalse(Document.objects.filter(id=v2.id).exists())
self.assertEqual(resp.data["current_version_id"], v1.id) self.assertEqual(resp.data["current_version_id"], v1.id)
root.refresh_from_db()
self.assertEqual(root.content, "root-content")
with mock.patch("documents.index.remove_document_from_index"): with mock.patch("documents.index.remove_document_from_index"):
resp = self.client.delete(f"/api/documents/{root.id}/versions/{v1.id}/") resp = self.client.delete(f"/api/documents/{root.id}/versions/{v1.id}/")
@@ -143,6 +148,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
self.assertEqual(resp.status_code, status.HTTP_200_OK) self.assertEqual(resp.status_code, status.HTTP_200_OK)
self.assertFalse(Document.objects.filter(id=v1.id).exists()) self.assertFalse(Document.objects.filter(id=v1.id).exists())
self.assertEqual(resp.data["current_version_id"], root.id) self.assertEqual(resp.data["current_version_id"], root.id)
root.refresh_from_db()
self.assertEqual(root.content, "root-content")
def test_delete_version_writes_audit_log_entry(self) -> None: def test_delete_version_writes_audit_log_entry(self) -> None:
root = Document.objects.create( root = Document.objects.create(
@@ -454,3 +461,60 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
) )
self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST) self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
def test_patch_content_updates_latest_version_content(self) -> None:
root = Document.objects.create(
title="root",
checksum="root",
mime_type="application/pdf",
content="root-content",
)
v1 = Document.objects.create(
title="v1",
checksum="v1",
mime_type="application/pdf",
root_document=root,
content="v1-content",
)
v2 = Document.objects.create(
title="v2",
checksum="v2",
mime_type="application/pdf",
root_document=root,
content="v2-content",
)
resp = self.client.patch(
f"/api/documents/{root.id}/",
{"content": "edited-content"},
format="json",
)
self.assertEqual(resp.status_code, status.HTTP_200_OK)
self.assertEqual(resp.data["content"], "edited-content")
root.refresh_from_db()
v1.refresh_from_db()
v2.refresh_from_db()
self.assertEqual(v2.content, "edited-content")
self.assertEqual(root.content, "root-content")
self.assertEqual(v1.content, "v1-content")
def test_retrieve_returns_latest_version_content(self) -> None:
root = Document.objects.create(
title="root",
checksum="root",
mime_type="application/pdf",
content="root-content",
)
Document.objects.create(
title="v1",
checksum="v1",
mime_type="application/pdf",
root_document=root,
content="v1-content",
)
resp = self.client.get(f"/api/documents/{root.id}/")
self.assertEqual(resp.status_code, status.HTTP_200_OK)
self.assertEqual(resp.data["content"], "v1-content")

View File

@@ -745,6 +745,7 @@ class TestConsumer(
assert version.original_filename is not None assert version.original_filename is not None
self.assertEqual(version.version_label, "v2") self.assertEqual(version.version_label, "v2")
self.assertTrue(version.original_filename.endswith("_v0.pdf")) self.assertTrue(version.original_filename.endswith("_v0.pdf"))
self.assertTrue(bool(version.content))
@mock.patch("documents.consumer.load_classifier") @mock.patch("documents.consumer.load_classifier")
def testClassifyDocument(self, m) -> None: def testClassifyDocument(self, m) -> None:

View File

@@ -30,12 +30,16 @@ from django.db.migrations.loader import MigrationLoader
from django.db.migrations.recorder import MigrationRecorder from django.db.migrations.recorder import MigrationRecorder
from django.db.models import Case from django.db.models import Case
from django.db.models import Count from django.db.models import Count
from django.db.models import F
from django.db.models import IntegerField from django.db.models import IntegerField
from django.db.models import Max from django.db.models import Max
from django.db.models import Model from django.db.models import Model
from django.db.models import OuterRef
from django.db.models import Q from django.db.models import Q
from django.db.models import Subquery
from django.db.models import Sum from django.db.models import Sum
from django.db.models import When from django.db.models import When
from django.db.models.functions import Coalesce
from django.db.models.functions import Lower from django.db.models.functions import Lower
from django.db.models.manager import Manager from django.db.models.manager import Manager
from django.db.models.query import QuerySet from django.db.models.query import QuerySet
@@ -763,7 +767,7 @@ class DocumentViewSet(
ObjectOwnedOrGrantedPermissionsFilter, ObjectOwnedOrGrantedPermissionsFilter,
) )
filterset_class = DocumentFilterSet filterset_class = DocumentFilterSet
search_fields = ("title", "correspondent__name", "content") search_fields = ("title", "correspondent__name", "effective_content")
ordering_fields = ( ordering_fields = (
"id", "id",
"title", "title",
@@ -781,10 +785,16 @@ class DocumentViewSet(
) )
def get_queryset(self): def get_queryset(self):
latest_version_content = Subquery(
Document.objects.filter(root_document=OuterRef("pk"))
.order_by("-id")
.values("content")[:1],
)
return ( return (
Document.objects.filter(root_document__isnull=True) Document.objects.filter(root_document__isnull=True)
.distinct() .distinct()
.order_by("-created") .order_by("-created")
.annotate(effective_content=Coalesce(latest_version_content, F("content")))
.annotate(num_notes=Count("notes")) .annotate(num_notes=Count("notes"))
.select_related("correspondent", "storage_path", "document_type", "owner") .select_related("correspondent", "storage_path", "document_type", "owner")
.prefetch_related("tags", "custom_fields", "notes") .prefetch_related("tags", "custom_fields", "notes")
@@ -847,14 +857,45 @@ class DocumentViewSet(
return Response({"root_id": root_doc.id}) return Response({"root_id": root_doc.id})
def update(self, request, *args, **kwargs): def update(self, request, *args, **kwargs):
response = super().update(request, *args, **kwargs) partial = kwargs.pop("partial", False)
root_doc = self.get_object()
content_updated = "content" in request.data
updated_content = request.data.get("content") if content_updated else None
latest_doc = self._get_latest_doc_for_root(root_doc)
data = request.data.copy()
serializer_partial = partial
if content_updated and latest_doc.id != root_doc.id:
if updated_content is None:
raise ValidationError({"content": ["This field may not be null."]})
data.pop("content", None)
serializer_partial = True
serializer = self.get_serializer(
root_doc,
data=data,
partial=serializer_partial,
)
serializer.is_valid(raise_exception=True)
self.perform_update(serializer)
if content_updated and latest_doc.id != root_doc.id:
latest_doc.content = updated_content
latest_doc.save(update_fields=["content", "modified"])
if getattr(root_doc, "_prefetched_objects_cache", None):
root_doc._prefetched_objects_cache = {}
refreshed_doc = self.get_queryset().get(pk=root_doc.pk)
response = Response(self.get_serializer(refreshed_doc).data)
from documents import index from documents import index
index.add_or_update_document(self.get_object()) index.add_or_update_document(refreshed_doc)
document_updated.send( document_updated.send(
sender=self.__class__, sender=self.__class__,
document=self.get_object(), document=refreshed_doc,
) )
return response return response
@@ -904,6 +945,11 @@ class DocumentViewSet(
latest = Document.objects.filter(root_document=root_doc).order_by("id").last() latest = Document.objects.filter(root_document=root_doc).order_by("id").last()
return latest or root_doc return latest or root_doc
@staticmethod
def _get_latest_doc_for_root(root_doc: Document) -> Document:
latest = Document.objects.filter(root_document=root_doc).order_by("-id").first()
return latest or root_doc
def file_response(self, pk, request, disposition): def file_response(self, pk, request, disposition):
request_doc = Document.global_objects.select_related( request_doc = Document.global_objects.select_related(
"owner", "owner",