Make content follow the version

- store content per version
- root doc retrieval returns latest content
- updating content affects the latest version
- load metadata per version
This commit is contained in:
shamoon
2026-02-12 10:20:47 -08:00
parent 60e400fb68
commit 6a0fae67e9
10 changed files with 224 additions and 43 deletions

View File

@@ -330,13 +330,19 @@ export class DocumentDetailComponent
}
get archiveContentRenderType(): ContentRenderType {
return this.document?.archived_file_name
const hasArchiveVersion =
this.metadata?.has_archive_version ?? !!this.document?.archived_file_name
return hasArchiveVersion
? this.getRenderType('application/pdf')
: this.getRenderType(this.document?.mime_type)
: this.getRenderType(
this.metadata?.original_mime_type || this.document?.mime_type
)
}
get originalContentRenderType(): ContentRenderType {
return this.getRenderType(this.document?.mime_type)
return this.getRenderType(
this.metadata?.original_mime_type || this.document?.mime_type
)
}
get showThumbnailOverlay(): boolean {
@@ -372,6 +378,39 @@ export class DocumentDetailComponent
}
}
private loadMetadataForSelectedVersion() {
this.documentsService
.getMetadata(this.documentId, this.selectedVersionId)
.pipe(
first(),
takeUntil(this.unsubscribeNotifier),
takeUntil(this.docChangeNotifier)
)
.subscribe({
next: (result) => {
this.metadata = result
this.tiffURL = null
this.tiffError = null
if (this.archiveContentRenderType === ContentRenderType.TIFF) {
this.tryRenderTiff()
}
if (
this.archiveContentRenderType !== ContentRenderType.PDF ||
this.useNativePdfViewer
) {
this.previewLoaded = true
}
},
error: (error) => {
this.metadata = {} // allow display to fallback to <object> tag
this.toastService.showError(
$localize`Error retrieving metadata`,
error
)
},
})
}
get isRTL() {
if (!this.metadata || !this.metadata.lang) return false
else {
@@ -724,36 +763,10 @@ export class DocumentDetailComponent
this.selectedVersionId = versions.length
? Math.max(...versions.map((version) => version.id))
: doc.id
this.previewLoaded = false
this.requiresPassword = false
this.updateFormForCustomFields()
if (this.archiveContentRenderType === ContentRenderType.TIFF) {
this.tryRenderTiff()
}
this.documentsService
.getMetadata(doc.id)
.pipe(
first(),
takeUntil(this.unsubscribeNotifier),
takeUntil(this.docChangeNotifier)
)
.subscribe({
next: (result) => {
this.metadata = result
if (
this.archiveContentRenderType !== ContentRenderType.PDF ||
this.useNativePdfViewer
) {
this.previewLoaded = true
}
},
error: (error) => {
this.metadata = {} // allow display to fallback to <object> tag
this.toastService.showError(
$localize`Error retrieving metadata`,
error
)
},
})
this.loadMetadataForSelectedVersion()
if (
this.permissionsService.currentUserHasObjectPermissions(
PermissionAction.Change,
@@ -785,6 +798,7 @@ export class DocumentDetailComponent
// Update file preview and download target to a specific version (by document id)
selectVersion(versionId: number) {
this.selectedVersionId = versionId
this.previewLoaded = false
this.previewUrl = this.documentsService.getPreviewUrl(
this.documentId,
false,
@@ -792,6 +806,7 @@ export class DocumentDetailComponent
)
this.updatePdfSource()
this.thumbUrl = this.documentsService.getThumbUrl(this.selectedVersionId)
this.loadMetadataForSelectedVersion()
// For text previews, refresh content
this.http
.get(this.previewUrl, { responseType: 'text' })
@@ -1840,7 +1855,7 @@ export class DocumentDetailComponent
const modal = this.modalService.open(ShareLinksDialogComponent)
modal.componentInstance.documentId = this.document.id
modal.componentInstance.hasArchiveVersion =
!!this.document?.archived_file_name
this.metadata?.has_archive_version ?? !!this.document?.archived_file_name
}
get emailEnabled(): boolean {
@@ -1853,7 +1868,7 @@ export class DocumentDetailComponent
})
modal.componentInstance.documentIds = [this.document.id]
modal.componentInstance.hasArchiveVersion =
!!this.document?.archived_file_name
this.metadata?.has_archive_version ?? !!this.document?.archived_file_name
}
private tryRenderTiff() {

View File

@@ -165,6 +165,14 @@ describe(`DocumentService`, () => {
expect(req.request.method).toEqual('GET')
})
it('should call appropriate api endpoint for versioned metadata', () => {
subscription = service.getMetadata(documents[0].id, 123).subscribe()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}${endpoint}/${documents[0].id}/metadata/?version=123`
)
expect(req.request.method).toEqual('GET')
})
it('should call appropriate api endpoint for getting selection data', () => {
const ids = [documents[0].id]
subscription = service.getSelectionData(ids).subscribe()

View File

@@ -242,8 +242,15 @@ export class DocumentService extends AbstractPaperlessService<Document> {
)
}
getMetadata(id: number): Observable<DocumentMetadata> {
return this.http.get<DocumentMetadata>(this.getResourceUrl(id, 'metadata'))
getMetadata(
id: number,
versionID: number = null
): Observable<DocumentMetadata> {
let url = new URL(this.getResourceUrl(id, 'metadata'))
if (versionID) {
url.searchParams.append('version', versionID.toString())
}
return this.http.get<DocumentMetadata>(url.toString())
}
bulkEdit(ids: number[], method: string, args: any) {

View File

@@ -520,7 +520,7 @@ class ConsumerPlugin(
original_document.checksum = hashlib.md5(
file_for_checksum.read_bytes(),
).hexdigest()
original_document.content = ""
original_document.content = text
original_document.page_count = page_count
original_document.mime_type = mime_type
original_document.original_filename = self.filename

View File

@@ -8,6 +8,7 @@ from contextlib import contextmanager
from typing import TYPE_CHECKING
from django.contrib.contenttypes.models import ContentType
from django.core.exceptions import FieldError
from django.db.models import Case
from django.db.models import CharField
from django.db.models import Count
@@ -163,11 +164,34 @@ class TitleContentFilter(Filter):
def filter(self, qs, value):
value = value.strip() if isinstance(value, str) else value
if value:
return qs.filter(Q(title__icontains=value) | Q(content__icontains=value))
try:
return qs.filter(
Q(title__icontains=value) | Q(effective_content__icontains=value),
)
except FieldError:
return qs.filter(
Q(title__icontains=value) | Q(content__icontains=value),
)
else:
return qs
@extend_schema_field(serializers.CharField)
class EffectiveContentFilter(Filter):
def filter(self, qs, value):
value = value.strip() if isinstance(value, str) else value
if not value:
return qs
try:
return qs.filter(
**{f"effective_content__{self.lookup_expr}": value},
)
except FieldError:
return qs.filter(
**{f"content__{self.lookup_expr}": value},
)
@extend_schema_field(serializers.BooleanField)
class SharedByUser(Filter):
def filter(self, qs, value):
@@ -724,6 +748,11 @@ class DocumentFilterSet(FilterSet):
title_content = TitleContentFilter()
content__istartswith = EffectiveContentFilter(lookup_expr="istartswith")
content__iendswith = EffectiveContentFilter(lookup_expr="iendswith")
content__icontains = EffectiveContentFilter(lookup_expr="icontains")
content__iexact = EffectiveContentFilter(lookup_expr="iexact")
owner__id__none = ObjectFilter(field_name="owner", exclude=True)
custom_fields__icontains = CustomFieldsFilter()
@@ -764,7 +793,6 @@ class DocumentFilterSet(FilterSet):
fields = {
"id": ID_KWARGS,
"title": CHAR_KWARGS,
"content": CHAR_KWARGS,
"archive_serial_number": INT_KWARGS,
"created": DATE_KWARGS,
"added": DATETIME_KWARGS,

View File

@@ -185,10 +185,20 @@ def update_document(writer: AsyncWriter, doc: Document) -> None:
only_with_perms_in=["view_document"],
)
viewer_ids: str = ",".join([str(u.id) for u in users_with_perms])
effective_content = doc.content
if doc.root_document_id is None:
latest_version = (
Document.objects.filter(root_document=doc)
.only("content")
.order_by("-id")
.first()
)
if latest_version is not None:
effective_content = latest_version.content
writer.update_document(
id=doc.pk,
title=doc.title,
content=doc.content,
content=effective_content,
correspondent=doc.correspondent.name if doc.correspondent else None,
correspondent_id=doc.correspondent.id if doc.correspondent else None,
has_correspondent=doc.correspondent is not None,

View File

@@ -1177,6 +1177,8 @@ class DocumentSerializer(
def to_representation(self, instance):
doc = super().to_representation(instance)
if "content" in self.fields and hasattr(instance, "effective_content"):
doc["content"] = getattr(instance, "effective_content") or ""
if self.truncate_content and "content" in self.fields:
doc["content"] = doc.get("content")[0:550]

View File

@@ -116,18 +116,21 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
title="root",
checksum="root",
mime_type="application/pdf",
content="root-content",
)
v1 = Document.objects.create(
title="v1",
checksum="v1",
mime_type="application/pdf",
root_document=root,
content="v1-content",
)
v2 = Document.objects.create(
title="v2",
checksum="v2",
mime_type="application/pdf",
root_document=root,
content="v2-content",
)
with mock.patch("documents.index.remove_document_from_index"):
@@ -136,6 +139,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
self.assertEqual(resp.status_code, status.HTTP_200_OK)
self.assertFalse(Document.objects.filter(id=v2.id).exists())
self.assertEqual(resp.data["current_version_id"], v1.id)
root.refresh_from_db()
self.assertEqual(root.content, "root-content")
with mock.patch("documents.index.remove_document_from_index"):
resp = self.client.delete(f"/api/documents/{root.id}/versions/{v1.id}/")
@@ -143,6 +148,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
self.assertEqual(resp.status_code, status.HTTP_200_OK)
self.assertFalse(Document.objects.filter(id=v1.id).exists())
self.assertEqual(resp.data["current_version_id"], root.id)
root.refresh_from_db()
self.assertEqual(root.content, "root-content")
def test_delete_version_writes_audit_log_entry(self) -> None:
root = Document.objects.create(
@@ -454,3 +461,60 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
)
self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
def test_patch_content_updates_latest_version_content(self) -> None:
root = Document.objects.create(
title="root",
checksum="root",
mime_type="application/pdf",
content="root-content",
)
v1 = Document.objects.create(
title="v1",
checksum="v1",
mime_type="application/pdf",
root_document=root,
content="v1-content",
)
v2 = Document.objects.create(
title="v2",
checksum="v2",
mime_type="application/pdf",
root_document=root,
content="v2-content",
)
resp = self.client.patch(
f"/api/documents/{root.id}/",
{"content": "edited-content"},
format="json",
)
self.assertEqual(resp.status_code, status.HTTP_200_OK)
self.assertEqual(resp.data["content"], "edited-content")
root.refresh_from_db()
v1.refresh_from_db()
v2.refresh_from_db()
self.assertEqual(v2.content, "edited-content")
self.assertEqual(root.content, "root-content")
self.assertEqual(v1.content, "v1-content")
def test_retrieve_returns_latest_version_content(self) -> None:
root = Document.objects.create(
title="root",
checksum="root",
mime_type="application/pdf",
content="root-content",
)
Document.objects.create(
title="v1",
checksum="v1",
mime_type="application/pdf",
root_document=root,
content="v1-content",
)
resp = self.client.get(f"/api/documents/{root.id}/")
self.assertEqual(resp.status_code, status.HTTP_200_OK)
self.assertEqual(resp.data["content"], "v1-content")

View File

@@ -745,6 +745,7 @@ class TestConsumer(
assert version.original_filename is not None
self.assertEqual(version.version_label, "v2")
self.assertTrue(version.original_filename.endswith("_v0.pdf"))
self.assertTrue(bool(version.content))
@mock.patch("documents.consumer.load_classifier")
def testClassifyDocument(self, m) -> None:

View File

@@ -30,12 +30,16 @@ from django.db.migrations.loader import MigrationLoader
from django.db.migrations.recorder import MigrationRecorder
from django.db.models import Case
from django.db.models import Count
from django.db.models import F
from django.db.models import IntegerField
from django.db.models import Max
from django.db.models import Model
from django.db.models import OuterRef
from django.db.models import Q
from django.db.models import Subquery
from django.db.models import Sum
from django.db.models import When
from django.db.models.functions import Coalesce
from django.db.models.functions import Lower
from django.db.models.manager import Manager
from django.db.models.query import QuerySet
@@ -763,7 +767,7 @@ class DocumentViewSet(
ObjectOwnedOrGrantedPermissionsFilter,
)
filterset_class = DocumentFilterSet
search_fields = ("title", "correspondent__name", "content")
search_fields = ("title", "correspondent__name", "effective_content")
ordering_fields = (
"id",
"title",
@@ -781,10 +785,16 @@ class DocumentViewSet(
)
def get_queryset(self):
latest_version_content = Subquery(
Document.objects.filter(root_document=OuterRef("pk"))
.order_by("-id")
.values("content")[:1],
)
return (
Document.objects.filter(root_document__isnull=True)
.distinct()
.order_by("-created")
.annotate(effective_content=Coalesce(latest_version_content, F("content")))
.annotate(num_notes=Count("notes"))
.select_related("correspondent", "storage_path", "document_type", "owner")
.prefetch_related("tags", "custom_fields", "notes")
@@ -847,14 +857,45 @@ class DocumentViewSet(
return Response({"root_id": root_doc.id})
def update(self, request, *args, **kwargs):
response = super().update(request, *args, **kwargs)
partial = kwargs.pop("partial", False)
root_doc = self.get_object()
content_updated = "content" in request.data
updated_content = request.data.get("content") if content_updated else None
latest_doc = self._get_latest_doc_for_root(root_doc)
data = request.data.copy()
serializer_partial = partial
if content_updated and latest_doc.id != root_doc.id:
if updated_content is None:
raise ValidationError({"content": ["This field may not be null."]})
data.pop("content", None)
serializer_partial = True
serializer = self.get_serializer(
root_doc,
data=data,
partial=serializer_partial,
)
serializer.is_valid(raise_exception=True)
self.perform_update(serializer)
if content_updated and latest_doc.id != root_doc.id:
latest_doc.content = updated_content
latest_doc.save(update_fields=["content", "modified"])
if getattr(root_doc, "_prefetched_objects_cache", None):
root_doc._prefetched_objects_cache = {}
refreshed_doc = self.get_queryset().get(pk=root_doc.pk)
response = Response(self.get_serializer(refreshed_doc).data)
from documents import index
index.add_or_update_document(self.get_object())
index.add_or_update_document(refreshed_doc)
document_updated.send(
sender=self.__class__,
document=self.get_object(),
document=refreshed_doc,
)
return response
@@ -904,6 +945,11 @@ class DocumentViewSet(
latest = Document.objects.filter(root_document=root_doc).order_by("id").last()
return latest or root_doc
@staticmethod
def _get_latest_doc_for_root(root_doc: Document) -> Document:
latest = Document.objects.filter(root_document=root_doc).order_by("-id").first()
return latest or root_doc
def file_response(self, pk, request, disposition):
request_doc = Document.global_objects.select_related(
"owner",