Make content follow the version

- store content per version
- root doc retrieval returns latest content
- updating content affects the latest version
- load metadata per version
This commit is contained in:
shamoon
2026-02-12 10:20:47 -08:00
parent 60e400fb68
commit 6a0fae67e9
10 changed files with 224 additions and 43 deletions

View File

@@ -520,7 +520,7 @@ class ConsumerPlugin(
original_document.checksum = hashlib.md5(
file_for_checksum.read_bytes(),
).hexdigest()
original_document.content = ""
original_document.content = text
original_document.page_count = page_count
original_document.mime_type = mime_type
original_document.original_filename = self.filename

View File

@@ -8,6 +8,7 @@ from contextlib import contextmanager
from typing import TYPE_CHECKING
from django.contrib.contenttypes.models import ContentType
from django.core.exceptions import FieldError
from django.db.models import Case
from django.db.models import CharField
from django.db.models import Count
@@ -163,11 +164,34 @@ class TitleContentFilter(Filter):
def filter(self, qs, value):
value = value.strip() if isinstance(value, str) else value
if value:
return qs.filter(Q(title__icontains=value) | Q(content__icontains=value))
try:
return qs.filter(
Q(title__icontains=value) | Q(effective_content__icontains=value),
)
except FieldError:
return qs.filter(
Q(title__icontains=value) | Q(content__icontains=value),
)
else:
return qs
@extend_schema_field(serializers.CharField)
class EffectiveContentFilter(Filter):
def filter(self, qs, value):
value = value.strip() if isinstance(value, str) else value
if not value:
return qs
try:
return qs.filter(
**{f"effective_content__{self.lookup_expr}": value},
)
except FieldError:
return qs.filter(
**{f"content__{self.lookup_expr}": value},
)
@extend_schema_field(serializers.BooleanField)
class SharedByUser(Filter):
def filter(self, qs, value):
@@ -724,6 +748,11 @@ class DocumentFilterSet(FilterSet):
title_content = TitleContentFilter()
content__istartswith = EffectiveContentFilter(lookup_expr="istartswith")
content__iendswith = EffectiveContentFilter(lookup_expr="iendswith")
content__icontains = EffectiveContentFilter(lookup_expr="icontains")
content__iexact = EffectiveContentFilter(lookup_expr="iexact")
owner__id__none = ObjectFilter(field_name="owner", exclude=True)
custom_fields__icontains = CustomFieldsFilter()
@@ -764,7 +793,6 @@ class DocumentFilterSet(FilterSet):
fields = {
"id": ID_KWARGS,
"title": CHAR_KWARGS,
"content": CHAR_KWARGS,
"archive_serial_number": INT_KWARGS,
"created": DATE_KWARGS,
"added": DATETIME_KWARGS,

View File

@@ -185,10 +185,20 @@ def update_document(writer: AsyncWriter, doc: Document) -> None:
only_with_perms_in=["view_document"],
)
viewer_ids: str = ",".join([str(u.id) for u in users_with_perms])
effective_content = doc.content
if doc.root_document_id is None:
latest_version = (
Document.objects.filter(root_document=doc)
.only("content")
.order_by("-id")
.first()
)
if latest_version is not None:
effective_content = latest_version.content
writer.update_document(
id=doc.pk,
title=doc.title,
content=doc.content,
content=effective_content,
correspondent=doc.correspondent.name if doc.correspondent else None,
correspondent_id=doc.correspondent.id if doc.correspondent else None,
has_correspondent=doc.correspondent is not None,

View File

@@ -1177,6 +1177,8 @@ class DocumentSerializer(
def to_representation(self, instance):
doc = super().to_representation(instance)
if "content" in self.fields and hasattr(instance, "effective_content"):
doc["content"] = getattr(instance, "effective_content") or ""
if self.truncate_content and "content" in self.fields:
doc["content"] = doc.get("content")[0:550]

View File

@@ -116,18 +116,21 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
title="root",
checksum="root",
mime_type="application/pdf",
content="root-content",
)
v1 = Document.objects.create(
title="v1",
checksum="v1",
mime_type="application/pdf",
root_document=root,
content="v1-content",
)
v2 = Document.objects.create(
title="v2",
checksum="v2",
mime_type="application/pdf",
root_document=root,
content="v2-content",
)
with mock.patch("documents.index.remove_document_from_index"):
@@ -136,6 +139,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
self.assertEqual(resp.status_code, status.HTTP_200_OK)
self.assertFalse(Document.objects.filter(id=v2.id).exists())
self.assertEqual(resp.data["current_version_id"], v1.id)
root.refresh_from_db()
self.assertEqual(root.content, "root-content")
with mock.patch("documents.index.remove_document_from_index"):
resp = self.client.delete(f"/api/documents/{root.id}/versions/{v1.id}/")
@@ -143,6 +148,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
self.assertEqual(resp.status_code, status.HTTP_200_OK)
self.assertFalse(Document.objects.filter(id=v1.id).exists())
self.assertEqual(resp.data["current_version_id"], root.id)
root.refresh_from_db()
self.assertEqual(root.content, "root-content")
def test_delete_version_writes_audit_log_entry(self) -> None:
root = Document.objects.create(
@@ -454,3 +461,60 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
)
self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
def test_patch_content_updates_latest_version_content(self) -> None:
root = Document.objects.create(
title="root",
checksum="root",
mime_type="application/pdf",
content="root-content",
)
v1 = Document.objects.create(
title="v1",
checksum="v1",
mime_type="application/pdf",
root_document=root,
content="v1-content",
)
v2 = Document.objects.create(
title="v2",
checksum="v2",
mime_type="application/pdf",
root_document=root,
content="v2-content",
)
resp = self.client.patch(
f"/api/documents/{root.id}/",
{"content": "edited-content"},
format="json",
)
self.assertEqual(resp.status_code, status.HTTP_200_OK)
self.assertEqual(resp.data["content"], "edited-content")
root.refresh_from_db()
v1.refresh_from_db()
v2.refresh_from_db()
self.assertEqual(v2.content, "edited-content")
self.assertEqual(root.content, "root-content")
self.assertEqual(v1.content, "v1-content")
def test_retrieve_returns_latest_version_content(self) -> None:
root = Document.objects.create(
title="root",
checksum="root",
mime_type="application/pdf",
content="root-content",
)
Document.objects.create(
title="v1",
checksum="v1",
mime_type="application/pdf",
root_document=root,
content="v1-content",
)
resp = self.client.get(f"/api/documents/{root.id}/")
self.assertEqual(resp.status_code, status.HTTP_200_OK)
self.assertEqual(resp.data["content"], "v1-content")

View File

@@ -745,6 +745,7 @@ class TestConsumer(
assert version.original_filename is not None
self.assertEqual(version.version_label, "v2")
self.assertTrue(version.original_filename.endswith("_v0.pdf"))
self.assertTrue(bool(version.content))
@mock.patch("documents.consumer.load_classifier")
def testClassifyDocument(self, m) -> None:

View File

@@ -30,12 +30,16 @@ from django.db.migrations.loader import MigrationLoader
from django.db.migrations.recorder import MigrationRecorder
from django.db.models import Case
from django.db.models import Count
from django.db.models import F
from django.db.models import IntegerField
from django.db.models import Max
from django.db.models import Model
from django.db.models import OuterRef
from django.db.models import Q
from django.db.models import Subquery
from django.db.models import Sum
from django.db.models import When
from django.db.models.functions import Coalesce
from django.db.models.functions import Lower
from django.db.models.manager import Manager
from django.db.models.query import QuerySet
@@ -763,7 +767,7 @@ class DocumentViewSet(
ObjectOwnedOrGrantedPermissionsFilter,
)
filterset_class = DocumentFilterSet
search_fields = ("title", "correspondent__name", "content")
search_fields = ("title", "correspondent__name", "effective_content")
ordering_fields = (
"id",
"title",
@@ -781,10 +785,16 @@ class DocumentViewSet(
)
def get_queryset(self):
latest_version_content = Subquery(
Document.objects.filter(root_document=OuterRef("pk"))
.order_by("-id")
.values("content")[:1],
)
return (
Document.objects.filter(root_document__isnull=True)
.distinct()
.order_by("-created")
.annotate(effective_content=Coalesce(latest_version_content, F("content")))
.annotate(num_notes=Count("notes"))
.select_related("correspondent", "storage_path", "document_type", "owner")
.prefetch_related("tags", "custom_fields", "notes")
@@ -847,14 +857,45 @@ class DocumentViewSet(
return Response({"root_id": root_doc.id})
def update(self, request, *args, **kwargs):
response = super().update(request, *args, **kwargs)
partial = kwargs.pop("partial", False)
root_doc = self.get_object()
content_updated = "content" in request.data
updated_content = request.data.get("content") if content_updated else None
latest_doc = self._get_latest_doc_for_root(root_doc)
data = request.data.copy()
serializer_partial = partial
if content_updated and latest_doc.id != root_doc.id:
if updated_content is None:
raise ValidationError({"content": ["This field may not be null."]})
data.pop("content", None)
serializer_partial = True
serializer = self.get_serializer(
root_doc,
data=data,
partial=serializer_partial,
)
serializer.is_valid(raise_exception=True)
self.perform_update(serializer)
if content_updated and latest_doc.id != root_doc.id:
latest_doc.content = updated_content
latest_doc.save(update_fields=["content", "modified"])
if getattr(root_doc, "_prefetched_objects_cache", None):
root_doc._prefetched_objects_cache = {}
refreshed_doc = self.get_queryset().get(pk=root_doc.pk)
response = Response(self.get_serializer(refreshed_doc).data)
from documents import index
index.add_or_update_document(self.get_object())
index.add_or_update_document(refreshed_doc)
document_updated.send(
sender=self.__class__,
document=self.get_object(),
document=refreshed_doc,
)
return response
@@ -904,6 +945,11 @@ class DocumentViewSet(
latest = Document.objects.filter(root_document=root_doc).order_by("id").last()
return latest or root_doc
@staticmethod
def _get_latest_doc_for_root(root_doc: Document) -> Document:
latest = Document.objects.filter(root_document=root_doc).order_by("-id").first()
return latest or root_doc
def file_response(self, pk, request, disposition):
request_doc = Document.global_objects.select_related(
"owner",