mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-02-14 00:09:35 -06:00
Make content follow the version
- store content per version - root doc retrieval returns latest content - updating content affects the latest version - load metadata per version
This commit is contained in:
@@ -520,7 +520,7 @@ class ConsumerPlugin(
|
||||
original_document.checksum = hashlib.md5(
|
||||
file_for_checksum.read_bytes(),
|
||||
).hexdigest()
|
||||
original_document.content = ""
|
||||
original_document.content = text
|
||||
original_document.page_count = page_count
|
||||
original_document.mime_type = mime_type
|
||||
original_document.original_filename = self.filename
|
||||
|
||||
@@ -8,6 +8,7 @@ from contextlib import contextmanager
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.core.exceptions import FieldError
|
||||
from django.db.models import Case
|
||||
from django.db.models import CharField
|
||||
from django.db.models import Count
|
||||
@@ -163,11 +164,34 @@ class TitleContentFilter(Filter):
|
||||
def filter(self, qs, value):
|
||||
value = value.strip() if isinstance(value, str) else value
|
||||
if value:
|
||||
return qs.filter(Q(title__icontains=value) | Q(content__icontains=value))
|
||||
try:
|
||||
return qs.filter(
|
||||
Q(title__icontains=value) | Q(effective_content__icontains=value),
|
||||
)
|
||||
except FieldError:
|
||||
return qs.filter(
|
||||
Q(title__icontains=value) | Q(content__icontains=value),
|
||||
)
|
||||
else:
|
||||
return qs
|
||||
|
||||
|
||||
@extend_schema_field(serializers.CharField)
|
||||
class EffectiveContentFilter(Filter):
|
||||
def filter(self, qs, value):
|
||||
value = value.strip() if isinstance(value, str) else value
|
||||
if not value:
|
||||
return qs
|
||||
try:
|
||||
return qs.filter(
|
||||
**{f"effective_content__{self.lookup_expr}": value},
|
||||
)
|
||||
except FieldError:
|
||||
return qs.filter(
|
||||
**{f"content__{self.lookup_expr}": value},
|
||||
)
|
||||
|
||||
|
||||
@extend_schema_field(serializers.BooleanField)
|
||||
class SharedByUser(Filter):
|
||||
def filter(self, qs, value):
|
||||
@@ -724,6 +748,11 @@ class DocumentFilterSet(FilterSet):
|
||||
|
||||
title_content = TitleContentFilter()
|
||||
|
||||
content__istartswith = EffectiveContentFilter(lookup_expr="istartswith")
|
||||
content__iendswith = EffectiveContentFilter(lookup_expr="iendswith")
|
||||
content__icontains = EffectiveContentFilter(lookup_expr="icontains")
|
||||
content__iexact = EffectiveContentFilter(lookup_expr="iexact")
|
||||
|
||||
owner__id__none = ObjectFilter(field_name="owner", exclude=True)
|
||||
|
||||
custom_fields__icontains = CustomFieldsFilter()
|
||||
@@ -764,7 +793,6 @@ class DocumentFilterSet(FilterSet):
|
||||
fields = {
|
||||
"id": ID_KWARGS,
|
||||
"title": CHAR_KWARGS,
|
||||
"content": CHAR_KWARGS,
|
||||
"archive_serial_number": INT_KWARGS,
|
||||
"created": DATE_KWARGS,
|
||||
"added": DATETIME_KWARGS,
|
||||
|
||||
@@ -185,10 +185,20 @@ def update_document(writer: AsyncWriter, doc: Document) -> None:
|
||||
only_with_perms_in=["view_document"],
|
||||
)
|
||||
viewer_ids: str = ",".join([str(u.id) for u in users_with_perms])
|
||||
effective_content = doc.content
|
||||
if doc.root_document_id is None:
|
||||
latest_version = (
|
||||
Document.objects.filter(root_document=doc)
|
||||
.only("content")
|
||||
.order_by("-id")
|
||||
.first()
|
||||
)
|
||||
if latest_version is not None:
|
||||
effective_content = latest_version.content
|
||||
writer.update_document(
|
||||
id=doc.pk,
|
||||
title=doc.title,
|
||||
content=doc.content,
|
||||
content=effective_content,
|
||||
correspondent=doc.correspondent.name if doc.correspondent else None,
|
||||
correspondent_id=doc.correspondent.id if doc.correspondent else None,
|
||||
has_correspondent=doc.correspondent is not None,
|
||||
|
||||
@@ -1177,6 +1177,8 @@ class DocumentSerializer(
|
||||
|
||||
def to_representation(self, instance):
|
||||
doc = super().to_representation(instance)
|
||||
if "content" in self.fields and hasattr(instance, "effective_content"):
|
||||
doc["content"] = getattr(instance, "effective_content") or ""
|
||||
if self.truncate_content and "content" in self.fields:
|
||||
doc["content"] = doc.get("content")[0:550]
|
||||
|
||||
|
||||
@@ -116,18 +116,21 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
|
||||
title="root",
|
||||
checksum="root",
|
||||
mime_type="application/pdf",
|
||||
content="root-content",
|
||||
)
|
||||
v1 = Document.objects.create(
|
||||
title="v1",
|
||||
checksum="v1",
|
||||
mime_type="application/pdf",
|
||||
root_document=root,
|
||||
content="v1-content",
|
||||
)
|
||||
v2 = Document.objects.create(
|
||||
title="v2",
|
||||
checksum="v2",
|
||||
mime_type="application/pdf",
|
||||
root_document=root,
|
||||
content="v2-content",
|
||||
)
|
||||
|
||||
with mock.patch("documents.index.remove_document_from_index"):
|
||||
@@ -136,6 +139,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(resp.status_code, status.HTTP_200_OK)
|
||||
self.assertFalse(Document.objects.filter(id=v2.id).exists())
|
||||
self.assertEqual(resp.data["current_version_id"], v1.id)
|
||||
root.refresh_from_db()
|
||||
self.assertEqual(root.content, "root-content")
|
||||
|
||||
with mock.patch("documents.index.remove_document_from_index"):
|
||||
resp = self.client.delete(f"/api/documents/{root.id}/versions/{v1.id}/")
|
||||
@@ -143,6 +148,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(resp.status_code, status.HTTP_200_OK)
|
||||
self.assertFalse(Document.objects.filter(id=v1.id).exists())
|
||||
self.assertEqual(resp.data["current_version_id"], root.id)
|
||||
root.refresh_from_db()
|
||||
self.assertEqual(root.content, "root-content")
|
||||
|
||||
def test_delete_version_writes_audit_log_entry(self) -> None:
|
||||
root = Document.objects.create(
|
||||
@@ -454,3 +461,60 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
def test_patch_content_updates_latest_version_content(self) -> None:
|
||||
root = Document.objects.create(
|
||||
title="root",
|
||||
checksum="root",
|
||||
mime_type="application/pdf",
|
||||
content="root-content",
|
||||
)
|
||||
v1 = Document.objects.create(
|
||||
title="v1",
|
||||
checksum="v1",
|
||||
mime_type="application/pdf",
|
||||
root_document=root,
|
||||
content="v1-content",
|
||||
)
|
||||
v2 = Document.objects.create(
|
||||
title="v2",
|
||||
checksum="v2",
|
||||
mime_type="application/pdf",
|
||||
root_document=root,
|
||||
content="v2-content",
|
||||
)
|
||||
|
||||
resp = self.client.patch(
|
||||
f"/api/documents/{root.id}/",
|
||||
{"content": "edited-content"},
|
||||
format="json",
|
||||
)
|
||||
|
||||
self.assertEqual(resp.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(resp.data["content"], "edited-content")
|
||||
root.refresh_from_db()
|
||||
v1.refresh_from_db()
|
||||
v2.refresh_from_db()
|
||||
self.assertEqual(v2.content, "edited-content")
|
||||
self.assertEqual(root.content, "root-content")
|
||||
self.assertEqual(v1.content, "v1-content")
|
||||
|
||||
def test_retrieve_returns_latest_version_content(self) -> None:
|
||||
root = Document.objects.create(
|
||||
title="root",
|
||||
checksum="root",
|
||||
mime_type="application/pdf",
|
||||
content="root-content",
|
||||
)
|
||||
Document.objects.create(
|
||||
title="v1",
|
||||
checksum="v1",
|
||||
mime_type="application/pdf",
|
||||
root_document=root,
|
||||
content="v1-content",
|
||||
)
|
||||
|
||||
resp = self.client.get(f"/api/documents/{root.id}/")
|
||||
|
||||
self.assertEqual(resp.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(resp.data["content"], "v1-content")
|
||||
|
||||
@@ -745,6 +745,7 @@ class TestConsumer(
|
||||
assert version.original_filename is not None
|
||||
self.assertEqual(version.version_label, "v2")
|
||||
self.assertTrue(version.original_filename.endswith("_v0.pdf"))
|
||||
self.assertTrue(bool(version.content))
|
||||
|
||||
@mock.patch("documents.consumer.load_classifier")
|
||||
def testClassifyDocument(self, m) -> None:
|
||||
|
||||
@@ -30,12 +30,16 @@ from django.db.migrations.loader import MigrationLoader
|
||||
from django.db.migrations.recorder import MigrationRecorder
|
||||
from django.db.models import Case
|
||||
from django.db.models import Count
|
||||
from django.db.models import F
|
||||
from django.db.models import IntegerField
|
||||
from django.db.models import Max
|
||||
from django.db.models import Model
|
||||
from django.db.models import OuterRef
|
||||
from django.db.models import Q
|
||||
from django.db.models import Subquery
|
||||
from django.db.models import Sum
|
||||
from django.db.models import When
|
||||
from django.db.models.functions import Coalesce
|
||||
from django.db.models.functions import Lower
|
||||
from django.db.models.manager import Manager
|
||||
from django.db.models.query import QuerySet
|
||||
@@ -763,7 +767,7 @@ class DocumentViewSet(
|
||||
ObjectOwnedOrGrantedPermissionsFilter,
|
||||
)
|
||||
filterset_class = DocumentFilterSet
|
||||
search_fields = ("title", "correspondent__name", "content")
|
||||
search_fields = ("title", "correspondent__name", "effective_content")
|
||||
ordering_fields = (
|
||||
"id",
|
||||
"title",
|
||||
@@ -781,10 +785,16 @@ class DocumentViewSet(
|
||||
)
|
||||
|
||||
def get_queryset(self):
|
||||
latest_version_content = Subquery(
|
||||
Document.objects.filter(root_document=OuterRef("pk"))
|
||||
.order_by("-id")
|
||||
.values("content")[:1],
|
||||
)
|
||||
return (
|
||||
Document.objects.filter(root_document__isnull=True)
|
||||
.distinct()
|
||||
.order_by("-created")
|
||||
.annotate(effective_content=Coalesce(latest_version_content, F("content")))
|
||||
.annotate(num_notes=Count("notes"))
|
||||
.select_related("correspondent", "storage_path", "document_type", "owner")
|
||||
.prefetch_related("tags", "custom_fields", "notes")
|
||||
@@ -847,14 +857,45 @@ class DocumentViewSet(
|
||||
return Response({"root_id": root_doc.id})
|
||||
|
||||
def update(self, request, *args, **kwargs):
|
||||
response = super().update(request, *args, **kwargs)
|
||||
partial = kwargs.pop("partial", False)
|
||||
root_doc = self.get_object()
|
||||
content_updated = "content" in request.data
|
||||
updated_content = request.data.get("content") if content_updated else None
|
||||
latest_doc = self._get_latest_doc_for_root(root_doc)
|
||||
|
||||
data = request.data.copy()
|
||||
serializer_partial = partial
|
||||
if content_updated and latest_doc.id != root_doc.id:
|
||||
if updated_content is None:
|
||||
raise ValidationError({"content": ["This field may not be null."]})
|
||||
data.pop("content", None)
|
||||
serializer_partial = True
|
||||
|
||||
serializer = self.get_serializer(
|
||||
root_doc,
|
||||
data=data,
|
||||
partial=serializer_partial,
|
||||
)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
self.perform_update(serializer)
|
||||
|
||||
if content_updated and latest_doc.id != root_doc.id:
|
||||
latest_doc.content = updated_content
|
||||
latest_doc.save(update_fields=["content", "modified"])
|
||||
|
||||
if getattr(root_doc, "_prefetched_objects_cache", None):
|
||||
root_doc._prefetched_objects_cache = {}
|
||||
|
||||
refreshed_doc = self.get_queryset().get(pk=root_doc.pk)
|
||||
response = Response(self.get_serializer(refreshed_doc).data)
|
||||
|
||||
from documents import index
|
||||
|
||||
index.add_or_update_document(self.get_object())
|
||||
index.add_or_update_document(refreshed_doc)
|
||||
|
||||
document_updated.send(
|
||||
sender=self.__class__,
|
||||
document=self.get_object(),
|
||||
document=refreshed_doc,
|
||||
)
|
||||
|
||||
return response
|
||||
@@ -904,6 +945,11 @@ class DocumentViewSet(
|
||||
latest = Document.objects.filter(root_document=root_doc).order_by("id").last()
|
||||
return latest or root_doc
|
||||
|
||||
@staticmethod
|
||||
def _get_latest_doc_for_root(root_doc: Document) -> Document:
|
||||
latest = Document.objects.filter(root_document=root_doc).order_by("-id").first()
|
||||
return latest or root_doc
|
||||
|
||||
def file_response(self, pk, request, disposition):
|
||||
request_doc = Document.global_objects.select_related(
|
||||
"owner",
|
||||
|
||||
Reference in New Issue
Block a user