From 67c955936286b2e51a96af654f998e49512c44c4 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Wed, 5 Mar 2025 10:05:35 -0800 Subject: [PATCH] Feature: allow using archive version in merge --- src/documents/bulk_edit.py | 10 ++++++-- src/documents/serialisers.py | 5 ++++ src/documents/tests/test_bulk_edit.py | 37 +++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/src/documents/bulk_edit.py b/src/documents/bulk_edit.py index be4608e36..54f5c1a33 100644 --- a/src/documents/bulk_edit.py +++ b/src/documents/bulk_edit.py @@ -318,6 +318,7 @@ def merge( *, metadata_document_id: int | None = None, delete_originals: bool = False, + archive_fallback: bool = False, user: User | None = None, ) -> Literal["OK"]: logger.info( @@ -333,7 +334,12 @@ def merge( for doc_id in doc_ids: doc = qs.get(id=doc_id) try: - with pikepdf.open(str(doc.source_path)) as pdf: + doc_path = ( + doc.archive_path + if archive_fallback and doc.mime_type != "application/pdf" + else doc.source_path + ) + with pikepdf.open(str(doc_path)) as pdf: version = max(version, pdf.pdf_version) merged_pdf.pages.extend(pdf.pages) affected_docs.append(doc.id) @@ -349,7 +355,7 @@ def merge( Path( tempfile.mkdtemp(dir=settings.SCRATCH_DIR), ) - / f"{'_'.join([str(doc_id) for doc_id in doc_ids])[:100]}_merged.pdf" + / f"{'_'.join([str(doc_id) for doc_id in affected_docs])[:100]}_merged.pdf" ) merged_pdf.remove_unreferenced_resources() merged_pdf.save(filepath, min_version=version) diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py index c0487b7b8..50cd23918 100644 --- a/src/documents/serialisers.py +++ b/src/documents/serialisers.py @@ -1446,6 +1446,11 @@ class BulkEditSerializer( raise serializers.ValidationError("delete_originals must be a boolean") else: parameters["delete_originals"] = False + if "archive_fallback" in parameters: + if not isinstance(parameters["archive_fallback"], bool): + raise serializers.ValidationError("archive_fallback must be a boolean") + else: + parameters["archive_fallback"] = False def validate(self, attrs): method = attrs["method"] diff --git a/src/documents/tests/test_bulk_edit.py b/src/documents/tests/test_bulk_edit.py index 4a7145d34..dd59a6217 100644 --- a/src/documents/tests/test_bulk_edit.py +++ b/src/documents/tests/test_bulk_edit.py @@ -514,12 +514,23 @@ class TestPDFActions(DirectoriesMixin, TestCase): Path(__file__).parent / "samples" / "simple.jpg", img_doc, ) + img_doc_archive = self.dirs.archive_dir / "sample_image.pdf" + shutil.copy( + Path(__file__).parent + / "samples" + / "documents" + / "originals" + / "0000001.pdf", + img_doc_archive, + ) self.img_doc = Document.objects.create( checksum="D", title="D", filename=img_doc, mime_type="image/jpeg", ) + self.img_doc.archive_filename = img_doc_archive + self.img_doc.save() @mock.patch("documents.tasks.consume_file.s") def test_merge(self, mock_consume_file): @@ -605,6 +616,32 @@ class TestPDFActions(DirectoriesMixin, TestCase): doc_ids, ) + @mock.patch("documents.tasks.consume_file.s") + def test_merge_with_archive_fallback(self, mock_consume_file): + """ + GIVEN: + - Existing documents + WHEN: + - Merge action is called with 2 documents, one of which is an image and archive_fallback is set to True + THEN: + - Image document should be included + """ + doc_ids = [self.doc2.id, self.img_doc.id] + + result = bulk_edit.merge(doc_ids, archive_fallback=True) + self.assertEqual(result, "OK") + + expected_filename = ( + f"{'_'.join([str(doc_id) for doc_id in doc_ids])[:100]}_merged.pdf" + ) + + mock_consume_file.assert_called() + consume_file_args, _ = mock_consume_file.call_args + self.assertEqual( + Path(consume_file_args[0].original_file).name, + expected_filename, + ) + @mock.patch("documents.tasks.consume_file.delay") @mock.patch("pikepdf.open") def test_merge_with_errors(self, mock_open_pdf, mock_consume_file):