mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Feature: PDF actions - merge, split & rotate (#6094)
This commit is contained in:
		| @@ -1,15 +1,27 @@ | ||||
| import hashlib | ||||
| import itertools | ||||
| import logging | ||||
| import os | ||||
| from typing import Optional | ||||
|  | ||||
| from celery import chord | ||||
| from django.conf import settings | ||||
| from django.db.models import Q | ||||
|  | ||||
| from documents.data_models import ConsumableDocument | ||||
| from documents.data_models import DocumentMetadataOverrides | ||||
| from documents.data_models import DocumentSource | ||||
| from documents.models import Correspondent | ||||
| from documents.models import Document | ||||
| from documents.models import DocumentType | ||||
| from documents.models import StoragePath | ||||
| from documents.permissions import set_permissions_for_object | ||||
| from documents.tasks import bulk_update_documents | ||||
| from documents.tasks import consume_file | ||||
| from documents.tasks import update_document_archive_file | ||||
|  | ||||
| logger = logging.getLogger("paperless.bulk_edit") | ||||
|  | ||||
|  | ||||
| def set_correspondent(doc_ids, correspondent): | ||||
|     if correspondent: | ||||
| @@ -146,3 +158,137 @@ def set_permissions(doc_ids, set_permissions, owner=None, merge=False): | ||||
|     bulk_update_documents.delay(document_ids=affected_docs) | ||||
|  | ||||
|     return "OK" | ||||
|  | ||||
|  | ||||
| def rotate(doc_ids: list[int], degrees: int): | ||||
|     logger.info( | ||||
|         f"Attempting to rotate {len(doc_ids)} documents by {degrees} degrees.", | ||||
|     ) | ||||
|     qs = Document.objects.filter(id__in=doc_ids) | ||||
|     affected_docs = [] | ||||
|     import pikepdf | ||||
|  | ||||
|     rotate_tasks = [] | ||||
|     for doc in qs: | ||||
|         if doc.mime_type != "application/pdf": | ||||
|             logger.warning( | ||||
|                 f"Document {doc.id} is not a PDF, skipping rotation.", | ||||
|             ) | ||||
|             continue | ||||
|         try: | ||||
|             with pikepdf.open(doc.source_path, allow_overwriting_input=True) as pdf: | ||||
|                 for page in pdf.pages: | ||||
|                     page.rotate(degrees, relative=True) | ||||
|                 pdf.save() | ||||
|                 doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest() | ||||
|                 doc.save() | ||||
|                 rotate_tasks.append( | ||||
|                     update_document_archive_file.s( | ||||
|                         document_id=doc.id, | ||||
|                     ), | ||||
|                 ) | ||||
|                 logger.info( | ||||
|                     f"Rotated document {doc.id} by {degrees} degrees", | ||||
|                 ) | ||||
|                 affected_docs.append(doc.id) | ||||
|         except Exception as e: | ||||
|             logger.exception(f"Error rotating document {doc.id}: {e}") | ||||
|  | ||||
|     if len(affected_docs) > 0: | ||||
|         bulk_update_task = bulk_update_documents.s(document_ids=affected_docs) | ||||
|         chord(header=rotate_tasks, body=bulk_update_task).delay() | ||||
|  | ||||
|     return "OK" | ||||
|  | ||||
|  | ||||
| def merge(doc_ids: list[int], metadata_document_id: Optional[int] = None): | ||||
|     logger.info( | ||||
|         f"Attempting to merge {len(doc_ids)} documents into a single document.", | ||||
|     ) | ||||
|     qs = Document.objects.filter(id__in=doc_ids) | ||||
|     affected_docs = [] | ||||
|     import pikepdf | ||||
|  | ||||
|     merged_pdf = pikepdf.new() | ||||
|     version = merged_pdf.pdf_version | ||||
|     # use doc_ids to preserve order | ||||
|     for doc_id in doc_ids: | ||||
|         doc = qs.get(id=doc_id) | ||||
|         try: | ||||
|             with pikepdf.open(str(doc.source_path)) as pdf: | ||||
|                 version = max(version, pdf.pdf_version) | ||||
|                 merged_pdf.pages.extend(pdf.pages) | ||||
|             affected_docs.append(doc.id) | ||||
|         except Exception as e: | ||||
|             logger.exception( | ||||
|                 f"Error merging document {doc.id}, it will not be included in the merge: {e}", | ||||
|             ) | ||||
|     if len(affected_docs) == 0: | ||||
|         logger.warning("No documents were merged") | ||||
|         return "OK" | ||||
|  | ||||
|     filepath = os.path.join( | ||||
|         settings.SCRATCH_DIR, | ||||
|         f"{'_'.join([str(doc_id) for doc_id in doc_ids])[:100]}_merged.pdf", | ||||
|     ) | ||||
|     merged_pdf.remove_unreferenced_resources() | ||||
|     merged_pdf.save(filepath, min_version=version) | ||||
|     merged_pdf.close() | ||||
|  | ||||
|     if metadata_document_id: | ||||
|         metadata_document = qs.get(id=metadata_document_id) | ||||
|         if metadata_document is not None: | ||||
|             overrides = DocumentMetadataOverrides.from_document(metadata_document) | ||||
|             overrides.title = metadata_document.title + " (merged)" | ||||
|     else: | ||||
|         overrides = DocumentMetadataOverrides() | ||||
|  | ||||
|     logger.info("Adding merged document to the task queue.") | ||||
|     consume_file.delay( | ||||
|         ConsumableDocument( | ||||
|             source=DocumentSource.ConsumeFolder, | ||||
|             original_file=filepath, | ||||
|         ), | ||||
|         overrides, | ||||
|     ) | ||||
|  | ||||
|     return "OK" | ||||
|  | ||||
|  | ||||
| def split(doc_ids: list[int], pages: list[list[int]]): | ||||
|     logger.info( | ||||
|         f"Attempting to split document {doc_ids[0]} into {len(pages)} documents", | ||||
|     ) | ||||
|     doc = Document.objects.get(id=doc_ids[0]) | ||||
|     import pikepdf | ||||
|  | ||||
|     try: | ||||
|         with pikepdf.open(doc.source_path) as pdf: | ||||
|             for idx, split_doc in enumerate(pages): | ||||
|                 dst = pikepdf.new() | ||||
|                 for page in split_doc: | ||||
|                     dst.pages.append(pdf.pages[page - 1]) | ||||
|                 filepath = os.path.join( | ||||
|                     settings.SCRATCH_DIR, | ||||
|                     f"{doc.id}_{split_doc[0]}-{split_doc[-1]}.pdf", | ||||
|                 ) | ||||
|                 dst.remove_unreferenced_resources() | ||||
|                 dst.save(filepath) | ||||
|                 dst.close() | ||||
|  | ||||
|                 overrides = DocumentMetadataOverrides().from_document(doc) | ||||
|                 overrides.title = f"{doc.title} (split {idx + 1})" | ||||
|                 logger.info( | ||||
|                     f"Adding split document with pages {split_doc} to the task queue.", | ||||
|                 ) | ||||
|                 consume_file.delay( | ||||
|                     ConsumableDocument( | ||||
|                         source=DocumentSource.ConsumeFolder, | ||||
|                         original_file=filepath, | ||||
|                     ), | ||||
|                     overrides, | ||||
|                 ) | ||||
|     except Exception as e: | ||||
|         logger.exception(f"Error splitting document {doc.id}: {e}") | ||||
|  | ||||
|     return "OK" | ||||
|   | ||||
| @@ -189,13 +189,21 @@ def refresh_metadata_cache( | ||||
|     cache.touch(doc_key, timeout) | ||||
|  | ||||
|  | ||||
| def clear_metadata_cache(document_id: int) -> None: | ||||
|     doc_key = get_metadata_cache_key(document_id) | ||||
|     cache.delete(doc_key) | ||||
|  | ||||
|  | ||||
| def get_thumbnail_modified_key(document_id: int) -> str: | ||||
|     """ | ||||
|     Builds the key to store a thumbnail's timestamp | ||||
|     """ | ||||
|     return f"doc_{document_id}_thumbnail_modified" | ||||
|  | ||||
|  | ||||
| def clear_document_caches(document_id: int) -> None: | ||||
|     """ | ||||
|     Removes all cached items for the given document | ||||
|     """ | ||||
|     cache.delete_many( | ||||
|         [ | ||||
|             get_suggestion_cache_key(document_id), | ||||
|             get_metadata_cache_key(document_id), | ||||
|             get_thumbnail_modified_key(document_id), | ||||
|         ], | ||||
|     ) | ||||
|   | ||||
| @@ -5,6 +5,8 @@ from pathlib import Path | ||||
| from typing import Optional | ||||
|  | ||||
| import magic | ||||
| from guardian.shortcuts import get_groups_with_perms | ||||
| from guardian.shortcuts import get_users_with_perms | ||||
|  | ||||
|  | ||||
| @dataclasses.dataclass | ||||
| @@ -88,6 +90,44 @@ class DocumentMetadataOverrides: | ||||
|  | ||||
|         return self | ||||
|  | ||||
|     @staticmethod | ||||
|     def from_document(doc) -> "DocumentMetadataOverrides": | ||||
|         """ | ||||
|         Fills in the overrides from a document object | ||||
|         """ | ||||
|         overrides = DocumentMetadataOverrides() | ||||
|         overrides.title = doc.title | ||||
|         overrides.correspondent_id = doc.correspondent.id if doc.correspondent else None | ||||
|         overrides.document_type_id = doc.document_type.id if doc.document_type else None | ||||
|         overrides.storage_path_id = doc.storage_path.id if doc.storage_path else None | ||||
|         overrides.owner_id = doc.owner.id if doc.owner else None | ||||
|         overrides.tag_ids = list(doc.tags.values_list("id", flat=True)) | ||||
|  | ||||
|         overrides.view_users = get_users_with_perms( | ||||
|             doc, | ||||
|             only_with_perms_in=["view_document"], | ||||
|         ).values_list("id", flat=True) | ||||
|         overrides.change_users = get_users_with_perms( | ||||
|             doc, | ||||
|             only_with_perms_in=["change_document"], | ||||
|         ).values_list("id", flat=True) | ||||
|         overrides.custom_field_ids = list( | ||||
|             doc.custom_fields.values_list("id", flat=True), | ||||
|         ) | ||||
|  | ||||
|         groups_with_perms = get_groups_with_perms( | ||||
|             doc, | ||||
|             attach_perms=True, | ||||
|         ) | ||||
|         overrides.view_groups = [ | ||||
|             group.id for group, perms in groups_with_perms if "view_document" in perms | ||||
|         ] | ||||
|         overrides.change_groups = [ | ||||
|             group.id for group, perms in groups_with_perms if "change_document" in perms | ||||
|         ] | ||||
|  | ||||
|         return overrides | ||||
|  | ||||
|  | ||||
| class DocumentSource(IntEnum): | ||||
|     """ | ||||
|   | ||||
| @@ -869,6 +869,9 @@ class BulkEditSerializer(DocumentListSerializer, SetPermissionsMixin): | ||||
|             "delete", | ||||
|             "redo_ocr", | ||||
|             "set_permissions", | ||||
|             "rotate", | ||||
|             "merge", | ||||
|             "split", | ||||
|         ], | ||||
|         label="Method", | ||||
|         write_only=True, | ||||
| @@ -906,6 +909,12 @@ class BulkEditSerializer(DocumentListSerializer, SetPermissionsMixin): | ||||
|             return bulk_edit.redo_ocr | ||||
|         elif method == "set_permissions": | ||||
|             return bulk_edit.set_permissions | ||||
|         elif method == "rotate": | ||||
|             return bulk_edit.rotate | ||||
|         elif method == "merge": | ||||
|             return bulk_edit.merge | ||||
|         elif method == "split": | ||||
|             return bulk_edit.split | ||||
|         else: | ||||
|             raise serializers.ValidationError("Unsupported method.") | ||||
|  | ||||
| @@ -984,6 +993,39 @@ class BulkEditSerializer(DocumentListSerializer, SetPermissionsMixin): | ||||
|         if "merge" not in parameters: | ||||
|             parameters["merge"] = False | ||||
|  | ||||
|     def _validate_parameters_rotate(self, parameters): | ||||
|         try: | ||||
|             if ( | ||||
|                 "degrees" not in parameters | ||||
|                 or not float(parameters["degrees"]).is_integer() | ||||
|             ): | ||||
|                 raise serializers.ValidationError("invalid rotation degrees") | ||||
|         except ValueError: | ||||
|             raise serializers.ValidationError("invalid rotation degrees") | ||||
|  | ||||
|     def _validate_parameters_split(self, parameters): | ||||
|         if "pages" not in parameters: | ||||
|             raise serializers.ValidationError("pages not specified") | ||||
|         try: | ||||
|             pages = [] | ||||
|             docs = parameters["pages"].split(",") | ||||
|             for doc in docs: | ||||
|                 if "-" in doc: | ||||
|                     pages.append( | ||||
|                         [ | ||||
|                             x | ||||
|                             for x in range( | ||||
|                                 int(doc.split("-")[0]), | ||||
|                                 int(doc.split("-")[1]) + 1, | ||||
|                             ) | ||||
|                         ], | ||||
|                     ) | ||||
|                 else: | ||||
|                     pages.append([int(doc)]) | ||||
|             parameters["pages"] = pages | ||||
|         except ValueError: | ||||
|             raise serializers.ValidationError("invalid pages specified") | ||||
|  | ||||
|     def validate(self, attrs): | ||||
|         method = attrs["method"] | ||||
|         parameters = attrs["parameters"] | ||||
| @@ -1000,6 +1042,14 @@ class BulkEditSerializer(DocumentListSerializer, SetPermissionsMixin): | ||||
|             self._validate_storage_path(parameters) | ||||
|         elif method == bulk_edit.set_permissions: | ||||
|             self._validate_parameters_set_permissions(parameters) | ||||
|         elif method == bulk_edit.rotate: | ||||
|             self._validate_parameters_rotate(parameters) | ||||
|         elif method == bulk_edit.split: | ||||
|             if len(attrs["documents"]) > 1: | ||||
|                 raise serializers.ValidationError( | ||||
|                     "Split method only supports one document", | ||||
|                 ) | ||||
|             self._validate_parameters_split(parameters) | ||||
|  | ||||
|         return attrs | ||||
|  | ||||
|   | ||||
| @@ -23,7 +23,7 @@ from filelock import FileLock | ||||
| from guardian.shortcuts import remove_perm | ||||
|  | ||||
| from documents import matching | ||||
| from documents.caching import clear_metadata_cache | ||||
| from documents.caching import clear_document_caches | ||||
| from documents.classifier import DocumentClassifier | ||||
| from documents.consumer import parse_doc_title_w_placeholders | ||||
| from documents.file_handling import create_source_path_directory | ||||
| @@ -439,7 +439,8 @@ def update_filename_and_move_files(sender, instance: Document, **kwargs): | ||||
|                 archive_filename=instance.archive_filename, | ||||
|                 modified=timezone.now(), | ||||
|             ) | ||||
|             clear_metadata_cache(instance.pk) | ||||
|             # Clear any caching for this document.  Slightly overkill, but not terrible | ||||
|             clear_document_caches(instance.pk) | ||||
|  | ||||
|         except (OSError, DatabaseError, CannotMoveFilesException) as e: | ||||
|             logger.warning(f"Exception during file handling: {e}") | ||||
|   | ||||
| @@ -18,6 +18,7 @@ from whoosh.writing import AsyncWriter | ||||
| from documents import index | ||||
| from documents import sanity_checker | ||||
| from documents.barcodes import BarcodePlugin | ||||
| from documents.caching import clear_document_caches | ||||
| from documents.classifier import DocumentClassifier | ||||
| from documents.classifier import load_classifier | ||||
| from documents.consumer import Consumer | ||||
| @@ -213,6 +214,7 @@ def bulk_update_documents(document_ids): | ||||
|     ix = index.open_index() | ||||
|  | ||||
|     for doc in documents: | ||||
|         clear_document_caches(doc.pk) | ||||
|         document_updated.send( | ||||
|             sender=None, | ||||
|             document=doc, | ||||
| @@ -305,6 +307,8 @@ def update_document_archive_file(document_id): | ||||
|             with index.open_index_writer() as writer: | ||||
|                 index.update_document(writer, document) | ||||
|  | ||||
|             clear_document_caches(document.pk) | ||||
|  | ||||
|     except Exception: | ||||
|         logger.exception( | ||||
|             f"Error while parsing document {document} (ID: {document_id})", | ||||
|   | ||||
| @@ -781,3 +781,153 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase): | ||||
|         self.assertEqual(response.status_code, status.HTTP_200_OK) | ||||
|  | ||||
|         m.assert_called_once() | ||||
|  | ||||
|     @mock.patch("documents.serialisers.bulk_edit.rotate") | ||||
|     def test_rotate(self, m): | ||||
|         m.return_value = "OK" | ||||
|  | ||||
|         response = self.client.post( | ||||
|             "/api/documents/bulk_edit/", | ||||
|             json.dumps( | ||||
|                 { | ||||
|                     "documents": [self.doc2.id, self.doc3.id], | ||||
|                     "method": "rotate", | ||||
|                     "parameters": {"degrees": 90}, | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(response.status_code, status.HTTP_200_OK) | ||||
|  | ||||
|         m.assert_called_once() | ||||
|         args, kwargs = m.call_args | ||||
|         self.assertCountEqual(args[0], [self.doc2.id, self.doc3.id]) | ||||
|         self.assertEqual(kwargs["degrees"], 90) | ||||
|  | ||||
|     @mock.patch("documents.serialisers.bulk_edit.rotate") | ||||
|     def test_rotate_invalid_params(self, m): | ||||
|         response = self.client.post( | ||||
|             "/api/documents/bulk_edit/", | ||||
|             json.dumps( | ||||
|                 { | ||||
|                     "documents": [self.doc2.id, self.doc3.id], | ||||
|                     "method": "rotate", | ||||
|                     "parameters": {"degrees": "foo"}, | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) | ||||
|  | ||||
|         response = self.client.post( | ||||
|             "/api/documents/bulk_edit/", | ||||
|             json.dumps( | ||||
|                 { | ||||
|                     "documents": [self.doc2.id, self.doc3.id], | ||||
|                     "method": "rotate", | ||||
|                     "parameters": {"degrees": 90.5}, | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) | ||||
|  | ||||
|         m.assert_not_called() | ||||
|  | ||||
|     @mock.patch("documents.serialisers.bulk_edit.merge") | ||||
|     def test_merge(self, m): | ||||
|         m.return_value = "OK" | ||||
|  | ||||
|         response = self.client.post( | ||||
|             "/api/documents/bulk_edit/", | ||||
|             json.dumps( | ||||
|                 { | ||||
|                     "documents": [self.doc2.id, self.doc3.id], | ||||
|                     "method": "merge", | ||||
|                     "parameters": {"metadata_document_id": self.doc3.id}, | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(response.status_code, status.HTTP_200_OK) | ||||
|  | ||||
|         m.assert_called_once() | ||||
|         args, kwargs = m.call_args | ||||
|         self.assertCountEqual(args[0], [self.doc2.id, self.doc3.id]) | ||||
|         self.assertEqual(kwargs["metadata_document_id"], self.doc3.id) | ||||
|  | ||||
|     @mock.patch("documents.serialisers.bulk_edit.split") | ||||
|     def test_split(self, m): | ||||
|         m.return_value = "OK" | ||||
|  | ||||
|         response = self.client.post( | ||||
|             "/api/documents/bulk_edit/", | ||||
|             json.dumps( | ||||
|                 { | ||||
|                     "documents": [self.doc2.id], | ||||
|                     "method": "split", | ||||
|                     "parameters": {"pages": "1,2-4,5-6,7"}, | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(response.status_code, status.HTTP_200_OK) | ||||
|  | ||||
|         m.assert_called_once() | ||||
|         args, kwargs = m.call_args | ||||
|         self.assertCountEqual(args[0], [self.doc2.id]) | ||||
|         self.assertEqual(kwargs["pages"], [[1], [2, 3, 4], [5, 6], [7]]) | ||||
|  | ||||
|     def test_split_invalid_params(self): | ||||
|         response = self.client.post( | ||||
|             "/api/documents/bulk_edit/", | ||||
|             json.dumps( | ||||
|                 { | ||||
|                     "documents": [self.doc2.id], | ||||
|                     "method": "split", | ||||
|                     "parameters": {},  # pages not specified | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) | ||||
|         self.assertIn(b"pages not specified", response.content) | ||||
|  | ||||
|         response = self.client.post( | ||||
|             "/api/documents/bulk_edit/", | ||||
|             json.dumps( | ||||
|                 { | ||||
|                     "documents": [self.doc2.id], | ||||
|                     "method": "split", | ||||
|                     "parameters": {"pages": "1:7"},  # wrong format | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) | ||||
|         self.assertIn(b"invalid pages specified", response.content) | ||||
|  | ||||
|         response = self.client.post( | ||||
|             "/api/documents/bulk_edit/", | ||||
|             json.dumps( | ||||
|                 { | ||||
|                     "documents": [ | ||||
|                         self.doc1.id, | ||||
|                         self.doc2.id, | ||||
|                     ],  # only one document supported | ||||
|                     "method": "split", | ||||
|                     "parameters": {"pages": "1-2,3-7"},  # wrong format | ||||
|                 }, | ||||
|             ), | ||||
|             content_type="application/json", | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) | ||||
|         self.assertIn(b"Split method only supports one document", response.content) | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| import shutil | ||||
| from pathlib import Path | ||||
| from unittest import mock | ||||
|  | ||||
| from django.contrib.auth.models import Group | ||||
| @@ -275,3 +277,262 @@ class TestBulkEdit(DirectoriesMixin, TestCase): | ||||
|             self.doc1, | ||||
|         ) | ||||
|         self.assertEqual(groups_with_perms.count(), 2) | ||||
|  | ||||
|  | ||||
| class TestPDFActions(DirectoriesMixin, TestCase): | ||||
|     def setUp(self): | ||||
|         super().setUp() | ||||
|         sample1 = self.dirs.scratch_dir / "sample.pdf" | ||||
|         shutil.copy( | ||||
|             Path(__file__).parent | ||||
|             / "samples" | ||||
|             / "documents" | ||||
|             / "originals" | ||||
|             / "0000001.pdf", | ||||
|             sample1, | ||||
|         ) | ||||
|         sample1_archive = self.dirs.archive_dir / "sample_archive.pdf" | ||||
|         shutil.copy( | ||||
|             Path(__file__).parent | ||||
|             / "samples" | ||||
|             / "documents" | ||||
|             / "originals" | ||||
|             / "0000001.pdf", | ||||
|             sample1_archive, | ||||
|         ) | ||||
|         sample2 = self.dirs.scratch_dir / "sample2.pdf" | ||||
|         shutil.copy( | ||||
|             Path(__file__).parent | ||||
|             / "samples" | ||||
|             / "documents" | ||||
|             / "originals" | ||||
|             / "0000002.pdf", | ||||
|             sample2, | ||||
|         ) | ||||
|         sample2_archive = self.dirs.archive_dir / "sample2_archive.pdf" | ||||
|         shutil.copy( | ||||
|             Path(__file__).parent | ||||
|             / "samples" | ||||
|             / "documents" | ||||
|             / "originals" | ||||
|             / "0000002.pdf", | ||||
|             sample2_archive, | ||||
|         ) | ||||
|         sample3 = self.dirs.scratch_dir / "sample3.pdf" | ||||
|         shutil.copy( | ||||
|             Path(__file__).parent | ||||
|             / "samples" | ||||
|             / "documents" | ||||
|             / "originals" | ||||
|             / "0000003.pdf", | ||||
|             sample3, | ||||
|         ) | ||||
|         self.doc1 = Document.objects.create( | ||||
|             checksum="A", | ||||
|             title="A", | ||||
|             filename=sample1, | ||||
|             mime_type="application/pdf", | ||||
|         ) | ||||
|         self.doc1.archive_filename = sample1_archive | ||||
|         self.doc1.save() | ||||
|         self.doc2 = Document.objects.create( | ||||
|             checksum="B", | ||||
|             title="B", | ||||
|             filename=sample2, | ||||
|             mime_type="application/pdf", | ||||
|         ) | ||||
|         self.doc2.archive_filename = sample2_archive | ||||
|         self.doc2.save() | ||||
|         self.doc3 = Document.objects.create( | ||||
|             checksum="C", | ||||
|             title="C", | ||||
|             filename=sample3, | ||||
|             mime_type="application/pdf", | ||||
|         ) | ||||
|         img_doc = self.dirs.scratch_dir / "sample_image.jpg" | ||||
|         shutil.copy( | ||||
|             Path(__file__).parent / "samples" / "simple.jpg", | ||||
|             img_doc, | ||||
|         ) | ||||
|         self.img_doc = Document.objects.create( | ||||
|             checksum="D", | ||||
|             title="D", | ||||
|             filename=img_doc, | ||||
|             mime_type="image/jpeg", | ||||
|         ) | ||||
|  | ||||
|     @mock.patch("documents.tasks.consume_file.delay") | ||||
|     def test_merge(self, mock_consume_file): | ||||
|         """ | ||||
|         GIVEN: | ||||
|             - Existing documents | ||||
|         WHEN: | ||||
|             - Merge action is called with 3 documents | ||||
|         THEN: | ||||
|             - Consume file should be called | ||||
|         """ | ||||
|         doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id] | ||||
|         metadata_document_id = self.doc1.id | ||||
|  | ||||
|         result = bulk_edit.merge(doc_ids) | ||||
|  | ||||
|         expected_filename = ( | ||||
|             f"{'_'.join([str(doc_id) for doc_id in doc_ids])[:100]}_merged.pdf" | ||||
|         ) | ||||
|  | ||||
|         mock_consume_file.assert_called() | ||||
|         consume_file_args, _ = mock_consume_file.call_args | ||||
|         self.assertEqual( | ||||
|             Path(consume_file_args[0].original_file).name, | ||||
|             expected_filename, | ||||
|         ) | ||||
|         self.assertEqual(consume_file_args[1].title, None) | ||||
|  | ||||
|         # With metadata_document_id overrides | ||||
|         result = bulk_edit.merge(doc_ids, metadata_document_id=metadata_document_id) | ||||
|         consume_file_args, _ = mock_consume_file.call_args | ||||
|         self.assertEqual(consume_file_args[1].title, "A (merged)") | ||||
|  | ||||
|         self.assertEqual(result, "OK") | ||||
|  | ||||
|     @mock.patch("documents.tasks.consume_file.delay") | ||||
|     @mock.patch("pikepdf.open") | ||||
|     def test_merge_with_errors(self, mock_open_pdf, mock_consume_file): | ||||
|         """ | ||||
|         GIVEN: | ||||
|             - Existing documents | ||||
|         WHEN: | ||||
|             - Merge action is called with 2 documents | ||||
|             - Error occurs when opening both files | ||||
|         THEN: | ||||
|             - Consume file should not be called | ||||
|         """ | ||||
|         mock_open_pdf.side_effect = Exception("Error opening PDF") | ||||
|         doc_ids = [self.doc2.id, self.doc3.id] | ||||
|  | ||||
|         with self.assertLogs("paperless.bulk_edit", level="ERROR") as cm: | ||||
|             bulk_edit.merge(doc_ids) | ||||
|             error_str = cm.output[0] | ||||
|             expected_str = ( | ||||
|                 "Error merging document 2, it will not be included in the merge" | ||||
|             ) | ||||
|             self.assertIn(expected_str, error_str) | ||||
|  | ||||
|         mock_consume_file.assert_not_called() | ||||
|  | ||||
|     @mock.patch("documents.tasks.consume_file.delay") | ||||
|     def test_split(self, mock_consume_file): | ||||
|         """ | ||||
|         GIVEN: | ||||
|             - Existing documents | ||||
|         WHEN: | ||||
|             - Split action is called with 1 document and 2 pages | ||||
|         THEN: | ||||
|             - Consume file should be called twice | ||||
|         """ | ||||
|         doc_ids = [self.doc2.id] | ||||
|         pages = [[1, 2], [3]] | ||||
|         result = bulk_edit.split(doc_ids, pages) | ||||
|         self.assertEqual(mock_consume_file.call_count, 2) | ||||
|         consume_file_args, _ = mock_consume_file.call_args | ||||
|         self.assertEqual(consume_file_args[1].title, "B (split 2)") | ||||
|  | ||||
|         self.assertEqual(result, "OK") | ||||
|  | ||||
|     @mock.patch("documents.tasks.consume_file.delay") | ||||
|     @mock.patch("pikepdf.Pdf.save") | ||||
|     def test_split_with_errors(self, mock_save_pdf, mock_consume_file): | ||||
|         """ | ||||
|         GIVEN: | ||||
|             - Existing documents | ||||
|         WHEN: | ||||
|             - Split action is called with 1 document and 2 page groups | ||||
|             - Error occurs when saving the files | ||||
|         THEN: | ||||
|             - Consume file should not be called | ||||
|         """ | ||||
|         mock_save_pdf.side_effect = Exception("Error saving PDF") | ||||
|         doc_ids = [self.doc2.id] | ||||
|         pages = [[1, 2], [3]] | ||||
|  | ||||
|         with self.assertLogs("paperless.bulk_edit", level="ERROR") as cm: | ||||
|             bulk_edit.split(doc_ids, pages) | ||||
|             error_str = cm.output[0] | ||||
|             expected_str = "Error splitting document 2" | ||||
|             self.assertIn(expected_str, error_str) | ||||
|  | ||||
|         mock_consume_file.assert_not_called() | ||||
|  | ||||
|     @mock.patch("documents.tasks.bulk_update_documents.s") | ||||
|     @mock.patch("documents.tasks.update_document_archive_file.s") | ||||
|     @mock.patch("celery.chord.delay") | ||||
|     def test_rotate(self, mock_chord, mock_update_document, mock_update_documents): | ||||
|         """ | ||||
|         GIVEN: | ||||
|             - Existing documents | ||||
|         WHEN: | ||||
|             - Rotate action is called with 2 documents | ||||
|         THEN: | ||||
|             - Rotate action should be called twice | ||||
|         """ | ||||
|         doc_ids = [self.doc1.id, self.doc2.id] | ||||
|         result = bulk_edit.rotate(doc_ids, 90) | ||||
|         self.assertEqual(mock_update_document.call_count, 2) | ||||
|         mock_update_documents.assert_called_once() | ||||
|         mock_chord.assert_called_once() | ||||
|         self.assertEqual(result, "OK") | ||||
|  | ||||
|     @mock.patch("documents.tasks.bulk_update_documents.s") | ||||
|     @mock.patch("documents.tasks.update_document_archive_file.s") | ||||
|     @mock.patch("pikepdf.Pdf.save") | ||||
|     def test_rotate_with_error( | ||||
|         self, | ||||
|         mock_pdf_save, | ||||
|         mock_update_archive_file, | ||||
|         mock_update_documents, | ||||
|     ): | ||||
|         """ | ||||
|         GIVEN: | ||||
|             - Existing documents | ||||
|         WHEN: | ||||
|             - Rotate action is called with 2 documents | ||||
|             - PikePDF raises an error | ||||
|         THEN: | ||||
|             - Rotate action should be called 0 times | ||||
|         """ | ||||
|         mock_pdf_save.side_effect = Exception("Error saving PDF") | ||||
|         doc_ids = [self.doc2.id, self.doc3.id] | ||||
|  | ||||
|         with self.assertLogs("paperless.bulk_edit", level="ERROR") as cm: | ||||
|             bulk_edit.rotate(doc_ids, 90) | ||||
|             error_str = cm.output[0] | ||||
|             expected_str = "Error rotating document" | ||||
|             self.assertIn(expected_str, error_str) | ||||
|             mock_update_archive_file.assert_not_called() | ||||
|  | ||||
|     @mock.patch("documents.tasks.bulk_update_documents.s") | ||||
|     @mock.patch("documents.tasks.update_document_archive_file.s") | ||||
|     @mock.patch("celery.chord.delay") | ||||
|     def test_rotate_non_pdf( | ||||
|         self, | ||||
|         mock_chord, | ||||
|         mock_update_document, | ||||
|         mock_update_documents, | ||||
|     ): | ||||
|         """ | ||||
|         GIVEN: | ||||
|             - Existing documents | ||||
|         WHEN: | ||||
|             - Rotate action is called with 2 documents, one of which is not a PDF | ||||
|         THEN: | ||||
|             - Rotate action should be performed 1 time, with the non-PDF document skipped | ||||
|         """ | ||||
|         with self.assertLogs("paperless.bulk_edit", level="INFO") as cm: | ||||
|             result = bulk_edit.rotate([self.doc2.id, self.img_doc.id], 90) | ||||
|             output_str = cm.output[1] | ||||
|             expected_str = "Document 4 is not a PDF, skipping rotation" | ||||
|             self.assertIn(expected_str, output_str) | ||||
|             self.assertEqual(mock_update_document.call_count, 1) | ||||
|             mock_update_documents.assert_called_once() | ||||
|             mock_chord.assert_called_once() | ||||
|             self.assertEqual(result, "OK") | ||||
|   | ||||
| @@ -891,7 +891,8 @@ class BulkEditView(GenericAPIView, PassUserMixin): | ||||
|             document_objs = Document.objects.filter(pk__in=documents) | ||||
|             has_perms = ( | ||||
|                 all((doc.owner == user or doc.owner is None) for doc in document_objs) | ||||
|                 if method == bulk_edit.set_permissions | ||||
|                 if method | ||||
|                 in [bulk_edit.set_permissions, bulk_edit.delete, bulk_edit.rotate] | ||||
|                 else all( | ||||
|                     has_perms_owner_aware(user, "change_document", doc) | ||||
|                     for doc in document_objs | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 shamoon
					shamoon