mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Feature: PDF actions - merge, split & rotate (#6094)
This commit is contained in:
@@ -1,15 +1,27 @@
|
||||
import hashlib
|
||||
import itertools
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from celery import chord
|
||||
from django.conf import settings
|
||||
from django.db.models import Q
|
||||
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
from documents.data_models import DocumentSource
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import StoragePath
|
||||
from documents.permissions import set_permissions_for_object
|
||||
from documents.tasks import bulk_update_documents
|
||||
from documents.tasks import consume_file
|
||||
from documents.tasks import update_document_archive_file
|
||||
|
||||
logger = logging.getLogger("paperless.bulk_edit")
|
||||
|
||||
|
||||
def set_correspondent(doc_ids, correspondent):
|
||||
if correspondent:
|
||||
@@ -146,3 +158,137 @@ def set_permissions(doc_ids, set_permissions, owner=None, merge=False):
|
||||
bulk_update_documents.delay(document_ids=affected_docs)
|
||||
|
||||
return "OK"
|
||||
|
||||
|
||||
def rotate(doc_ids: list[int], degrees: int):
|
||||
logger.info(
|
||||
f"Attempting to rotate {len(doc_ids)} documents by {degrees} degrees.",
|
||||
)
|
||||
qs = Document.objects.filter(id__in=doc_ids)
|
||||
affected_docs = []
|
||||
import pikepdf
|
||||
|
||||
rotate_tasks = []
|
||||
for doc in qs:
|
||||
if doc.mime_type != "application/pdf":
|
||||
logger.warning(
|
||||
f"Document {doc.id} is not a PDF, skipping rotation.",
|
||||
)
|
||||
continue
|
||||
try:
|
||||
with pikepdf.open(doc.source_path, allow_overwriting_input=True) as pdf:
|
||||
for page in pdf.pages:
|
||||
page.rotate(degrees, relative=True)
|
||||
pdf.save()
|
||||
doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
|
||||
doc.save()
|
||||
rotate_tasks.append(
|
||||
update_document_archive_file.s(
|
||||
document_id=doc.id,
|
||||
),
|
||||
)
|
||||
logger.info(
|
||||
f"Rotated document {doc.id} by {degrees} degrees",
|
||||
)
|
||||
affected_docs.append(doc.id)
|
||||
except Exception as e:
|
||||
logger.exception(f"Error rotating document {doc.id}: {e}")
|
||||
|
||||
if len(affected_docs) > 0:
|
||||
bulk_update_task = bulk_update_documents.s(document_ids=affected_docs)
|
||||
chord(header=rotate_tasks, body=bulk_update_task).delay()
|
||||
|
||||
return "OK"
|
||||
|
||||
|
||||
def merge(doc_ids: list[int], metadata_document_id: Optional[int] = None):
|
||||
logger.info(
|
||||
f"Attempting to merge {len(doc_ids)} documents into a single document.",
|
||||
)
|
||||
qs = Document.objects.filter(id__in=doc_ids)
|
||||
affected_docs = []
|
||||
import pikepdf
|
||||
|
||||
merged_pdf = pikepdf.new()
|
||||
version = merged_pdf.pdf_version
|
||||
# use doc_ids to preserve order
|
||||
for doc_id in doc_ids:
|
||||
doc = qs.get(id=doc_id)
|
||||
try:
|
||||
with pikepdf.open(str(doc.source_path)) as pdf:
|
||||
version = max(version, pdf.pdf_version)
|
||||
merged_pdf.pages.extend(pdf.pages)
|
||||
affected_docs.append(doc.id)
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
f"Error merging document {doc.id}, it will not be included in the merge: {e}",
|
||||
)
|
||||
if len(affected_docs) == 0:
|
||||
logger.warning("No documents were merged")
|
||||
return "OK"
|
||||
|
||||
filepath = os.path.join(
|
||||
settings.SCRATCH_DIR,
|
||||
f"{'_'.join([str(doc_id) for doc_id in doc_ids])[:100]}_merged.pdf",
|
||||
)
|
||||
merged_pdf.remove_unreferenced_resources()
|
||||
merged_pdf.save(filepath, min_version=version)
|
||||
merged_pdf.close()
|
||||
|
||||
if metadata_document_id:
|
||||
metadata_document = qs.get(id=metadata_document_id)
|
||||
if metadata_document is not None:
|
||||
overrides = DocumentMetadataOverrides.from_document(metadata_document)
|
||||
overrides.title = metadata_document.title + " (merged)"
|
||||
else:
|
||||
overrides = DocumentMetadataOverrides()
|
||||
|
||||
logger.info("Adding merged document to the task queue.")
|
||||
consume_file.delay(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file=filepath,
|
||||
),
|
||||
overrides,
|
||||
)
|
||||
|
||||
return "OK"
|
||||
|
||||
|
||||
def split(doc_ids: list[int], pages: list[list[int]]):
|
||||
logger.info(
|
||||
f"Attempting to split document {doc_ids[0]} into {len(pages)} documents",
|
||||
)
|
||||
doc = Document.objects.get(id=doc_ids[0])
|
||||
import pikepdf
|
||||
|
||||
try:
|
||||
with pikepdf.open(doc.source_path) as pdf:
|
||||
for idx, split_doc in enumerate(pages):
|
||||
dst = pikepdf.new()
|
||||
for page in split_doc:
|
||||
dst.pages.append(pdf.pages[page - 1])
|
||||
filepath = os.path.join(
|
||||
settings.SCRATCH_DIR,
|
||||
f"{doc.id}_{split_doc[0]}-{split_doc[-1]}.pdf",
|
||||
)
|
||||
dst.remove_unreferenced_resources()
|
||||
dst.save(filepath)
|
||||
dst.close()
|
||||
|
||||
overrides = DocumentMetadataOverrides().from_document(doc)
|
||||
overrides.title = f"{doc.title} (split {idx + 1})"
|
||||
logger.info(
|
||||
f"Adding split document with pages {split_doc} to the task queue.",
|
||||
)
|
||||
consume_file.delay(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file=filepath,
|
||||
),
|
||||
overrides,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception(f"Error splitting document {doc.id}: {e}")
|
||||
|
||||
return "OK"
|
||||
|
@@ -189,13 +189,21 @@ def refresh_metadata_cache(
|
||||
cache.touch(doc_key, timeout)
|
||||
|
||||
|
||||
def clear_metadata_cache(document_id: int) -> None:
|
||||
doc_key = get_metadata_cache_key(document_id)
|
||||
cache.delete(doc_key)
|
||||
|
||||
|
||||
def get_thumbnail_modified_key(document_id: int) -> str:
|
||||
"""
|
||||
Builds the key to store a thumbnail's timestamp
|
||||
"""
|
||||
return f"doc_{document_id}_thumbnail_modified"
|
||||
|
||||
|
||||
def clear_document_caches(document_id: int) -> None:
|
||||
"""
|
||||
Removes all cached items for the given document
|
||||
"""
|
||||
cache.delete_many(
|
||||
[
|
||||
get_suggestion_cache_key(document_id),
|
||||
get_metadata_cache_key(document_id),
|
||||
get_thumbnail_modified_key(document_id),
|
||||
],
|
||||
)
|
||||
|
@@ -5,6 +5,8 @@ from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import magic
|
||||
from guardian.shortcuts import get_groups_with_perms
|
||||
from guardian.shortcuts import get_users_with_perms
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
@@ -88,6 +90,44 @@ class DocumentMetadataOverrides:
|
||||
|
||||
return self
|
||||
|
||||
@staticmethod
|
||||
def from_document(doc) -> "DocumentMetadataOverrides":
|
||||
"""
|
||||
Fills in the overrides from a document object
|
||||
"""
|
||||
overrides = DocumentMetadataOverrides()
|
||||
overrides.title = doc.title
|
||||
overrides.correspondent_id = doc.correspondent.id if doc.correspondent else None
|
||||
overrides.document_type_id = doc.document_type.id if doc.document_type else None
|
||||
overrides.storage_path_id = doc.storage_path.id if doc.storage_path else None
|
||||
overrides.owner_id = doc.owner.id if doc.owner else None
|
||||
overrides.tag_ids = list(doc.tags.values_list("id", flat=True))
|
||||
|
||||
overrides.view_users = get_users_with_perms(
|
||||
doc,
|
||||
only_with_perms_in=["view_document"],
|
||||
).values_list("id", flat=True)
|
||||
overrides.change_users = get_users_with_perms(
|
||||
doc,
|
||||
only_with_perms_in=["change_document"],
|
||||
).values_list("id", flat=True)
|
||||
overrides.custom_field_ids = list(
|
||||
doc.custom_fields.values_list("id", flat=True),
|
||||
)
|
||||
|
||||
groups_with_perms = get_groups_with_perms(
|
||||
doc,
|
||||
attach_perms=True,
|
||||
)
|
||||
overrides.view_groups = [
|
||||
group.id for group, perms in groups_with_perms if "view_document" in perms
|
||||
]
|
||||
overrides.change_groups = [
|
||||
group.id for group, perms in groups_with_perms if "change_document" in perms
|
||||
]
|
||||
|
||||
return overrides
|
||||
|
||||
|
||||
class DocumentSource(IntEnum):
|
||||
"""
|
||||
|
@@ -869,6 +869,9 @@ class BulkEditSerializer(DocumentListSerializer, SetPermissionsMixin):
|
||||
"delete",
|
||||
"redo_ocr",
|
||||
"set_permissions",
|
||||
"rotate",
|
||||
"merge",
|
||||
"split",
|
||||
],
|
||||
label="Method",
|
||||
write_only=True,
|
||||
@@ -906,6 +909,12 @@ class BulkEditSerializer(DocumentListSerializer, SetPermissionsMixin):
|
||||
return bulk_edit.redo_ocr
|
||||
elif method == "set_permissions":
|
||||
return bulk_edit.set_permissions
|
||||
elif method == "rotate":
|
||||
return bulk_edit.rotate
|
||||
elif method == "merge":
|
||||
return bulk_edit.merge
|
||||
elif method == "split":
|
||||
return bulk_edit.split
|
||||
else:
|
||||
raise serializers.ValidationError("Unsupported method.")
|
||||
|
||||
@@ -984,6 +993,39 @@ class BulkEditSerializer(DocumentListSerializer, SetPermissionsMixin):
|
||||
if "merge" not in parameters:
|
||||
parameters["merge"] = False
|
||||
|
||||
def _validate_parameters_rotate(self, parameters):
|
||||
try:
|
||||
if (
|
||||
"degrees" not in parameters
|
||||
or not float(parameters["degrees"]).is_integer()
|
||||
):
|
||||
raise serializers.ValidationError("invalid rotation degrees")
|
||||
except ValueError:
|
||||
raise serializers.ValidationError("invalid rotation degrees")
|
||||
|
||||
def _validate_parameters_split(self, parameters):
|
||||
if "pages" not in parameters:
|
||||
raise serializers.ValidationError("pages not specified")
|
||||
try:
|
||||
pages = []
|
||||
docs = parameters["pages"].split(",")
|
||||
for doc in docs:
|
||||
if "-" in doc:
|
||||
pages.append(
|
||||
[
|
||||
x
|
||||
for x in range(
|
||||
int(doc.split("-")[0]),
|
||||
int(doc.split("-")[1]) + 1,
|
||||
)
|
||||
],
|
||||
)
|
||||
else:
|
||||
pages.append([int(doc)])
|
||||
parameters["pages"] = pages
|
||||
except ValueError:
|
||||
raise serializers.ValidationError("invalid pages specified")
|
||||
|
||||
def validate(self, attrs):
|
||||
method = attrs["method"]
|
||||
parameters = attrs["parameters"]
|
||||
@@ -1000,6 +1042,14 @@ class BulkEditSerializer(DocumentListSerializer, SetPermissionsMixin):
|
||||
self._validate_storage_path(parameters)
|
||||
elif method == bulk_edit.set_permissions:
|
||||
self._validate_parameters_set_permissions(parameters)
|
||||
elif method == bulk_edit.rotate:
|
||||
self._validate_parameters_rotate(parameters)
|
||||
elif method == bulk_edit.split:
|
||||
if len(attrs["documents"]) > 1:
|
||||
raise serializers.ValidationError(
|
||||
"Split method only supports one document",
|
||||
)
|
||||
self._validate_parameters_split(parameters)
|
||||
|
||||
return attrs
|
||||
|
||||
|
@@ -23,7 +23,7 @@ from filelock import FileLock
|
||||
from guardian.shortcuts import remove_perm
|
||||
|
||||
from documents import matching
|
||||
from documents.caching import clear_metadata_cache
|
||||
from documents.caching import clear_document_caches
|
||||
from documents.classifier import DocumentClassifier
|
||||
from documents.consumer import parse_doc_title_w_placeholders
|
||||
from documents.file_handling import create_source_path_directory
|
||||
@@ -439,7 +439,8 @@ def update_filename_and_move_files(sender, instance: Document, **kwargs):
|
||||
archive_filename=instance.archive_filename,
|
||||
modified=timezone.now(),
|
||||
)
|
||||
clear_metadata_cache(instance.pk)
|
||||
# Clear any caching for this document. Slightly overkill, but not terrible
|
||||
clear_document_caches(instance.pk)
|
||||
|
||||
except (OSError, DatabaseError, CannotMoveFilesException) as e:
|
||||
logger.warning(f"Exception during file handling: {e}")
|
||||
|
@@ -18,6 +18,7 @@ from whoosh.writing import AsyncWriter
|
||||
from documents import index
|
||||
from documents import sanity_checker
|
||||
from documents.barcodes import BarcodePlugin
|
||||
from documents.caching import clear_document_caches
|
||||
from documents.classifier import DocumentClassifier
|
||||
from documents.classifier import load_classifier
|
||||
from documents.consumer import Consumer
|
||||
@@ -213,6 +214,7 @@ def bulk_update_documents(document_ids):
|
||||
ix = index.open_index()
|
||||
|
||||
for doc in documents:
|
||||
clear_document_caches(doc.pk)
|
||||
document_updated.send(
|
||||
sender=None,
|
||||
document=doc,
|
||||
@@ -305,6 +307,8 @@ def update_document_archive_file(document_id):
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, document)
|
||||
|
||||
clear_document_caches(document.pk)
|
||||
|
||||
except Exception:
|
||||
logger.exception(
|
||||
f"Error while parsing document {document} (ID: {document_id})",
|
||||
|
@@ -781,3 +781,153 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.rotate")
|
||||
def test_rotate(self, m):
|
||||
m.return_value = "OK"
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"method": "rotate",
|
||||
"parameters": {"degrees": 90},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id, self.doc3.id])
|
||||
self.assertEqual(kwargs["degrees"], 90)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.rotate")
|
||||
def test_rotate_invalid_params(self, m):
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"method": "rotate",
|
||||
"parameters": {"degrees": "foo"},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"method": "rotate",
|
||||
"parameters": {"degrees": 90.5},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.merge")
|
||||
def test_merge(self, m):
|
||||
m.return_value = "OK"
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"method": "merge",
|
||||
"parameters": {"metadata_document_id": self.doc3.id},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id, self.doc3.id])
|
||||
self.assertEqual(kwargs["metadata_document_id"], self.doc3.id)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.split")
|
||||
def test_split(self, m):
|
||||
m.return_value = "OK"
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "split",
|
||||
"parameters": {"pages": "1,2-4,5-6,7"},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id])
|
||||
self.assertEqual(kwargs["pages"], [[1], [2, 3, 4], [5, 6], [7]])
|
||||
|
||||
def test_split_invalid_params(self):
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "split",
|
||||
"parameters": {}, # pages not specified
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"pages not specified", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "split",
|
||||
"parameters": {"pages": "1:7"}, # wrong format
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"invalid pages specified", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [
|
||||
self.doc1.id,
|
||||
self.doc2.id,
|
||||
], # only one document supported
|
||||
"method": "split",
|
||||
"parameters": {"pages": "1-2,3-7"}, # wrong format
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"Split method only supports one document", response.content)
|
||||
|
@@ -1,3 +1,5 @@
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from django.contrib.auth.models import Group
|
||||
@@ -275,3 +277,262 @@ class TestBulkEdit(DirectoriesMixin, TestCase):
|
||||
self.doc1,
|
||||
)
|
||||
self.assertEqual(groups_with_perms.count(), 2)
|
||||
|
||||
|
||||
class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
sample1 = self.dirs.scratch_dir / "sample.pdf"
|
||||
shutil.copy(
|
||||
Path(__file__).parent
|
||||
/ "samples"
|
||||
/ "documents"
|
||||
/ "originals"
|
||||
/ "0000001.pdf",
|
||||
sample1,
|
||||
)
|
||||
sample1_archive = self.dirs.archive_dir / "sample_archive.pdf"
|
||||
shutil.copy(
|
||||
Path(__file__).parent
|
||||
/ "samples"
|
||||
/ "documents"
|
||||
/ "originals"
|
||||
/ "0000001.pdf",
|
||||
sample1_archive,
|
||||
)
|
||||
sample2 = self.dirs.scratch_dir / "sample2.pdf"
|
||||
shutil.copy(
|
||||
Path(__file__).parent
|
||||
/ "samples"
|
||||
/ "documents"
|
||||
/ "originals"
|
||||
/ "0000002.pdf",
|
||||
sample2,
|
||||
)
|
||||
sample2_archive = self.dirs.archive_dir / "sample2_archive.pdf"
|
||||
shutil.copy(
|
||||
Path(__file__).parent
|
||||
/ "samples"
|
||||
/ "documents"
|
||||
/ "originals"
|
||||
/ "0000002.pdf",
|
||||
sample2_archive,
|
||||
)
|
||||
sample3 = self.dirs.scratch_dir / "sample3.pdf"
|
||||
shutil.copy(
|
||||
Path(__file__).parent
|
||||
/ "samples"
|
||||
/ "documents"
|
||||
/ "originals"
|
||||
/ "0000003.pdf",
|
||||
sample3,
|
||||
)
|
||||
self.doc1 = Document.objects.create(
|
||||
checksum="A",
|
||||
title="A",
|
||||
filename=sample1,
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
self.doc1.archive_filename = sample1_archive
|
||||
self.doc1.save()
|
||||
self.doc2 = Document.objects.create(
|
||||
checksum="B",
|
||||
title="B",
|
||||
filename=sample2,
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
self.doc2.archive_filename = sample2_archive
|
||||
self.doc2.save()
|
||||
self.doc3 = Document.objects.create(
|
||||
checksum="C",
|
||||
title="C",
|
||||
filename=sample3,
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
img_doc = self.dirs.scratch_dir / "sample_image.jpg"
|
||||
shutil.copy(
|
||||
Path(__file__).parent / "samples" / "simple.jpg",
|
||||
img_doc,
|
||||
)
|
||||
self.img_doc = Document.objects.create(
|
||||
checksum="D",
|
||||
title="D",
|
||||
filename=img_doc,
|
||||
mime_type="image/jpeg",
|
||||
)
|
||||
|
||||
@mock.patch("documents.tasks.consume_file.delay")
|
||||
def test_merge(self, mock_consume_file):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing documents
|
||||
WHEN:
|
||||
- Merge action is called with 3 documents
|
||||
THEN:
|
||||
- Consume file should be called
|
||||
"""
|
||||
doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
|
||||
metadata_document_id = self.doc1.id
|
||||
|
||||
result = bulk_edit.merge(doc_ids)
|
||||
|
||||
expected_filename = (
|
||||
f"{'_'.join([str(doc_id) for doc_id in doc_ids])[:100]}_merged.pdf"
|
||||
)
|
||||
|
||||
mock_consume_file.assert_called()
|
||||
consume_file_args, _ = mock_consume_file.call_args
|
||||
self.assertEqual(
|
||||
Path(consume_file_args[0].original_file).name,
|
||||
expected_filename,
|
||||
)
|
||||
self.assertEqual(consume_file_args[1].title, None)
|
||||
|
||||
# With metadata_document_id overrides
|
||||
result = bulk_edit.merge(doc_ids, metadata_document_id=metadata_document_id)
|
||||
consume_file_args, _ = mock_consume_file.call_args
|
||||
self.assertEqual(consume_file_args[1].title, "A (merged)")
|
||||
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
@mock.patch("documents.tasks.consume_file.delay")
|
||||
@mock.patch("pikepdf.open")
|
||||
def test_merge_with_errors(self, mock_open_pdf, mock_consume_file):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing documents
|
||||
WHEN:
|
||||
- Merge action is called with 2 documents
|
||||
- Error occurs when opening both files
|
||||
THEN:
|
||||
- Consume file should not be called
|
||||
"""
|
||||
mock_open_pdf.side_effect = Exception("Error opening PDF")
|
||||
doc_ids = [self.doc2.id, self.doc3.id]
|
||||
|
||||
with self.assertLogs("paperless.bulk_edit", level="ERROR") as cm:
|
||||
bulk_edit.merge(doc_ids)
|
||||
error_str = cm.output[0]
|
||||
expected_str = (
|
||||
"Error merging document 2, it will not be included in the merge"
|
||||
)
|
||||
self.assertIn(expected_str, error_str)
|
||||
|
||||
mock_consume_file.assert_not_called()
|
||||
|
||||
@mock.patch("documents.tasks.consume_file.delay")
|
||||
def test_split(self, mock_consume_file):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing documents
|
||||
WHEN:
|
||||
- Split action is called with 1 document and 2 pages
|
||||
THEN:
|
||||
- Consume file should be called twice
|
||||
"""
|
||||
doc_ids = [self.doc2.id]
|
||||
pages = [[1, 2], [3]]
|
||||
result = bulk_edit.split(doc_ids, pages)
|
||||
self.assertEqual(mock_consume_file.call_count, 2)
|
||||
consume_file_args, _ = mock_consume_file.call_args
|
||||
self.assertEqual(consume_file_args[1].title, "B (split 2)")
|
||||
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
@mock.patch("documents.tasks.consume_file.delay")
|
||||
@mock.patch("pikepdf.Pdf.save")
|
||||
def test_split_with_errors(self, mock_save_pdf, mock_consume_file):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing documents
|
||||
WHEN:
|
||||
- Split action is called with 1 document and 2 page groups
|
||||
- Error occurs when saving the files
|
||||
THEN:
|
||||
- Consume file should not be called
|
||||
"""
|
||||
mock_save_pdf.side_effect = Exception("Error saving PDF")
|
||||
doc_ids = [self.doc2.id]
|
||||
pages = [[1, 2], [3]]
|
||||
|
||||
with self.assertLogs("paperless.bulk_edit", level="ERROR") as cm:
|
||||
bulk_edit.split(doc_ids, pages)
|
||||
error_str = cm.output[0]
|
||||
expected_str = "Error splitting document 2"
|
||||
self.assertIn(expected_str, error_str)
|
||||
|
||||
mock_consume_file.assert_not_called()
|
||||
|
||||
@mock.patch("documents.tasks.bulk_update_documents.s")
|
||||
@mock.patch("documents.tasks.update_document_archive_file.s")
|
||||
@mock.patch("celery.chord.delay")
|
||||
def test_rotate(self, mock_chord, mock_update_document, mock_update_documents):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing documents
|
||||
WHEN:
|
||||
- Rotate action is called with 2 documents
|
||||
THEN:
|
||||
- Rotate action should be called twice
|
||||
"""
|
||||
doc_ids = [self.doc1.id, self.doc2.id]
|
||||
result = bulk_edit.rotate(doc_ids, 90)
|
||||
self.assertEqual(mock_update_document.call_count, 2)
|
||||
mock_update_documents.assert_called_once()
|
||||
mock_chord.assert_called_once()
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
@mock.patch("documents.tasks.bulk_update_documents.s")
|
||||
@mock.patch("documents.tasks.update_document_archive_file.s")
|
||||
@mock.patch("pikepdf.Pdf.save")
|
||||
def test_rotate_with_error(
|
||||
self,
|
||||
mock_pdf_save,
|
||||
mock_update_archive_file,
|
||||
mock_update_documents,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing documents
|
||||
WHEN:
|
||||
- Rotate action is called with 2 documents
|
||||
- PikePDF raises an error
|
||||
THEN:
|
||||
- Rotate action should be called 0 times
|
||||
"""
|
||||
mock_pdf_save.side_effect = Exception("Error saving PDF")
|
||||
doc_ids = [self.doc2.id, self.doc3.id]
|
||||
|
||||
with self.assertLogs("paperless.bulk_edit", level="ERROR") as cm:
|
||||
bulk_edit.rotate(doc_ids, 90)
|
||||
error_str = cm.output[0]
|
||||
expected_str = "Error rotating document"
|
||||
self.assertIn(expected_str, error_str)
|
||||
mock_update_archive_file.assert_not_called()
|
||||
|
||||
@mock.patch("documents.tasks.bulk_update_documents.s")
|
||||
@mock.patch("documents.tasks.update_document_archive_file.s")
|
||||
@mock.patch("celery.chord.delay")
|
||||
def test_rotate_non_pdf(
|
||||
self,
|
||||
mock_chord,
|
||||
mock_update_document,
|
||||
mock_update_documents,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing documents
|
||||
WHEN:
|
||||
- Rotate action is called with 2 documents, one of which is not a PDF
|
||||
THEN:
|
||||
- Rotate action should be performed 1 time, with the non-PDF document skipped
|
||||
"""
|
||||
with self.assertLogs("paperless.bulk_edit", level="INFO") as cm:
|
||||
result = bulk_edit.rotate([self.doc2.id, self.img_doc.id], 90)
|
||||
output_str = cm.output[1]
|
||||
expected_str = "Document 4 is not a PDF, skipping rotation"
|
||||
self.assertIn(expected_str, output_str)
|
||||
self.assertEqual(mock_update_document.call_count, 1)
|
||||
mock_update_documents.assert_called_once()
|
||||
mock_chord.assert_called_once()
|
||||
self.assertEqual(result, "OK")
|
||||
|
@@ -891,7 +891,8 @@ class BulkEditView(GenericAPIView, PassUserMixin):
|
||||
document_objs = Document.objects.filter(pk__in=documents)
|
||||
has_perms = (
|
||||
all((doc.owner == user or doc.owner is None) for doc in document_objs)
|
||||
if method == bulk_edit.set_permissions
|
||||
if method
|
||||
in [bulk_edit.set_permissions, bulk_edit.delete, bulk_edit.rotate]
|
||||
else all(
|
||||
has_perms_owner_aware(user, "change_document", doc)
|
||||
for doc in document_objs
|
||||
|
Reference in New Issue
Block a user