Enhancement: support delete originals after split / merge (#6935)

---------

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
Dominik Bruhn
2024-06-08 18:56:25 +02:00
committed by shamoon
parent 81e4092f53
commit d1ac15baa9
16 changed files with 354 additions and 32 deletions

View File

@@ -4,7 +4,10 @@ import logging
import os
from typing import Optional
from celery import chain
from celery import chord
from celery import group
from celery import shared_task
from django.conf import settings
from django.db.models import Q
@@ -153,6 +156,7 @@ def modify_custom_fields(doc_ids: list[int], add_custom_fields, remove_custom_fi
return "OK"
@shared_task
def delete(doc_ids: list[int]):
Document.objects.filter(id__in=doc_ids).delete()
@@ -234,7 +238,11 @@ def rotate(doc_ids: list[int], degrees: int):
return "OK"
def merge(doc_ids: list[int], metadata_document_id: Optional[int] = None):
def merge(
doc_ids: list[int],
metadata_document_id: Optional[int] = None,
delete_originals: bool = False,
):
logger.info(
f"Attempting to merge {len(doc_ids)} documents into a single document.",
)
@@ -277,7 +285,8 @@ def merge(doc_ids: list[int], metadata_document_id: Optional[int] = None):
overrides = DocumentMetadataOverrides()
logger.info("Adding merged document to the task queue.")
consume_file.delay(
consume_task = consume_file.s(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=filepath,
@@ -285,16 +294,26 @@ def merge(doc_ids: list[int], metadata_document_id: Optional[int] = None):
overrides,
)
if delete_originals:
logger.info(
"Queueing removal of original documents after consumption of merged document",
)
chain(consume_task, delete.si(affected_docs)).delay()
else:
consume_task.delay()
return "OK"
def split(doc_ids: list[int], pages: list[list[int]]):
def split(doc_ids: list[int], pages: list[list[int]], delete_originals: bool = False):
logger.info(
f"Attempting to split document {doc_ids[0]} into {len(pages)} documents",
)
doc = Document.objects.get(id=doc_ids[0])
import pikepdf
consume_tasks = []
try:
with pikepdf.open(doc.source_path) as pdf:
for idx, split_doc in enumerate(pages):
@@ -314,13 +333,24 @@ def split(doc_ids: list[int], pages: list[list[int]]):
logger.info(
f"Adding split document with pages {split_doc} to the task queue.",
)
consume_file.delay(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=filepath,
consume_tasks.append(
consume_file.s(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=filepath,
),
overrides,
),
overrides,
)
if delete_originals:
logger.info(
"Queueing removal of original document after consumption of the split documents",
)
chord(header=consume_tasks, body=delete.si([doc.id])).delay()
else:
group(consume_tasks).delay()
except Exception as e:
logger.exception(f"Error splitting document {doc.id}: {e}")

View File

@@ -1131,6 +1131,12 @@ class BulkEditSerializer(
except ValueError:
raise serializers.ValidationError("invalid pages specified")
if "delete_originals" in parameters:
if not isinstance(parameters["delete_originals"], bool):
raise serializers.ValidationError("delete_originals must be a boolean")
else:
parameters["delete_originals"] = False
def _validate_parameters_delete_pages(self, parameters):
if "pages" not in parameters:
raise serializers.ValidationError("pages not specified")
@@ -1139,6 +1145,13 @@ class BulkEditSerializer(
if not all(isinstance(i, int) for i in parameters["pages"]):
raise serializers.ValidationError("pages must be a list of integers")
def _validate_parameters_merge(self, parameters):
if "delete_originals" in parameters:
if not isinstance(parameters["delete_originals"], bool):
raise serializers.ValidationError("delete_originals must be a boolean")
else:
parameters["delete_originals"] = False
def validate(self, attrs):
method = attrs["method"]
parameters = attrs["parameters"]
@@ -1171,6 +1184,8 @@ class BulkEditSerializer(
"Delete pages method only supports one document",
)
self._validate_parameters_delete_pages(parameters)
elif method == bulk_edit.merge:
self._validate_parameters_merge(parameters)
return attrs

View File

@@ -994,6 +994,82 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertCountEqual(args[0], [self.doc2.id, self.doc3.id])
self.assertEqual(kwargs["metadata_document_id"], self.doc3.id)
@mock.patch("documents.serialisers.bulk_edit.merge")
def test_merge_and_delete_insufficient_permissions(self, m):
self.doc1.owner = User.objects.get(username="temp_admin")
self.doc1.save()
user1 = User.objects.create(username="user1")
self.client.force_authenticate(user=user1)
m.return_value = "OK"
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc1.id, self.doc2.id],
"method": "merge",
"parameters": {
"metadata_document_id": self.doc2.id,
"delete_originals": True,
},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
m.assert_not_called()
self.assertEqual(response.content, b"Insufficient permissions")
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id, self.doc3.id],
"method": "merge",
"parameters": {
"metadata_document_id": self.doc2.id,
"delete_originals": True,
},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
@mock.patch("documents.serialisers.bulk_edit.merge")
def test_merge_invalid_parameters(self, m):
"""
GIVEN:
- API data for merging documents is called
- The parameters are invalid
WHEN:
- API is called
THEN:
- The API fails with a correct error code
"""
m.return_value = "OK"
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc1.id, self.doc2.id],
"method": "merge",
"parameters": {
"delete_originals": "not_boolean",
},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
m.assert_not_called()
@mock.patch("documents.serialisers.bulk_edit.split")
def test_split(self, m):
m.return_value = "OK"
@@ -1066,6 +1142,24 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"Split method only supports one document", response.content)
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "split",
"parameters": {
"pages": "1",
"delete_originals": "notabool",
}, # not a bool
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"delete_originals must be a boolean", response.content)
@mock.patch("documents.serialisers.bulk_edit.delete_pages")
def test_delete_pages(self, m):
m.return_value = "OK"

View File

@@ -410,7 +410,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
mime_type="image/jpeg",
)
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("documents.tasks.consume_file.s")
def test_merge(self, mock_consume_file):
"""
GIVEN:
@@ -444,6 +444,50 @@ class TestPDFActions(DirectoriesMixin, TestCase):
self.assertEqual(result, "OK")
@mock.patch("documents.bulk_edit.delete.si")
@mock.patch("documents.tasks.consume_file.s")
@mock.patch("documents.bulk_edit.chain")
def test_merge_and_delete_originals(
self,
mock_chain,
mock_consume_file,
mock_delete_documents,
):
"""
GIVEN:
- Existing documents
WHEN:
- Merge action with deleting documents is called with 3 documents
THEN:
- Consume file task should be called
- Document deletion task should be called
"""
doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
result = bulk_edit.merge(doc_ids, delete_originals=True)
self.assertEqual(result, "OK")
expected_filename = (
f"{'_'.join([str(doc_id) for doc_id in doc_ids])[:100]}_merged.pdf"
)
mock_consume_file.assert_called()
mock_delete_documents.assert_called()
mock_chain.assert_called_once()
consume_file_args, _ = mock_consume_file.call_args
self.assertEqual(
Path(consume_file_args[0].original_file).name,
expected_filename,
)
self.assertEqual(consume_file_args[1].title, None)
delete_documents_args, _ = mock_delete_documents.call_args
self.assertEqual(
delete_documents_args[0],
doc_ids,
)
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.open")
def test_merge_with_errors(self, mock_open_pdf, mock_consume_file):
@@ -469,7 +513,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
mock_consume_file.assert_not_called()
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("documents.tasks.consume_file.s")
def test_split(self, mock_consume_file):
"""
GIVEN:
@@ -488,6 +532,44 @@ class TestPDFActions(DirectoriesMixin, TestCase):
self.assertEqual(result, "OK")
@mock.patch("documents.bulk_edit.delete.si")
@mock.patch("documents.tasks.consume_file.s")
@mock.patch("documents.bulk_edit.chord")
def test_split_and_delete_originals(
self,
mock_chord,
mock_consume_file,
mock_delete_documents,
):
"""
GIVEN:
- Existing documents
WHEN:
- Split action with deleting documents is called with 1 document and 2 page groups
- delete_originals is set to True
THEN:
- Consume file should be called twice
- Document deletion task should be called
"""
doc_ids = [self.doc2.id]
pages = [[1, 2], [3]]
result = bulk_edit.split(doc_ids, pages, delete_originals=True)
self.assertEqual(result, "OK")
self.assertEqual(mock_consume_file.call_count, 2)
consume_file_args, _ = mock_consume_file.call_args
self.assertEqual(consume_file_args[1].title, "B (split 2)")
mock_delete_documents.assert_called()
mock_chord.assert_called_once()
delete_documents_args, _ = mock_delete_documents.call_args
self.assertEqual(
delete_documents_args[0],
doc_ids,
)
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.Pdf.save")
def test_split_with_errors(self, mock_save_pdf, mock_consume_file):

View File

@@ -965,16 +965,31 @@ class BulkEditView(PassUserMixin):
document_objs = Document.objects.select_related("owner").filter(
pk__in=documents,
)
user_is_owner_of_all_documents = all(
(doc.owner == user or doc.owner is None) for doc in document_objs
)
has_perms = (
all((doc.owner == user or doc.owner is None) for doc in document_objs)
user_is_owner_of_all_documents
if method
in [bulk_edit.set_permissions, bulk_edit.delete, bulk_edit.rotate]
in [
bulk_edit.set_permissions,
bulk_edit.delete,
bulk_edit.rotate,
]
else all(
has_perms_owner_aware(user, "change_document", doc)
for doc in document_objs
)
)
if (
method in [bulk_edit.merge, bulk_edit.split]
and parameters["delete_originals"]
and not user_is_owner_of_all_documents
):
has_perms = False
if not has_perms:
return HttpResponseForbidden("Insufficient permissions")