mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Enhancement: support delete originals after split / merge (#6935)
--------- Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
@@ -4,7 +4,10 @@ import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from celery import chain
|
||||
from celery import chord
|
||||
from celery import group
|
||||
from celery import shared_task
|
||||
from django.conf import settings
|
||||
from django.db.models import Q
|
||||
|
||||
@@ -153,6 +156,7 @@ def modify_custom_fields(doc_ids: list[int], add_custom_fields, remove_custom_fi
|
||||
return "OK"
|
||||
|
||||
|
||||
@shared_task
|
||||
def delete(doc_ids: list[int]):
|
||||
Document.objects.filter(id__in=doc_ids).delete()
|
||||
|
||||
@@ -234,7 +238,11 @@ def rotate(doc_ids: list[int], degrees: int):
|
||||
return "OK"
|
||||
|
||||
|
||||
def merge(doc_ids: list[int], metadata_document_id: Optional[int] = None):
|
||||
def merge(
|
||||
doc_ids: list[int],
|
||||
metadata_document_id: Optional[int] = None,
|
||||
delete_originals: bool = False,
|
||||
):
|
||||
logger.info(
|
||||
f"Attempting to merge {len(doc_ids)} documents into a single document.",
|
||||
)
|
||||
@@ -277,7 +285,8 @@ def merge(doc_ids: list[int], metadata_document_id: Optional[int] = None):
|
||||
overrides = DocumentMetadataOverrides()
|
||||
|
||||
logger.info("Adding merged document to the task queue.")
|
||||
consume_file.delay(
|
||||
|
||||
consume_task = consume_file.s(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file=filepath,
|
||||
@@ -285,16 +294,26 @@ def merge(doc_ids: list[int], metadata_document_id: Optional[int] = None):
|
||||
overrides,
|
||||
)
|
||||
|
||||
if delete_originals:
|
||||
logger.info(
|
||||
"Queueing removal of original documents after consumption of merged document",
|
||||
)
|
||||
chain(consume_task, delete.si(affected_docs)).delay()
|
||||
else:
|
||||
consume_task.delay()
|
||||
|
||||
return "OK"
|
||||
|
||||
|
||||
def split(doc_ids: list[int], pages: list[list[int]]):
|
||||
def split(doc_ids: list[int], pages: list[list[int]], delete_originals: bool = False):
|
||||
logger.info(
|
||||
f"Attempting to split document {doc_ids[0]} into {len(pages)} documents",
|
||||
)
|
||||
doc = Document.objects.get(id=doc_ids[0])
|
||||
import pikepdf
|
||||
|
||||
consume_tasks = []
|
||||
|
||||
try:
|
||||
with pikepdf.open(doc.source_path) as pdf:
|
||||
for idx, split_doc in enumerate(pages):
|
||||
@@ -314,13 +333,24 @@ def split(doc_ids: list[int], pages: list[list[int]]):
|
||||
logger.info(
|
||||
f"Adding split document with pages {split_doc} to the task queue.",
|
||||
)
|
||||
consume_file.delay(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file=filepath,
|
||||
consume_tasks.append(
|
||||
consume_file.s(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file=filepath,
|
||||
),
|
||||
overrides,
|
||||
),
|
||||
overrides,
|
||||
)
|
||||
|
||||
if delete_originals:
|
||||
logger.info(
|
||||
"Queueing removal of original document after consumption of the split documents",
|
||||
)
|
||||
chord(header=consume_tasks, body=delete.si([doc.id])).delay()
|
||||
else:
|
||||
group(consume_tasks).delay()
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Error splitting document {doc.id}: {e}")
|
||||
|
||||
|
@@ -1131,6 +1131,12 @@ class BulkEditSerializer(
|
||||
except ValueError:
|
||||
raise serializers.ValidationError("invalid pages specified")
|
||||
|
||||
if "delete_originals" in parameters:
|
||||
if not isinstance(parameters["delete_originals"], bool):
|
||||
raise serializers.ValidationError("delete_originals must be a boolean")
|
||||
else:
|
||||
parameters["delete_originals"] = False
|
||||
|
||||
def _validate_parameters_delete_pages(self, parameters):
|
||||
if "pages" not in parameters:
|
||||
raise serializers.ValidationError("pages not specified")
|
||||
@@ -1139,6 +1145,13 @@ class BulkEditSerializer(
|
||||
if not all(isinstance(i, int) for i in parameters["pages"]):
|
||||
raise serializers.ValidationError("pages must be a list of integers")
|
||||
|
||||
def _validate_parameters_merge(self, parameters):
|
||||
if "delete_originals" in parameters:
|
||||
if not isinstance(parameters["delete_originals"], bool):
|
||||
raise serializers.ValidationError("delete_originals must be a boolean")
|
||||
else:
|
||||
parameters["delete_originals"] = False
|
||||
|
||||
def validate(self, attrs):
|
||||
method = attrs["method"]
|
||||
parameters = attrs["parameters"]
|
||||
@@ -1171,6 +1184,8 @@ class BulkEditSerializer(
|
||||
"Delete pages method only supports one document",
|
||||
)
|
||||
self._validate_parameters_delete_pages(parameters)
|
||||
elif method == bulk_edit.merge:
|
||||
self._validate_parameters_merge(parameters)
|
||||
|
||||
return attrs
|
||||
|
||||
|
@@ -994,6 +994,82 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertCountEqual(args[0], [self.doc2.id, self.doc3.id])
|
||||
self.assertEqual(kwargs["metadata_document_id"], self.doc3.id)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.merge")
|
||||
def test_merge_and_delete_insufficient_permissions(self, m):
|
||||
self.doc1.owner = User.objects.get(username="temp_admin")
|
||||
self.doc1.save()
|
||||
user1 = User.objects.create(username="user1")
|
||||
self.client.force_authenticate(user=user1)
|
||||
|
||||
m.return_value = "OK"
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc1.id, self.doc2.id],
|
||||
"method": "merge",
|
||||
"parameters": {
|
||||
"metadata_document_id": self.doc2.id,
|
||||
"delete_originals": True,
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
|
||||
m.assert_not_called()
|
||||
self.assertEqual(response.content, b"Insufficient permissions")
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"method": "merge",
|
||||
"parameters": {
|
||||
"metadata_document_id": self.doc2.id,
|
||||
"delete_originals": True,
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.merge")
|
||||
def test_merge_invalid_parameters(self, m):
|
||||
"""
|
||||
GIVEN:
|
||||
- API data for merging documents is called
|
||||
- The parameters are invalid
|
||||
WHEN:
|
||||
- API is called
|
||||
THEN:
|
||||
- The API fails with a correct error code
|
||||
"""
|
||||
m.return_value = "OK"
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc1.id, self.doc2.id],
|
||||
"method": "merge",
|
||||
"parameters": {
|
||||
"delete_originals": "not_boolean",
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.split")
|
||||
def test_split(self, m):
|
||||
m.return_value = "OK"
|
||||
@@ -1066,6 +1142,24 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"Split method only supports one document", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "split",
|
||||
"parameters": {
|
||||
"pages": "1",
|
||||
"delete_originals": "notabool",
|
||||
}, # not a bool
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"delete_originals must be a boolean", response.content)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.delete_pages")
|
||||
def test_delete_pages(self, m):
|
||||
m.return_value = "OK"
|
||||
|
@@ -410,7 +410,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
mime_type="image/jpeg",
|
||||
)
|
||||
|
||||
@mock.patch("documents.tasks.consume_file.delay")
|
||||
@mock.patch("documents.tasks.consume_file.s")
|
||||
def test_merge(self, mock_consume_file):
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -444,6 +444,50 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
@mock.patch("documents.bulk_edit.delete.si")
|
||||
@mock.patch("documents.tasks.consume_file.s")
|
||||
@mock.patch("documents.bulk_edit.chain")
|
||||
def test_merge_and_delete_originals(
|
||||
self,
|
||||
mock_chain,
|
||||
mock_consume_file,
|
||||
mock_delete_documents,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing documents
|
||||
WHEN:
|
||||
- Merge action with deleting documents is called with 3 documents
|
||||
THEN:
|
||||
- Consume file task should be called
|
||||
- Document deletion task should be called
|
||||
"""
|
||||
doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
|
||||
|
||||
result = bulk_edit.merge(doc_ids, delete_originals=True)
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
expected_filename = (
|
||||
f"{'_'.join([str(doc_id) for doc_id in doc_ids])[:100]}_merged.pdf"
|
||||
)
|
||||
|
||||
mock_consume_file.assert_called()
|
||||
mock_delete_documents.assert_called()
|
||||
mock_chain.assert_called_once()
|
||||
|
||||
consume_file_args, _ = mock_consume_file.call_args
|
||||
self.assertEqual(
|
||||
Path(consume_file_args[0].original_file).name,
|
||||
expected_filename,
|
||||
)
|
||||
self.assertEqual(consume_file_args[1].title, None)
|
||||
|
||||
delete_documents_args, _ = mock_delete_documents.call_args
|
||||
self.assertEqual(
|
||||
delete_documents_args[0],
|
||||
doc_ids,
|
||||
)
|
||||
|
||||
@mock.patch("documents.tasks.consume_file.delay")
|
||||
@mock.patch("pikepdf.open")
|
||||
def test_merge_with_errors(self, mock_open_pdf, mock_consume_file):
|
||||
@@ -469,7 +513,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
|
||||
mock_consume_file.assert_not_called()
|
||||
|
||||
@mock.patch("documents.tasks.consume_file.delay")
|
||||
@mock.patch("documents.tasks.consume_file.s")
|
||||
def test_split(self, mock_consume_file):
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -488,6 +532,44 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
@mock.patch("documents.bulk_edit.delete.si")
|
||||
@mock.patch("documents.tasks.consume_file.s")
|
||||
@mock.patch("documents.bulk_edit.chord")
|
||||
def test_split_and_delete_originals(
|
||||
self,
|
||||
mock_chord,
|
||||
mock_consume_file,
|
||||
mock_delete_documents,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing documents
|
||||
WHEN:
|
||||
- Split action with deleting documents is called with 1 document and 2 page groups
|
||||
- delete_originals is set to True
|
||||
THEN:
|
||||
- Consume file should be called twice
|
||||
- Document deletion task should be called
|
||||
"""
|
||||
doc_ids = [self.doc2.id]
|
||||
pages = [[1, 2], [3]]
|
||||
|
||||
result = bulk_edit.split(doc_ids, pages, delete_originals=True)
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
self.assertEqual(mock_consume_file.call_count, 2)
|
||||
consume_file_args, _ = mock_consume_file.call_args
|
||||
self.assertEqual(consume_file_args[1].title, "B (split 2)")
|
||||
|
||||
mock_delete_documents.assert_called()
|
||||
mock_chord.assert_called_once()
|
||||
|
||||
delete_documents_args, _ = mock_delete_documents.call_args
|
||||
self.assertEqual(
|
||||
delete_documents_args[0],
|
||||
doc_ids,
|
||||
)
|
||||
|
||||
@mock.patch("documents.tasks.consume_file.delay")
|
||||
@mock.patch("pikepdf.Pdf.save")
|
||||
def test_split_with_errors(self, mock_save_pdf, mock_consume_file):
|
||||
|
@@ -965,16 +965,31 @@ class BulkEditView(PassUserMixin):
|
||||
document_objs = Document.objects.select_related("owner").filter(
|
||||
pk__in=documents,
|
||||
)
|
||||
user_is_owner_of_all_documents = all(
|
||||
(doc.owner == user or doc.owner is None) for doc in document_objs
|
||||
)
|
||||
|
||||
has_perms = (
|
||||
all((doc.owner == user or doc.owner is None) for doc in document_objs)
|
||||
user_is_owner_of_all_documents
|
||||
if method
|
||||
in [bulk_edit.set_permissions, bulk_edit.delete, bulk_edit.rotate]
|
||||
in [
|
||||
bulk_edit.set_permissions,
|
||||
bulk_edit.delete,
|
||||
bulk_edit.rotate,
|
||||
]
|
||||
else all(
|
||||
has_perms_owner_aware(user, "change_document", doc)
|
||||
for doc in document_objs
|
||||
)
|
||||
)
|
||||
|
||||
if (
|
||||
method in [bulk_edit.merge, bulk_edit.split]
|
||||
and parameters["delete_originals"]
|
||||
and not user_is_owner_of_all_documents
|
||||
):
|
||||
has_perms = False
|
||||
|
||||
if not has_perms:
|
||||
return HttpResponseForbidden("Insufficient permissions")
|
||||
|
||||
|
Reference in New Issue
Block a user