mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Enhancement: delete pages PDF action (#6772)
This commit is contained in:
@@ -325,3 +325,29 @@ def split(doc_ids: list[int], pages: list[list[int]]):
|
||||
logger.exception(f"Error splitting document {doc.id}: {e}")
|
||||
|
||||
return "OK"
|
||||
|
||||
|
||||
def delete_pages(doc_ids: list[int], pages: list[int]):
|
||||
logger.info(
|
||||
f"Attempting to delete pages {pages} from {len(doc_ids)} documents",
|
||||
)
|
||||
doc = Document.objects.get(id=doc_ids[0])
|
||||
pages = sorted(pages) # sort pages to avoid index issues
|
||||
import pikepdf
|
||||
|
||||
try:
|
||||
with pikepdf.open(doc.source_path, allow_overwriting_input=True) as pdf:
|
||||
offset = 1 # pages are 1-indexed
|
||||
for page_num in pages:
|
||||
pdf.pages.remove(pdf.pages[page_num - offset])
|
||||
offset += 1 # remove() changes the index of the pages
|
||||
pdf.remove_unreferenced_resources()
|
||||
pdf.save()
|
||||
doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
|
||||
doc.save()
|
||||
update_document_archive_file.delay(document_id=doc.id)
|
||||
logger.info(f"Deleted pages {pages} from document {doc.id}")
|
||||
except Exception as e:
|
||||
logger.exception(f"Error deleting pages from document {doc.id}: {e}")
|
||||
|
||||
return "OK"
|
||||
|
@@ -944,6 +944,7 @@ class BulkEditSerializer(
|
||||
"rotate",
|
||||
"merge",
|
||||
"split",
|
||||
"delete_pages",
|
||||
],
|
||||
label="Method",
|
||||
write_only=True,
|
||||
@@ -1000,6 +1001,8 @@ class BulkEditSerializer(
|
||||
return bulk_edit.merge
|
||||
elif method == "split":
|
||||
return bulk_edit.split
|
||||
elif method == "delete_pages":
|
||||
return bulk_edit.delete_pages
|
||||
else:
|
||||
raise serializers.ValidationError("Unsupported method.")
|
||||
|
||||
@@ -1128,6 +1131,14 @@ class BulkEditSerializer(
|
||||
except ValueError:
|
||||
raise serializers.ValidationError("invalid pages specified")
|
||||
|
||||
def _validate_parameters_delete_pages(self, parameters):
|
||||
if "pages" not in parameters:
|
||||
raise serializers.ValidationError("pages not specified")
|
||||
if not isinstance(parameters["pages"], list):
|
||||
raise serializers.ValidationError("pages must be a list")
|
||||
if not all(isinstance(i, int) for i in parameters["pages"]):
|
||||
raise serializers.ValidationError("pages must be a list of integers")
|
||||
|
||||
def validate(self, attrs):
|
||||
method = attrs["method"]
|
||||
parameters = attrs["parameters"]
|
||||
@@ -1154,6 +1165,12 @@ class BulkEditSerializer(
|
||||
"Split method only supports one document",
|
||||
)
|
||||
self._validate_parameters_split(parameters)
|
||||
elif method == bulk_edit.delete_pages:
|
||||
if len(attrs["documents"]) > 1:
|
||||
raise serializers.ValidationError(
|
||||
"Delete pages method only supports one document",
|
||||
)
|
||||
self._validate_parameters_delete_pages(parameters)
|
||||
|
||||
return attrs
|
||||
|
||||
|
@@ -1065,3 +1065,95 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"Split method only supports one document", response.content)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.delete_pages")
|
||||
def test_delete_pages(self, m):
|
||||
m.return_value = "OK"
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "delete_pages",
|
||||
"parameters": {"pages": [1, 2, 3, 4]},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id])
|
||||
self.assertEqual(kwargs["pages"], [1, 2, 3, 4])
|
||||
|
||||
def test_delete_pages_invalid_params(self):
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [
|
||||
self.doc1.id,
|
||||
self.doc2.id,
|
||||
], # only one document supported
|
||||
"method": "delete_pages",
|
||||
"parameters": {
|
||||
"pages": [1, 2, 3, 4],
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(
|
||||
b"Delete pages method only supports one document",
|
||||
response.content,
|
||||
)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "delete_pages",
|
||||
"parameters": {}, # pages not specified
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"pages not specified", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "delete_pages",
|
||||
"parameters": {"pages": "1-3"}, # not a list
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"pages must be a list", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "delete_pages",
|
||||
"parameters": {"pages": ["1-3"]}, # not ints
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"pages must be a list of integers", response.content)
|
||||
|
@@ -585,3 +585,46 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
mock_update_documents.assert_called_once()
|
||||
mock_chord.assert_called_once()
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
@mock.patch("documents.tasks.update_document_archive_file.delay")
|
||||
@mock.patch("pikepdf.Pdf.save")
|
||||
def test_delete_pages(self, mock_pdf_save, mock_update_archive_file):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing documents
|
||||
WHEN:
|
||||
- Delete pages action is called with 1 document and 2 pages
|
||||
THEN:
|
||||
- Save should be called once
|
||||
- Archive file should be updated once
|
||||
"""
|
||||
doc_ids = [self.doc2.id]
|
||||
pages = [1, 3]
|
||||
result = bulk_edit.delete_pages(doc_ids, pages)
|
||||
mock_pdf_save.assert_called_once()
|
||||
mock_update_archive_file.assert_called_once()
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
@mock.patch("documents.tasks.update_document_archive_file.delay")
|
||||
@mock.patch("pikepdf.Pdf.save")
|
||||
def test_delete_pages_with_error(self, mock_pdf_save, mock_update_archive_file):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing documents
|
||||
WHEN:
|
||||
- Delete pages action is called with 1 document and 2 pages
|
||||
- PikePDF raises an error
|
||||
THEN:
|
||||
- Save should be called once
|
||||
- Archive file should not be updated
|
||||
"""
|
||||
mock_pdf_save.side_effect = Exception("Error saving PDF")
|
||||
doc_ids = [self.doc2.id]
|
||||
pages = [1, 3]
|
||||
|
||||
with self.assertLogs("paperless.bulk_edit", level="ERROR") as cm:
|
||||
bulk_edit.delete_pages(doc_ids, pages)
|
||||
error_str = cm.output[0]
|
||||
expected_str = "Error deleting pages from document"
|
||||
self.assertIn(expected_str, error_str)
|
||||
mock_update_archive_file.assert_not_called()
|
||||
|
Reference in New Issue
Block a user