Feature: PDF editor (#10318)

This commit is contained in:
shamoon
2025-08-11 10:29:48 -07:00
committed by GitHub
parent 6730896894
commit 6d72ee795f
23 changed files with 1066 additions and 662 deletions

View File

@@ -41,6 +41,7 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
title="B",
correspondent=self.c1,
document_type=self.dt1,
page_count=5,
)
self.doc3 = Document.objects.create(
checksum="C",
@@ -1369,6 +1370,218 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"pages must be a list of integers", response.content)
@mock.patch("documents.serialisers.bulk_edit.edit_pdf")
def test_edit_pdf(self, m):
self.setup_mock(m, "edit_pdf")
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 1}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertCountEqual(args[0], [self.doc2.id])
self.assertEqual(kwargs["operations"], [{"page": 1}])
self.assertEqual(kwargs["user"], self.user)
def test_edit_pdf_invalid_params(self):
# multiple documents
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id, self.doc3.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 1}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"Edit PDF method only supports one document", response.content)
# no operations specified
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"operations not specified", response.content)
# operations not a list
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": "not_a_list"},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"operations must be a list", response.content)
# invalid operation
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": ["invalid_operation"]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"invalid operation entry", response.content)
# page not an int
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": "not_an_int"}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"page must be an integer", response.content)
# rotate not an int
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 1, "rotate": "not_an_int"}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"rotate must be an integer", response.content)
# doc not an int
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 1, "doc": "not_an_int"}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"doc must be an integer", response.content)
# update_document not a boolean
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {
"update_document": "not_a_bool",
"operations": [{"page": 1}],
},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"update_document must be a boolean", response.content)
# include_metadata not a boolean
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {
"include_metadata": "not_a_bool",
"operations": [{"page": 1}],
},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"include_metadata must be a boolean", response.content)
# update_document True but output would be multiple documents
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {
"update_document": True,
"operations": [{"page": 1, "doc": 1}, {"page": 2, "doc": 2}],
},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(
b"update_document only allowed with a single output document",
response.content,
)
@mock.patch("documents.serialisers.bulk_edit.edit_pdf")
def test_edit_pdf_page_out_of_bounds(self, m):
"""
GIVEN:
- API data for editing PDF is called
- The page number is out of bounds
WHEN:
- API is called
THEN:
- The API fails with a correct error code
"""
self.setup_mock(m, "edit_pdf")
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 99}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"out of bounds", response.content)
@override_settings(AUDIT_LOG_ENABLED=True)
def test_bulk_edit_audit_log_enabled_simple_field(self):
"""

View File

@@ -909,3 +909,156 @@ class TestPDFActions(DirectoriesMixin, TestCase):
expected_str = "Error deleting pages from document"
self.assertIn(expected_str, error_str)
mock_update_archive_file.assert_not_called()
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_basic_operations(self, mock_consume_file, mock_group):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf is called with two operations to split the doc and rotate pages
THEN:
- A grouped task is generated and delay() is called
"""
mock_group.return_value.delay.return_value = None
doc_ids = [self.doc2.id]
operations = [{"page": 1, "doc": 0}, {"page": 2, "doc": 1, "rotate": 90}]
result = bulk_edit.edit_pdf(doc_ids, operations)
self.assertEqual(result, "OK")
mock_group.return_value.delay.assert_called_once()
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_with_user_override(self, mock_consume_file, mock_group):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf is called with user override
THEN:
- Task is created with user context
"""
mock_group.return_value.delay.return_value = None
doc_ids = [self.doc2.id]
operations = [{"page": 1, "doc": 0}, {"page": 2, "doc": 1}]
user = User.objects.create(username="editor")
result = bulk_edit.edit_pdf(doc_ids, operations, user=user)
self.assertEqual(result, "OK")
mock_group.return_value.delay.assert_called_once()
@mock.patch("documents.bulk_edit.chord")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_with_delete_original(self, mock_consume_file, mock_chord):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf is called with delete_original=True
THEN:
- Task group is triggered
"""
mock_chord.return_value.delay.return_value = None
doc_ids = [self.doc2.id]
operations = [{"page": 1}, {"page": 2}]
result = bulk_edit.edit_pdf(doc_ids, operations, delete_original=True)
self.assertEqual(result, "OK")
mock_chord.assert_called_once()
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
def test_edit_pdf_with_update_document(self, mock_update_document):
"""
GIVEN:
- A single existing PDF document
WHEN:
- edit_pdf is called with update_document=True and a single output
THEN:
- The original document is updated in-place
- The update_document_content_maybe_archive_file task is triggered
"""
doc_ids = [self.doc2.id]
operations = [{"page": 1}, {"page": 2}]
original_checksum = self.doc2.checksum
original_page_count = self.doc2.page_count
result = bulk_edit.edit_pdf(
doc_ids,
operations=operations,
update_document=True,
delete_original=False,
)
self.assertEqual(result, "OK")
self.doc2.refresh_from_db()
self.assertNotEqual(self.doc2.checksum, original_checksum)
self.assertNotEqual(self.doc2.page_count, original_page_count)
mock_update_document.assert_called_once_with(document_id=self.doc2.id)
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_without_metadata(self, mock_consume_file, mock_group):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf is called with include_metadata=False
THEN:
- Tasks are created with empty metadata
"""
mock_group.return_value.delay.return_value = None
doc_ids = [self.doc2.id]
operations = [{"page": 1}]
result = bulk_edit.edit_pdf(doc_ids, operations, include_metadata=False)
self.assertEqual(result, "OK")
mock_group.return_value.delay.assert_called_once()
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_open_failure(self, mock_consume_file, mock_group):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf fails to open PDF
THEN:
- Task group is not called
"""
doc_ids = [self.doc2.id]
operations = [
{"page": 9999}, # invalid page, forces error during PDF load
]
with self.assertLogs("paperless.bulk_edit", level="ERROR"):
with self.assertRaises(Exception):
bulk_edit.edit_pdf(doc_ids, operations)
mock_group.assert_not_called()
mock_consume_file.assert_not_called()
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_multiple_outputs_with_update_flag_errors(
self,
mock_consume_file,
mock_group,
):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf is called with multiple outputs and update_document=True
THEN:
- An error is logged and task group is not called
"""
doc_ids = [self.doc2.id]
operations = [
{"page": 1, "doc": 0},
{"page": 2, "doc": 1},
]
with self.assertLogs("paperless.bulk_edit", level="ERROR"):
with self.assertRaises(ValueError):
bulk_edit.edit_pdf(doc_ids, operations, update_document=True)
mock_group.assert_not_called()
mock_consume_file.assert_not_called()