mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-08-12 00:19:48 +00:00
Feature: auto-clean some invalid pdfs (#7651)
This commit is contained in:
BIN
src/documents/tests/samples/invalid_pdf.pdf
Normal file
BIN
src/documents/tests/samples/invalid_pdf.pdf
Normal file
Binary file not shown.
@@ -1402,6 +1402,27 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
self.assertEqual(overrides.filename, "simple.pdf")
|
||||
self.assertEqual(overrides.custom_field_ids, [custom_field.id])
|
||||
|
||||
def test_upload_invalid_pdf(self):
|
||||
"""
|
||||
GIVEN: Invalid PDF named "*.pdf" that mime_type is in settings.CONSUMER_PDF_RECOVERABLE_MIME_TYPES
|
||||
WHEN: Upload the file
|
||||
THEN: The file is not rejected
|
||||
"""
|
||||
self.consume_file_mock.return_value = celery.result.AsyncResult(
|
||||
id=str(uuid.uuid4()),
|
||||
)
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "invalid_pdf.pdf"),
|
||||
"rb",
|
||||
) as f:
|
||||
response = self.client.post(
|
||||
"/api/documents/post_document/",
|
||||
{"document": f},
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
def test_get_metadata(self):
|
||||
doc = Document.objects.create(
|
||||
title="test",
|
||||
|
@@ -235,6 +235,8 @@ class FaultyGenericExceptionParser(_BaseTestParser):
|
||||
|
||||
def fake_magic_from_file(file, mime=False):
|
||||
if mime:
|
||||
if file.name.startswith("invalid_pdf"):
|
||||
return "application/octet-stream"
|
||||
if os.path.splitext(file)[1] == ".pdf":
|
||||
return "application/pdf"
|
||||
elif os.path.splitext(file)[1] == ".png":
|
||||
@@ -952,6 +954,27 @@ class TestConsumer(
|
||||
|
||||
sanity_check()
|
||||
|
||||
@mock.patch("documents.consumer.run_subprocess")
|
||||
def test_try_to_clean_invalid_pdf(self, m):
|
||||
shutil.copy(
|
||||
Path(__file__).parent / "samples" / "invalid_pdf.pdf",
|
||||
settings.CONSUMPTION_DIR / "invalid_pdf.pdf",
|
||||
)
|
||||
with self.get_consumer(
|
||||
settings.CONSUMPTION_DIR / "invalid_pdf.pdf",
|
||||
) as consumer:
|
||||
# fails because no qpdf
|
||||
self.assertRaises(ConsumerError, consumer.run)
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
args, _ = m.call_args
|
||||
|
||||
command = args[0]
|
||||
|
||||
self.assertEqual(command[0], "qpdf")
|
||||
self.assertEqual(command[1], "--replace-input")
|
||||
|
||||
|
||||
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
|
||||
class TestConsumerCreatedDate(DirectoriesMixin, GetConsumerMixin, TestCase):
|
||||
|
Reference in New Issue
Block a user