Feature: auto-clean some invalid pdfs (#7651)

This commit is contained in:
shamoon
2024-09-25 08:57:20 -07:00
committed by GitHub
parent c92c3e224a
commit 5e687d9a93
7 changed files with 100 additions and 4 deletions

View File

@@ -235,6 +235,8 @@ class FaultyGenericExceptionParser(_BaseTestParser):
def fake_magic_from_file(file, mime=False):
if mime:
if file.name.startswith("invalid_pdf"):
return "application/octet-stream"
if os.path.splitext(file)[1] == ".pdf":
return "application/pdf"
elif os.path.splitext(file)[1] == ".png":
@@ -952,6 +954,27 @@ class TestConsumer(
sanity_check()
@mock.patch("documents.consumer.run_subprocess")
def test_try_to_clean_invalid_pdf(self, m):
shutil.copy(
Path(__file__).parent / "samples" / "invalid_pdf.pdf",
settings.CONSUMPTION_DIR / "invalid_pdf.pdf",
)
with self.get_consumer(
settings.CONSUMPTION_DIR / "invalid_pdf.pdf",
) as consumer:
# fails because no qpdf
self.assertRaises(ConsumerError, consumer.run)
m.assert_called_once()
args, _ = m.call_args
command = args[0]
self.assertEqual(command[0], "qpdf")
self.assertEqual(command[1], "--replace-input")
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
class TestConsumerCreatedDate(DirectoriesMixin, GetConsumerMixin, TestCase):