fixes #631

2026-02-03 23:22:42 -06:00 · 2021-03-14 14:42:48 +01:00
parent 0ad2b05455
commit 40ce38254b
6 changed files with 63 additions and 30 deletions
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -291,6 +291,7 @@ class RasterisedDocumentParser(DocumentParser):
                    f"No text was found in {document_path}, the content will "
                    f"be empty."
                )
+                self.text = ""


 def strip_excess_whitespace(text):
--- a/src/paperless_tesseract/tests/samples/encrypted.pdf
+++ b/src/paperless_tesseract/tests/samples/encrypted.pdf
--- a/src/paperless_tesseract/tests/samples/signed.pdf
+++ b/src/paperless_tesseract/tests/samples/signed.pdf
--- a/src/paperless_tesseract/tests/test_parser.py
+++ b/src/paperless_tesseract/tests/test_parser.py
@@ -81,8 +81,8 @@ class TestParser(DirectoriesMixin, TestCase):

    def test_thumbnail(self):
        parser = RasterisedDocumentParser(uuid.uuid4())
-        parser.get_thumbnail(os.path.join(self.SAMPLE_FILES, 'simple-digital.pdf'), "application/pdf")
-        # dont really know how to test it, just call it and assert that it does not raise anything.
+        thumb = parser.get_thumbnail(os.path.join(self.SAMPLE_FILES, 'simple-digital.pdf'), "application/pdf")
+        self.assertTrue(os.path.isfile(thumb))

    @mock.patch("documents.parsers.run_convert")
    def test_thumbnail_fallback(self, m):
@@ -96,8 +96,13 @@ class TestParser(DirectoriesMixin, TestCase):
        m.side_effect = call_convert

        parser = RasterisedDocumentParser(uuid.uuid4())
-        parser.get_thumbnail(os.path.join(self.SAMPLE_FILES, 'simple-digital.pdf'), "application/pdf")
-        # dont really know how to test it, just call it and assert that it does not raise anything.
+        thumb = parser.get_thumbnail(os.path.join(self.SAMPLE_FILES, 'simple-digital.pdf'), "application/pdf")
+        self.assertTrue(os.path.isfile(thumb))
+
+    def test_thumbnail_encrypted(self):
+        parser = RasterisedDocumentParser(uuid.uuid4())
+        thumb = parser.get_thumbnail(os.path.join(self.SAMPLE_FILES, 'encrypted.pdf'), "application/pdf")
+        self.assertTrue(os.path.isfile(thumb))

    def test_get_dpi(self):
        parser = RasterisedDocumentParser(None)
@@ -135,6 +140,15 @@ class TestParser(DirectoriesMixin, TestCase):
        self.assertIsNone(parser.archive_path)
        self.assertContainsStrings(parser.get_text(), ["Please enter your name in here:", "This is a PDF document with a form."])

+    @override_settings(OCR_MODE="skip")
+    def test_signed(self):
+        parser = RasterisedDocumentParser(None)
+
+        parser.parse(os.path.join(self.SAMPLE_FILES, "signed.pdf"), "application/pdf")
+
+        self.assertIsNone(parser.archive_path)
+        self.assertContainsStrings(parser.get_text(), ["This is a digitally signed PDF, created with Acrobat Pro for the Paperless project to enable", "automated testing of signed/encrypted PDFs"])
+
    @override_settings(OCR_MODE="skip")
    def test_encrypted(self):
        parser = RasterisedDocumentParser(None)
@@ -142,7 +156,8 @@ class TestParser(DirectoriesMixin, TestCase):
        parser.parse(os.path.join(self.SAMPLE_FILES, "encrypted.pdf"), "application/pdf")

        self.assertIsNone(parser.archive_path)
-        self.assertContainsStrings(parser.get_text(), ["This is a digitally signed PDF, created with Acrobat Pro for the Paperless project to enable", "automated testing of signed/encrypted PDFs"])
+        self.assertEqual(parser.get_text(), "")
+

    @override_settings(OCR_MODE="redo")
    def test_with_form_error_notext(self):