From e77774234c2453ee4cc885af82226347408b95eb Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Mon, 4 Nov 2024 09:09:51 -0800 Subject: [PATCH] Fixes for a change in what OCRMyPDF reports when a signed and encrypted document is encountered --- src/paperless_tesseract/parsers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index 95c1dbfcc..28b052614 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -365,6 +365,7 @@ class RasterisedDocumentParser(DocumentParser): from ocrmypdf import EncryptedPdfError from ocrmypdf import InputFileError from ocrmypdf import SubprocessOutputError + from ocrmypdf.exceptions import DigitalSignatureError archive_path = Path(os.path.join(self.tempdir, "archive.pdf")) sidecar_file = Path(os.path.join(self.tempdir, "sidecar.txt")) @@ -387,9 +388,9 @@ class RasterisedDocumentParser(DocumentParser): if not self.text: raise NoTextFoundException("No text was found in the original document") - except EncryptedPdfError: + except (DigitalSignatureError, EncryptedPdfError): self.log.warning( - "This file is encrypted, OCR is impossible. Using " + "This file is encrypted and/or signed, OCR is impossible. Using " "any text present in the original file.", ) if original_has_text: