Adds specific handling for CCITT Group 4, which pikepdf decodes, but not correctly

2025-12-14 01:21:14 -06:00 · 2022-10-05 19:58:40 -07:00
parent c888b3dfd3
commit ddef90d96e
3 changed files with 28 additions and 1 deletions
--- a/src/documents/barcodes.py
+++ b/src/documents/barcodes.py
@@ -20,6 +20,10 @@ from pyzbar import pyzbar
 logger = logging.getLogger("paperless.barcodes")


+class BarcodeImageFormatError(Exception):
+    pass
+
+
@lru_cache(maxsize=8)
 def supported_file_type(mime_type) -> bool:
    """
@@ -115,6 +119,9 @@ def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], Lis
                for image_key in page.images:
                    pdfimage = PdfImage(page.images[image_key])

+                    if "/CCITTFaxDecode" in pdfimage.filters:
+                        raise BarcodeImageFormatError()
+
                    # Not all images can be transcoded to a PIL image, which
                    # is what pyzbar expects to receive
                    pillow_img = pdfimage.as_pil_image()