Adds specific handling for CCITT Group 4, which pikepdf decodes, but not correctly

This commit is contained in:
Trenton Holmes
2022-10-05 19:58:40 -07:00
committed by Trenton H
parent c888b3dfd3
commit ddef90d96e
3 changed files with 28 additions and 1 deletions

View File

@@ -20,6 +20,10 @@ from pyzbar import pyzbar
logger = logging.getLogger("paperless.barcodes")
class BarcodeImageFormatError(Exception):
pass
@lru_cache(maxsize=8)
def supported_file_type(mime_type) -> bool:
"""
@@ -115,6 +119,9 @@ def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], Lis
for image_key in page.images:
pdfimage = PdfImage(page.images[image_key])
if "/CCITTFaxDecode" in pdfimage.filters:
raise BarcodeImageFormatError()
# Not all images can be transcoded to a PIL image, which
# is what pyzbar expects to receive
pillow_img = pdfimage.as_pil_image()