diff --git a/src/documents/barcodes.py b/src/documents/barcodes.py index 54db83c19..a4be126a5 100644 --- a/src/documents/barcodes.py +++ b/src/documents/barcodes.py @@ -20,6 +20,10 @@ from pyzbar import pyzbar logger = logging.getLogger("paperless.barcodes") +class BarcodeImageFormatError(Exception): + pass + + @lru_cache(maxsize=8) def supported_file_type(mime_type) -> bool: """ @@ -115,6 +119,9 @@ def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], Lis for image_key in page.images: pdfimage = PdfImage(page.images[image_key]) + if "/CCITTFaxDecode" in pdfimage.filters: + raise BarcodeImageFormatError() + # Not all images can be transcoded to a PIL image, which # is what pyzbar expects to receive pillow_img = pdfimage.as_pil_image() diff --git a/src/documents/tests/samples/barcodes/barcode-fax-image.pdf b/src/documents/tests/samples/barcodes/barcode-fax-image.pdf new file mode 100644 index 000000000..2e248c82b Binary files /dev/null and b/src/documents/tests/samples/barcodes/barcode-fax-image.pdf differ diff --git a/src/documents/tests/test_barcodes.py b/src/documents/tests/test_barcodes.py index 0f16845d2..ee8df9f34 100644 --- a/src/documents/tests/test_barcodes.py +++ b/src/documents/tests/test_barcodes.py @@ -226,7 +226,7 @@ class TestBarcode(DirectoriesMixin, TestCase): WHEN: - The image tries to be transcoded to a PIL image, but fails THEN: - - The barcode reader is still called, as + - The barcode reader is still called """ def _build_device_n_pdf(self, save_path: str): @@ -279,6 +279,26 @@ class TestBarcode(DirectoriesMixin, TestCase): reader.assert_called() + def test_scan_file_for_separating_barcodes_fax_decode(self): + """ + GIVEN: + - A PDF containing an image encoded as CCITT Group 4 encoding + WHEN: + - Barcode processing happens with the file + THEN: + - The barcode is still detected + """ + test_file = os.path.join( + self.BARCODE_SAMPLE_DIR, + "barcode-fax-image.pdf", + ) + pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( + test_file, + ) + + self.assertEqual(pdf_file, test_file) + self.assertListEqual(separator_page_numbers, [1]) + def test_scan_file_for_separating_qr_barcodes(self): test_file = os.path.join( self.BARCODE_SAMPLE_DIR,