Adds specific handling for CCITT Group 4, which pikepdf decodes, but not correctly

This commit is contained in:
Trenton Holmes 2022-10-05 19:58:40 -07:00 committed by Trenton H
parent caf4b54bc7
commit 4cc2976614
3 changed files with 28 additions and 1 deletions

View File

@ -20,6 +20,10 @@ from pyzbar import pyzbar
logger = logging.getLogger("paperless.barcodes")
class BarcodeImageFormatError(Exception):
pass
@lru_cache(maxsize=8)
def supported_file_type(mime_type) -> bool:
"""
@ -115,6 +119,9 @@ def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], Lis
for image_key in page.images:
pdfimage = PdfImage(page.images[image_key])
if "/CCITTFaxDecode" in pdfimage.filters:
raise BarcodeImageFormatError()
# Not all images can be transcoded to a PIL image, which
# is what pyzbar expects to receive
pillow_img = pdfimage.as_pil_image()

View File

@ -226,7 +226,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
WHEN:
- The image tries to be transcoded to a PIL image, but fails
THEN:
- The barcode reader is still called, as
- The barcode reader is still called
"""
def _build_device_n_pdf(self, save_path: str):
@ -279,6 +279,26 @@ class TestBarcode(DirectoriesMixin, TestCase):
reader.assert_called()
def test_scan_file_for_separating_barcodes_fax_decode(self):
"""
GIVEN:
- A PDF containing an image encoded as CCITT Group 4 encoding
WHEN:
- Barcode processing happens with the file
THEN:
- The barcode is still detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-fax-image.pdf",
)
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [1])
def test_scan_file_for_separating_qr_barcodes(self):
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,