mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Adds specific handling for CCITT Group 4, which pikepdf decodes, but not correctly
This commit is contained in:
parent
caf4b54bc7
commit
4cc2976614
@ -20,6 +20,10 @@ from pyzbar import pyzbar
|
||||
logger = logging.getLogger("paperless.barcodes")
|
||||
|
||||
|
||||
class BarcodeImageFormatError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
@lru_cache(maxsize=8)
|
||||
def supported_file_type(mime_type) -> bool:
|
||||
"""
|
||||
@ -115,6 +119,9 @@ def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], Lis
|
||||
for image_key in page.images:
|
||||
pdfimage = PdfImage(page.images[image_key])
|
||||
|
||||
if "/CCITTFaxDecode" in pdfimage.filters:
|
||||
raise BarcodeImageFormatError()
|
||||
|
||||
# Not all images can be transcoded to a PIL image, which
|
||||
# is what pyzbar expects to receive
|
||||
pillow_img = pdfimage.as_pil_image()
|
||||
|
BIN
src/documents/tests/samples/barcodes/barcode-fax-image.pdf
Normal file
BIN
src/documents/tests/samples/barcodes/barcode-fax-image.pdf
Normal file
Binary file not shown.
@ -226,7 +226,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
WHEN:
|
||||
- The image tries to be transcoded to a PIL image, but fails
|
||||
THEN:
|
||||
- The barcode reader is still called, as
|
||||
- The barcode reader is still called
|
||||
"""
|
||||
|
||||
def _build_device_n_pdf(self, save_path: str):
|
||||
@ -279,6 +279,26 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
reader.assert_called()
|
||||
|
||||
def test_scan_file_for_separating_barcodes_fax_decode(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- A PDF containing an image encoded as CCITT Group 4 encoding
|
||||
WHEN:
|
||||
- Barcode processing happens with the file
|
||||
THEN:
|
||||
- The barcode is still detected
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-fax-image.pdf",
|
||||
)
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
|
||||
def test_scan_file_for_separating_qr_barcodes(self):
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
|
Loading…
x
Reference in New Issue
Block a user