mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Adds specific handling for CCITT Group 4, which pikepdf decodes, but not correctly
This commit is contained in:
parent
caf4b54bc7
commit
4cc2976614
@ -20,6 +20,10 @@ from pyzbar import pyzbar
|
|||||||
logger = logging.getLogger("paperless.barcodes")
|
logger = logging.getLogger("paperless.barcodes")
|
||||||
|
|
||||||
|
|
||||||
|
class BarcodeImageFormatError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=8)
|
@lru_cache(maxsize=8)
|
||||||
def supported_file_type(mime_type) -> bool:
|
def supported_file_type(mime_type) -> bool:
|
||||||
"""
|
"""
|
||||||
@ -115,6 +119,9 @@ def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], Lis
|
|||||||
for image_key in page.images:
|
for image_key in page.images:
|
||||||
pdfimage = PdfImage(page.images[image_key])
|
pdfimage = PdfImage(page.images[image_key])
|
||||||
|
|
||||||
|
if "/CCITTFaxDecode" in pdfimage.filters:
|
||||||
|
raise BarcodeImageFormatError()
|
||||||
|
|
||||||
# Not all images can be transcoded to a PIL image, which
|
# Not all images can be transcoded to a PIL image, which
|
||||||
# is what pyzbar expects to receive
|
# is what pyzbar expects to receive
|
||||||
pillow_img = pdfimage.as_pil_image()
|
pillow_img = pdfimage.as_pil_image()
|
||||||
|
BIN
src/documents/tests/samples/barcodes/barcode-fax-image.pdf
Normal file
BIN
src/documents/tests/samples/barcodes/barcode-fax-image.pdf
Normal file
Binary file not shown.
@ -226,7 +226,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
|||||||
WHEN:
|
WHEN:
|
||||||
- The image tries to be transcoded to a PIL image, but fails
|
- The image tries to be transcoded to a PIL image, but fails
|
||||||
THEN:
|
THEN:
|
||||||
- The barcode reader is still called, as
|
- The barcode reader is still called
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _build_device_n_pdf(self, save_path: str):
|
def _build_device_n_pdf(self, save_path: str):
|
||||||
@ -279,6 +279,26 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
reader.assert_called()
|
reader.assert_called()
|
||||||
|
|
||||||
|
def test_scan_file_for_separating_barcodes_fax_decode(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- A PDF containing an image encoded as CCITT Group 4 encoding
|
||||||
|
WHEN:
|
||||||
|
- Barcode processing happens with the file
|
||||||
|
THEN:
|
||||||
|
- The barcode is still detected
|
||||||
|
"""
|
||||||
|
test_file = os.path.join(
|
||||||
|
self.BARCODE_SAMPLE_DIR,
|
||||||
|
"barcode-fax-image.pdf",
|
||||||
|
)
|
||||||
|
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||||
|
test_file,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(pdf_file, test_file)
|
||||||
|
self.assertListEqual(separator_page_numbers, [1])
|
||||||
|
|
||||||
def test_scan_file_for_separating_qr_barcodes(self):
|
def test_scan_file_for_separating_qr_barcodes(self):
|
||||||
test_file = os.path.join(
|
test_file = os.path.join(
|
||||||
self.BARCODE_SAMPLE_DIR,
|
self.BARCODE_SAMPLE_DIR,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user