Allows using pdf2image instead of pikepdf if desired

This commit is contained in:
Trenton H
2022-10-24 08:40:33 -07:00
parent 5841df5dd9
commit 6d2851c693
4 changed files with 70 additions and 12 deletions

View File

@@ -150,16 +150,20 @@ def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], Lis
if mime_type == "image/tiff":
pdf_filepath = convert_from_tiff_to_pdf(filepath)
try:
_pikepdf_barcode_scan(pdf_filepath)
except Exception as e:
logger.warning(
f"Exception using pikepdf for barcodes, falling back to pdf2image: {e}",
)
# Reset this incase pikepdf got part way through
separator_page_numbers = []
if settings.CONSUMER_USE_LEGACY_DETECTION:
_pdf2image_barcode_scan(pdf_filepath)
else:
try:
_pikepdf_barcode_scan(pdf_filepath)
except Exception as e:
logger.warning(
f"Exception using pikepdf for barcodes,"
f" falling back to pdf2image: {e}",
)
# Reset this incase pikepdf got part way through
separator_page_numbers = []
_pdf2image_barcode_scan(pdf_filepath)
else:
logger.warning(

View File

@@ -468,6 +468,41 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(target_file1))
self.assertTrue(os.path.isfile(target_file2))
@override_settings(CONSUMER_USE_LEGACY_DETECTION=True)
def test_barcode_splitter_legacy_fallback(self):
"""
GIVEN:
- File containing barcode
- Legacy method of detection is enabled
WHEN:
- File is scanned for barcodes
THEN:
- Barcodes are properly detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
test_file,
)
self.assertEqual(test_file, pdf_file)
self.assertTrue(len(separator_page_numbers) > 0)
document_list = barcodes.separate_pages(test_file, separator_page_numbers)
self.assertTrue(document_list)
for document in document_list:
barcodes.save_to_dir(document, target_dir=tempdir)
target_file1 = os.path.join(tempdir, "patch-code-t-middle_document_0.pdf")
target_file2 = os.path.join(tempdir, "patch-code-t-middle_document_1.pdf")
self.assertTrue(os.path.isfile(target_file1))
self.assertTrue(os.path.isfile(target_file2))
@override_settings(CONSUMER_ENABLE_BARCODES=True)
def test_consume_barcode_file(self):
test_file = os.path.join(

View File

@@ -558,15 +558,23 @@ CONSUMER_IGNORE_PATTERNS = list(
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
CONSUMER_ENABLE_BARCODES = __get_boolean(
CONSUMER_ENABLE_BARCODES: Final[bool] = __get_boolean(
"PAPERLESS_CONSUMER_ENABLE_BARCODES",
)
CONSUMER_BARCODE_TIFF_SUPPORT = __get_boolean(
CONSUMER_BARCODE_TIFF_SUPPORT: Final[bool] = __get_boolean(
"PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT",
)
CONSUMER_BARCODE_STRING = os.getenv("PAPERLESS_CONSUMER_BARCODE_STRING", "PATCHT")
CONSUMER_USE_LEGACY_DETECTION: Final[bool] = __get_boolean(
"PAPERLESS_CONSUMER_USE_LEGACY_DETECTION",
"NO",
)
CONSUMER_BARCODE_STRING: Final[str] = os.getenv(
"PAPERLESS_CONSUMER_BARCODE_STRING",
"PATCHT",
)
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))