mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-09 09:58:20 -05:00
Allows using pdf2image instead of pikepdf if desired
This commit is contained in:
parent
0a19ad4edb
commit
f8ce6285df
@ -701,6 +701,17 @@ PAPERLESS_CONSUMER_ENABLE_BARCODES=<bool>
|
|||||||
|
|
||||||
Defaults to false.
|
Defaults to false.
|
||||||
|
|
||||||
|
PAPERLESS_CONSUMER_USE_LEGACY_DETECTION=<bool>
|
||||||
|
Enables the legacy method of detecting barcodes. By default, images are
|
||||||
|
extracted directly from the PDF structure for barcode detection. If this
|
||||||
|
configuration value is set, images of the whole PDF page will be used instead.
|
||||||
|
|
||||||
|
This is a slower and more memory intensive process, but may be required for
|
||||||
|
certain files, depending on how it is produced and how images are encoded.
|
||||||
|
|
||||||
|
Defaults to false.
|
||||||
|
|
||||||
|
|
||||||
PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT=<bool>
|
PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT=<bool>
|
||||||
Whether TIFF image files should be scanned for barcodes.
|
Whether TIFF image files should be scanned for barcodes.
|
||||||
This will automatically convert any TIFF image(s) to pdfs for later
|
This will automatically convert any TIFF image(s) to pdfs for later
|
||||||
|
@ -150,16 +150,20 @@ def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], Lis
|
|||||||
if mime_type == "image/tiff":
|
if mime_type == "image/tiff":
|
||||||
pdf_filepath = convert_from_tiff_to_pdf(filepath)
|
pdf_filepath = convert_from_tiff_to_pdf(filepath)
|
||||||
|
|
||||||
try:
|
if settings.CONSUMER_USE_LEGACY_DETECTION:
|
||||||
_pikepdf_barcode_scan(pdf_filepath)
|
|
||||||
except Exception as e:
|
|
||||||
|
|
||||||
logger.warning(
|
|
||||||
f"Exception using pikepdf for barcodes, falling back to pdf2image: {e}",
|
|
||||||
)
|
|
||||||
# Reset this incase pikepdf got part way through
|
|
||||||
separator_page_numbers = []
|
|
||||||
_pdf2image_barcode_scan(pdf_filepath)
|
_pdf2image_barcode_scan(pdf_filepath)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
_pikepdf_barcode_scan(pdf_filepath)
|
||||||
|
except Exception as e:
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
f"Exception using pikepdf for barcodes,"
|
||||||
|
f" falling back to pdf2image: {e}",
|
||||||
|
)
|
||||||
|
# Reset this incase pikepdf got part way through
|
||||||
|
separator_page_numbers = []
|
||||||
|
_pdf2image_barcode_scan(pdf_filepath)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
|
@ -468,6 +468,41 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
|||||||
self.assertTrue(os.path.isfile(target_file1))
|
self.assertTrue(os.path.isfile(target_file1))
|
||||||
self.assertTrue(os.path.isfile(target_file2))
|
self.assertTrue(os.path.isfile(target_file2))
|
||||||
|
|
||||||
|
@override_settings(CONSUMER_USE_LEGACY_DETECTION=True)
|
||||||
|
def test_barcode_splitter_legacy_fallback(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- File containing barcode
|
||||||
|
- Legacy method of detection is enabled
|
||||||
|
WHEN:
|
||||||
|
- File is scanned for barcodes
|
||||||
|
THEN:
|
||||||
|
- Barcodes are properly detected
|
||||||
|
"""
|
||||||
|
test_file = os.path.join(
|
||||||
|
self.BARCODE_SAMPLE_DIR,
|
||||||
|
"patch-code-t-middle.pdf",
|
||||||
|
)
|
||||||
|
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||||
|
|
||||||
|
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||||
|
test_file,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(test_file, pdf_file)
|
||||||
|
self.assertTrue(len(separator_page_numbers) > 0)
|
||||||
|
|
||||||
|
document_list = barcodes.separate_pages(test_file, separator_page_numbers)
|
||||||
|
self.assertTrue(document_list)
|
||||||
|
for document in document_list:
|
||||||
|
barcodes.save_to_dir(document, target_dir=tempdir)
|
||||||
|
|
||||||
|
target_file1 = os.path.join(tempdir, "patch-code-t-middle_document_0.pdf")
|
||||||
|
target_file2 = os.path.join(tempdir, "patch-code-t-middle_document_1.pdf")
|
||||||
|
|
||||||
|
self.assertTrue(os.path.isfile(target_file1))
|
||||||
|
self.assertTrue(os.path.isfile(target_file2))
|
||||||
|
|
||||||
@override_settings(CONSUMER_ENABLE_BARCODES=True)
|
@override_settings(CONSUMER_ENABLE_BARCODES=True)
|
||||||
def test_consume_barcode_file(self):
|
def test_consume_barcode_file(self):
|
||||||
test_file = os.path.join(
|
test_file = os.path.join(
|
||||||
|
@ -558,15 +558,23 @@ CONSUMER_IGNORE_PATTERNS = list(
|
|||||||
|
|
||||||
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
|
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
|
||||||
|
|
||||||
CONSUMER_ENABLE_BARCODES = __get_boolean(
|
CONSUMER_ENABLE_BARCODES: Final[bool] = __get_boolean(
|
||||||
"PAPERLESS_CONSUMER_ENABLE_BARCODES",
|
"PAPERLESS_CONSUMER_ENABLE_BARCODES",
|
||||||
)
|
)
|
||||||
|
|
||||||
CONSUMER_BARCODE_TIFF_SUPPORT = __get_boolean(
|
CONSUMER_BARCODE_TIFF_SUPPORT: Final[bool] = __get_boolean(
|
||||||
"PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT",
|
"PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT",
|
||||||
)
|
)
|
||||||
|
|
||||||
CONSUMER_BARCODE_STRING = os.getenv("PAPERLESS_CONSUMER_BARCODE_STRING", "PATCHT")
|
CONSUMER_USE_LEGACY_DETECTION: Final[bool] = __get_boolean(
|
||||||
|
"PAPERLESS_CONSUMER_USE_LEGACY_DETECTION",
|
||||||
|
"NO",
|
||||||
|
)
|
||||||
|
|
||||||
|
CONSUMER_BARCODE_STRING: Final[str] = os.getenv(
|
||||||
|
"PAPERLESS_CONSUMER_BARCODE_STRING",
|
||||||
|
"PATCHT",
|
||||||
|
)
|
||||||
|
|
||||||
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
|
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user