mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-09 09:58:20 -05:00
add TIFF barcode support
Signed-off-by: Florian Brandes <florian.brandes@posteo.de>
This commit is contained in:
parent
cc93616019
commit
ad5188a280
@ -626,6 +626,12 @@ PAPERLESS_CONSUMER_ENABLE_BARCODES=<bool>
|
||||
|
||||
Defaults to false.
|
||||
|
||||
PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT=<bool>
|
||||
Whether TIFF image files should be scanned for barcodes.
|
||||
This will automatically convert any TIFF image(s) to pdfs for later
|
||||
processing.
|
||||
|
||||
Defaults to false.
|
||||
|
||||
PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT
|
||||
Defines the string to be detected as a separator barcode.
|
||||
|
@ -22,6 +22,8 @@ from documents.models import Tag
|
||||
from documents.sanity_checker import SanityCheckFailedException
|
||||
from pdf2image import convert_from_path
|
||||
from pikepdf import Pdf
|
||||
from PIL import Image
|
||||
from PIL import ImageSequence
|
||||
from pyzbar import pyzbar
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
@ -93,6 +95,41 @@ def barcode_reader(image) -> List[str]:
|
||||
return barcodes
|
||||
|
||||
|
||||
def convert_from_tiff_to_pdf(filepath: str) -> str:
|
||||
"""
|
||||
converts a given TIFF image file to pdf.
|
||||
Returns the new pdf file.
|
||||
"""
|
||||
file_extension = os.path.splitext(os.path.basename(filepath))[1]
|
||||
# use old file name with pdf extension
|
||||
if file_extension == ".tif":
|
||||
newpath = filepath.replace(".tif", ".pdf")
|
||||
elif file_extension == ".tiff":
|
||||
newpath = filepath.replace(".tiff", ".pdf")
|
||||
else:
|
||||
logger.warning(f"Cannot convert from {str(file_extension)} to pdf.")
|
||||
return ""
|
||||
image = Image.open(filepath)
|
||||
images = []
|
||||
for i, page in enumerate(ImageSequence.Iterator(image)):
|
||||
page = page.convert("RGB")
|
||||
images.append(page)
|
||||
try:
|
||||
if len(images) == 1:
|
||||
images[0].save(newpath)
|
||||
else:
|
||||
images[0].save(newpath, save_all=True, append_images=images[1:])
|
||||
os.unlink(filepath)
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
f"Could not save the file as pdf. "
|
||||
f"The original image file was not deleted. Error: "
|
||||
f"{str(e)}",
|
||||
)
|
||||
image.close()
|
||||
return newpath
|
||||
|
||||
|
||||
def scan_file_for_separating_barcodes(filepath: str) -> List[int]:
|
||||
"""
|
||||
Scan the provided file for page separating barcodes
|
||||
@ -195,42 +232,56 @@ def consume_file(
|
||||
if settings.CONSUMER_ENABLE_BARCODES:
|
||||
separators = []
|
||||
document_list = []
|
||||
separators = scan_file_for_separating_barcodes(path)
|
||||
if separators:
|
||||
logger.debug(f"Pages with separators found in: {str(path)}")
|
||||
document_list = separate_pages(path, separators)
|
||||
if document_list:
|
||||
for n, document in enumerate(document_list):
|
||||
# save to consumption dir
|
||||
# rename it to the original filename with number prefix
|
||||
if override_filename:
|
||||
newname = f"{str(n)}_" + override_filename
|
||||
else:
|
||||
newname = None
|
||||
save_to_dir(document, newname=newname)
|
||||
# if we got here, the document was successfully split
|
||||
# and can safely be deleted
|
||||
logger.debug("Deleting file {}".format(path))
|
||||
os.unlink(path)
|
||||
# notify the sender, otherwise the progress bar
|
||||
# in the UI stays stuck
|
||||
payload = {
|
||||
"filename": override_filename,
|
||||
"task_id": task_id,
|
||||
"current_progress": 100,
|
||||
"max_progress": 100,
|
||||
"status": "SUCCESS",
|
||||
"message": "finished",
|
||||
}
|
||||
try:
|
||||
async_to_sync(get_channel_layer().group_send)(
|
||||
"status_updates",
|
||||
{"type": "status_update", "data": payload},
|
||||
)
|
||||
except OSError as e:
|
||||
logger.warning("OSError. It could be, the broker cannot be reached.")
|
||||
logger.warning(str(e))
|
||||
return "File successfully split"
|
||||
if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
|
||||
supported_extensions = [".pdf", ".tiff", ".tif"]
|
||||
else:
|
||||
supported_extensions = [".pdf"]
|
||||
file_extension = os.path.splitext(os.path.basename(path))[1]
|
||||
if file_extension not in supported_extensions:
|
||||
logger.warning(
|
||||
f"Unsupported file format for barcode reader: {str(file_extension)}",
|
||||
)
|
||||
else:
|
||||
if file_extension == ".tif" or file_extension == ".tiff":
|
||||
path = convert_from_tiff_to_pdf(path)
|
||||
separators = scan_file_for_separating_barcodes(path)
|
||||
if separators:
|
||||
logger.debug(f"Pages with separators found in: {str(path)}")
|
||||
document_list = separate_pages(path, separators)
|
||||
if document_list:
|
||||
for n, document in enumerate(document_list):
|
||||
# save to consumption dir
|
||||
# rename it to the original filename with number prefix
|
||||
if override_filename:
|
||||
newname = f"{str(n)}_" + override_filename
|
||||
else:
|
||||
newname = None
|
||||
save_to_dir(document, newname=newname)
|
||||
# if we got here, the document was successfully split
|
||||
# and can safely be deleted
|
||||
logger.debug("Deleting file {}".format(path))
|
||||
os.unlink(path)
|
||||
# notify the sender, otherwise the progress bar
|
||||
# in the UI stays stuck
|
||||
payload = {
|
||||
"filename": override_filename,
|
||||
"task_id": task_id,
|
||||
"current_progress": 100,
|
||||
"max_progress": 100,
|
||||
"status": "SUCCESS",
|
||||
"message": "finished",
|
||||
}
|
||||
try:
|
||||
async_to_sync(get_channel_layer().group_send)(
|
||||
"status_updates",
|
||||
{"type": "status_update", "data": payload},
|
||||
)
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
"OSError. It could be, the broker cannot be reached.",
|
||||
)
|
||||
logger.warning(str(e))
|
||||
return "File successfully split"
|
||||
|
||||
# continue with consumption if no barcode was found
|
||||
document = Consumer().try_consume_file(
|
||||
|
BIN
src/documents/tests/samples/barcodes/patch-code-t-middle.tiff
Normal file
BIN
src/documents/tests/samples/barcodes/patch-code-t-middle.tiff
Normal file
Binary file not shown.
BIN
src/documents/tests/samples/simple.tiff
Normal file
BIN
src/documents/tests/samples/simple.tiff
Normal file
Binary file not shown.
@ -204,6 +204,30 @@ class TestTasks(DirectoriesMixin, TestCase):
|
||||
img = Image.open(test_file)
|
||||
self.assertEqual(tasks.barcode_reader(img), ["CUSTOM BARCODE"])
|
||||
|
||||
def test_convert_from_tiff_to_pdf(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"simple.tiff",
|
||||
)
|
||||
dst = os.path.join(settings.SCRATCH_DIR, "simple.tiff")
|
||||
shutil.copy(test_file, dst)
|
||||
target_file = tasks.convert_from_tiff_to_pdf(dst)
|
||||
file_extension = os.path.splitext(os.path.basename(target_file))[1]
|
||||
self.assertTrue(os.path.isfile(target_file))
|
||||
self.assertEqual(file_extension, ".pdf")
|
||||
|
||||
def test_convert_error_from_pdf_to_pdf(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"simple.pdf",
|
||||
)
|
||||
dst = os.path.join(settings.SCRATCH_DIR, "simple.pdf")
|
||||
shutil.copy(test_file, dst)
|
||||
target_file = tasks.convert_from_tiff_to_pdf(dst)
|
||||
self.assertFalse(os.path.isfile(target_file))
|
||||
|
||||
def test_scan_file_for_separating_barcodes(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
@ -400,7 +424,23 @@ class TestTasks(DirectoriesMixin, TestCase):
|
||||
"barcodes",
|
||||
"patch-code-t-middle.pdf",
|
||||
)
|
||||
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.pd")
|
||||
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.pdf")
|
||||
shutil.copy(test_file, dst)
|
||||
|
||||
self.assertEqual(tasks.consume_file(dst), "File successfully split")
|
||||
|
||||
@override_settings(
|
||||
CONSUMER_ENABLE_BARCODES=True,
|
||||
CONSUMER_BARCODE_TIFF_SUPPORT=True,
|
||||
)
|
||||
def test_consume_barcode_tiff_file(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
"patch-code-t-middle.tiff",
|
||||
)
|
||||
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.tiff")
|
||||
shutil.copy(test_file, dst)
|
||||
|
||||
self.assertEqual(tasks.consume_file(dst), "File successfully split")
|
||||
|
@ -502,6 +502,10 @@ CONSUMER_ENABLE_BARCODES = __get_boolean(
|
||||
"PAPERLESS_CONSUMER_ENABLE_BARCODES",
|
||||
)
|
||||
|
||||
CONSUMER_BARCODE_TIFF_SUPPORT = __get_boolean(
|
||||
"PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT",
|
||||
)
|
||||
|
||||
CONSUMER_BARCODE_STRING = os.getenv("PAPERLESS_CONSUMER_BARCODE_STRING", "PATCHT")
|
||||
|
||||
OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
|
||||
|
Loading…
x
Reference in New Issue
Block a user