mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
more improvements to tiff support
Signed-off-by: Florian Brandes <florian.brandes@posteo.de>
This commit is contained in:
parent
6d0fdc7510
commit
bf57b6e4a2
@ -97,15 +97,15 @@ def barcode_reader(image) -> List[str]:
|
|||||||
|
|
||||||
def convert_from_tiff_to_pdf(filepath: str) -> str:
|
def convert_from_tiff_to_pdf(filepath: str) -> str:
|
||||||
"""
|
"""
|
||||||
converts a given TIFF image file to pdf.
|
converts a given TIFF image file to pdf into a temp. directory.
|
||||||
Returns the new pdf file.
|
Returns the new pdf file.
|
||||||
"""
|
"""
|
||||||
file_extension = os.path.splitext(os.path.basename(filepath))[1]
|
file_name = os.path.splitext(os.path.basename(filepath))[0]
|
||||||
|
file_extension = os.path.splitext(os.path.basename(filepath))[1].lower()
|
||||||
|
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||||
# use old file name with pdf extension
|
# use old file name with pdf extension
|
||||||
if file_extension == ".tif":
|
if file_extension == ".tif" or file_extension == ".tiff":
|
||||||
newpath = filepath.replace(".tif", ".pdf")
|
newpath = os.path.join(tempdir, file_name + ".pdf")
|
||||||
elif file_extension == ".tiff":
|
|
||||||
newpath = filepath.replace(".tiff", ".pdf")
|
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Cannot convert from {str(file_extension)} to pdf.")
|
logger.warning(f"Cannot convert from {str(file_extension)} to pdf.")
|
||||||
return ""
|
return ""
|
||||||
@ -121,12 +121,9 @@ def convert_from_tiff_to_pdf(filepath: str) -> str:
|
|||||||
images[0].save(newpath, save_all=True, append_images=images[1:])
|
images[0].save(newpath, save_all=True, append_images=images[1:])
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Could not save the file as pdf. "
|
f"Could not save the file as pdf. Error: {str(e)}",
|
||||||
f"The original image file was not deleted. Error: "
|
|
||||||
f"{str(e)}",
|
|
||||||
)
|
)
|
||||||
return ""
|
return ""
|
||||||
os.unlink(filepath)
|
|
||||||
image.close()
|
image.close()
|
||||||
return newpath
|
return newpath
|
||||||
|
|
||||||
@ -233,22 +230,35 @@ def consume_file(
|
|||||||
if settings.CONSUMER_ENABLE_BARCODES:
|
if settings.CONSUMER_ENABLE_BARCODES:
|
||||||
separators = []
|
separators = []
|
||||||
document_list = []
|
document_list = []
|
||||||
|
converted_tiff = None
|
||||||
if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
|
if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
|
||||||
supported_extensions = [".pdf", ".tiff", ".tif"]
|
supported_extensions = [".pdf", ".tiff", ".tif"]
|
||||||
else:
|
else:
|
||||||
supported_extensions = [".pdf"]
|
supported_extensions = [".pdf"]
|
||||||
file_extension = os.path.splitext(os.path.basename(path))[1]
|
file_extension = os.path.splitext(os.path.basename(path))[1].lower()
|
||||||
if file_extension not in supported_extensions:
|
if file_extension not in supported_extensions:
|
||||||
|
# if not supported, skip this routine
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Unsupported file format for barcode reader: {str(file_extension)}",
|
f"Unsupported file format for barcode reader: {str(file_extension)}",
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
if file_extension == ".tif" or file_extension == ".tiff":
|
if file_extension == ".tif" or file_extension == ".tiff":
|
||||||
path = convert_from_tiff_to_pdf(path)
|
converted_tiff = convert_from_tiff_to_pdf(path)
|
||||||
separators = scan_file_for_separating_barcodes(path)
|
if converted_tiff:
|
||||||
|
separators = scan_file_for_separating_barcodes(converted_tiff)
|
||||||
|
else:
|
||||||
|
separators = scan_file_for_separating_barcodes(path)
|
||||||
if separators:
|
if separators:
|
||||||
logger.debug(f"Pages with separators found in: {str(path)}")
|
if converted_tiff:
|
||||||
document_list = separate_pages(path, separators)
|
logger.debug(
|
||||||
|
f"Pages with separators found in: {str(converted_tiff)}",
|
||||||
|
)
|
||||||
|
document_list = separate_pages(converted_tiff, separators)
|
||||||
|
else:
|
||||||
|
logger.debug(
|
||||||
|
f"Pages with separators found in: {str(path)}",
|
||||||
|
)
|
||||||
|
document_list = separate_pages(path, separators)
|
||||||
if document_list:
|
if document_list:
|
||||||
for n, document in enumerate(document_list):
|
for n, document in enumerate(document_list):
|
||||||
# save to consumption dir
|
# save to consumption dir
|
||||||
@ -260,6 +270,9 @@ def consume_file(
|
|||||||
save_to_dir(document, newname=newname)
|
save_to_dir(document, newname=newname)
|
||||||
# if we got here, the document was successfully split
|
# if we got here, the document was successfully split
|
||||||
# and can safely be deleted
|
# and can safely be deleted
|
||||||
|
if converted_tiff:
|
||||||
|
logger.debug("Deleting file {}".format(converted_tiff))
|
||||||
|
os.unlink(converted_tiff)
|
||||||
logger.debug("Deleting file {}".format(path))
|
logger.debug("Deleting file {}".format(path))
|
||||||
os.unlink(path)
|
os.unlink(path)
|
||||||
# notify the sender, otherwise the progress bar
|
# notify the sender, otherwise the progress bar
|
||||||
@ -282,6 +295,8 @@ def consume_file(
|
|||||||
"OSError. It could be, the broker cannot be reached.",
|
"OSError. It could be, the broker cannot be reached.",
|
||||||
)
|
)
|
||||||
logger.warning(str(e))
|
logger.warning(str(e))
|
||||||
|
# consuming stops here, since the original document with
|
||||||
|
# the barcodes has been split and will be consumed separately
|
||||||
return "File successfully split"
|
return "File successfully split"
|
||||||
|
|
||||||
# continue with consumption if no barcode was found
|
# continue with consumption if no barcode was found
|
||||||
|
Loading…
x
Reference in New Issue
Block a user