add split logic to consume_file

Signed-off-by: florian on nixos (Florian Brandes) <florian.brandes@posteo.de>
This commit is contained in:
Florian Brandes 2022-04-06 21:22:07 +02:00
parent b5f77fd6e7
commit b227427916
2 changed files with 20 additions and 3 deletions

View File

@ -61,6 +61,7 @@ ocrmypdf==13.4.2
packaging==21.3; python_version >= '3.6'
pathvalidate==2.5.0
pdfminer.six==20220319
pdf2image==1.16.0
pikepdf==5.1.1
pillow==9.1.0
pluggy==1.0.0; python_version >= '3.6'

View File

@ -177,10 +177,26 @@ def consume_file(
):
# check for separators in current document
separator_page_numbers = scan_file_for_separating_barcodes(path)
if separator_page_numbers != []:
logger.debug(f"Pages with separators found: {str(separator_page_numbers)}")
separators = scan_file_for_separating_barcodes(path)
document_list = []
if separators == []:
pass
else:
logger.debug(f"Pages with separators found in: {str(path)}")
document_list = separate_pages(path, separators)
if document_list == []:
pass
else:
for document in document_list:
# save to consumption dir
save_to_dir(document)
# if we got here, the document was successfully split
# and can safely be deleted
logger.debug("Deleting file {}".format(path))
os.unlink(path)
return "File successfully split"
# continue with consumption if no barcode was found
document = Consumer().try_consume_file(
path,
override_filename=override_filename,