add split logic to consume_file

Signed-off-by: florian on nixos (Florian Brandes) <florian.brandes@posteo.de>
This commit is contained in:
Florian Brandes 2022-04-06 21:22:07 +02:00
parent b5f77fd6e7
commit b227427916
2 changed files with 20 additions and 3 deletions

View File

@ -61,6 +61,7 @@ ocrmypdf==13.4.2
packaging==21.3; python_version >= '3.6' packaging==21.3; python_version >= '3.6'
pathvalidate==2.5.0 pathvalidate==2.5.0
pdfminer.six==20220319 pdfminer.six==20220319
pdf2image==1.16.0
pikepdf==5.1.1 pikepdf==5.1.1
pillow==9.1.0 pillow==9.1.0
pluggy==1.0.0; python_version >= '3.6' pluggy==1.0.0; python_version >= '3.6'

View File

@ -177,10 +177,26 @@ def consume_file(
): ):
# check for separators in current document # check for separators in current document
separator_page_numbers = scan_file_for_separating_barcodes(path) separators = scan_file_for_separating_barcodes(path)
if separator_page_numbers != []: document_list = []
logger.debug(f"Pages with separators found: {str(separator_page_numbers)}") if separators == []:
pass
else:
logger.debug(f"Pages with separators found in: {str(path)}")
document_list = separate_pages(path, separators)
if document_list == []:
pass
else:
for document in document_list:
# save to consumption dir
save_to_dir(document)
# if we got here, the document was successfully split
# and can safely be deleted
logger.debug("Deleting file {}".format(path))
os.unlink(path)
return "File successfully split"
# continue with consumption if no barcode was found
document = Consumer().try_consume_file( document = Consumer().try_consume_file(
path, path,
override_filename=override_filename, override_filename=override_filename,