From b22742791625370641de9a213186e63b56db3a2f Mon Sep 17 00:00:00 2001 From: Florian Brandes Date: Wed, 6 Apr 2022 21:22:07 +0200 Subject: [PATCH] add split logic to consume_file Signed-off-by: florian on nixos (Florian Brandes) --- requirements.txt | 1 + src/documents/tasks.py | 22 +++++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index afc8b9f51..26ec003f8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -61,6 +61,7 @@ ocrmypdf==13.4.2 packaging==21.3; python_version >= '3.6' pathvalidate==2.5.0 pdfminer.six==20220319 +pdf2image==1.16.0 pikepdf==5.1.1 pillow==9.1.0 pluggy==1.0.0; python_version >= '3.6' diff --git a/src/documents/tasks.py b/src/documents/tasks.py index 5161fcc01..eff46436e 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -177,10 +177,26 @@ def consume_file( ): # check for separators in current document - separator_page_numbers = scan_file_for_separating_barcodes(path) - if separator_page_numbers != []: - logger.debug(f"Pages with separators found: {str(separator_page_numbers)}") + separators = scan_file_for_separating_barcodes(path) + document_list = [] + if separators == []: + pass + else: + logger.debug(f"Pages with separators found in: {str(path)}") + document_list = separate_pages(path, separators) + if document_list == []: + pass + else: + for document in document_list: + # save to consumption dir + save_to_dir(document) + # if we got here, the document was successfully split + # and can safely be deleted + logger.debug("Deleting file {}".format(path)) + os.unlink(path) + return "File successfully split" + # continue with consumption if no barcode was found document = Consumer().try_consume_file( path, override_filename=override_filename,