diff --git a/requirements.txt b/requirements.txt index afc8b9f51..26ec003f8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -61,6 +61,7 @@ ocrmypdf==13.4.2 packaging==21.3; python_version >= '3.6' pathvalidate==2.5.0 pdfminer.six==20220319 +pdf2image==1.16.0 pikepdf==5.1.1 pillow==9.1.0 pluggy==1.0.0; python_version >= '3.6' diff --git a/src/documents/tasks.py b/src/documents/tasks.py index 5161fcc01..eff46436e 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -177,10 +177,26 @@ def consume_file( ): # check for separators in current document - separator_page_numbers = scan_file_for_separating_barcodes(path) - if separator_page_numbers != []: - logger.debug(f"Pages with separators found: {str(separator_page_numbers)}") + separators = scan_file_for_separating_barcodes(path) + document_list = [] + if separators == []: + pass + else: + logger.debug(f"Pages with separators found in: {str(path)}") + document_list = separate_pages(path, separators) + if document_list == []: + pass + else: + for document in document_list: + # save to consumption dir + save_to_dir(document) + # if we got here, the document was successfully split + # and can safely be deleted + logger.debug("Deleting file {}".format(path)) + os.unlink(path) + return "File successfully split" + # continue with consumption if no barcode was found document = Consumer().try_consume_file( path, override_filename=override_filename,