diff --git a/docs/configuration.rst b/docs/configuration.rst index f53266481..a5db8fffa 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -588,6 +588,27 @@ PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS= Defaults to false. +PAPERLESS_CONSUMER_ENABLE_BARCODES= + Enables the scanning and page separation based on detected barcodes. + This allows for scanning and adding multiple documents per uploaded + file, which are separated by one or multiple barcode pages. + + For ease of use, it is suggested to use a standardized separation page, + e.g. `here `_. + + If no barcodes are detected in the uploaded file, no page separation + will happen. + + Defaults to true. + + +PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT + Defines the string to be detected as a separator barcode. + If paperless is used with the PATCH-T separator pages, users + shouldn't change this. + + Defaults to "PATCHT" + PAPERLESS_CONVERT_MEMORY_LIMIT= On smaller systems, or even in the case of Very Large Documents, the consumer diff --git a/paperless.conf.example b/paperless.conf.example index de24bde74..cad13257c 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -60,6 +60,8 @@ #PAPERLESS_CONSUMER_RECURSIVE=false #PAPERLESS_CONSUMER_IGNORE_PATTERNS=[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"] #PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false +#PAPERLESS_CONSUMER_ENABLE_BARCODES=true +#PAPERLESS_CONSUMER_ENABLE_BARCODES=PATCHT #PAPERLESS_OPTIMIZE_THUMBNAILS=true #PAPERLESS_PRE_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh #PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh diff --git a/src/documents/tasks.py b/src/documents/tasks.py index e7d20eff8..3abb3227b 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -24,8 +24,6 @@ from pikepdf import Pdf from pyzbar import pyzbar from whoosh.writing import AsyncWriter -# barcode decoder - logger = logging.getLogger("paperless.tasks") @@ -100,12 +98,13 @@ def scan_file_for_separating_barcodes(filepath: str) -> list: Returns a list of pagenumbers, which separate the file """ separator_page_numbers = [] + separator_barcode = "b'" + str(settings.CONSUMER_BARCODE_STRING) + "'" # use a temporary directory in case the file os too big to handle in memory with tempfile.TemporaryDirectory() as path: pages_from_path = convert_from_path(filepath, output_folder=path) for current_page_number, page in enumerate(pages_from_path): current_barcodes = barcode_reader(page) - if "b'PATCHT'" in current_barcodes: + if separator_barcode in current_barcodes: separator_page_numbers = separator_page_numbers + [current_page_number] return separator_page_numbers @@ -163,13 +162,12 @@ def save_to_dir(filepath, newname=None, target_dir=settings.CONSUMPTION_DIR): Copies filepath to target_dir. Optionally rename the file. """ - logger.debug(f"filepath: {str(filepath)}") - logger.debug(f"newname: {str(newname)}") - logger.debug(f"target_dir: {str(target_dir)}") if os.path.isfile(filepath) and os.path.isdir(target_dir): dst = shutil.copy(filepath, target_dir) + logging.debug(f"saved {str(filepath)} to {str(dst)}") if newname: dst_new = os.path.join(target_dir, newname) + logger.debug(f"moving {str(dst)} to {str(dst_new)}") os.rename(dst, dst_new) else: logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.") @@ -186,7 +184,9 @@ def consume_file( ): # check for separators in current document - separators = scan_file_for_separating_barcodes(path) + separators = [] + if settings.CONSUMER_ENABLE_BARCODES: + separators = scan_file_for_separating_barcodes(path) document_list = [] if separators == []: pass diff --git a/src/documents/tests/test_tasks.py b/src/documents/tests/test_tasks.py index b5eabdd1a..3e019b51e 100644 --- a/src/documents/tests/test_tasks.py +++ b/src/documents/tests/test_tasks.py @@ -98,7 +98,8 @@ class TestTasks(DirectoriesMixin, TestCase): "patch-code-t.pbm", ) img = Image.open(test_file) - self.assertEqual(tasks.barcode_reader(img), ["b'PATCHT'"]) + separator_barcode = "b'" + str(settings.CONSUMER_BARCODE_STRING) + "'" + self.assertEqual(tasks.barcode_reader(img), [separator_barcode]) def test_barcode_reader2(self): test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.png") diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 39b850813..934d19910 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -462,6 +462,13 @@ CONSUMER_IGNORE_PATTERNS = list( CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS") +CONSUMER_ENABLE_BARCODES = __get_boolean( + "PAPERLESS_CONSUMER_ENABLE_BARCODES", + default="YES", +) + +CONSUMER_BARCODE_STRING = os.getenv("PAPERLESS_CONSUMER_BARCODE_STRING", "PATCHT") + OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true") OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))