add config options and documentation

Signed-off-by: florian on nixos (Florian Brandes) <florian.brandes@posteo.de>
This commit is contained in:
florian on nixos (Florian Brandes) 2022-03-26 10:16:23 +01:00 committed by Florian Brandes
parent 37b3fde4e1
commit c024b846c3
5 changed files with 39 additions and 8 deletions

View File

@ -588,6 +588,27 @@ PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=<bool>
Defaults to false.
PAPERLESS_CONSUMER_ENABLE_BARCODES=<bool>
Enables the scanning and page separation based on detected barcodes.
This allows for scanning and adding multiple documents per uploaded
file, which are separated by one or multiple barcode pages.
For ease of use, it is suggested to use a standardized separation page,
e.g. `here <https://www.alliancegroup.co.uk/patch-codes.htm>`_.
If no barcodes are detected in the uploaded file, no page separation
will happen.
Defaults to true.
PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT
Defines the string to be detected as a separator barcode.
If paperless is used with the PATCH-T separator pages, users
shouldn't change this.
Defaults to "PATCHT"
PAPERLESS_CONVERT_MEMORY_LIMIT=<num>
On smaller systems, or even in the case of Very Large Documents, the consumer

View File

@ -60,6 +60,8 @@
#PAPERLESS_CONSUMER_RECURSIVE=false
#PAPERLESS_CONSUMER_IGNORE_PATTERNS=[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]
#PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false
#PAPERLESS_CONSUMER_ENABLE_BARCODES=true
#PAPERLESS_CONSUMER_ENABLE_BARCODES=PATCHT
#PAPERLESS_OPTIMIZE_THUMBNAILS=true
#PAPERLESS_PRE_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
#PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh

View File

@ -24,8 +24,6 @@ from pikepdf import Pdf
from pyzbar import pyzbar
from whoosh.writing import AsyncWriter
# barcode decoder
logger = logging.getLogger("paperless.tasks")
@ -100,12 +98,13 @@ def scan_file_for_separating_barcodes(filepath: str) -> list:
Returns a list of pagenumbers, which separate the file
"""
separator_page_numbers = []
separator_barcode = "b'" + str(settings.CONSUMER_BARCODE_STRING) + "'"
# use a temporary directory in case the file os too big to handle in memory
with tempfile.TemporaryDirectory() as path:
pages_from_path = convert_from_path(filepath, output_folder=path)
for current_page_number, page in enumerate(pages_from_path):
current_barcodes = barcode_reader(page)
if "b'PATCHT'" in current_barcodes:
if separator_barcode in current_barcodes:
separator_page_numbers = separator_page_numbers + [current_page_number]
return separator_page_numbers
@ -163,13 +162,12 @@ def save_to_dir(filepath, newname=None, target_dir=settings.CONSUMPTION_DIR):
Copies filepath to target_dir.
Optionally rename the file.
"""
logger.debug(f"filepath: {str(filepath)}")
logger.debug(f"newname: {str(newname)}")
logger.debug(f"target_dir: {str(target_dir)}")
if os.path.isfile(filepath) and os.path.isdir(target_dir):
dst = shutil.copy(filepath, target_dir)
logging.debug(f"saved {str(filepath)} to {str(dst)}")
if newname:
dst_new = os.path.join(target_dir, newname)
logger.debug(f"moving {str(dst)} to {str(dst_new)}")
os.rename(dst, dst_new)
else:
logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")
@ -186,7 +184,9 @@ def consume_file(
):
# check for separators in current document
separators = scan_file_for_separating_barcodes(path)
separators = []
if settings.CONSUMER_ENABLE_BARCODES:
separators = scan_file_for_separating_barcodes(path)
document_list = []
if separators == []:
pass

View File

@ -98,7 +98,8 @@ class TestTasks(DirectoriesMixin, TestCase):
"patch-code-t.pbm",
)
img = Image.open(test_file)
self.assertEqual(tasks.barcode_reader(img), ["b'PATCHT'"])
separator_barcode = "b'" + str(settings.CONSUMER_BARCODE_STRING) + "'"
self.assertEqual(tasks.barcode_reader(img), [separator_barcode])
def test_barcode_reader2(self):
test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.png")

View File

@ -462,6 +462,13 @@ CONSUMER_IGNORE_PATTERNS = list(
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
CONSUMER_ENABLE_BARCODES = __get_boolean(
"PAPERLESS_CONSUMER_ENABLE_BARCODES",
default="YES",
)
CONSUMER_BARCODE_STRING = os.getenv("PAPERLESS_CONSUMER_BARCODE_STRING", "PATCHT")
OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))