mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-09 09:58:20 -05:00
add config options and documentation
Signed-off-by: florian on nixos (Florian Brandes) <florian.brandes@posteo.de>
This commit is contained in:
parent
37b3fde4e1
commit
c024b846c3
@ -588,6 +588,27 @@ PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=<bool>
|
|||||||
|
|
||||||
Defaults to false.
|
Defaults to false.
|
||||||
|
|
||||||
|
PAPERLESS_CONSUMER_ENABLE_BARCODES=<bool>
|
||||||
|
Enables the scanning and page separation based on detected barcodes.
|
||||||
|
This allows for scanning and adding multiple documents per uploaded
|
||||||
|
file, which are separated by one or multiple barcode pages.
|
||||||
|
|
||||||
|
For ease of use, it is suggested to use a standardized separation page,
|
||||||
|
e.g. `here <https://www.alliancegroup.co.uk/patch-codes.htm>`_.
|
||||||
|
|
||||||
|
If no barcodes are detected in the uploaded file, no page separation
|
||||||
|
will happen.
|
||||||
|
|
||||||
|
Defaults to true.
|
||||||
|
|
||||||
|
|
||||||
|
PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT
|
||||||
|
Defines the string to be detected as a separator barcode.
|
||||||
|
If paperless is used with the PATCH-T separator pages, users
|
||||||
|
shouldn't change this.
|
||||||
|
|
||||||
|
Defaults to "PATCHT"
|
||||||
|
|
||||||
|
|
||||||
PAPERLESS_CONVERT_MEMORY_LIMIT=<num>
|
PAPERLESS_CONVERT_MEMORY_LIMIT=<num>
|
||||||
On smaller systems, or even in the case of Very Large Documents, the consumer
|
On smaller systems, or even in the case of Very Large Documents, the consumer
|
||||||
|
@ -60,6 +60,8 @@
|
|||||||
#PAPERLESS_CONSUMER_RECURSIVE=false
|
#PAPERLESS_CONSUMER_RECURSIVE=false
|
||||||
#PAPERLESS_CONSUMER_IGNORE_PATTERNS=[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]
|
#PAPERLESS_CONSUMER_IGNORE_PATTERNS=[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]
|
||||||
#PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false
|
#PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false
|
||||||
|
#PAPERLESS_CONSUMER_ENABLE_BARCODES=true
|
||||||
|
#PAPERLESS_CONSUMER_ENABLE_BARCODES=PATCHT
|
||||||
#PAPERLESS_OPTIMIZE_THUMBNAILS=true
|
#PAPERLESS_OPTIMIZE_THUMBNAILS=true
|
||||||
#PAPERLESS_PRE_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
|
#PAPERLESS_PRE_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
|
||||||
#PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
|
#PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
|
||||||
|
@ -24,8 +24,6 @@ from pikepdf import Pdf
|
|||||||
from pyzbar import pyzbar
|
from pyzbar import pyzbar
|
||||||
from whoosh.writing import AsyncWriter
|
from whoosh.writing import AsyncWriter
|
||||||
|
|
||||||
# barcode decoder
|
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.tasks")
|
logger = logging.getLogger("paperless.tasks")
|
||||||
|
|
||||||
|
|
||||||
@ -100,12 +98,13 @@ def scan_file_for_separating_barcodes(filepath: str) -> list:
|
|||||||
Returns a list of pagenumbers, which separate the file
|
Returns a list of pagenumbers, which separate the file
|
||||||
"""
|
"""
|
||||||
separator_page_numbers = []
|
separator_page_numbers = []
|
||||||
|
separator_barcode = "b'" + str(settings.CONSUMER_BARCODE_STRING) + "'"
|
||||||
# use a temporary directory in case the file os too big to handle in memory
|
# use a temporary directory in case the file os too big to handle in memory
|
||||||
with tempfile.TemporaryDirectory() as path:
|
with tempfile.TemporaryDirectory() as path:
|
||||||
pages_from_path = convert_from_path(filepath, output_folder=path)
|
pages_from_path = convert_from_path(filepath, output_folder=path)
|
||||||
for current_page_number, page in enumerate(pages_from_path):
|
for current_page_number, page in enumerate(pages_from_path):
|
||||||
current_barcodes = barcode_reader(page)
|
current_barcodes = barcode_reader(page)
|
||||||
if "b'PATCHT'" in current_barcodes:
|
if separator_barcode in current_barcodes:
|
||||||
separator_page_numbers = separator_page_numbers + [current_page_number]
|
separator_page_numbers = separator_page_numbers + [current_page_number]
|
||||||
return separator_page_numbers
|
return separator_page_numbers
|
||||||
|
|
||||||
@ -163,13 +162,12 @@ def save_to_dir(filepath, newname=None, target_dir=settings.CONSUMPTION_DIR):
|
|||||||
Copies filepath to target_dir.
|
Copies filepath to target_dir.
|
||||||
Optionally rename the file.
|
Optionally rename the file.
|
||||||
"""
|
"""
|
||||||
logger.debug(f"filepath: {str(filepath)}")
|
|
||||||
logger.debug(f"newname: {str(newname)}")
|
|
||||||
logger.debug(f"target_dir: {str(target_dir)}")
|
|
||||||
if os.path.isfile(filepath) and os.path.isdir(target_dir):
|
if os.path.isfile(filepath) and os.path.isdir(target_dir):
|
||||||
dst = shutil.copy(filepath, target_dir)
|
dst = shutil.copy(filepath, target_dir)
|
||||||
|
logging.debug(f"saved {str(filepath)} to {str(dst)}")
|
||||||
if newname:
|
if newname:
|
||||||
dst_new = os.path.join(target_dir, newname)
|
dst_new = os.path.join(target_dir, newname)
|
||||||
|
logger.debug(f"moving {str(dst)} to {str(dst_new)}")
|
||||||
os.rename(dst, dst_new)
|
os.rename(dst, dst_new)
|
||||||
else:
|
else:
|
||||||
logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")
|
logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")
|
||||||
@ -186,6 +184,8 @@ def consume_file(
|
|||||||
):
|
):
|
||||||
|
|
||||||
# check for separators in current document
|
# check for separators in current document
|
||||||
|
separators = []
|
||||||
|
if settings.CONSUMER_ENABLE_BARCODES:
|
||||||
separators = scan_file_for_separating_barcodes(path)
|
separators = scan_file_for_separating_barcodes(path)
|
||||||
document_list = []
|
document_list = []
|
||||||
if separators == []:
|
if separators == []:
|
||||||
|
@ -98,7 +98,8 @@ class TestTasks(DirectoriesMixin, TestCase):
|
|||||||
"patch-code-t.pbm",
|
"patch-code-t.pbm",
|
||||||
)
|
)
|
||||||
img = Image.open(test_file)
|
img = Image.open(test_file)
|
||||||
self.assertEqual(tasks.barcode_reader(img), ["b'PATCHT'"])
|
separator_barcode = "b'" + str(settings.CONSUMER_BARCODE_STRING) + "'"
|
||||||
|
self.assertEqual(tasks.barcode_reader(img), [separator_barcode])
|
||||||
|
|
||||||
def test_barcode_reader2(self):
|
def test_barcode_reader2(self):
|
||||||
test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.png")
|
test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.png")
|
||||||
|
@ -462,6 +462,13 @@ CONSUMER_IGNORE_PATTERNS = list(
|
|||||||
|
|
||||||
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
|
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
|
||||||
|
|
||||||
|
CONSUMER_ENABLE_BARCODES = __get_boolean(
|
||||||
|
"PAPERLESS_CONSUMER_ENABLE_BARCODES",
|
||||||
|
default="YES",
|
||||||
|
)
|
||||||
|
|
||||||
|
CONSUMER_BARCODE_STRING = os.getenv("PAPERLESS_CONSUMER_BARCODE_STRING", "PATCHT")
|
||||||
|
|
||||||
OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
|
OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
|
||||||
|
|
||||||
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
|
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user