From 20b2408dbbbb64972c09029757ba4f9f945302b4 Mon Sep 17 00:00:00 2001 From: Pit Kleyersburg Date: Sun, 14 Feb 2016 16:37:38 +0100 Subject: [PATCH] Ensure `OCR_THREADS` is integer, add documentation --- docs/setup.rst | 5 +++++ src/documents/consumer.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/setup.rst b/docs/setup.rst index 1ca9a6ed3..24a9b9fa2 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -58,6 +58,11 @@ Standard (Bare Metal) passphrase from the environment, so if you don't set it to a static value here, you must set ``PAPERLESS_PASSPHRASE=some-secret-string`` on the command line whenever invoking the consumer or webserver. + * ``OCR_THREADS``: this is the number of threads the OCR process will spawn + to process document pages in parallel. The default value gets sourced from + the environment-variable ``PAPERLESS_OCR_THREADS`` and expects it to be an + integer. If the variable is not set, Python determines the core-count of + your CPU and uses that value. 4. Initialise the database with ``./manage.py migrate``. 5. Create a user for your Paperless instance with ``./manage.py createsuperuser``. Follow the prompts to create your user. diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 3f3b9e9a3..2fa0ea016 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -52,7 +52,7 @@ class Consumer(Renderable): SCRATCH = settings.SCRATCH_DIR CONVERT = settings.CONVERT_BINARY CONSUME = settings.CONSUMPTION_DIR - THREADS = settings.OCR_THREADS + THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None OCR = pyocr.get_available_tools()[0] DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE