From 1eb76a1827f7421062050dbb37816d75e638c30f Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Thu, 12 Nov 2020 10:01:22 +0100 Subject: [PATCH] fixes #35 --- src/paperless/settings.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 06dfdcd84..38721c00f 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -13,6 +13,17 @@ elif os.path.exists("/etc/paperless.conf"): elif os.path.exists("/usr/local/etc/paperless.conf"): load_dotenv("/usr/local/etc/paperless.conf") +# There are multiple levels of concurrency in paperless: +# - Multiple consumers may be run in parallel. +# - Each consumer may process multiple pages in parallel. +# - Each Tesseract OCR run may spawn multiple threads to process a single page +# slightly faster. +# The performance gains from having tesseract use multiple threads are minimal. +# However, when multiple pages are processed in parallel, the total number of +# OCR threads may exceed the number of available cpu cores, which will +# dramatically slow down the consumption process. This settings limits each +# Tesseract process to one thread. +os.environ['OMP_THREAD_LIMIT'] = "1" def __get_boolean(key, default="NO"): """