From edaaedae36ee2bb99859b1ca22455b3b7381d0bd Mon Sep 17 00:00:00 2001 From: Trenton Holmes Date: Mon, 11 Jul 2022 13:54:04 -0700 Subject: [PATCH] Reduces webserver and task worker count to 1 by default --- docs/configuration.rst | 6 ++--- gunicorn.conf.py | 2 +- src/documents/tests/test_settings.py | 35 ---------------------------- src/paperless/settings.py | 22 +---------------- src/paperless/tests/test_settings.py | 26 +++++++++++++++++++++ 5 files changed, 31 insertions(+), 60 deletions(-) delete mode 100644 src/documents/tests/test_settings.py diff --git a/docs/configuration.rst b/docs/configuration.rst index a5db55927..c4203472c 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -536,6 +536,8 @@ PAPERLESS_TASK_WORKERS= maintain the automatic matching algorithm, check emails, consume documents, etc. This variable specifies how many things it will do in parallel. + Defaults to 1 + PAPERLESS_THREADS_PER_WORKER= Furthermore, paperless uses multiple threads when consuming documents to @@ -797,9 +799,7 @@ PAPERLESS_WEBSERVER_WORKERS= also loads the entire application into memory separately, so increasing this value will increase RAM usage. - Consider configuring this to 1 on low power devices with limited amount of RAM. - - Defaults to 2. + Defaults to 1. PAPERLESS_PORT= The port number the webserver will listen on inside the container. There are diff --git a/gunicorn.conf.py b/gunicorn.conf.py index 9c0e5be78..7193815b3 100644 --- a/gunicorn.conf.py +++ b/gunicorn.conf.py @@ -1,7 +1,7 @@ import os bind = f'[::]:{os.getenv("PAPERLESS_PORT", 8000)}' -workers = int(os.getenv("PAPERLESS_WEBSERVER_WORKERS", 2)) +workers = int(os.getenv("PAPERLESS_WEBSERVER_WORKERS", 1)) worker_class = "paperless.workers.ConfigurableWorker" timeout = 120 diff --git a/src/documents/tests/test_settings.py b/src/documents/tests/test_settings.py deleted file mode 100644 index 9b8edab27..000000000 --- a/src/documents/tests/test_settings.py +++ /dev/null @@ -1,35 +0,0 @@ -import logging -from unittest import mock - -from django.test import TestCase -from paperless.settings import default_task_workers -from paperless.settings import default_threads_per_worker - - -class TestSettings(TestCase): - @mock.patch("paperless.settings.multiprocessing.cpu_count") - def test_single_core(self, cpu_count): - cpu_count.return_value = 1 - - default_workers = default_task_workers() - - default_threads = default_threads_per_worker(default_workers) - - self.assertEqual(default_workers, 1) - self.assertEqual(default_threads, 1) - - def test_workers_threads(self): - for i in range(1, 64): - with mock.patch( - "paperless.settings.multiprocessing.cpu_count", - ) as cpu_count: - cpu_count.return_value = i - - default_workers = default_task_workers() - - default_threads = default_threads_per_worker(default_workers) - - self.assertTrue(default_workers >= 1) - self.assertTrue(default_threads >= 1) - - self.assertTrue(default_workers * default_threads <= i, f"{i}") diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 8c8aa8482..bfb9507ba 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -425,27 +425,7 @@ LOGGING = { # Task queue # ############################################################################### - -# Sensible defaults for multitasking: -# use a fair balance between worker processes and threads epr worker so that -# both consuming many documents in parallel and consuming large documents is -# reasonably fast. -# Favors threads per worker on smaller systems and never exceeds cpu_count() -# in total. - - -def default_task_workers() -> int: - # always leave one core open - available_cores = max(multiprocessing.cpu_count(), 1) - try: - if available_cores < 4: - return available_cores - return max(math.floor(math.sqrt(available_cores)), 1) - except NotImplementedError: - return 1 - - -TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", default_task_workers()) +TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", 1) PAPERLESS_WORKER_TIMEOUT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800) diff --git a/src/paperless/tests/test_settings.py b/src/paperless/tests/test_settings.py index 57481df5b..fed4079e2 100644 --- a/src/paperless/tests/test_settings.py +++ b/src/paperless/tests/test_settings.py @@ -1,7 +1,9 @@ import datetime +from unittest import mock from unittest import TestCase from paperless.settings import _parse_ignore_dates +from paperless.settings import default_threads_per_worker class TestIgnoreDateParsing(TestCase): @@ -56,3 +58,27 @@ class TestIgnoreDateParsing(TestCase): ] self._parse_checker(test_cases) + + def test_workers_threads(self): + """ + GIVEN: + - Certain CPU counts + WHEN: + - Threads per worker is calculated + THEN: + - Threads per worker less than or equal to CPU count + - At least 1 thread per worker + """ + default_workers = 1 + + for i in range(1, 64): + with mock.patch( + "paperless.settings.multiprocessing.cpu_count", + ) as cpu_count: + cpu_count.return_value = i + + default_threads = default_threads_per_worker(default_workers) + + self.assertGreaterEqual(default_threads, 1) + + self.assertLessEqual(default_workers * default_threads, i)