Merge pull request #1227 from paperless-ngx/feature-reduce-worker-counts

Chore: Reduces webserver and task worker count to 1 by default
This commit is contained in:
shamoon 2022-07-12 09:56:21 -07:00 committed by GitHub
commit 8da7e505c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 31 additions and 60 deletions

View File

@ -536,6 +536,8 @@ PAPERLESS_TASK_WORKERS=<num>
maintain the automatic matching algorithm, check emails, consume documents, maintain the automatic matching algorithm, check emails, consume documents,
etc. This variable specifies how many things it will do in parallel. etc. This variable specifies how many things it will do in parallel.
Defaults to 1
PAPERLESS_THREADS_PER_WORKER=<num> PAPERLESS_THREADS_PER_WORKER=<num>
Furthermore, paperless uses multiple threads when consuming documents to Furthermore, paperless uses multiple threads when consuming documents to
@ -797,9 +799,7 @@ PAPERLESS_WEBSERVER_WORKERS=<num>
also loads the entire application into memory separately, so increasing this value also loads the entire application into memory separately, so increasing this value
will increase RAM usage. will increase RAM usage.
Consider configuring this to 1 on low power devices with limited amount of RAM. Defaults to 1.
Defaults to 2.
PAPERLESS_PORT=<port> PAPERLESS_PORT=<port>
The port number the webserver will listen on inside the container. There are The port number the webserver will listen on inside the container. There are

View File

@ -1,7 +1,7 @@
import os import os
bind = f'[::]:{os.getenv("PAPERLESS_PORT", 8000)}' bind = f'[::]:{os.getenv("PAPERLESS_PORT", 8000)}'
workers = int(os.getenv("PAPERLESS_WEBSERVER_WORKERS", 2)) workers = int(os.getenv("PAPERLESS_WEBSERVER_WORKERS", 1))
worker_class = "paperless.workers.ConfigurableWorker" worker_class = "paperless.workers.ConfigurableWorker"
timeout = 120 timeout = 120

View File

@ -1,35 +0,0 @@
import logging
from unittest import mock
from django.test import TestCase
from paperless.settings import default_task_workers
from paperless.settings import default_threads_per_worker
class TestSettings(TestCase):
@mock.patch("paperless.settings.multiprocessing.cpu_count")
def test_single_core(self, cpu_count):
cpu_count.return_value = 1
default_workers = default_task_workers()
default_threads = default_threads_per_worker(default_workers)
self.assertEqual(default_workers, 1)
self.assertEqual(default_threads, 1)
def test_workers_threads(self):
for i in range(1, 64):
with mock.patch(
"paperless.settings.multiprocessing.cpu_count",
) as cpu_count:
cpu_count.return_value = i
default_workers = default_task_workers()
default_threads = default_threads_per_worker(default_workers)
self.assertTrue(default_workers >= 1)
self.assertTrue(default_threads >= 1)
self.assertTrue(default_workers * default_threads <= i, f"{i}")

View File

@ -425,27 +425,7 @@ LOGGING = {
# Task queue # # Task queue #
############################################################################### ###############################################################################
TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", 1)
# Sensible defaults for multitasking:
# use a fair balance between worker processes and threads epr worker so that
# both consuming many documents in parallel and consuming large documents is
# reasonably fast.
# Favors threads per worker on smaller systems and never exceeds cpu_count()
# in total.
def default_task_workers() -> int:
# always leave one core open
available_cores = max(multiprocessing.cpu_count(), 1)
try:
if available_cores < 4:
return available_cores
return max(math.floor(math.sqrt(available_cores)), 1)
except NotImplementedError:
return 1
TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", default_task_workers())
PAPERLESS_WORKER_TIMEOUT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800) PAPERLESS_WORKER_TIMEOUT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)

View File

@ -1,7 +1,9 @@
import datetime import datetime
from unittest import mock
from unittest import TestCase from unittest import TestCase
from paperless.settings import _parse_ignore_dates from paperless.settings import _parse_ignore_dates
from paperless.settings import default_threads_per_worker
class TestIgnoreDateParsing(TestCase): class TestIgnoreDateParsing(TestCase):
@ -56,3 +58,27 @@ class TestIgnoreDateParsing(TestCase):
] ]
self._parse_checker(test_cases) self._parse_checker(test_cases)
def test_workers_threads(self):
"""
GIVEN:
- Certain CPU counts
WHEN:
- Threads per worker is calculated
THEN:
- Threads per worker less than or equal to CPU count
- At least 1 thread per worker
"""
default_workers = 1
for i in range(1, 64):
with mock.patch(
"paperless.settings.multiprocessing.cpu_count",
) as cpu_count:
cpu_count.return_value = i
default_threads = default_threads_per_worker(default_workers)
self.assertGreaterEqual(default_threads, 1)
self.assertLessEqual(default_workers * default_threads, i)