Merge pull request #1227 from paperless-ngx/feature-reduce-worker-counts

Chore: Reduces webserver and task worker count to 1 by default
This commit is contained in:
shamoon 2022-07-12 09:56:21 -07:00 committed by GitHub
commit 8da7e505c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 31 additions and 60 deletions

View File

@ -536,6 +536,8 @@ PAPERLESS_TASK_WORKERS=<num>
maintain the automatic matching algorithm, check emails, consume documents,
etc. This variable specifies how many things it will do in parallel.
Defaults to 1
PAPERLESS_THREADS_PER_WORKER=<num>
Furthermore, paperless uses multiple threads when consuming documents to
@ -797,9 +799,7 @@ PAPERLESS_WEBSERVER_WORKERS=<num>
also loads the entire application into memory separately, so increasing this value
will increase RAM usage.
Consider configuring this to 1 on low power devices with limited amount of RAM.
Defaults to 2.
Defaults to 1.
PAPERLESS_PORT=<port>
The port number the webserver will listen on inside the container. There are

View File

@ -1,7 +1,7 @@
import os
bind = f'[::]:{os.getenv("PAPERLESS_PORT", 8000)}'
workers = int(os.getenv("PAPERLESS_WEBSERVER_WORKERS", 2))
workers = int(os.getenv("PAPERLESS_WEBSERVER_WORKERS", 1))
worker_class = "paperless.workers.ConfigurableWorker"
timeout = 120

View File

@ -1,35 +0,0 @@
import logging
from unittest import mock
from django.test import TestCase
from paperless.settings import default_task_workers
from paperless.settings import default_threads_per_worker
class TestSettings(TestCase):
@mock.patch("paperless.settings.multiprocessing.cpu_count")
def test_single_core(self, cpu_count):
cpu_count.return_value = 1
default_workers = default_task_workers()
default_threads = default_threads_per_worker(default_workers)
self.assertEqual(default_workers, 1)
self.assertEqual(default_threads, 1)
def test_workers_threads(self):
for i in range(1, 64):
with mock.patch(
"paperless.settings.multiprocessing.cpu_count",
) as cpu_count:
cpu_count.return_value = i
default_workers = default_task_workers()
default_threads = default_threads_per_worker(default_workers)
self.assertTrue(default_workers >= 1)
self.assertTrue(default_threads >= 1)
self.assertTrue(default_workers * default_threads <= i, f"{i}")

View File

@ -425,27 +425,7 @@ LOGGING = {
# Task queue #
###############################################################################
# Sensible defaults for multitasking:
# use a fair balance between worker processes and threads epr worker so that
# both consuming many documents in parallel and consuming large documents is
# reasonably fast.
# Favors threads per worker on smaller systems and never exceeds cpu_count()
# in total.
def default_task_workers() -> int:
# always leave one core open
available_cores = max(multiprocessing.cpu_count(), 1)
try:
if available_cores < 4:
return available_cores
return max(math.floor(math.sqrt(available_cores)), 1)
except NotImplementedError:
return 1
TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", default_task_workers())
TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", 1)
PAPERLESS_WORKER_TIMEOUT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)

View File

@ -1,7 +1,9 @@
import datetime
from unittest import mock
from unittest import TestCase
from paperless.settings import _parse_ignore_dates
from paperless.settings import default_threads_per_worker
class TestIgnoreDateParsing(TestCase):
@ -56,3 +58,27 @@ class TestIgnoreDateParsing(TestCase):
]
self._parse_checker(test_cases)
def test_workers_threads(self):
"""
GIVEN:
- Certain CPU counts
WHEN:
- Threads per worker is calculated
THEN:
- Threads per worker less than or equal to CPU count
- At least 1 thread per worker
"""
default_workers = 1
for i in range(1, 64):
with mock.patch(
"paperless.settings.multiprocessing.cpu_count",
) as cpu_count:
cpu_count.return_value = i
default_threads = default_threads_per_worker(default_workers)
self.assertGreaterEqual(default_threads, 1)
self.assertLessEqual(default_workers * default_threads, i)