Performance: add setting to enable DB connection pooling for PostgreSQL (#10354)

---------

Co-authored-by: Trenton H <797416+stumpylog@users.noreply.github.com>
This commit is contained in:
Antoine Mérino
2025-08-02 14:54:13 +02:00
committed by GitHub
parent f0b6e79d14
commit 0ea159683d
5 changed files with 66 additions and 3 deletions

View File

@@ -159,6 +159,23 @@ Available options are `postgresql` and `mariadb`.
Defaults to unset, which uses Djangos built-in defaults.
#### [`PAPERLESS_DB_POOLSIZE=<int>`](#PAPERLESS_DB_POOLSIZE) {#PAPERLESS_DB_POOLSIZE}
: Defines the maximum number of database connections to keep in the pool.
Only applies to PostgreSQL. This setting is ignored for other database engines.
The value must be greater than or equal to 1 to be used.
Defaults to unset, which disables connection pooling.
!!! note
A small pool is typically sufficient — for example, a size of 4.
Make sure your PostgreSQL server's max_connections setting is large enough to handle:
```(Paperless workers + Celery workers) × pool size + safety margin```
For example, with 4 Paperless workers and 2 Celery workers, and a pool size of 4:
(4 + 2) × 4 + 10 = 34 connections required.
#### [`PAPERLESS_DB_READ_CACHE_ENABLED=<bool>`](#PAPERLESS_DB_READ_CACHE_ENABLED) {#PAPERLESS_DB_READ_CACHE_ENABLED}
: Caches the database read query results into Redis. This can significantly improve application response times by caching database queries, at the cost of slightly increased memory usage.

View File

@@ -52,6 +52,7 @@ dependencies = [
"ocrmypdf~=16.10.0",
"pathvalidate~=3.3.1",
"pdf2image~=1.17.0",
"psycopg-pool",
"python-dateutil~=2.9.0",
"python-dotenv~=1.1.0",
"python-gnupg~=0.5.4",
@@ -74,9 +75,10 @@ optional-dependencies.mariadb = [
"mysqlclient~=2.2.7",
]
optional-dependencies.postgres = [
"psycopg[c]==3.2.9",
"psycopg[c,pool]==3.2.9",
# Direct dependency for proper resolution of the pre-built wheels
"psycopg-c==3.2.9",
"psycopg-pool==3.2.6",
]
optional-dependencies.webserver = [
"granian[uvloop]~=2.4.1",

View File

@@ -12,11 +12,13 @@ from celery.signals import before_task_publish
from celery.signals import task_failure
from celery.signals import task_postrun
from celery.signals import task_prerun
from celery.signals import worker_process_init
from django.conf import settings
from django.contrib.auth.models import Group
from django.contrib.auth.models import User
from django.db import DatabaseError
from django.db import close_old_connections
from django.db import connections
from django.db import models
from django.db.models import Q
from django.dispatch import receiver
@@ -1439,3 +1441,18 @@ def task_failure_handler(
task_instance.save()
except Exception: # pragma: no cover
logger.exception("Updating PaperlessTask failed")
@worker_process_init.connect
def close_connection_pool_on_worker_init(**kwargs):
"""
Close the DB connection pool for each Celery child process after it starts.
This is necessary because the parent process parse the Django configuration,
initializes connection pools then forks.
Closing these pools after forking ensures child processes have a valid connection.
"""
for conn in connections.all(initialized_only=True):
if conn.alias == "default" and hasattr(conn, "pool") and conn.pool:
conn.close_pool()

View File

@@ -703,6 +703,9 @@ def _parse_db_settings() -> dict:
# Leave room for future extensibility
if os.getenv("PAPERLESS_DBENGINE") == "mariadb":
engine = "django.db.backends.mysql"
# Contrary to Postgres, Django does not natively support connection pooling for MariaDB.
# However, since MariaDB uses threads instead of forks, establishing connections is significantly faster
# compared to PostgreSQL, so the lack of pooling is not an issue
options = {
"read_default_file": "/etc/mysql/my.cnf",
"charset": "utf8mb4",
@@ -722,6 +725,15 @@ def _parse_db_settings() -> dict:
"sslcert": os.getenv("PAPERLESS_DBSSLCERT", None),
"sslkey": os.getenv("PAPERLESS_DBSSLKEY", None),
}
if int(os.getenv("PAPERLESS_DB_POOLSIZE", 0)) > 0:
options.update(
{
"pool": {
"min_size": 1,
"max_size": int(os.getenv("PAPERLESS_DB_POOLSIZE")),
},
},
)
databases["default"]["ENGINE"] = engine
databases["default"]["OPTIONS"].update(options)

19
uv.lock generated
View File

@@ -1971,10 +1971,11 @@ mariadb = [
{ name = "mysqlclient", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
postgres = [
{ name = "psycopg", extra = ["c"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "psycopg", extra = ["c", "pool"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "psycopg-c", version = "3.2.9", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version != '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version != '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
{ name = "psycopg-c", version = "3.2.9", source = { url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_aarch64.whl" }, marker = "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'" },
{ name = "psycopg-c", version = "3.2.9", source = { url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_x86_64.whl" }, marker = "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux'" },
{ name = "psycopg-pool", version = "3.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or sys_platform == 'darwin'" },
]
webserver = [
{ name = "granian", extra = ["uvloop"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2079,10 +2080,11 @@ requires-dist = [
{ name = "ocrmypdf", specifier = "~=16.10.0" },
{ name = "pathvalidate", specifier = "~=3.3.1" },
{ name = "pdf2image", specifier = "~=1.17.0" },
{ name = "psycopg", extras = ["c"], marker = "extra == 'postgres'", specifier = "==3.2.9" },
{ name = "psycopg", extras = ["c", "pool"], marker = "extra == 'postgres'", specifier = "==3.2.9" },
{ name = "psycopg-c", marker = "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'postgres'", url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_aarch64.whl" },
{ name = "psycopg-c", marker = "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'postgres'", url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_x86_64.whl" },
{ name = "psycopg-c", marker = "(python_full_version != '3.12.*' and platform_machine == 'aarch64' and extra == 'postgres') or (python_full_version != '3.12.*' and platform_machine == 'x86_64' and extra == 'postgres') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'postgres') or (sys_platform != 'linux' and extra == 'postgres')", specifier = "==3.2.9" },
{ name = "psycopg-pool", marker = "extra == 'postgres'" },
{ name = "python-dateutil", specifier = "~=2.9.0" },
{ name = "python-dotenv", specifier = "~=1.1.0" },
{ name = "python-gnupg", specifier = "~=0.5.4" },
@@ -2433,6 +2435,9 @@ c = [
{ name = "psycopg-c", version = "3.2.9", source = { url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_aarch64.whl" }, marker = "python_full_version == '3.12.*' and implementation_name != 'pypy' and platform_machine == 'aarch64' and sys_platform == 'linux'" },
{ name = "psycopg-c", version = "3.2.9", source = { url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_x86_64.whl" }, marker = "python_full_version == '3.12.*' and implementation_name != 'pypy' and platform_machine == 'x86_64' and sys_platform == 'linux'" },
]
pool = [
{ name = "psycopg-pool", version = "3.2.6", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or sys_platform == 'darwin'" },
]
[[package]]
name = "psycopg-c"
@@ -2466,6 +2471,16 @@ wheels = [
{ url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_x86_64.whl", hash = "sha256:250c357319242da102047b04c5cc78af872dbf85c2cb05abf114e1fb5f207917" },
]
[[package]]
name = "psycopg-pool"
version = "3.2.6"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/cf/13/1e7850bb2c69a63267c3dbf37387d3f71a00fd0e2fa55c5db14d64ba1af4/psycopg_pool-3.2.6.tar.gz", hash = "sha256:0f92a7817719517212fbfe2fd58b8c35c1850cdd2a80d36b581ba2085d9148e5", size = 29770, upload-time = "2025-02-26T12:03:47.129Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/47/fd/4feb52a55c1a4bd748f2acaed1903ab54a723c47f6d0242780f4d97104d4/psycopg_pool-3.2.6-py3-none-any.whl", hash = "sha256:5887318a9f6af906d041a0b1dc1c60f8f0dda8340c2572b74e10907b51ed5da7", size = 38252, upload-time = "2025-02-26T12:03:45.073Z" },
]
[[package]]
name = "pyasn1"
version = "0.6.1"