mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-08-12 00:19:48 +00:00
Enhancement: Add a database caching for improved performance (#9784)
--------- Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
17
src/paperless/db_cache.py
Normal file
17
src/paperless/db_cache.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from cachalot.api import invalidate as cachalot_invalidate
|
||||
from cachalot.utils import get_query_cache_key
|
||||
from cachalot.utils import get_table_cache_key
|
||||
|
||||
PREFIX = "pngx_cachalot_"
|
||||
|
||||
|
||||
def custom_get_query_cache_key(compiler):
|
||||
return PREFIX + get_query_cache_key(compiler)
|
||||
|
||||
|
||||
def custom_get_table_cache_key(db_alias, table):
|
||||
return PREFIX + get_table_cache_key(db_alias, table)
|
||||
|
||||
|
||||
def invalidate_db_cache():
|
||||
return cachalot_invalidate(cache_alias="read-cache")
|
@@ -433,6 +433,7 @@ STORAGES = {
|
||||
_CELERY_REDIS_URL, _CHANNELS_REDIS_URL = _parse_redis_url(
|
||||
os.getenv("PAPERLESS_REDIS", None),
|
||||
)
|
||||
_REDIS_KEY_PREFIX = os.getenv("PAPERLESS_REDIS_PREFIX", "")
|
||||
|
||||
TEMPLATES = [
|
||||
{
|
||||
@@ -458,7 +459,7 @@ CHANNEL_LAYERS = {
|
||||
"hosts": [_CHANNELS_REDIS_URL],
|
||||
"capacity": 2000, # default 100
|
||||
"expiry": 15, # default 60
|
||||
"prefix": os.getenv("PAPERLESS_REDIS_PREFIX", ""),
|
||||
"prefix": _REDIS_KEY_PREFIX,
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -882,7 +883,7 @@ CELERY_SEND_TASK_SENT_EVENT = True
|
||||
CELERY_BROKER_CONNECTION_RETRY = True
|
||||
CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP = True
|
||||
CELERY_BROKER_TRANSPORT_OPTIONS = {
|
||||
"global_keyprefix": os.getenv("PAPERLESS_REDIS_PREFIX", ""),
|
||||
"global_keyprefix": _REDIS_KEY_PREFIX,
|
||||
}
|
||||
|
||||
CELERY_TASK_TRACK_STARTED = True
|
||||
@@ -903,22 +904,69 @@ CELERY_BEAT_SCHEDULE = _parse_beat_schedule()
|
||||
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename
|
||||
CELERY_BEAT_SCHEDULE_FILENAME = str(DATA_DIR / "celerybeat-schedule.db")
|
||||
|
||||
# django setting.
|
||||
CACHES = {
|
||||
"default": {
|
||||
"BACKEND": os.environ.get(
|
||||
"PAPERLESS_CACHE_BACKEND",
|
||||
"django.core.cache.backends.redis.RedisCache",
|
||||
),
|
||||
"LOCATION": _CHANNELS_REDIS_URL,
|
||||
"KEY_PREFIX": os.getenv("PAPERLESS_REDIS_PREFIX", ""),
|
||||
},
|
||||
}
|
||||
|
||||
if DEBUG and os.getenv("PAPERLESS_CACHE_BACKEND") is None:
|
||||
CACHES["default"]["BACKEND"] = (
|
||||
"django.core.cache.backends.locmem.LocMemCache" # pragma: no cover
|
||||
# Cachalot: Database read cache.
|
||||
def _parse_cachalot_settings():
|
||||
global INSTALLED_APPS
|
||||
ttl = __get_int("PAPERLESS_READ_CACHE_TTL", 3600)
|
||||
ttl = min(ttl, 31536000) if ttl > 0 else 3600
|
||||
_, redis_url = _parse_redis_url(
|
||||
os.getenv("PAPERLESS_READ_CACHE_REDIS_URL", None),
|
||||
)
|
||||
result = {
|
||||
"CACHALOT_CACHE": "read-cache",
|
||||
"CACHALOT_ENABLED": __get_boolean(
|
||||
"PAPERLESS_DB_READ_CACHE_ENABLED",
|
||||
default="no",
|
||||
),
|
||||
"CACHALOT_FINAL_SQL_CHECK": True,
|
||||
"CACHALOT_QUERY_KEYGEN": "paperless.db_cache.custom_get_query_cache_key",
|
||||
"CACHALOT_TABLE_KEYGEN": "paperless.db_cache.custom_get_table_cache_key",
|
||||
"CACHALOT_REDIS_URL": redis_url,
|
||||
"CACHALOT_TIMEOUT": ttl,
|
||||
}
|
||||
if result["CACHALOT_ENABLED"]:
|
||||
INSTALLED_APPS.append("cachalot")
|
||||
return result
|
||||
|
||||
|
||||
_cachalot_settings = _parse_cachalot_settings()
|
||||
CACHALOT_ENABLED = _cachalot_settings["CACHALOT_ENABLED"]
|
||||
CACHALOT_CACHE = _cachalot_settings["CACHALOT_CACHE"]
|
||||
CACHALOT_TIMEOUT = _cachalot_settings["CACHALOT_TIMEOUT"]
|
||||
CACHALOT_QUERY_KEYGEN = _cachalot_settings["CACHALOT_QUERY_KEYGEN"]
|
||||
CACHALOT_TABLE_KEYGEN = _cachalot_settings["CACHALOT_TABLE_KEYGEN"]
|
||||
CACHALOT_FINAL_SQL_CHECK = _cachalot_settings["CACHALOT_FINAL_SQL_CHECK"]
|
||||
|
||||
|
||||
# Django default & Cachalot cache configuration
|
||||
_CACHE_BACKEND = os.environ.get(
|
||||
"PAPERLESS_CACHE_BACKEND",
|
||||
"django.core.cache.backends.locmem.LocMemCache"
|
||||
if DEBUG
|
||||
else "django.core.cache.backends.redis.RedisCache",
|
||||
)
|
||||
|
||||
|
||||
def _parse_caches():
|
||||
return {
|
||||
"default": {
|
||||
"BACKEND": _CACHE_BACKEND,
|
||||
"LOCATION": _CHANNELS_REDIS_URL,
|
||||
"KEY_PREFIX": _REDIS_KEY_PREFIX,
|
||||
},
|
||||
"read-cache": {
|
||||
"BACKEND": _CACHE_BACKEND,
|
||||
"LOCATION": _parse_cachalot_settings()["CACHALOT_REDIS_URL"],
|
||||
"KEY_PREFIX": _REDIS_KEY_PREFIX,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
CACHES = _parse_caches()
|
||||
|
||||
|
||||
del _cachalot_settings
|
||||
|
||||
|
||||
def default_threads_per_worker(task_workers) -> int:
|
||||
|
162
src/paperless/tests/test_db_cache.py
Normal file
162
src/paperless/tests/test_db_cache.py
Normal file
@@ -0,0 +1,162 @@
|
||||
import os
|
||||
import time
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from cachalot.settings import cachalot_settings
|
||||
from django.conf import settings
|
||||
from django.db import connection
|
||||
from django.test import override_settings
|
||||
from django.test.utils import CaptureQueriesContext
|
||||
|
||||
from documents.models import Tag
|
||||
from paperless.db_cache import invalidate_db_cache
|
||||
from paperless.settings import _parse_cachalot_settings
|
||||
from paperless.settings import _parse_caches
|
||||
|
||||
|
||||
def test_all_redis_caches_have_same_custom_prefix(monkeypatch):
|
||||
"""
|
||||
Check that when setting a custom Redis prefix,
|
||||
it is set for both the Django default cache and the read cache.
|
||||
"""
|
||||
from paperless import settings
|
||||
|
||||
monkeypatch.setattr(settings, "_REDIS_KEY_PREFIX", "test_a_custom_key_prefix")
|
||||
caches = _parse_caches()
|
||||
assert caches["read-cache"]["KEY_PREFIX"] == "test_a_custom_key_prefix"
|
||||
assert caches["default"]["KEY_PREFIX"] == "test_a_custom_key_prefix"
|
||||
|
||||
|
||||
class TestDbCacheSettings:
|
||||
def test_cachalot_default_settings(self):
|
||||
# Cachalot must be installed even if disabled,
|
||||
# so the cache can be invalidated anytime
|
||||
assert "cachalot" not in settings.INSTALLED_APPS
|
||||
cachalot_settings = _parse_cachalot_settings()
|
||||
caches = _parse_caches()
|
||||
|
||||
# Default settings
|
||||
assert not cachalot_settings["CACHALOT_ENABLED"]
|
||||
assert cachalot_settings["CACHALOT_TIMEOUT"] == 3600
|
||||
assert caches["read-cache"]["KEY_PREFIX"] == ""
|
||||
assert caches["read-cache"]["LOCATION"] == "redis://localhost:6379"
|
||||
|
||||
# Fixed settings
|
||||
assert cachalot_settings["CACHALOT_CACHE"] == "read-cache"
|
||||
assert (
|
||||
cachalot_settings["CACHALOT_QUERY_KEYGEN"]
|
||||
== "paperless.db_cache.custom_get_query_cache_key"
|
||||
)
|
||||
assert (
|
||||
cachalot_settings["CACHALOT_TABLE_KEYGEN"]
|
||||
== "paperless.db_cache.custom_get_table_cache_key"
|
||||
)
|
||||
assert cachalot_settings["CACHALOT_FINAL_SQL_CHECK"] is True
|
||||
|
||||
@patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
"PAPERLESS_DB_READ_CACHE_ENABLED": "true",
|
||||
"PAPERLESS_READ_CACHE_REDIS_URL": "redis://localhost:6380/7",
|
||||
"PAPERLESS_READ_CACHE_TTL": "7200",
|
||||
},
|
||||
)
|
||||
def test_cachalot_custom_settings(self):
|
||||
cachalot_settings = _parse_cachalot_settings()
|
||||
assert "cachalot" in settings.INSTALLED_APPS
|
||||
caches = _parse_caches()
|
||||
|
||||
# Modifiable settings
|
||||
assert cachalot_settings["CACHALOT_ENABLED"]
|
||||
assert cachalot_settings["CACHALOT_TIMEOUT"] == 7200
|
||||
assert caches["read-cache"]["LOCATION"] == "redis://localhost:6380/7"
|
||||
|
||||
# Fixed settings
|
||||
assert cachalot_settings["CACHALOT_CACHE"] == "read-cache"
|
||||
assert (
|
||||
cachalot_settings["CACHALOT_QUERY_KEYGEN"]
|
||||
== "paperless.db_cache.custom_get_query_cache_key"
|
||||
)
|
||||
assert (
|
||||
cachalot_settings["CACHALOT_TABLE_KEYGEN"]
|
||||
== "paperless.db_cache.custom_get_table_cache_key"
|
||||
)
|
||||
assert cachalot_settings["CACHALOT_FINAL_SQL_CHECK"] is True
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("env_var_ttl", "expected_cachalot_timeout"),
|
||||
[
|
||||
# 0 or less will be ignored, and the default TTL will be set
|
||||
("0", 3600),
|
||||
("-1", 3600),
|
||||
("-500000", 3600),
|
||||
# Any positive value will be set, for a maximum of one year
|
||||
("1", 1),
|
||||
("7524", 7524),
|
||||
("99999999999999", 31536000),
|
||||
],
|
||||
)
|
||||
def test_cachalot_ttl_parsing(
|
||||
self,
|
||||
env_var_ttl: int,
|
||||
expected_cachalot_timeout: int,
|
||||
):
|
||||
with patch.dict(os.environ, {"PAPERLESS_READ_CACHE_TTL": f"{env_var_ttl}"}):
|
||||
cachalot_timeout = _parse_cachalot_settings()["CACHALOT_TIMEOUT"]
|
||||
assert cachalot_timeout == expected_cachalot_timeout
|
||||
|
||||
|
||||
@override_settings(
|
||||
CACHALOT_ENABLED=True,
|
||||
CACHALOT_TIMEOUT=1,
|
||||
)
|
||||
@pytest.mark.django_db(transaction=True)
|
||||
def test_cache_hit_when_enabled():
|
||||
cachalot_settings.reload()
|
||||
|
||||
assert cachalot_settings.CACHALOT_ENABLED
|
||||
assert cachalot_settings.CACHALOT_TIMEOUT == 1
|
||||
assert settings.CACHALOT_TIMEOUT == 1
|
||||
|
||||
# Read a table to populate the cache
|
||||
list(list(Tag.objects.values_list("id", flat=True)))
|
||||
|
||||
# Invalidate the cache then read the database, there should be DB hit
|
||||
invalidate_db_cache()
|
||||
with CaptureQueriesContext(connection) as ctx:
|
||||
list(list(Tag.objects.values_list("id", flat=True)))
|
||||
assert len(ctx)
|
||||
|
||||
# Doing the same request again should hit the cache, not the DB
|
||||
with CaptureQueriesContext(connection) as ctx:
|
||||
list(list(Tag.objects.values_list("id", flat=True)))
|
||||
assert not len(ctx)
|
||||
|
||||
# Wait the end of TTL
|
||||
# Redis expire accuracy should be between 0 and 1 ms
|
||||
time.sleep(1.002)
|
||||
|
||||
# Read the DB again. The DB should be hit because the cache has expired
|
||||
with CaptureQueriesContext(connection) as ctx:
|
||||
list(list(Tag.objects.values_list("id", flat=True)))
|
||||
assert len(ctx)
|
||||
|
||||
# Invalidate the cache at the end of test
|
||||
invalidate_db_cache()
|
||||
|
||||
|
||||
@pytest.mark.django_db(transaction=True)
|
||||
def test_cache_is_disabled_by_default():
|
||||
cachalot_settings.reload()
|
||||
# Invalidate the cache just in case
|
||||
invalidate_db_cache()
|
||||
|
||||
# Read the table multiple times: the DB should always be hit without cache
|
||||
for _ in range(3):
|
||||
with CaptureQueriesContext(connection) as ctx:
|
||||
list(list(Tag.objects.values_list("id", flat=True)))
|
||||
assert len(ctx)
|
||||
|
||||
# Invalidate the cache at the end of test
|
||||
invalidate_db_cache()
|
Reference in New Issue
Block a user