Compare commits

...

3 Commits

Author SHA1 Message Date
Trenton H
b79edfd271 Adds the checks for depracated env vars 2026-02-13 16:14:51 -08:00
Trenton H
8f0a06c2da Moves to the new parser module 2026-02-13 14:34:20 -08:00
Trenton H
3ceb62f098 Renames the settings to a module for the next steps 2026-02-13 14:02:25 -08:00
6 changed files with 1073 additions and 281 deletions

View File

@@ -202,3 +202,51 @@ def audit_log_check(app_configs, **kwargs):
)
return result
@register()
def check_deprecated_db_settings(app_configs, **kwargs) -> list[Warning]:
"""Check for deprecated database environment variables.
Detects legacy advanced options that should be migrated to
PAPERLESS_DB_OPTIONS.
Returns:
List of Django Warning instances for any deprecated vars found.
"""
deprecated_vars = {
"PAPERLESS_DB_TIMEOUT": "timeout (or connect_timeout for Postgres/MariaDB)",
"PAPERLESS_DB_POOLSIZE": "pool.min_size,pool.max_size",
"PAPERLESS_DBSSLMODE": "sslmode (or ssl_mode for MariaDB)",
"PAPERLESS_DBSSLROOTCERT": "sslrootcert (or ssl.ca for MariaDB)",
"PAPERLESS_DBSSLCERT": "sslcert (or ssl.cert for MariaDB)",
"PAPERLESS_DBSSLKEY": "sslkey (or ssl.key for MariaDB)",
}
found_vars = []
for var_name in deprecated_vars:
if os.getenv(var_name):
found_vars.append(var_name)
if not found_vars:
return []
# Build migration example
examples = []
for var in found_vars:
examples.append(f"{var} -> PAPERLESS_DB_OPTIONS={deprecated_vars[var]}=<value>")
return [
Warning(
"Deprecated database environment variables detected",
# TODO: Need to check this URL
hint=(
f"Found: {', '.join(found_vars)}. "
"These will be removed in v3.2. "
"Migrate to PAPERLESS_DB_OPTIONS instead. "
f"Examples: {'; '.join(examples[:3])}. "
"See https://docs.paperless-ngx.com/migration/"
),
id="paperless.W001",
),
]

View File

@@ -6,7 +6,6 @@ import math
import multiprocessing
import os
import tempfile
from os import PathLike
from pathlib import Path
from typing import Final
from urllib.parse import urlparse
@@ -17,6 +16,13 @@ from dateparser.languages.loader import LocaleDataLoader
from django.utils.translation import gettext_lazy as _
from dotenv import load_dotenv
from paperless.settings.custom import parse_db_settings
from paperless.settings.parsers import get_bool_from_env
from paperless.settings.parsers import get_float_from_env
from paperless.settings.parsers import get_int_from_env
from paperless.settings.parsers import get_list_from_env
from paperless.settings.parsers import get_path_from_env
logger = logging.getLogger("paperless.settings")
# Tap paperless.conf if it's available
@@ -43,76 +49,6 @@ for path in [
os.environ["OMP_THREAD_LIMIT"] = "1"
def __get_boolean(key: str, default: str = "NO") -> bool:
"""
Return a boolean value based on whatever the user has supplied in the
environment based on whether the value "looks like" it's True or not.
"""
return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
def __get_int(key: str, default: int) -> int:
"""
Return an integer value based on the environment variable or a default
"""
return int(os.getenv(key, default))
def __get_optional_int(key: str) -> int | None:
"""
Returns None if the environment key is not present, otherwise an integer
"""
if key in os.environ:
return __get_int(key, -1) # pragma: no cover
return None
def __get_float(key: str, default: float) -> float:
"""
Return an integer value based on the environment variable or a default
"""
return float(os.getenv(key, default))
def __get_path(
key: str,
default: PathLike | str,
) -> Path:
"""
Return a normalized, absolute path based on the environment variable or a default,
if provided
"""
if key in os.environ:
return Path(os.environ[key]).resolve()
return Path(default).resolve()
def __get_optional_path(key: str) -> Path | None:
"""
Returns None if the environment key is not present, otherwise a fully resolved Path
"""
if key in os.environ:
return __get_path(key, "")
return None
def __get_list(
key: str,
default: list[str] | None = None,
sep: str = ",",
) -> list[str]:
"""
Return a list of elements from the environment, as separated by the given
string, or the default if the key does not exist
"""
if key in os.environ:
return list(filter(None, os.environ[key].split(sep)))
elif default is not None:
return default
else:
return []
def _parse_redis_url(env_redis: str | None) -> tuple[str, str]:
"""
Gets the Redis information from the environment or a default and handles
@@ -275,7 +211,7 @@ def _parse_beat_schedule() -> dict:
# NEVER RUN WITH DEBUG IN PRODUCTION.
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
DEBUG = get_bool_from_env("PAPERLESS_DEBUG", "NO")
###############################################################################
@@ -284,21 +220,21 @@ DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
BASE_DIR: Path = Path(__file__).resolve().parent.parent
STATIC_ROOT = __get_path("PAPERLESS_STATICDIR", BASE_DIR.parent / "static")
STATIC_ROOT = get_path_from_env("PAPERLESS_STATICDIR", BASE_DIR.parent / "static")
MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
MEDIA_ROOT = get_path_from_env("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals"
ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive"
THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails"
SHARE_LINK_BUNDLE_DIR = MEDIA_ROOT / "documents" / "share_link_bundles"
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
DATA_DIR = get_path_from_env("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
NLTK_DIR = __get_path("PAPERLESS_NLTK_DIR", "/usr/share/nltk_data")
NLTK_DIR = get_path_from_env("PAPERLESS_NLTK_DIR", "/usr/share/nltk_data")
# Check deprecated setting first
EMPTY_TRASH_DIR = (
__get_path("PAPERLESS_TRASH_DIR", os.getenv("PAPERLESS_EMPTY_TRASH_DIR"))
get_path_from_env("PAPERLESS_TRASH_DIR", os.getenv("PAPERLESS_EMPTY_TRASH_DIR"))
if os.getenv("PAPERLESS_TRASH_DIR") or os.getenv("PAPERLESS_EMPTY_TRASH_DIR")
else None
)
@@ -307,21 +243,21 @@ EMPTY_TRASH_DIR = (
# threads.
MEDIA_LOCK = MEDIA_ROOT / "media.lock"
INDEX_DIR = DATA_DIR / "index"
MODEL_FILE = __get_path(
MODEL_FILE = get_path_from_env(
"PAPERLESS_MODEL_FILE",
DATA_DIR / "classification_model.pickle",
)
LLM_INDEX_DIR = DATA_DIR / "llm_index"
LOGGING_DIR = __get_path("PAPERLESS_LOGGING_DIR", DATA_DIR / "log")
LOGGING_DIR = get_path_from_env("PAPERLESS_LOGGING_DIR", DATA_DIR / "log")
CONSUMPTION_DIR = __get_path(
CONSUMPTION_DIR = get_path_from_env(
"PAPERLESS_CONSUMPTION_DIR",
BASE_DIR.parent / "consume",
)
# This will be created if it doesn't exist
SCRATCH_DIR = __get_path(
SCRATCH_DIR = get_path_from_env(
"PAPERLESS_SCRATCH_DIR",
Path(tempfile.gettempdir()) / "paperless",
)
@@ -330,7 +266,7 @@ SCRATCH_DIR = __get_path(
# Application Definition #
###############################################################################
env_apps = __get_list("PAPERLESS_APPS")
env_apps = get_list_from_env("PAPERLESS_APPS")
INSTALLED_APPS = [
"whitenoise.runserver_nostatic",
@@ -403,7 +339,7 @@ MIDDLEWARE = [
]
# Optional to enable compression
if __get_boolean("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: no cover
if get_bool_from_env("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: no cover
MIDDLEWARE.insert(0, "compression_middleware.middleware.CompressionMiddleware")
# Workaround to not compress streaming responses (e.g. chat).
@@ -512,8 +448,8 @@ EMAIL_PORT: Final[int] = int(os.getenv("PAPERLESS_EMAIL_PORT", 25))
EMAIL_HOST_USER: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST_USER", "")
EMAIL_HOST_PASSWORD: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST_PASSWORD", "")
DEFAULT_FROM_EMAIL: Final[str] = os.getenv("PAPERLESS_EMAIL_FROM", EMAIL_HOST_USER)
EMAIL_USE_TLS: Final[bool] = __get_boolean("PAPERLESS_EMAIL_USE_TLS")
EMAIL_USE_SSL: Final[bool] = __get_boolean("PAPERLESS_EMAIL_USE_SSL")
EMAIL_USE_TLS: Final[bool] = get_bool_from_env("PAPERLESS_EMAIL_USE_TLS")
EMAIL_USE_SSL: Final[bool] = get_bool_from_env("PAPERLESS_EMAIL_USE_SSL")
EMAIL_SUBJECT_PREFIX: Final[str] = "[Paperless-ngx] "
EMAIL_TIMEOUT = 30.0
EMAIL_ENABLED = EMAIL_HOST != "localhost" or EMAIL_HOST_USER != ""
@@ -538,20 +474,22 @@ ACCOUNT_DEFAULT_HTTP_PROTOCOL = os.getenv(
)
ACCOUNT_ADAPTER = "paperless.adapter.CustomAccountAdapter"
ACCOUNT_ALLOW_SIGNUPS = __get_boolean("PAPERLESS_ACCOUNT_ALLOW_SIGNUPS")
ACCOUNT_DEFAULT_GROUPS = __get_list("PAPERLESS_ACCOUNT_DEFAULT_GROUPS")
ACCOUNT_ALLOW_SIGNUPS = get_bool_from_env("PAPERLESS_ACCOUNT_ALLOW_SIGNUPS")
ACCOUNT_DEFAULT_GROUPS = get_list_from_env("PAPERLESS_ACCOUNT_DEFAULT_GROUPS")
SOCIALACCOUNT_ADAPTER = "paperless.adapter.CustomSocialAccountAdapter"
SOCIALACCOUNT_ALLOW_SIGNUPS = __get_boolean(
SOCIALACCOUNT_ALLOW_SIGNUPS = get_bool_from_env(
"PAPERLESS_SOCIALACCOUNT_ALLOW_SIGNUPS",
"yes",
)
SOCIALACCOUNT_AUTO_SIGNUP = __get_boolean("PAPERLESS_SOCIAL_AUTO_SIGNUP")
SOCIALACCOUNT_AUTO_SIGNUP = get_bool_from_env("PAPERLESS_SOCIAL_AUTO_SIGNUP")
SOCIALACCOUNT_PROVIDERS = json.loads(
os.getenv("PAPERLESS_SOCIALACCOUNT_PROVIDERS", "{}"),
)
SOCIAL_ACCOUNT_DEFAULT_GROUPS = __get_list("PAPERLESS_SOCIAL_ACCOUNT_DEFAULT_GROUPS")
SOCIAL_ACCOUNT_SYNC_GROUPS = __get_boolean("PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS")
SOCIAL_ACCOUNT_DEFAULT_GROUPS = get_list_from_env(
"PAPERLESS_SOCIAL_ACCOUNT_DEFAULT_GROUPS",
)
SOCIAL_ACCOUNT_SYNC_GROUPS = get_bool_from_env("PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS")
SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM: Final[str] = os.getenv(
"PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM",
"groups",
@@ -563,8 +501,8 @@ MFA_TOTP_ISSUER = "Paperless-ngx"
ACCOUNT_EMAIL_SUBJECT_PREFIX = "[Paperless-ngx] "
DISABLE_REGULAR_LOGIN = __get_boolean("PAPERLESS_DISABLE_REGULAR_LOGIN")
REDIRECT_LOGIN_TO_SSO = __get_boolean("PAPERLESS_REDIRECT_LOGIN_TO_SSO")
DISABLE_REGULAR_LOGIN = get_bool_from_env("PAPERLESS_DISABLE_REGULAR_LOGIN")
REDIRECT_LOGIN_TO_SSO = get_bool_from_env("PAPERLESS_REDIRECT_LOGIN_TO_SSO")
AUTO_LOGIN_USERNAME = os.getenv("PAPERLESS_AUTO_LOGIN_USERNAME")
@@ -577,12 +515,15 @@ ACCOUNT_EMAIL_VERIFICATION = (
)
)
ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS = __get_boolean(
ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS = get_bool_from_env(
"PAPERLESS_ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS",
"True",
)
ACCOUNT_SESSION_REMEMBER = __get_boolean("PAPERLESS_ACCOUNT_SESSION_REMEMBER", "True")
ACCOUNT_SESSION_REMEMBER = get_bool_from_env(
"PAPERLESS_ACCOUNT_SESSION_REMEMBER",
"True",
)
SESSION_EXPIRE_AT_BROWSER_CLOSE = not ACCOUNT_SESSION_REMEMBER
SESSION_COOKIE_AGE = int(
os.getenv("PAPERLESS_SESSION_COOKIE_AGE", 60 * 60 * 24 * 7 * 3),
@@ -599,8 +540,8 @@ if AUTO_LOGIN_USERNAME:
def _parse_remote_user_settings() -> str:
global MIDDLEWARE, AUTHENTICATION_BACKENDS, REST_FRAMEWORK
enable = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
enable_api = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER_API")
enable = get_bool_from_env("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
enable_api = get_bool_from_env("PAPERLESS_ENABLE_HTTP_REMOTE_USER_API")
if enable or enable_api:
MIDDLEWARE.append("paperless.auth.HttpRemoteUserMiddleware")
AUTHENTICATION_BACKENDS.insert(
@@ -628,16 +569,16 @@ HTTP_REMOTE_USER_HEADER_NAME = _parse_remote_user_settings()
X_FRAME_OPTIONS = "SAMEORIGIN"
# The next 3 settings can also be set using just PAPERLESS_URL
CSRF_TRUSTED_ORIGINS = __get_list("PAPERLESS_CSRF_TRUSTED_ORIGINS")
CSRF_TRUSTED_ORIGINS = get_list_from_env("PAPERLESS_CSRF_TRUSTED_ORIGINS")
if DEBUG:
# Allow access from the angular development server during debugging
CSRF_TRUSTED_ORIGINS.append("http://localhost:4200")
# We allow CORS from localhost:8000
CORS_ALLOWED_ORIGINS = __get_list(
CORS_ALLOWED_ORIGINS = get_list_from_env(
"PAPERLESS_CORS_ALLOWED_HOSTS",
["http://localhost:8000"],
default=["http://localhost:8000"],
)
if DEBUG:
@@ -650,7 +591,7 @@ CORS_EXPOSE_HEADERS = [
"Content-Disposition",
]
ALLOWED_HOSTS = __get_list("PAPERLESS_ALLOWED_HOSTS", ["*"])
ALLOWED_HOSTS = get_list_from_env("PAPERLESS_ALLOWED_HOSTS", default=["*"])
if ALLOWED_HOSTS != ["*"]:
# always allow localhost. Necessary e.g. for healthcheck in docker.
ALLOWED_HOSTS.append("localhost")
@@ -670,10 +611,10 @@ def _parse_paperless_url():
PAPERLESS_URL = _parse_paperless_url()
# For use with trusted proxies
TRUSTED_PROXIES = __get_list("PAPERLESS_TRUSTED_PROXIES")
TRUSTED_PROXIES = get_list_from_env("PAPERLESS_TRUSTED_PROXIES")
USE_X_FORWARDED_HOST = __get_boolean("PAPERLESS_USE_X_FORWARD_HOST", "false")
USE_X_FORWARDED_PORT = __get_boolean("PAPERLESS_USE_X_FORWARD_PORT", "false")
USE_X_FORWARDED_HOST = get_bool_from_env("PAPERLESS_USE_X_FORWARD_HOST", "false")
USE_X_FORWARDED_PORT = get_bool_from_env("PAPERLESS_USE_X_FORWARD_PORT", "false")
SECURE_PROXY_SSL_HEADER = (
tuple(json.loads(os.environ["PAPERLESS_PROXY_SSL_HEADER"]))
if "PAPERLESS_PROXY_SSL_HEADER" in os.environ
@@ -716,98 +657,15 @@ CSRF_COOKIE_NAME = f"{COOKIE_PREFIX}csrftoken"
SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid"
LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language"
EMAIL_CERTIFICATE_FILE = __get_optional_path("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
EMAIL_CERTIFICATE_FILE = get_path_from_env("PAPERLESS_EMAIL_CERTIFICATE_LOCATION", None)
###############################################################################
# Database #
###############################################################################
def _parse_db_settings() -> dict:
databases = {
"default": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": DATA_DIR / "db.sqlite3",
"OPTIONS": {},
},
}
if os.getenv("PAPERLESS_DBHOST"):
# Have sqlite available as a second option for management commands
# This is important when migrating to/from sqlite
databases["sqlite"] = databases["default"].copy()
DATABASES = parse_db_settings(DATA_DIR)
databases["default"] = {
"HOST": os.getenv("PAPERLESS_DBHOST"),
"NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
"USER": os.getenv("PAPERLESS_DBUSER", "paperless"),
"PASSWORD": os.getenv("PAPERLESS_DBPASS", "paperless"),
"OPTIONS": {},
}
if os.getenv("PAPERLESS_DBPORT"):
databases["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
# Leave room for future extensibility
if os.getenv("PAPERLESS_DBENGINE") == "mariadb":
engine = "django.db.backends.mysql"
# Contrary to Postgres, Django does not natively support connection pooling for MariaDB.
# However, since MariaDB uses threads instead of forks, establishing connections is significantly faster
# compared to PostgreSQL, so the lack of pooling is not an issue
options = {
"read_default_file": "/etc/mysql/my.cnf",
"charset": "utf8mb4",
"ssl_mode": os.getenv("PAPERLESS_DBSSLMODE", "PREFERRED"),
"ssl": {
"ca": os.getenv("PAPERLESS_DBSSLROOTCERT", None),
"cert": os.getenv("PAPERLESS_DBSSLCERT", None),
"key": os.getenv("PAPERLESS_DBSSLKEY", None),
},
}
else: # Default to PostgresDB
engine = "django.db.backends.postgresql"
options = {
"sslmode": os.getenv("PAPERLESS_DBSSLMODE", "prefer"),
"sslrootcert": os.getenv("PAPERLESS_DBSSLROOTCERT", None),
"sslcert": os.getenv("PAPERLESS_DBSSLCERT", None),
"sslkey": os.getenv("PAPERLESS_DBSSLKEY", None),
}
if int(os.getenv("PAPERLESS_DB_POOLSIZE", 0)) > 0:
options.update(
{
"pool": {
"min_size": 1,
"max_size": int(os.getenv("PAPERLESS_DB_POOLSIZE")),
},
},
)
databases["default"]["ENGINE"] = engine
databases["default"]["OPTIONS"].update(options)
if os.getenv("PAPERLESS_DB_TIMEOUT") is not None:
if databases["default"]["ENGINE"] == "django.db.backends.sqlite3":
databases["default"]["OPTIONS"].update(
{"timeout": int(os.getenv("PAPERLESS_DB_TIMEOUT"))},
)
else:
databases["default"]["OPTIONS"].update(
{"connect_timeout": int(os.getenv("PAPERLESS_DB_TIMEOUT"))},
)
databases["sqlite"]["OPTIONS"].update(
{"timeout": int(os.getenv("PAPERLESS_DB_TIMEOUT"))},
)
return databases
DATABASES = _parse_db_settings()
if os.getenv("PAPERLESS_DBENGINE") == "mariadb":
# Silence Django error on old MariaDB versions.
# VARCHAR can support > 255 in modern versions
# https://docs.djangoproject.com/en/4.1/ref/checks/#database
# https://mariadb.com/kb/en/innodb-system-variables/#innodb_large_prefix
SILENCED_SYSTEM_CHECKS = ["mysql.W003"]
DEFAULT_AUTO_FIELD = "django.db.models.AutoField"
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
###############################################################################
# Internationalization #
@@ -942,7 +800,7 @@ CELERY_BROKER_URL = _CELERY_REDIS_URL
CELERY_TIMEZONE = TIME_ZONE
CELERY_WORKER_HIJACK_ROOT_LOGGER = False
CELERY_WORKER_CONCURRENCY: Final[int] = __get_int("PAPERLESS_TASK_WORKERS", 1)
CELERY_WORKER_CONCURRENCY: Final[int] = get_int_from_env("PAPERLESS_TASK_WORKERS", 1)
TASK_WORKERS = CELERY_WORKER_CONCURRENCY
CELERY_WORKER_MAX_TASKS_PER_CHILD = 1
CELERY_WORKER_SEND_TASK_EVENTS = True
@@ -955,7 +813,7 @@ CELERY_BROKER_TRANSPORT_OPTIONS = {
}
CELERY_TASK_TRACK_STARTED = True
CELERY_TASK_TIME_LIMIT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)
CELERY_TASK_TIME_LIMIT: Final[int] = get_int_from_env("PAPERLESS_WORKER_TIMEOUT", 1800)
CELERY_RESULT_EXTENDED = True
CELERY_RESULT_BACKEND = "django-db"
@@ -975,14 +833,14 @@ CELERY_BEAT_SCHEDULE_FILENAME = str(DATA_DIR / "celerybeat-schedule.db")
# Cachalot: Database read cache.
def _parse_cachalot_settings():
ttl = __get_int("PAPERLESS_READ_CACHE_TTL", 3600)
ttl = get_int_from_env("PAPERLESS_READ_CACHE_TTL", 3600)
ttl = min(ttl, 31536000) if ttl > 0 else 3600
_, redis_url = _parse_redis_url(
os.getenv("PAPERLESS_READ_CACHE_REDIS_URL", _CHANNELS_REDIS_URL),
)
result = {
"CACHALOT_CACHE": "read-cache",
"CACHALOT_ENABLED": __get_boolean(
"CACHALOT_ENABLED": get_bool_from_env(
"PAPERLESS_DB_READ_CACHE_ENABLED",
default="no",
),
@@ -1067,9 +925,9 @@ CONSUMER_POLLING_INTERVAL = float(os.getenv("PAPERLESS_CONSUMER_POLLING_INTERVAL
CONSUMER_STABILITY_DELAY = float(os.getenv("PAPERLESS_CONSUMER_STABILITY_DELAY", 5))
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
CONSUMER_DELETE_DUPLICATES = get_bool_from_env("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE")
CONSUMER_RECURSIVE = get_bool_from_env("PAPERLESS_CONSUMER_RECURSIVE")
# Ignore regex patterns, matched against filename only
CONSUMER_IGNORE_PATTERNS = list(
@@ -1091,13 +949,13 @@ CONSUMER_IGNORE_DIRS = list(
),
)
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
CONSUMER_SUBDIRS_AS_TAGS = get_bool_from_env("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
CONSUMER_ENABLE_BARCODES: Final[bool] = __get_boolean(
CONSUMER_ENABLE_BARCODES: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_ENABLE_BARCODES",
)
CONSUMER_BARCODE_TIFF_SUPPORT: Final[bool] = __get_boolean(
CONSUMER_BARCODE_TIFF_SUPPORT: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT",
)
@@ -1106,7 +964,7 @@ CONSUMER_BARCODE_STRING: Final[str] = os.getenv(
"PATCHT",
)
CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = __get_boolean(
CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE",
)
@@ -1115,23 +973,26 @@ CONSUMER_ASN_BARCODE_PREFIX: Final[str] = os.getenv(
"ASN",
)
CONSUMER_BARCODE_UPSCALE: Final[float] = __get_float(
CONSUMER_BARCODE_UPSCALE: Final[float] = get_float_from_env(
"PAPERLESS_CONSUMER_BARCODE_UPSCALE",
0.0,
)
CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
CONSUMER_BARCODE_DPI: Final[int] = get_int_from_env(
"PAPERLESS_CONSUMER_BARCODE_DPI",
300,
)
CONSUMER_BARCODE_MAX_PAGES: Final[int] = __get_int(
CONSUMER_BARCODE_MAX_PAGES: Final[int] = get_int_from_env(
"PAPERLESS_CONSUMER_BARCODE_MAX_PAGES",
0,
)
CONSUMER_BARCODE_RETAIN_SPLIT_PAGES = __get_boolean(
CONSUMER_BARCODE_RETAIN_SPLIT_PAGES = get_bool_from_env(
"PAPERLESS_CONSUMER_BARCODE_RETAIN_SPLIT_PAGES",
)
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean(
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
)
@@ -1144,11 +1005,11 @@ CONSUMER_TAG_BARCODE_MAPPING = dict(
),
)
CONSUMER_TAG_BARCODE_SPLIT: Final[bool] = __get_boolean(
CONSUMER_TAG_BARCODE_SPLIT: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_TAG_BARCODE_SPLIT",
)
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = __get_boolean(
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED",
)
@@ -1157,13 +1018,13 @@ CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME: Final[str] = os.getenv(
"double-sided",
)
CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT: Final[bool] = __get_boolean(
CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT",
)
CONSUMER_PDF_RECOVERABLE_MIME_TYPES = ("application/octet-stream",)
OCR_PAGES = __get_optional_int("PAPERLESS_OCR_PAGES")
OCR_PAGES = get_int_from_env("PAPERLESS_OCR_PAGES", None)
# The default language that tesseract will attempt to use when parsing
# documents. It should be a 3-letter language code consistent with ISO 639.
@@ -1177,21 +1038,22 @@ OCR_MODE = os.getenv("PAPERLESS_OCR_MODE", "skip")
OCR_SKIP_ARCHIVE_FILE = os.getenv("PAPERLESS_OCR_SKIP_ARCHIVE_FILE", "never")
OCR_IMAGE_DPI = __get_optional_int("PAPERLESS_OCR_IMAGE_DPI")
OCR_IMAGE_DPI = get_int_from_env("PAPERLESS_OCR_IMAGE_DPI", None)
OCR_CLEAN = os.getenv("PAPERLESS_OCR_CLEAN", "clean")
OCR_DESKEW: Final[bool] = __get_boolean("PAPERLESS_OCR_DESKEW", "true")
OCR_DESKEW: Final[bool] = get_bool_from_env("PAPERLESS_OCR_DESKEW", "true")
OCR_ROTATE_PAGES: Final[bool] = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true")
OCR_ROTATE_PAGES: Final[bool] = get_bool_from_env("PAPERLESS_OCR_ROTATE_PAGES", "true")
OCR_ROTATE_PAGES_THRESHOLD: Final[float] = __get_float(
OCR_ROTATE_PAGES_THRESHOLD: Final[float] = get_float_from_env(
"PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD",
12.0,
)
OCR_MAX_IMAGE_PIXELS: Final[int | None] = __get_optional_int(
OCR_MAX_IMAGE_PIXELS: Final[int | None] = get_int_from_env(
"PAPERLESS_OCR_MAX_IMAGE_PIXELS",
None,
)
OCR_COLOR_CONVERSION_STRATEGY = os.getenv(
@@ -1201,8 +1063,9 @@ OCR_COLOR_CONVERSION_STRATEGY = os.getenv(
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")
MAX_IMAGE_PIXELS: Final[int | None] = __get_optional_int(
MAX_IMAGE_PIXELS: Final[int | None] = get_int_from_env(
"PAPERLESS_MAX_IMAGE_PIXELS",
None,
)
# GNUPG needs a home directory for some reason
@@ -1216,7 +1079,7 @@ CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
# Fallback layout for .eml consumption
EMAIL_PARSE_DEFAULT_LAYOUT = __get_int(
EMAIL_PARSE_DEFAULT_LAYOUT = get_int_from_env(
"PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT",
1, # MailRule.PdfLayout.TEXT_HTML but that can't be imported here
)
@@ -1257,7 +1120,7 @@ DATE_PARSER_LANGUAGES = (
# Maximum number of dates taken from document start to end to show as suggestions for
# `created` date in the frontend. Duplicates are removed, which can result in
# fewer dates shown.
NUMBER_OF_SUGGESTED_DATES = __get_int("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
NUMBER_OF_SUGGESTED_DATES = get_int_from_env("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
# Specify the filename format for out files
FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
@@ -1265,7 +1128,7 @@ FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
# If this is enabled, variables in filename format will resolve to
# empty-string instead of 'none'.
# Directories with 'empty names' are omitted, too.
FILENAME_FORMAT_REMOVE_NONE = __get_boolean(
FILENAME_FORMAT_REMOVE_NONE = get_bool_from_env(
"PAPERLESS_FILENAME_FORMAT_REMOVE_NONE",
"NO",
)
@@ -1276,7 +1139,7 @@ THUMBNAIL_FONT_NAME = os.getenv(
)
# Tika settings
TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
TIKA_ENABLED = get_bool_from_env("PAPERLESS_TIKA_ENABLED", "NO")
TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
TIKA_GOTENBERG_ENDPOINT = os.getenv(
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT",
@@ -1286,7 +1149,7 @@ TIKA_GOTENBERG_ENDPOINT = os.getenv(
if TIKA_ENABLED:
INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
AUDIT_LOG_ENABLED = __get_boolean("PAPERLESS_AUDIT_LOG_ENABLED", "true")
AUDIT_LOG_ENABLED = get_bool_from_env("PAPERLESS_AUDIT_LOG_ENABLED", "true")
if AUDIT_LOG_ENABLED:
INSTALLED_APPS.append("auditlog")
MIDDLEWARE.append("auditlog.middleware.AuditlogMiddleware")
@@ -1331,7 +1194,7 @@ if os.getenv("PAPERLESS_IGNORE_DATES") is not None:
ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default")
if ENABLE_UPDATE_CHECK != "default":
ENABLE_UPDATE_CHECK = __get_boolean("PAPERLESS_ENABLE_UPDATE_CHECK")
ENABLE_UPDATE_CHECK = get_bool_from_env("PAPERLESS_ENABLE_UPDATE_CHECK")
APP_TITLE = os.getenv("PAPERLESS_APP_TITLE", None)
APP_LOGO = os.getenv("PAPERLESS_APP_LOGO", None)
@@ -1376,7 +1239,7 @@ def _get_nltk_language_setting(ocr_lang: str) -> str | None:
return iso_code_to_nltk.get(ocr_lang)
NLTK_ENABLED: Final[bool] = __get_boolean("PAPERLESS_ENABLE_NLTK", "yes")
NLTK_ENABLED: Final[bool] = get_bool_from_env("PAPERLESS_ENABLE_NLTK", "yes")
NLTK_LANGUAGE: str | None = _get_nltk_language_setting(OCR_LANGUAGE)
@@ -1385,7 +1248,7 @@ NLTK_LANGUAGE: str | None = _get_nltk_language_setting(OCR_LANGUAGE)
###############################################################################
EMAIL_GNUPG_HOME: Final[str | None] = os.getenv("PAPERLESS_EMAIL_GNUPG_HOME")
EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = __get_boolean(
EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = get_bool_from_env(
"PAPERLESS_ENABLE_GPG_DECRYPTOR",
)
@@ -1393,7 +1256,7 @@ EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = __get_boolean(
###############################################################################
# Soft Delete #
###############################################################################
EMPTY_TRASH_DELAY = max(__get_int("PAPERLESS_EMPTY_TRASH_DELAY", 30), 1)
EMPTY_TRASH_DELAY = max(get_int_from_env("PAPERLESS_EMPTY_TRASH_DELAY", 30), 1)
###############################################################################
@@ -1420,19 +1283,19 @@ OUTLOOK_OAUTH_ENABLED = bool(
###############################################################################
WEBHOOKS_ALLOWED_SCHEMES = set(
s.lower()
for s in __get_list(
for s in get_list_from_env(
"PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES",
["http", "https"],
default=["http", "https"],
)
)
WEBHOOKS_ALLOWED_PORTS = set(
int(p)
for p in __get_list(
for p in get_list_from_env(
"PAPERLESS_WEBHOOKS_ALLOWED_PORTS",
[],
default=[],
)
)
WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean(
WEBHOOKS_ALLOW_INTERNAL_REQUESTS = get_bool_from_env(
"PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
"true",
)
@@ -1447,7 +1310,7 @@ REMOTE_OCR_ENDPOINT = os.getenv("PAPERLESS_REMOTE_OCR_ENDPOINT")
################################################################################
# AI Settings #
################################################################################
AI_ENABLED = __get_boolean("PAPERLESS_AI_ENABLED", "NO")
AI_ENABLED = get_bool_from_env("PAPERLESS_AI_ENABLED", "NO")
LLM_EMBEDDING_BACKEND = os.getenv(
"PAPERLESS_AI_LLM_EMBEDDING_BACKEND",
) # "huggingface" or "openai"

View File

@@ -0,0 +1,278 @@
import os
from pathlib import Path
from typing import TypeAlias
from celery.schedules import crontab
from paperless.settings.parsers import get_choice_from_env
from paperless.settings.parsers import get_int_from_env
from paperless.settings.parsers import parse_dict_from_str
# Covers: ENGINE/NAME/HOST/USER/PASSWORD (str), PORT (int), OPTIONS (dict)
DatabaseConfig: TypeAlias = dict[str, str | int | dict[str, str | int | dict | None]]
def parse_hosting_settings() -> tuple[str | None, str, str, str, str]:
script_name = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
base_url = (script_name or "") + "/"
login_url = base_url + "accounts/login/"
login_redirect_url = base_url + "dashboard"
logout_redirect_url = os.getenv(
"PAPERLESS_LOGOUT_REDIRECT_URL",
login_url + "?loggedout=1",
)
return script_name, base_url, login_url, login_redirect_url, logout_redirect_url
def parse_redis_url(env_redis: str | None) -> tuple[str, str]:
"""
Gets the Redis information from the environment or a default and handles
converting from incompatible django_channels and celery formats.
Returns a tuple of (celery_url, channels_url)
"""
# Not set, return a compatible default
if env_redis is None:
return ("redis://localhost:6379", "redis://localhost:6379")
if "unix" in env_redis.lower():
# channels_redis socket format, looks like:
# "unix:///path/to/redis.sock"
_, path = env_redis.split(":")
# Optionally setting a db number
if "?db=" in env_redis:
path, number = path.split("?db=")
return (f"redis+socket:{path}?virtual_host={number}", env_redis)
else:
return (f"redis+socket:{path}", env_redis)
elif "+socket" in env_redis.lower():
# celery socket style, looks like:
# "redis+socket:///path/to/redis.sock"
_, path = env_redis.split(":")
if "?virtual_host=" in env_redis:
# Virtual host (aka db number)
path, number = path.split("?virtual_host=")
return (env_redis, f"unix:{path}?db={number}")
else:
return (env_redis, f"unix:{path}")
# Not a socket
return (env_redis, env_redis)
def parse_beat_schedule() -> dict:
"""
Configures the scheduled tasks, according to default or
environment variables. Task expiration is configured so the task will
expire (and not run), shortly before the default frequency will put another
of the same task into the queue
https://docs.celeryq.dev/en/stable/userguide/periodic-tasks.html#beat-entries
https://docs.celeryq.dev/en/latest/userguide/calling.html#expiration
"""
schedule = {}
tasks = [
{
"name": "Check all e-mail accounts",
"env_key": "PAPERLESS_EMAIL_TASK_CRON",
# Default every ten minutes
"env_default": "*/10 * * * *",
"task": "paperless_mail.tasks.process_mail_accounts",
"options": {
# 1 minute before default schedule sends again
"expires": 9.0 * 60.0,
},
},
{
"name": "Train the classifier",
"env_key": "PAPERLESS_TRAIN_TASK_CRON",
# Default hourly at 5 minutes past the hour
"env_default": "5 */1 * * *",
"task": "documents.tasks.train_classifier",
"options": {
# 1 minute before default schedule sends again
"expires": 59.0 * 60.0,
},
},
{
"name": "Optimize the index",
"env_key": "PAPERLESS_INDEX_TASK_CRON",
# Default daily at midnight
"env_default": "0 0 * * *",
"task": "documents.tasks.index_optimize",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
{
"name": "Perform sanity check",
"env_key": "PAPERLESS_SANITY_TASK_CRON",
# Default Sunday at 00:30
"env_default": "30 0 * * sun",
"task": "documents.tasks.sanity_check",
"options": {
# 1 hour before default schedule sends again
"expires": ((7.0 * 24.0) - 1.0) * 60.0 * 60.0,
},
},
{
"name": "Empty trash",
"env_key": "PAPERLESS_EMPTY_TRASH_TASK_CRON",
# Default daily at 01:00
"env_default": "0 1 * * *",
"task": "documents.tasks.empty_trash",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
{
"name": "Check and run scheduled workflows",
"env_key": "PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON",
# Default hourly at 5 minutes past the hour
"env_default": "5 */1 * * *",
"task": "documents.tasks.check_scheduled_workflows",
"options": {
# 1 minute before default schedule sends again
"expires": 59.0 * 60.0,
},
},
]
for task in tasks:
# Either get the environment setting or use the default
value = os.getenv(task["env_key"], task["env_default"])
# Don't add disabled tasks to the schedule
if value == "disable":
continue
# I find https://crontab.guru/ super helpful
# crontab(5) format
# - five time-and-date fields
# - separated by at least one blank
minute, hour, day_month, month, day_week = value.split(" ")
schedule[task["name"]] = {
"task": task["task"],
"schedule": crontab(minute, hour, day_week, day_month, month),
"options": task["options"],
}
return schedule
def parse_db_settings(data_dir: Path) -> dict[str, DatabaseConfig]:
"""Parse database settings from environment variables.
Core connection variables (no deprecation):
- PAPERLESS_DBENGINE (sqlite/postgresql/mariadb)
- PAPERLESS_DBHOST, PAPERLESS_DBPORT
- PAPERLESS_DBNAME, PAPERLESS_DBUSER, PAPERLESS_DBPASS
Advanced options can be set via:
- Legacy individual env vars (deprecated in v3.0, removed in v3.2)
- PAPERLESS_DB_OPTIONS (recommended v3+ approach)
Args:
data_dir: The data directory path for SQLite database location.
Returns:
A databases dict suitable for Django DATABASES setting.
"""
engine = get_choice_from_env(
"PAPERLESS_DBENGINE",
{"sqlite", "postgresql", "mariadb"},
default="sqlite",
)
match engine:
case "sqlite":
db_config = {
"ENGINE": "django.db.backends.sqlite3",
"NAME": str((data_dir / "db.sqlite3").resolve()),
}
base_options = {}
case "postgresql":
db_config = {
"ENGINE": "django.db.backends.postgresql",
"HOST": os.getenv("PAPERLESS_DBHOST"),
"NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
"USER": os.getenv("PAPERLESS_DBUSER", "paperless"),
"PASSWORD": os.getenv("PAPERLESS_DBPASS", "paperless"),
}
base_options = {
"sslmode": os.getenv("PAPERLESS_DBSSLMODE", "prefer"),
"sslrootcert": os.getenv("PAPERLESS_DBSSLROOTCERT"),
"sslcert": os.getenv("PAPERLESS_DBSSLCERT"),
"sslkey": os.getenv("PAPERLESS_DBSSLKEY"),
}
if (pool_size := get_int_from_env("PAPERLESS_DB_POOLSIZE")) is not None:
base_options["pool"] = {
"min_size": 1,
"max_size": pool_size,
}
case "mariadb":
db_config = {
"ENGINE": "django.db.backends.mysql",
"HOST": os.getenv("PAPERLESS_DBHOST"),
"NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
"USER": os.getenv("PAPERLESS_DBUSER", "paperless"),
"PASSWORD": os.getenv("PAPERLESS_DBPASS", "paperless"),
}
base_options = {
"read_default_file": "/etc/mysql/my.cnf",
"charset": "utf8mb4",
"collation": "utf8mb4_unicode_ci",
"ssl_mode": os.getenv("PAPERLESS_DBSSLMODE", "PREFERRED"),
"ssl": {
"ca": os.getenv("PAPERLESS_DBSSLROOTCERT"),
"cert": os.getenv("PAPERLESS_DBSSLCERT"),
"key": os.getenv("PAPERLESS_DBSSLKEY"),
},
}
# Handle port setting for external databases
if (
engine in ("postgresql", "mariadb")
and (port := get_int_from_env("PAPERLESS_DBPORT")) is not None
):
db_config["PORT"] = port
# Handle timeout setting (common across all engines, different key names)
if (timeout := get_int_from_env("PAPERLESS_DB_TIMEOUT")) is not None:
timeout_key = "timeout" if engine == "sqlite" else "connect_timeout"
base_options[timeout_key] = timeout
# Apply PAPERLESS_DB_OPTIONS overrides
db_config["OPTIONS"] = parse_dict_from_str(
os.getenv("PAPERLESS_DB_OPTIONS"),
defaults=base_options,
type_map={
# SQLite options
"timeout": int,
# Postgres/MariaDB options
"connect_timeout": int,
"pool.min_size": int,
"pool.max_size": int,
},
)
databases = {"default": db_config}
# Add SQLite fallback for PostgreSQL/MariaDB
# TODO: Is this really useful/used?
if engine in ("postgresql", "mariadb"):
databases["sqlite"] = {
"ENGINE": "django.db.backends.sqlite3",
"NAME": str((data_dir / "db.sqlite3").resolve()),
"OPTIONS": {},
}
return databases

View File

@@ -0,0 +1,294 @@
import copy
import os
from collections.abc import Callable
from collections.abc import Mapping
from pathlib import Path
from typing import Any
from typing import TypeVar
from typing import overload
T = TypeVar("T")
def str_to_bool(value: str) -> bool:
"""
Converts a string representation of truth to a boolean value.
Recognizes 'true', '1', 't', 'y', 'yes' as True, and
'false', '0', 'f', 'n', 'no' as False. Case-insensitive.
Args:
value: The string to convert.
Returns:
The boolean representation of the string.
Raises:
ValueError: If the string is not a recognized boolean value.
"""
val_lower = value.strip().lower()
if val_lower in ("true", "1", "t", "y", "yes"):
return True
elif val_lower in ("false", "0", "f", "n", "no"):
return False
raise ValueError(f"Cannot convert '{value}' to a boolean.")
def parse_dict_from_str(
env_str: str | None,
defaults: dict[str, Any] | None = None,
type_map: Mapping[str, Callable[[str], Any]] | None = None,
separator: str = ",",
) -> dict[str, Any]:
"""
Parses a key-value string into a dictionary, applying defaults and casting types.
Supports nested keys via dot-notation, e.g.:
"database.host=localhost,database.port=5432"
Args:
env_str: The string from the environment variable (e.g., "port=9090,debug=true").
defaults: A dictionary of default values (can contain nested dicts).
type_map: A dictionary mapping keys (dot-notation allowed) to a type or a parsing
function (e.g., {'port': int, 'debug': bool, 'database.port': int}).
The special `bool` type triggers custom boolean parsing.
separator: The character used to separate key-value pairs. Defaults to ','.
Returns:
A dictionary with the parsed and correctly-typed settings.
Raises:
ValueError: If a value cannot be cast to its specified type.
"""
def _set_nested(d: dict, keys: list[str], value: Any) -> None:
"""Set a nested value, creating intermediate dicts as needed."""
cur = d
for k in keys[:-1]:
if k not in cur or not isinstance(cur[k], dict):
cur[k] = {}
cur = cur[k]
cur[keys[-1]] = value
def _get_nested(d: dict, keys: list[str]) -> Any:
"""Get nested value or raise KeyError if not present."""
cur = d
for k in keys:
if not isinstance(cur, dict) or k not in cur:
raise KeyError
cur = cur[k]
return cur
def _has_nested(d: dict, keys: list[str]) -> bool:
try:
_get_nested(d, keys)
return True
except KeyError:
return False
settings: dict[str, Any] = copy.deepcopy(defaults) if defaults else {}
_type_map = type_map if type_map else {}
if not env_str:
return settings
# Parse the environment string using the specified separator
pairs = [p.strip() for p in env_str.split(separator) if p.strip()]
for pair in pairs:
if "=" not in pair:
# ignore malformed pairs
continue
key, val = pair.split("=", 1)
key = key.strip()
val = val.strip()
if not key:
continue
parts = key.split(".")
_set_nested(settings, parts, val)
# Apply type casting to the updated settings (supports nested keys in type_map)
for key, caster in _type_map.items():
key_parts = key.split(".")
if _has_nested(settings, key_parts):
raw_val = _get_nested(settings, key_parts)
# Only cast if it's a string (i.e. from env parsing). If defaults already provided
# a different type we leave it as-is.
if isinstance(raw_val, str):
try:
if caster is bool:
parsed = str_to_bool(raw_val)
elif caster is Path:
parsed = Path(raw_val).resolve()
else:
parsed = caster(raw_val)
except (ValueError, TypeError) as e:
caster_name = getattr(caster, "__name__", repr(caster))
raise ValueError(
f"Error casting key '{key}' with value '{raw_val}' "
f"to type '{caster_name}'",
) from e
_set_nested(settings, key_parts, parsed)
return settings
def get_bool_from_env(key: str, default: str = "NO") -> bool:
"""
Return a boolean value based on whatever the user has supplied in the
environment based on whether the value "looks like" it's True or not.
"""
return str_to_bool(os.getenv(key, default))
@overload
def get_int_from_env(key: str) -> int | None: ...
@overload
def get_int_from_env(key: str, default: None) -> int | None: ...
@overload
def get_int_from_env(key: str, default: int) -> int: ...
def get_int_from_env(key: str, default: int | None = None) -> int | None:
"""
Return an integer value based on the environment variable.
If default is provided, returns that value when key is missing.
If default is None, returns None when key is missing.
"""
if key not in os.environ:
return default
return int(os.environ[key])
@overload
def get_float_from_env(key: str) -> float | None: ...
@overload
def get_float_from_env(key: str, default: None) -> float | None: ...
@overload
def get_float_from_env(key: str, default: float) -> float: ...
def get_float_from_env(key: str, default: float | None = None) -> float | None:
"""
Return a float value based on the environment variable.
If default is provided, returns that value when key is missing.
If default is None, returns None when key is missing.
"""
if key not in os.environ:
return default
return float(os.environ[key])
@overload
def get_path_from_env(key: str) -> Path | None: ...
@overload
def get_path_from_env(key: str, default: None) -> Path | None: ...
@overload
def get_path_from_env(key: str, default: Path | str) -> Path: ...
def get_path_from_env(key: str, default: Path | str | None = None) -> Path | None:
"""
Return a Path object based on the environment variable.
If default is provided, returns that value when key is missing.
If default is None, returns None when key is missing.
"""
if key not in os.environ:
return default if default is None else Path(default).resolve()
return Path(os.environ[key]).resolve()
def get_list_from_env(
key: str,
separator: str = ",",
default: list[T] | None = None,
*,
strip_whitespace: bool = True,
remove_empty: bool = True,
required: bool = False,
) -> list[str] | list[T]:
"""
Get and parse a list from an environment variable or return a default.
Args:
key: Environment variable name
separator: Character(s) to split on (default: ',')
default: Default value to return if env var is not set or empty
strip_whitespace: Whether to strip whitespace from each element
remove_empty: Whether to remove empty strings from the result
required: If True, raise an error when the env var is missing and no default provided
Returns:
List of strings, the default if env var is empty/None or an empty list
Raises:
ValueError: If required=True and env var is missing and there is no default
"""
# Get the environment variable value
env_value = os.environ.get(key)
# Handle required environment variables
if required and env_value is None and default is None:
raise ValueError(f"Required environment variable '{key}' is not set")
if env_value:
items = env_value.split(separator)
if strip_whitespace:
items = [item.strip() for item in items]
if remove_empty:
items = [item for item in items if item]
return items
elif default is not None:
return default
else:
return []
def get_choice_from_env(
env_key: str,
choices: set[str],
default: str | None = None,
) -> str:
"""
Gets and validates an environment variable against a set of allowed choices.
Args:
env_key: The environment variable key to validate
choices: Set of valid choices for the environment variable
default: Optional default value if environment variable is not set
Returns:
The validated environment variable value
Raises:
ValueError: If the environment variable value is not in choices
or if no default is provided and env var is missing
"""
value = os.environ.get(env_key, default)
if value is None:
raise ValueError(
f"Environment variable '{env_key}' is required but not set.",
)
if value not in choices:
raise ValueError(
f"Environment variable '{env_key}' has invalid value '{value}'. "
f"Valid choices are: {', '.join(sorted(choices))}",
)
return value

View File

@@ -2,13 +2,16 @@ import os
from pathlib import Path
from unittest import mock
import pytest
from django.test import TestCase
from django.test import override_settings
from pytest_mock import MockerFixture
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from paperless.checks import audit_log_check
from paperless.checks import binaries_check
from paperless.checks import check_deprecated_db_settings
from paperless.checks import debug_mode_check
from paperless.checks import paths_check
from paperless.checks import settings_values_check
@@ -237,3 +240,67 @@ class TestAuditLogChecks(TestCase):
("auditlog table was found but audit log is disabled."),
msg.msg,
)
class TestDeprecatedDbSettings:
"""Test suite for deprecated database settings system check."""
def test_no_deprecated_vars_no_warning(
self,
mocker: MockerFixture,
) -> None:
"""Test that no warning is raised when no deprecated vars are set."""
mocker.patch.dict(os.environ, {}, clear=True)
warnings = check_deprecated_db_settings(None)
assert warnings == []
@pytest.mark.parametrize(
("env_var", "expected_hint_fragment"),
[
("PAPERLESS_DB_TIMEOUT", "timeout"),
("PAPERLESS_DB_POOLSIZE", "pool.min_size,pool.max_size"),
("PAPERLESS_DBSSLMODE", "sslmode"),
("PAPERLESS_DBSSLROOTCERT", "sslrootcert"),
("PAPERLESS_DBSSLCERT", "sslcert"),
("PAPERLESS_DBSSLKEY", "sslkey"),
],
)
def test_deprecated_var_triggers_warning(
self,
mocker: MockerFixture,
env_var: str,
expected_hint_fragment: str,
) -> None:
"""Test that each deprecated var triggers appropriate warning."""
mocker.patch.dict(os.environ, {env_var: "some_value"}, clear=True)
warnings = check_deprecated_db_settings(None)
assert len(warnings) == 1
assert warnings[0].id == "paperless.W001"
assert env_var in warnings[0].hint
assert expected_hint_fragment in warnings[0].hint
assert "v3.2" in warnings[0].hint
def test_multiple_deprecated_vars(
self,
mocker: MockerFixture,
) -> None:
"""Test that multiple deprecated vars are all listed in warning."""
mocker.patch.dict(
os.environ,
{
"PAPERLESS_DB_TIMEOUT": "30",
"PAPERLESS_DB_POOLSIZE": "10",
"PAPERLESS_DBSSLMODE": "require",
},
clear=True,
)
warnings = check_deprecated_db_settings(None)
assert len(warnings) == 1
assert "PAPERLESS_DB_TIMEOUT" in warnings[0].hint
assert "PAPERLESS_DB_POOLSIZE" in warnings[0].hint
assert "PAPERLESS_DBSSLMODE" in warnings[0].hint

View File

@@ -1,19 +1,21 @@
import datetime
import os
from pathlib import Path
from unittest import TestCase
from unittest import mock
import pytest
from celery.schedules import crontab
from pytest_mock import MockerFixture
from paperless.settings import _parse_base_paths
from paperless.settings import _parse_beat_schedule
from paperless.settings import _parse_dateparser_languages
from paperless.settings import _parse_db_settings
from paperless.settings import _parse_ignore_dates
from paperless.settings import _parse_paperless_url
from paperless.settings import _parse_redis_url
from paperless.settings import default_threads_per_worker
from paperless.settings.custom import parse_db_settings
class TestIgnoreDateParsing(TestCase):
@@ -378,62 +380,302 @@ class TestCeleryScheduleParsing(TestCase):
)
class TestDBSettings(TestCase):
def test_db_timeout_with_sqlite(self) -> None:
"""
GIVEN:
- PAPERLESS_DB_TIMEOUT is set
WHEN:
- Settings are parsed
THEN:
- PAPERLESS_DB_TIMEOUT set for sqlite
"""
with mock.patch.dict(
os.environ,
class TestParseDbSettings:
"""Test suite for parse_db_settings function."""
@pytest.mark.parametrize(
("env_vars", "expected_database_settings"),
[
pytest.param(
{},
{
"PAPERLESS_DB_TIMEOUT": "10",
"default": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": None, # Will be replaced with tmp_path
"OPTIONS": {},
},
},
id="default-sqlite",
),
pytest.param(
{
"PAPERLESS_DBENGINE": "sqlite",
"PAPERLESS_DB_OPTIONS": "timeout=30",
},
{
"default": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": None, # Will be replaced with tmp_path
"OPTIONS": {
"timeout": 30,
},
},
},
id="sqlite-with-timeout-override",
),
pytest.param(
{
"PAPERLESS_DBENGINE": "postgresql",
"PAPERLESS_DBHOST": "localhost",
},
{
"default": {
"ENGINE": "django.db.backends.postgresql",
"HOST": "localhost",
"NAME": "paperless",
"USER": "paperless",
"PASSWORD": "paperless",
"OPTIONS": {
"sslmode": "prefer",
"sslrootcert": None,
"sslcert": None,
"sslkey": None,
},
},
"sqlite": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": None, # Will be replaced with tmp_path
"OPTIONS": {},
},
},
id="postgresql-defaults",
),
pytest.param(
{
"PAPERLESS_DBENGINE": "postgresql",
"PAPERLESS_DBHOST": "paperless-db-host",
"PAPERLESS_DBPORT": "1111",
"PAPERLESS_DBNAME": "customdb",
"PAPERLESS_DBUSER": "customuser",
"PAPERLESS_DBPASS": "custompass",
"PAPERLESS_DB_OPTIONS": "pool.max_size=50,pool.min_size=2,sslmode=require",
},
{
"default": {
"ENGINE": "django.db.backends.postgresql",
"HOST": "paperless-db-host",
"PORT": 1111,
"NAME": "customdb",
"USER": "customuser",
"PASSWORD": "custompass",
"OPTIONS": {
"sslmode": "require",
"sslrootcert": None,
"sslcert": None,
"sslkey": None,
"pool": {
"min_size": 2,
"max_size": 50,
},
},
},
"sqlite": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": None, # Will be replaced with tmp_path
"OPTIONS": {},
},
},
id="postgresql-overrides",
),
pytest.param(
{
"PAPERLESS_DBENGINE": "postgresql",
"PAPERLESS_DBHOST": "pghost",
"PAPERLESS_DB_POOLSIZE": "10",
},
{
"default": {
"ENGINE": "django.db.backends.postgresql",
"HOST": "pghost",
"NAME": "paperless",
"USER": "paperless",
"PASSWORD": "paperless",
"OPTIONS": {
"sslmode": "prefer",
"sslrootcert": None,
"sslcert": None,
"sslkey": None,
"pool": {
"min_size": 1,
"max_size": 10,
},
},
},
"sqlite": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": None, # Will be replaced with tmp_path
"OPTIONS": {},
},
},
id="postgresql-legacy-poolsize",
),
pytest.param(
{
"PAPERLESS_DBENGINE": "postgresql",
"PAPERLESS_DBHOST": "pghost",
"PAPERLESS_DBSSLMODE": "require",
"PAPERLESS_DBSSLROOTCERT": "/certs/ca.crt",
"PAPERLESS_DB_TIMEOUT": "30",
},
{
"default": {
"ENGINE": "django.db.backends.postgresql",
"HOST": "pghost",
"NAME": "paperless",
"USER": "paperless",
"PASSWORD": "paperless",
"OPTIONS": {
"sslmode": "require",
"sslrootcert": "/certs/ca.crt",
"sslcert": None,
"sslkey": None,
"connect_timeout": 30,
},
},
"sqlite": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": None, # Will be replaced with tmp_path
"OPTIONS": {},
},
},
id="postgresql-legacy-ssl-and-timeout",
),
pytest.param(
{
"PAPERLESS_DBENGINE": "mariadb",
"PAPERLESS_DBHOST": "localhost",
},
{
"default": {
"ENGINE": "django.db.backends.mysql",
"HOST": "localhost",
"NAME": "paperless",
"USER": "paperless",
"PASSWORD": "paperless",
"OPTIONS": {
"read_default_file": "/etc/mysql/my.cnf",
"charset": "utf8mb4",
"collation": "utf8mb4_unicode_ci",
"ssl_mode": "PREFERRED",
"ssl": {
"ca": None,
"cert": None,
"key": None,
},
},
},
"sqlite": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": None, # Will be replaced with tmp_path
"OPTIONS": {},
},
},
id="mariadb-defaults",
),
pytest.param(
{
"PAPERLESS_DBENGINE": "mariadb",
"PAPERLESS_DBHOST": "paperless-mariadb-host",
"PAPERLESS_DBPORT": "5555",
"PAPERLESS_DBUSER": "my-cool-user",
"PAPERLESS_DBPASS": "my-secure-password",
"PAPERLESS_DB_OPTIONS": "ssl.ca=/path/to/ca.pem,ssl_mode=REQUIRED",
},
{
"default": {
"ENGINE": "django.db.backends.mysql",
"HOST": "paperless-mariadb-host",
"PORT": 5555,
"NAME": "paperless",
"USER": "my-cool-user",
"PASSWORD": "my-secure-password",
"OPTIONS": {
"read_default_file": "/etc/mysql/my.cnf",
"charset": "utf8mb4",
"collation": "utf8mb4_unicode_ci",
"ssl_mode": "REQUIRED",
"ssl": {
"ca": "/path/to/ca.pem",
"cert": None,
"key": None,
},
},
},
"sqlite": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": None, # Will be replaced with tmp_path
"OPTIONS": {},
},
},
id="mariadb-overrides",
),
pytest.param(
{
"PAPERLESS_DBENGINE": "mariadb",
"PAPERLESS_DBHOST": "mariahost",
"PAPERLESS_DBSSLMODE": "REQUIRED",
"PAPERLESS_DBSSLROOTCERT": "/certs/ca.pem",
"PAPERLESS_DBSSLCERT": "/certs/client.pem",
"PAPERLESS_DBSSLKEY": "/certs/client.key",
"PAPERLESS_DB_TIMEOUT": "25",
},
{
"default": {
"ENGINE": "django.db.backends.mysql",
"HOST": "mariahost",
"NAME": "paperless",
"USER": "paperless",
"PASSWORD": "paperless",
"OPTIONS": {
"read_default_file": "/etc/mysql/my.cnf",
"charset": "utf8mb4",
"collation": "utf8mb4_unicode_ci",
"ssl_mode": "REQUIRED",
"ssl": {
"ca": "/certs/ca.pem",
"cert": "/certs/client.pem",
"key": "/certs/client.key",
},
"connect_timeout": 25,
},
},
"sqlite": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": None, # Will be replaced with tmp_path
"OPTIONS": {},
},
},
id="mariadb-legacy-ssl-and-timeout",
),
],
)
def test_parse_db_settings(
self,
tmp_path: Path,
mocker: MockerFixture,
env_vars: dict[str, str],
expected_database_settings: dict[str, dict],
) -> None:
"""Test various database configurations with defaults and overrides."""
# Clear environment and set test vars
mocker.patch.dict(os.environ, env_vars, clear=True)
# Update expected paths with actual tmp_path
if (
"default" in expected_database_settings
and expected_database_settings["default"]["NAME"] is None
):
databases = _parse_db_settings()
self.assertDictEqual(
{
"timeout": 10.0,
},
databases["default"]["OPTIONS"],
expected_database_settings["default"]["NAME"] = str(
tmp_path / "db.sqlite3",
)
if "sqlite" in expected_database_settings:
expected_database_settings["sqlite"]["NAME"] = str(
tmp_path / "db.sqlite3",
)
def test_db_timeout_with_not_sqlite(self) -> None:
"""
GIVEN:
- PAPERLESS_DB_TIMEOUT is set but db is not sqlite
WHEN:
- Settings are parsed
THEN:
- PAPERLESS_DB_TIMEOUT set correctly in non-sqlite db & for fallback sqlite db
"""
with mock.patch.dict(
os.environ,
{
"PAPERLESS_DBHOST": "127.0.0.1",
"PAPERLESS_DB_TIMEOUT": "10",
},
):
databases = _parse_db_settings()
settings = parse_db_settings(tmp_path)
self.assertDictEqual(
databases["default"]["OPTIONS"],
databases["default"]["OPTIONS"]
| {
"connect_timeout": 10.0,
},
)
self.assertDictEqual(
{
"timeout": 10.0,
},
databases["sqlite"]["OPTIONS"],
)
assert settings == expected_database_settings
class TestPaperlessURLSettings(TestCase):