mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
1265 lines
43 KiB
Python
1265 lines
43 KiB
Python
import datetime
|
|
import json
|
|
import math
|
|
import multiprocessing
|
|
import os
|
|
import tempfile
|
|
from os import PathLike
|
|
from pathlib import Path
|
|
from platform import machine
|
|
from typing import Final
|
|
from urllib.parse import urlparse
|
|
|
|
from celery.schedules import crontab
|
|
from concurrent_log_handler.queue import setup_logging_queues
|
|
from django.utils.translation import gettext_lazy as _
|
|
from dotenv import load_dotenv
|
|
|
|
# Tap paperless.conf if it's available
|
|
configuration_path = os.getenv("PAPERLESS_CONFIGURATION_PATH")
|
|
if configuration_path and os.path.exists(configuration_path):
|
|
load_dotenv(configuration_path)
|
|
elif os.path.exists("../paperless.conf"):
|
|
load_dotenv("../paperless.conf")
|
|
elif os.path.exists("/etc/paperless.conf"):
|
|
load_dotenv("/etc/paperless.conf")
|
|
elif os.path.exists("/usr/local/etc/paperless.conf"):
|
|
load_dotenv("/usr/local/etc/paperless.conf")
|
|
|
|
# There are multiple levels of concurrency in paperless:
|
|
# - Multiple consumers may be run in parallel.
|
|
# - Each consumer may process multiple pages in parallel.
|
|
# - Each Tesseract OCR run may spawn multiple threads to process a single page
|
|
# slightly faster.
|
|
# The performance gains from having tesseract use multiple threads are minimal.
|
|
# However, when multiple pages are processed in parallel, the total number of
|
|
# OCR threads may exceed the number of available cpu cores, which will
|
|
# dramatically slow down the consumption process. This settings limits each
|
|
# Tesseract process to one thread.
|
|
os.environ["OMP_THREAD_LIMIT"] = "1"
|
|
|
|
|
|
def __get_boolean(key: str, default: str = "NO") -> bool:
|
|
"""
|
|
Return a boolean value based on whatever the user has supplied in the
|
|
environment based on whether the value "looks like" it's True or not.
|
|
"""
|
|
return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
|
|
|
|
|
|
def __get_int(key: str, default: int) -> int:
|
|
"""
|
|
Return an integer value based on the environment variable or a default
|
|
"""
|
|
return int(os.getenv(key, default))
|
|
|
|
|
|
def __get_optional_int(key: str) -> int | None:
|
|
"""
|
|
Returns None if the environment key is not present, otherwise an integer
|
|
"""
|
|
if key in os.environ:
|
|
return __get_int(key, -1) # pragma: no cover
|
|
return None
|
|
|
|
|
|
def __get_float(key: str, default: float) -> float:
|
|
"""
|
|
Return an integer value based on the environment variable or a default
|
|
"""
|
|
return float(os.getenv(key, default))
|
|
|
|
|
|
def __get_path(
|
|
key: str,
|
|
default: PathLike | str,
|
|
) -> Path:
|
|
"""
|
|
Return a normalized, absolute path based on the environment variable or a default,
|
|
if provided
|
|
"""
|
|
if key in os.environ:
|
|
return Path(os.environ[key]).resolve()
|
|
return Path(default).resolve()
|
|
|
|
|
|
def __get_optional_path(key: str) -> Path | None:
|
|
"""
|
|
Returns None if the environment key is not present, otherwise a fully resolved Path
|
|
"""
|
|
if key in os.environ:
|
|
return __get_path(key, "")
|
|
return None
|
|
|
|
|
|
def __get_list(
|
|
key: str,
|
|
default: list[str] | None = None,
|
|
sep: str = ",",
|
|
) -> list[str]:
|
|
"""
|
|
Return a list of elements from the environment, as separated by the given
|
|
string, or the default if the key does not exist
|
|
"""
|
|
if key in os.environ:
|
|
return list(filter(None, os.environ[key].split(sep)))
|
|
elif default is not None:
|
|
return default
|
|
else:
|
|
return []
|
|
|
|
|
|
def _parse_redis_url(env_redis: str | None) -> tuple[str, str]:
|
|
"""
|
|
Gets the Redis information from the environment or a default and handles
|
|
converting from incompatible django_channels and celery formats.
|
|
|
|
Returns a tuple of (celery_url, channels_url)
|
|
"""
|
|
|
|
# Not set, return a compatible default
|
|
if env_redis is None:
|
|
return ("redis://localhost:6379", "redis://localhost:6379")
|
|
|
|
if "unix" in env_redis.lower():
|
|
# channels_redis socket format, looks like:
|
|
# "unix:///path/to/redis.sock"
|
|
_, path = env_redis.split(":")
|
|
# Optionally setting a db number
|
|
if "?db=" in env_redis:
|
|
path, number = path.split("?db=")
|
|
return (f"redis+socket:{path}?virtual_host={number}", env_redis)
|
|
else:
|
|
return (f"redis+socket:{path}", env_redis)
|
|
|
|
elif "+socket" in env_redis.lower():
|
|
# celery socket style, looks like:
|
|
# "redis+socket:///path/to/redis.sock"
|
|
_, path = env_redis.split(":")
|
|
if "?virtual_host=" in env_redis:
|
|
# Virtual host (aka db number)
|
|
path, number = path.split("?virtual_host=")
|
|
return (env_redis, f"unix:{path}?db={number}")
|
|
else:
|
|
return (env_redis, f"unix:{path}")
|
|
|
|
# Not a socket
|
|
return (env_redis, env_redis)
|
|
|
|
|
|
def _parse_beat_schedule() -> dict:
|
|
"""
|
|
Configures the scheduled tasks, according to default or
|
|
environment variables. Task expiration is configured so the task will
|
|
expire (and not run), shortly before the default frequency will put another
|
|
of the same task into the queue
|
|
|
|
|
|
https://docs.celeryq.dev/en/stable/userguide/periodic-tasks.html#beat-entries
|
|
https://docs.celeryq.dev/en/latest/userguide/calling.html#expiration
|
|
"""
|
|
schedule = {}
|
|
tasks = [
|
|
{
|
|
"name": "Check all e-mail accounts",
|
|
"env_key": "PAPERLESS_EMAIL_TASK_CRON",
|
|
# Default every ten minutes
|
|
"env_default": "*/10 * * * *",
|
|
"task": "paperless_mail.tasks.process_mail_accounts",
|
|
"options": {
|
|
# 1 minute before default schedule sends again
|
|
"expires": 9.0 * 60.0,
|
|
},
|
|
},
|
|
{
|
|
"name": "Train the classifier",
|
|
"env_key": "PAPERLESS_TRAIN_TASK_CRON",
|
|
# Default hourly at 5 minutes past the hour
|
|
"env_default": "5 */1 * * *",
|
|
"task": "documents.tasks.train_classifier",
|
|
"options": {
|
|
# 1 minute before default schedule sends again
|
|
"expires": 59.0 * 60.0,
|
|
},
|
|
},
|
|
{
|
|
"name": "Optimize the index",
|
|
"env_key": "PAPERLESS_INDEX_TASK_CRON",
|
|
# Default daily at midnight
|
|
"env_default": "0 0 * * *",
|
|
"task": "documents.tasks.index_optimize",
|
|
"options": {
|
|
# 1 hour before default schedule sends again
|
|
"expires": 23.0 * 60.0 * 60.0,
|
|
},
|
|
},
|
|
{
|
|
"name": "Perform sanity check",
|
|
"env_key": "PAPERLESS_SANITY_TASK_CRON",
|
|
# Default Sunday at 00:30
|
|
"env_default": "30 0 * * sun",
|
|
"task": "documents.tasks.sanity_check",
|
|
"options": {
|
|
# 1 hour before default schedule sends again
|
|
"expires": ((7.0 * 24.0) - 1.0) * 60.0 * 60.0,
|
|
},
|
|
},
|
|
{
|
|
"name": "Empty trash",
|
|
"env_key": "PAPERLESS_EMPTY_TRASH_TASK_CRON",
|
|
# Default daily at 01:00
|
|
"env_default": "0 1 * * *",
|
|
"task": "documents.tasks.empty_trash",
|
|
"options": {
|
|
# 1 hour before default schedule sends again
|
|
"expires": 23.0 * 60.0 * 60.0,
|
|
},
|
|
},
|
|
{
|
|
"name": "Check and run scheduled workflows",
|
|
"env_key": "PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON",
|
|
# Default hourly at 5 minutes past the hour
|
|
"env_default": "5 */1 * * *",
|
|
"task": "documents.tasks.check_scheduled_workflows",
|
|
"options": {
|
|
# 1 minute before default schedule sends again
|
|
"expires": 59.0 * 60.0,
|
|
},
|
|
},
|
|
]
|
|
for task in tasks:
|
|
# Either get the environment setting or use the default
|
|
value = os.getenv(task["env_key"], task["env_default"])
|
|
# Don't add disabled tasks to the schedule
|
|
if value == "disable":
|
|
continue
|
|
# I find https://crontab.guru/ super helpful
|
|
# crontab(5) format
|
|
# - five time-and-date fields
|
|
# - separated by at least one blank
|
|
minute, hour, day_month, month, day_week = value.split(" ")
|
|
|
|
schedule[task["name"]] = {
|
|
"task": task["task"],
|
|
"schedule": crontab(minute, hour, day_week, day_month, month),
|
|
"options": task["options"],
|
|
}
|
|
|
|
return schedule
|
|
|
|
|
|
# NEVER RUN WITH DEBUG IN PRODUCTION.
|
|
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
|
|
|
|
|
|
###############################################################################
|
|
# Directories #
|
|
###############################################################################
|
|
|
|
BASE_DIR: Path = Path(__file__).resolve().parent.parent
|
|
|
|
STATIC_ROOT = __get_path("PAPERLESS_STATICDIR", BASE_DIR.parent / "static")
|
|
|
|
MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
|
|
ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals"
|
|
ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive"
|
|
THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails"
|
|
|
|
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
|
|
|
|
NLTK_DIR = __get_path("PAPERLESS_NLTK_DIR", "/usr/share/nltk_data")
|
|
|
|
# Check deprecated setting first
|
|
EMPTY_TRASH_DIR = os.getenv(
|
|
"PAPERLESS_TRASH_DIR",
|
|
os.getenv("PAPERLESS_EMPTY_TRASH_DIR"),
|
|
)
|
|
|
|
# Lock file for synchronizing changes to the MEDIA directory across multiple
|
|
# threads.
|
|
MEDIA_LOCK = MEDIA_ROOT / "media.lock"
|
|
INDEX_DIR = DATA_DIR / "index"
|
|
MODEL_FILE = __get_path(
|
|
"PAPERLESS_MODEL_FILE",
|
|
DATA_DIR / "classification_model.pickle",
|
|
)
|
|
|
|
LOGGING_DIR = __get_path("PAPERLESS_LOGGING_DIR", DATA_DIR / "log")
|
|
|
|
CONSUMPTION_DIR = __get_path(
|
|
"PAPERLESS_CONSUMPTION_DIR",
|
|
BASE_DIR.parent / "consume",
|
|
)
|
|
|
|
# This will be created if it doesn't exist
|
|
SCRATCH_DIR = __get_path(
|
|
"PAPERLESS_SCRATCH_DIR",
|
|
Path(tempfile.gettempdir()) / "paperless",
|
|
)
|
|
|
|
###############################################################################
|
|
# Application Definition #
|
|
###############################################################################
|
|
|
|
env_apps = __get_list("PAPERLESS_APPS")
|
|
|
|
INSTALLED_APPS = [
|
|
"whitenoise.runserver_nostatic",
|
|
"django.contrib.auth",
|
|
"django.contrib.contenttypes",
|
|
"django.contrib.sessions",
|
|
"django.contrib.messages",
|
|
"django.contrib.staticfiles",
|
|
"corsheaders",
|
|
"django_extensions",
|
|
"paperless",
|
|
"documents.apps.DocumentsConfig",
|
|
"paperless_tesseract.apps.PaperlessTesseractConfig",
|
|
"paperless_text.apps.PaperlessTextConfig",
|
|
"paperless_mail.apps.PaperlessMailConfig",
|
|
"django.contrib.admin",
|
|
"rest_framework",
|
|
"rest_framework.authtoken",
|
|
"django_filters",
|
|
"django_celery_results",
|
|
"guardian",
|
|
"allauth",
|
|
"allauth.account",
|
|
"allauth.socialaccount",
|
|
"allauth.mfa",
|
|
"drf_spectacular",
|
|
"drf_spectacular_sidecar",
|
|
*env_apps,
|
|
]
|
|
|
|
if DEBUG:
|
|
INSTALLED_APPS.append("channels")
|
|
|
|
REST_FRAMEWORK = {
|
|
"DEFAULT_AUTHENTICATION_CLASSES": [
|
|
"paperless.auth.PaperlessBasicAuthentication",
|
|
"rest_framework.authentication.TokenAuthentication",
|
|
"rest_framework.authentication.SessionAuthentication",
|
|
],
|
|
"DEFAULT_VERSIONING_CLASS": "rest_framework.versioning.AcceptHeaderVersioning",
|
|
"DEFAULT_VERSION": "7",
|
|
# Make sure these are ordered and that the most recent version appears
|
|
# last. See api.md#api-versioning when adding new versions.
|
|
"ALLOWED_VERSIONS": ["1", "2", "3", "4", "5", "6", "7"],
|
|
# DRF Spectacular default schema
|
|
"DEFAULT_SCHEMA_CLASS": "drf_spectacular.openapi.AutoSchema",
|
|
}
|
|
|
|
# DRF Spectacular settings
|
|
SPECTACULAR_SETTINGS = {
|
|
"TITLE": "Paperless-ngx REST API",
|
|
"DESCRIPTION": "OpenAPI Spec for Paperless-ngx",
|
|
"VERSION": "6.0.0",
|
|
"SERVE_INCLUDE_SCHEMA": False,
|
|
"SWAGGER_UI_DIST": "SIDECAR",
|
|
"COMPONENT_SPLIT_REQUEST": True,
|
|
"EXTERNAL_DOCS": {
|
|
"description": "Paperless-ngx API Documentation",
|
|
"url": "https://docs.paperless-ngx.com/api/",
|
|
},
|
|
"ENUM_NAME_OVERRIDES": {
|
|
"MatchingAlgorithm": "documents.models.MatchingModel.MATCHING_ALGORITHMS",
|
|
},
|
|
}
|
|
|
|
if DEBUG:
|
|
REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append(
|
|
"paperless.auth.AngularApiAuthenticationOverride",
|
|
)
|
|
|
|
MIDDLEWARE = [
|
|
"django.middleware.security.SecurityMiddleware",
|
|
"whitenoise.middleware.WhiteNoiseMiddleware",
|
|
"django.contrib.sessions.middleware.SessionMiddleware",
|
|
"corsheaders.middleware.CorsMiddleware",
|
|
"django.middleware.locale.LocaleMiddleware",
|
|
"django.middleware.common.CommonMiddleware",
|
|
"django.middleware.csrf.CsrfViewMiddleware",
|
|
"paperless.middleware.ApiVersionMiddleware",
|
|
"django.contrib.auth.middleware.AuthenticationMiddleware",
|
|
"django.contrib.messages.middleware.MessageMiddleware",
|
|
"django.middleware.clickjacking.XFrameOptionsMiddleware",
|
|
"allauth.account.middleware.AccountMiddleware",
|
|
]
|
|
|
|
# Optional to enable compression
|
|
if __get_boolean("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: no cover
|
|
MIDDLEWARE.insert(0, "compression_middleware.middleware.CompressionMiddleware")
|
|
|
|
ROOT_URLCONF = "paperless.urls"
|
|
|
|
|
|
def _parse_base_paths() -> tuple[str, str, str, str, str]:
|
|
script_name = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
|
|
base_url = (script_name or "") + "/"
|
|
login_url = base_url + "accounts/login/"
|
|
login_redirect_url = base_url + "dashboard"
|
|
logout_redirect_url = os.getenv(
|
|
"PAPERLESS_LOGOUT_REDIRECT_URL",
|
|
login_url + "?loggedout=1",
|
|
)
|
|
return script_name, base_url, login_url, login_redirect_url, logout_redirect_url
|
|
|
|
|
|
FORCE_SCRIPT_NAME, BASE_URL, LOGIN_URL, LOGIN_REDIRECT_URL, LOGOUT_REDIRECT_URL = (
|
|
_parse_base_paths()
|
|
)
|
|
|
|
WSGI_APPLICATION = "paperless.wsgi.application"
|
|
ASGI_APPLICATION = "paperless.asgi.application"
|
|
|
|
STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", BASE_URL + "static/")
|
|
WHITENOISE_STATIC_PREFIX = "/static/"
|
|
|
|
if machine().lower() == "aarch64": # pragma: no cover
|
|
_static_backend = "django.contrib.staticfiles.storage.StaticFilesStorage"
|
|
else:
|
|
_static_backend = "whitenoise.storage.CompressedStaticFilesStorage"
|
|
|
|
STORAGES = {
|
|
"staticfiles": {
|
|
"BACKEND": _static_backend,
|
|
},
|
|
"default": {"BACKEND": "django.core.files.storage.FileSystemStorage"},
|
|
}
|
|
|
|
_CELERY_REDIS_URL, _CHANNELS_REDIS_URL = _parse_redis_url(
|
|
os.getenv("PAPERLESS_REDIS", None),
|
|
)
|
|
|
|
TEMPLATES = [
|
|
{
|
|
"BACKEND": "django.template.backends.django.DjangoTemplates",
|
|
"DIRS": [],
|
|
"APP_DIRS": True,
|
|
"OPTIONS": {
|
|
"context_processors": [
|
|
"django.template.context_processors.debug",
|
|
"django.template.context_processors.request",
|
|
"django.contrib.auth.context_processors.auth",
|
|
"django.contrib.messages.context_processors.messages",
|
|
"documents.context_processors.settings",
|
|
],
|
|
},
|
|
},
|
|
]
|
|
|
|
CHANNEL_LAYERS = {
|
|
"default": {
|
|
"BACKEND": "channels_redis.pubsub.RedisPubSubChannelLayer",
|
|
"CONFIG": {
|
|
"hosts": [_CHANNELS_REDIS_URL],
|
|
"capacity": 2000, # default 100
|
|
"expiry": 15, # default 60
|
|
"prefix": os.getenv("PAPERLESS_REDIS_PREFIX", ""),
|
|
},
|
|
},
|
|
}
|
|
|
|
###############################################################################
|
|
# Security #
|
|
###############################################################################
|
|
|
|
AUTHENTICATION_BACKENDS = [
|
|
"guardian.backends.ObjectPermissionBackend",
|
|
"django.contrib.auth.backends.ModelBackend",
|
|
"allauth.account.auth_backends.AuthenticationBackend",
|
|
]
|
|
|
|
ACCOUNT_LOGOUT_ON_GET = True
|
|
ACCOUNT_DEFAULT_HTTP_PROTOCOL = os.getenv(
|
|
"PAPERLESS_ACCOUNT_DEFAULT_HTTP_PROTOCOL",
|
|
"https",
|
|
)
|
|
|
|
ACCOUNT_ADAPTER = "paperless.adapter.CustomAccountAdapter"
|
|
ACCOUNT_ALLOW_SIGNUPS = __get_boolean("PAPERLESS_ACCOUNT_ALLOW_SIGNUPS")
|
|
ACCOUNT_DEFAULT_GROUPS = __get_list("PAPERLESS_ACCOUNT_DEFAULT_GROUPS")
|
|
|
|
SOCIALACCOUNT_ADAPTER = "paperless.adapter.CustomSocialAccountAdapter"
|
|
SOCIALACCOUNT_ALLOW_SIGNUPS = __get_boolean(
|
|
"PAPERLESS_SOCIALACCOUNT_ALLOW_SIGNUPS",
|
|
"yes",
|
|
)
|
|
SOCIALACCOUNT_AUTO_SIGNUP = __get_boolean("PAPERLESS_SOCIAL_AUTO_SIGNUP")
|
|
SOCIALACCOUNT_PROVIDERS = json.loads(
|
|
os.getenv("PAPERLESS_SOCIALACCOUNT_PROVIDERS", "{}"),
|
|
)
|
|
SOCIAL_ACCOUNT_DEFAULT_GROUPS = __get_list("PAPERLESS_SOCIAL_ACCOUNT_DEFAULT_GROUPS")
|
|
SOCIAL_ACCOUNT_SYNC_GROUPS = __get_boolean("PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS")
|
|
|
|
MFA_TOTP_ISSUER = "Paperless-ngx"
|
|
|
|
ACCOUNT_EMAIL_SUBJECT_PREFIX = "[Paperless-ngx] "
|
|
|
|
DISABLE_REGULAR_LOGIN = __get_boolean("PAPERLESS_DISABLE_REGULAR_LOGIN")
|
|
REDIRECT_LOGIN_TO_SSO = __get_boolean("PAPERLESS_REDIRECT_LOGIN_TO_SSO")
|
|
|
|
AUTO_LOGIN_USERNAME = os.getenv("PAPERLESS_AUTO_LOGIN_USERNAME")
|
|
|
|
ACCOUNT_EMAIL_VERIFICATION = os.getenv(
|
|
"PAPERLESS_ACCOUNT_EMAIL_VERIFICATION",
|
|
"optional",
|
|
)
|
|
|
|
ACCOUNT_SESSION_REMEMBER = __get_boolean("PAPERLESS_ACCOUNT_SESSION_REMEMBER", "True")
|
|
SESSION_EXPIRE_AT_BROWSER_CLOSE = not ACCOUNT_SESSION_REMEMBER
|
|
SESSION_COOKIE_AGE = int(
|
|
os.getenv("PAPERLESS_SESSION_COOKIE_AGE", 60 * 60 * 24 * 7 * 3),
|
|
)
|
|
# https://docs.djangoproject.com/en/5.1/ref/settings/#std-setting-SESSION_ENGINE
|
|
SESSION_ENGINE = "django.contrib.sessions.backends.cached_db"
|
|
|
|
if AUTO_LOGIN_USERNAME:
|
|
_index = MIDDLEWARE.index("django.contrib.auth.middleware.AuthenticationMiddleware")
|
|
# This overrides everything the auth middleware is doing but still allows
|
|
# regular login in case the provided user does not exist.
|
|
MIDDLEWARE.insert(_index + 1, "paperless.auth.AutoLoginMiddleware")
|
|
|
|
|
|
def _parse_remote_user_settings() -> str:
|
|
global MIDDLEWARE, AUTHENTICATION_BACKENDS, REST_FRAMEWORK
|
|
enable = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
|
|
enable_api = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER_API")
|
|
if enable or enable_api:
|
|
MIDDLEWARE.append("paperless.auth.HttpRemoteUserMiddleware")
|
|
AUTHENTICATION_BACKENDS.insert(
|
|
0,
|
|
"django.contrib.auth.backends.RemoteUserBackend",
|
|
)
|
|
|
|
if enable_api:
|
|
REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].insert(
|
|
0,
|
|
"paperless.auth.PaperlessRemoteUserAuthentication",
|
|
)
|
|
|
|
header_name = os.getenv(
|
|
"PAPERLESS_HTTP_REMOTE_USER_HEADER_NAME",
|
|
"HTTP_REMOTE_USER",
|
|
)
|
|
|
|
return header_name
|
|
|
|
|
|
HTTP_REMOTE_USER_HEADER_NAME = _parse_remote_user_settings()
|
|
|
|
# X-Frame options for embedded PDF display:
|
|
X_FRAME_OPTIONS = "SAMEORIGIN"
|
|
|
|
# The next 3 settings can also be set using just PAPERLESS_URL
|
|
CSRF_TRUSTED_ORIGINS = __get_list("PAPERLESS_CSRF_TRUSTED_ORIGINS")
|
|
|
|
# We allow CORS from localhost:8000
|
|
CORS_ALLOWED_ORIGINS = __get_list(
|
|
"PAPERLESS_CORS_ALLOWED_HOSTS",
|
|
["http://localhost:8000"],
|
|
)
|
|
|
|
if DEBUG:
|
|
# Allow access from the angular development server during debugging
|
|
CORS_ALLOWED_ORIGINS.append("http://localhost:4200")
|
|
|
|
ALLOWED_HOSTS = __get_list("PAPERLESS_ALLOWED_HOSTS", ["*"])
|
|
if ALLOWED_HOSTS != ["*"]:
|
|
# always allow localhost. Necessary e.g. for healthcheck in docker.
|
|
ALLOWED_HOSTS.append("localhost")
|
|
|
|
|
|
def _parse_paperless_url():
|
|
global CSRF_TRUSTED_ORIGINS, CORS_ALLOWED_ORIGINS, ALLOWED_HOSTS
|
|
url = os.getenv("PAPERLESS_URL")
|
|
if url:
|
|
CSRF_TRUSTED_ORIGINS.append(url)
|
|
CORS_ALLOWED_ORIGINS.append(url)
|
|
ALLOWED_HOSTS.append(urlparse(url).hostname)
|
|
|
|
return url
|
|
|
|
|
|
PAPERLESS_URL = _parse_paperless_url()
|
|
|
|
# For use with trusted proxies
|
|
TRUSTED_PROXIES = __get_list("PAPERLESS_TRUSTED_PROXIES")
|
|
|
|
USE_X_FORWARDED_HOST = __get_boolean("PAPERLESS_USE_X_FORWARD_HOST", "false")
|
|
USE_X_FORWARDED_PORT = __get_boolean("PAPERLESS_USE_X_FORWARD_PORT", "false")
|
|
SECURE_PROXY_SSL_HEADER = (
|
|
tuple(json.loads(os.environ["PAPERLESS_PROXY_SSL_HEADER"]))
|
|
if "PAPERLESS_PROXY_SSL_HEADER" in os.environ
|
|
else None
|
|
)
|
|
|
|
# The secret key has a default that should be fine so long as you're hosting
|
|
# Paperless on a closed network. However, if you're putting this anywhere
|
|
# public, you should change the key to something unique and verbose.
|
|
SECRET_KEY = os.getenv(
|
|
"PAPERLESS_SECRET_KEY",
|
|
"e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee",
|
|
)
|
|
|
|
AUTH_PASSWORD_VALIDATORS = [
|
|
{
|
|
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
|
|
},
|
|
{
|
|
"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
|
|
},
|
|
{
|
|
"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",
|
|
},
|
|
{
|
|
"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",
|
|
},
|
|
]
|
|
|
|
# Disable Django's artificial limit on the number of form fields to submit at
|
|
# once. This is a protection against overloading the server, but since this is
|
|
# a self-hosted sort of gig, the benefits of being able to mass-delete a ton
|
|
# of log entries outweigh the benefits of such a safeguard.
|
|
|
|
DATA_UPLOAD_MAX_NUMBER_FIELDS = None
|
|
|
|
COOKIE_PREFIX = os.getenv("PAPERLESS_COOKIE_PREFIX", "")
|
|
|
|
CSRF_COOKIE_NAME = f"{COOKIE_PREFIX}csrftoken"
|
|
SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid"
|
|
LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language"
|
|
|
|
EMAIL_CERTIFICATE_FILE = __get_optional_path("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
|
|
|
|
|
|
###############################################################################
|
|
# Database #
|
|
###############################################################################
|
|
def _parse_db_settings() -> dict:
|
|
databases = {
|
|
"default": {
|
|
"ENGINE": "django.db.backends.sqlite3",
|
|
"NAME": os.path.join(DATA_DIR, "db.sqlite3"),
|
|
"OPTIONS": {},
|
|
},
|
|
}
|
|
if os.getenv("PAPERLESS_DBHOST"):
|
|
# Have sqlite available as a second option for management commands
|
|
# This is important when migrating to/from sqlite
|
|
databases["sqlite"] = databases["default"].copy()
|
|
|
|
databases["default"] = {
|
|
"HOST": os.getenv("PAPERLESS_DBHOST"),
|
|
"NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
|
|
"USER": os.getenv("PAPERLESS_DBUSER", "paperless"),
|
|
"PASSWORD": os.getenv("PAPERLESS_DBPASS", "paperless"),
|
|
"OPTIONS": {},
|
|
}
|
|
if os.getenv("PAPERLESS_DBPORT"):
|
|
databases["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
|
|
|
|
# Leave room for future extensibility
|
|
if os.getenv("PAPERLESS_DBENGINE") == "mariadb":
|
|
engine = "django.db.backends.mysql"
|
|
options = {
|
|
"read_default_file": "/etc/mysql/my.cnf",
|
|
"charset": "utf8mb4",
|
|
"ssl_mode": os.getenv("PAPERLESS_DBSSLMODE", "PREFERRED"),
|
|
"ssl": {
|
|
"ca": os.getenv("PAPERLESS_DBSSLROOTCERT", None),
|
|
"cert": os.getenv("PAPERLESS_DBSSLCERT", None),
|
|
"key": os.getenv("PAPERLESS_DBSSLKEY", None),
|
|
},
|
|
}
|
|
|
|
else: # Default to PostgresDB
|
|
engine = "django.db.backends.postgresql"
|
|
options = {
|
|
"sslmode": os.getenv("PAPERLESS_DBSSLMODE", "prefer"),
|
|
"sslrootcert": os.getenv("PAPERLESS_DBSSLROOTCERT", None),
|
|
"sslcert": os.getenv("PAPERLESS_DBSSLCERT", None),
|
|
"sslkey": os.getenv("PAPERLESS_DBSSLKEY", None),
|
|
}
|
|
|
|
databases["default"]["ENGINE"] = engine
|
|
databases["default"]["OPTIONS"].update(options)
|
|
|
|
if os.getenv("PAPERLESS_DB_TIMEOUT") is not None:
|
|
if databases["default"]["ENGINE"] == "django.db.backends.sqlite3":
|
|
databases["default"]["OPTIONS"].update(
|
|
{"timeout": int(os.getenv("PAPERLESS_DB_TIMEOUT"))},
|
|
)
|
|
else:
|
|
databases["default"]["OPTIONS"].update(
|
|
{"connect_timeout": int(os.getenv("PAPERLESS_DB_TIMEOUT"))},
|
|
)
|
|
databases["sqlite"]["OPTIONS"].update(
|
|
{"timeout": int(os.getenv("PAPERLESS_DB_TIMEOUT"))},
|
|
)
|
|
return databases
|
|
|
|
|
|
DATABASES = _parse_db_settings()
|
|
|
|
if os.getenv("PAPERLESS_DBENGINE") == "mariadb":
|
|
# Silence Django error on old MariaDB versions.
|
|
# VARCHAR can support > 255 in modern versions
|
|
# https://docs.djangoproject.com/en/4.1/ref/checks/#database
|
|
# https://mariadb.com/kb/en/innodb-system-variables/#innodb_large_prefix
|
|
SILENCED_SYSTEM_CHECKS = ["mysql.W003"]
|
|
|
|
DEFAULT_AUTO_FIELD = "django.db.models.AutoField"
|
|
|
|
###############################################################################
|
|
# Internationalization #
|
|
###############################################################################
|
|
|
|
LANGUAGE_CODE = "en-us"
|
|
|
|
LANGUAGES = [
|
|
("en-us", _("English (US)")), # needs to be first to act as fallback language
|
|
("ar-ar", _("Arabic")),
|
|
("af-za", _("Afrikaans")),
|
|
("be-by", _("Belarusian")),
|
|
("bg-bg", _("Bulgarian")),
|
|
("ca-es", _("Catalan")),
|
|
("cs-cz", _("Czech")),
|
|
("da-dk", _("Danish")),
|
|
("de-de", _("German")),
|
|
("el-gr", _("Greek")),
|
|
("en-gb", _("English (GB)")),
|
|
("es-es", _("Spanish")),
|
|
("fi-fi", _("Finnish")),
|
|
("fr-fr", _("French")),
|
|
("hu-hu", _("Hungarian")),
|
|
("it-it", _("Italian")),
|
|
("ja-jp", _("Japanese")),
|
|
("ko-kr", _("Korean")),
|
|
("lb-lu", _("Luxembourgish")),
|
|
("no-no", _("Norwegian")),
|
|
("nl-nl", _("Dutch")),
|
|
("pl-pl", _("Polish")),
|
|
("pt-br", _("Portuguese (Brazil)")),
|
|
("pt-pt", _("Portuguese")),
|
|
("ro-ro", _("Romanian")),
|
|
("ru-ru", _("Russian")),
|
|
("sk-sk", _("Slovak")),
|
|
("sl-si", _("Slovenian")),
|
|
("sr-cs", _("Serbian")),
|
|
("sv-se", _("Swedish")),
|
|
("tr-tr", _("Turkish")),
|
|
("uk-ua", _("Ukrainian")),
|
|
("zh-cn", _("Chinese Simplified")),
|
|
("zh-tw", _("Chinese Traditional")),
|
|
]
|
|
|
|
LOCALE_PATHS = [os.path.join(BASE_DIR, "locale")]
|
|
|
|
TIME_ZONE = os.getenv("PAPERLESS_TIME_ZONE", "UTC")
|
|
|
|
USE_I18N = True
|
|
|
|
USE_L10N = True
|
|
|
|
USE_TZ = True
|
|
|
|
###############################################################################
|
|
# Logging #
|
|
###############################################################################
|
|
|
|
setup_logging_queues()
|
|
|
|
LOGGING_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
LOGROTATE_MAX_SIZE = os.getenv("PAPERLESS_LOGROTATE_MAX_SIZE", 1024 * 1024)
|
|
LOGROTATE_MAX_BACKUPS = os.getenv("PAPERLESS_LOGROTATE_MAX_BACKUPS", 20)
|
|
|
|
LOGGING = {
|
|
"version": 1,
|
|
"disable_existing_loggers": False,
|
|
"formatters": {
|
|
"verbose": {
|
|
"format": "[{asctime}] [{levelname}] [{name}] {message}",
|
|
"style": "{",
|
|
},
|
|
"simple": {
|
|
"format": "{levelname} {message}",
|
|
"style": "{",
|
|
},
|
|
},
|
|
"handlers": {
|
|
"console": {
|
|
"level": "DEBUG" if DEBUG else "INFO",
|
|
"class": "logging.StreamHandler",
|
|
"formatter": "verbose",
|
|
},
|
|
"file_paperless": {
|
|
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
|
|
"formatter": "verbose",
|
|
"filename": os.path.join(LOGGING_DIR, "paperless.log"),
|
|
"maxBytes": LOGROTATE_MAX_SIZE,
|
|
"backupCount": LOGROTATE_MAX_BACKUPS,
|
|
},
|
|
"file_mail": {
|
|
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
|
|
"formatter": "verbose",
|
|
"filename": os.path.join(LOGGING_DIR, "mail.log"),
|
|
"maxBytes": LOGROTATE_MAX_SIZE,
|
|
"backupCount": LOGROTATE_MAX_BACKUPS,
|
|
},
|
|
"file_celery": {
|
|
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
|
|
"formatter": "verbose",
|
|
"filename": os.path.join(LOGGING_DIR, "celery.log"),
|
|
"maxBytes": LOGROTATE_MAX_SIZE,
|
|
"backupCount": LOGROTATE_MAX_BACKUPS,
|
|
},
|
|
},
|
|
"root": {"handlers": ["console"]},
|
|
"loggers": {
|
|
"paperless": {"handlers": ["file_paperless"], "level": "DEBUG"},
|
|
"paperless_mail": {"handlers": ["file_mail"], "level": "DEBUG"},
|
|
"ocrmypdf": {"handlers": ["file_paperless"], "level": "INFO"},
|
|
"celery": {"handlers": ["file_celery"], "level": "DEBUG"},
|
|
"kombu": {"handlers": ["file_celery"], "level": "DEBUG"},
|
|
"_granian": {"handlers": ["file_paperless"], "level": "DEBUG"},
|
|
"granian.access": {"handlers": ["file_paperless"], "level": "DEBUG"},
|
|
},
|
|
}
|
|
|
|
###############################################################################
|
|
# Task queue #
|
|
###############################################################################
|
|
|
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html
|
|
|
|
CELERY_BROKER_URL = _CELERY_REDIS_URL
|
|
CELERY_TIMEZONE = TIME_ZONE
|
|
|
|
CELERY_WORKER_HIJACK_ROOT_LOGGER = False
|
|
CELERY_WORKER_CONCURRENCY: Final[int] = __get_int("PAPERLESS_TASK_WORKERS", 1)
|
|
TASK_WORKERS = CELERY_WORKER_CONCURRENCY
|
|
CELERY_WORKER_MAX_TASKS_PER_CHILD = 1
|
|
CELERY_WORKER_SEND_TASK_EVENTS = True
|
|
CELERY_TASK_SEND_SENT_EVENT = True
|
|
CELERY_SEND_TASK_SENT_EVENT = True
|
|
CELERY_BROKER_CONNECTION_RETRY = True
|
|
CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP = True
|
|
CELERY_BROKER_TRANSPORT_OPTIONS = {
|
|
"global_keyprefix": os.getenv("PAPERLESS_REDIS_PREFIX", ""),
|
|
}
|
|
|
|
CELERY_TASK_TRACK_STARTED = True
|
|
CELERY_TASK_TIME_LIMIT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)
|
|
|
|
CELERY_RESULT_EXTENDED = True
|
|
CELERY_RESULT_BACKEND = "django-db"
|
|
CELERY_CACHE_BACKEND = "default"
|
|
|
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-serializer
|
|
CELERY_TASK_SERIALIZER = "pickle"
|
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std-setting-accept_content
|
|
CELERY_ACCEPT_CONTENT = ["application/json", "application/x-python-serialize"]
|
|
|
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule
|
|
CELERY_BEAT_SCHEDULE = _parse_beat_schedule()
|
|
|
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename
|
|
CELERY_BEAT_SCHEDULE_FILENAME = os.path.join(DATA_DIR, "celerybeat-schedule.db")
|
|
|
|
# django setting.
|
|
CACHES = {
|
|
"default": {
|
|
"BACKEND": os.environ.get(
|
|
"PAPERLESS_CACHE_BACKEND",
|
|
"django.core.cache.backends.redis.RedisCache",
|
|
),
|
|
"LOCATION": _CHANNELS_REDIS_URL,
|
|
"KEY_PREFIX": os.getenv("PAPERLESS_REDIS_PREFIX", ""),
|
|
},
|
|
}
|
|
|
|
if DEBUG and os.getenv("PAPERLESS_CACHE_BACKEND") is None:
|
|
CACHES["default"]["BACKEND"] = (
|
|
"django.core.cache.backends.locmem.LocMemCache" # pragma: no cover
|
|
)
|
|
|
|
|
|
def default_threads_per_worker(task_workers) -> int:
|
|
# always leave one core open
|
|
available_cores = max(multiprocessing.cpu_count(), 1)
|
|
try:
|
|
return max(math.floor(available_cores / task_workers), 1)
|
|
except NotImplementedError:
|
|
return 1
|
|
|
|
|
|
THREADS_PER_WORKER = os.getenv(
|
|
"PAPERLESS_THREADS_PER_WORKER",
|
|
default_threads_per_worker(CELERY_WORKER_CONCURRENCY),
|
|
)
|
|
|
|
###############################################################################
|
|
# Paperless Specific Settings #
|
|
###############################################################################
|
|
|
|
CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0))
|
|
|
|
CONSUMER_POLLING_DELAY = int(os.getenv("PAPERLESS_CONSUMER_POLLING_DELAY", 5))
|
|
|
|
CONSUMER_POLLING_RETRY_COUNT = int(
|
|
os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5),
|
|
)
|
|
|
|
CONSUMER_INOTIFY_DELAY: Final[float] = __get_float(
|
|
"PAPERLESS_CONSUMER_INOTIFY_DELAY",
|
|
0.5,
|
|
)
|
|
|
|
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
|
|
|
|
CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE")
|
|
|
|
# Ignore glob patterns, relative to PAPERLESS_CONSUMPTION_DIR
|
|
CONSUMER_IGNORE_PATTERNS = list(
|
|
json.loads(
|
|
os.getenv(
|
|
"PAPERLESS_CONSUMER_IGNORE_PATTERNS",
|
|
'[".DS_Store", ".DS_STORE", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini", "@eaDir/*", "Thumbs.db"]',
|
|
),
|
|
),
|
|
)
|
|
|
|
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
|
|
|
|
CONSUMER_ENABLE_BARCODES: Final[bool] = __get_boolean(
|
|
"PAPERLESS_CONSUMER_ENABLE_BARCODES",
|
|
)
|
|
|
|
CONSUMER_BARCODE_TIFF_SUPPORT: Final[bool] = __get_boolean(
|
|
"PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT",
|
|
)
|
|
|
|
CONSUMER_BARCODE_STRING: Final[str] = os.getenv(
|
|
"PAPERLESS_CONSUMER_BARCODE_STRING",
|
|
"PATCHT",
|
|
)
|
|
|
|
CONSUMER_BARCODE_SCANNER: Final[str] = os.getenv(
|
|
"PAPERLESS_CONSUMER_BARCODE_SCANNER",
|
|
"PYZBAR",
|
|
).upper()
|
|
|
|
CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = __get_boolean(
|
|
"PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE",
|
|
)
|
|
|
|
CONSUMER_ASN_BARCODE_PREFIX: Final[str] = os.getenv(
|
|
"PAPERLESS_CONSUMER_ASN_BARCODE_PREFIX",
|
|
"ASN",
|
|
)
|
|
|
|
CONSUMER_BARCODE_UPSCALE: Final[float] = __get_float(
|
|
"PAPERLESS_CONSUMER_BARCODE_UPSCALE",
|
|
0.0,
|
|
)
|
|
|
|
CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
|
|
|
|
CONSUMER_BARCODE_MAX_PAGES: Final[int] = __get_int(
|
|
"PAPERLESS_CONSUMER_BARCODE_MAX_PAGES",
|
|
0,
|
|
)
|
|
|
|
CONSUMER_BARCODE_RETAIN_SPLIT_PAGES = __get_boolean(
|
|
"PAPERLESS_CONSUMER_BARCODE_RETAIN_SPLIT_PAGES",
|
|
)
|
|
|
|
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean(
|
|
"PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
|
|
)
|
|
|
|
CONSUMER_TAG_BARCODE_MAPPING = dict(
|
|
json.loads(
|
|
os.getenv(
|
|
"PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING",
|
|
'{"TAG:(.*)": "\\\\g<1>"}',
|
|
),
|
|
),
|
|
)
|
|
|
|
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = __get_boolean(
|
|
"PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED",
|
|
)
|
|
|
|
CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME: Final[str] = os.getenv(
|
|
"PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME",
|
|
"double-sided",
|
|
)
|
|
|
|
CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT: Final[bool] = __get_boolean(
|
|
"PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT",
|
|
)
|
|
|
|
CONSUMER_PDF_RECOVERABLE_MIME_TYPES = ("application/octet-stream",)
|
|
|
|
OCR_PAGES = __get_optional_int("PAPERLESS_OCR_PAGES")
|
|
|
|
# The default language that tesseract will attempt to use when parsing
|
|
# documents. It should be a 3-letter language code consistent with ISO 639.
|
|
OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")
|
|
|
|
# OCRmyPDF --output-type options are available.
|
|
OCR_OUTPUT_TYPE = os.getenv("PAPERLESS_OCR_OUTPUT_TYPE", "pdfa")
|
|
|
|
# skip. redo, force
|
|
OCR_MODE = os.getenv("PAPERLESS_OCR_MODE", "skip")
|
|
|
|
OCR_SKIP_ARCHIVE_FILE = os.getenv("PAPERLESS_OCR_SKIP_ARCHIVE_FILE", "never")
|
|
|
|
OCR_IMAGE_DPI = __get_optional_int("PAPERLESS_OCR_IMAGE_DPI")
|
|
|
|
OCR_CLEAN = os.getenv("PAPERLESS_OCR_CLEAN", "clean")
|
|
|
|
OCR_DESKEW: Final[bool] = __get_boolean("PAPERLESS_OCR_DESKEW", "true")
|
|
|
|
OCR_ROTATE_PAGES: Final[bool] = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true")
|
|
|
|
OCR_ROTATE_PAGES_THRESHOLD: Final[float] = __get_float(
|
|
"PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD",
|
|
12.0,
|
|
)
|
|
|
|
OCR_MAX_IMAGE_PIXELS: Final[int | None] = __get_optional_int(
|
|
"PAPERLESS_OCR_MAX_IMAGE_PIXELS",
|
|
)
|
|
|
|
OCR_COLOR_CONVERSION_STRATEGY = os.getenv(
|
|
"PAPERLESS_OCR_COLOR_CONVERSION_STRATEGY",
|
|
"RGB",
|
|
)
|
|
|
|
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")
|
|
|
|
MAX_IMAGE_PIXELS: Final[int | None] = __get_optional_int(
|
|
"PAPERLESS_MAX_IMAGE_PIXELS",
|
|
)
|
|
|
|
# GNUPG needs a home directory for some reason
|
|
GNUPG_HOME = os.getenv("HOME", "/tmp")
|
|
|
|
# Convert is part of the ImageMagick package
|
|
CONVERT_BINARY = os.getenv("PAPERLESS_CONVERT_BINARY", "convert")
|
|
CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR")
|
|
CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
|
|
|
|
GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
|
|
|
|
# Fallback layout for .eml consumption
|
|
EMAIL_PARSE_DEFAULT_LAYOUT = __get_int(
|
|
"PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT",
|
|
1, # MailRule.PdfLayout.TEXT_HTML but that can't be imported here
|
|
)
|
|
|
|
# Pre-2.x versions of Paperless stored your documents locally with GPG
|
|
# encryption, but that is no longer the default. This behaviour is still
|
|
# available, but it must be explicitly enabled by setting
|
|
# `PAPERLESS_PASSPHRASE` in your environment or config file. The default is to
|
|
# store these files unencrypted.
|
|
#
|
|
# Translation:
|
|
# * If you're a new user, you can safely ignore this setting.
|
|
# * If you're upgrading from 1.x, this must be set, OR you can run
|
|
# `./manage.py change_storage_type gpg unencrypted` to decrypt your files,
|
|
# after which you can unset this value.
|
|
PASSPHRASE = os.getenv("PAPERLESS_PASSPHRASE")
|
|
|
|
# Trigger a script after every successful document consumption?
|
|
PRE_CONSUME_SCRIPT = os.getenv("PAPERLESS_PRE_CONSUME_SCRIPT")
|
|
POST_CONSUME_SCRIPT = os.getenv("PAPERLESS_POST_CONSUME_SCRIPT")
|
|
|
|
# Specify the default date order (for autodetected dates)
|
|
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
|
|
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
|
|
|
|
# Maximum number of dates taken from document start to end to show as suggestions for
|
|
# `created` date in the frontend. Duplicates are removed, which can result in
|
|
# fewer dates shown.
|
|
NUMBER_OF_SUGGESTED_DATES = __get_int("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
|
|
|
|
# Specify the filename format for out files
|
|
FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
|
|
|
|
# If this is enabled, variables in filename format will resolve to
|
|
# empty-string instead of 'none'.
|
|
# Directories with 'empty names' are omitted, too.
|
|
FILENAME_FORMAT_REMOVE_NONE = __get_boolean(
|
|
"PAPERLESS_FILENAME_FORMAT_REMOVE_NONE",
|
|
"NO",
|
|
)
|
|
|
|
THUMBNAIL_FONT_NAME = os.getenv(
|
|
"PAPERLESS_THUMBNAIL_FONT_NAME",
|
|
"/usr/share/fonts/liberation/LiberationSerif-Regular.ttf",
|
|
)
|
|
|
|
# Tika settings
|
|
TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
|
|
TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
|
|
TIKA_GOTENBERG_ENDPOINT = os.getenv(
|
|
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT",
|
|
"http://localhost:3000",
|
|
)
|
|
|
|
if TIKA_ENABLED:
|
|
INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
|
|
|
|
AUDIT_LOG_ENABLED = __get_boolean("PAPERLESS_AUDIT_LOG_ENABLED", "true")
|
|
if AUDIT_LOG_ENABLED:
|
|
INSTALLED_APPS.append("auditlog")
|
|
MIDDLEWARE.append("auditlog.middleware.AuditlogMiddleware")
|
|
|
|
|
|
def _parse_ignore_dates(
|
|
env_ignore: str,
|
|
date_order: str = DATE_ORDER,
|
|
) -> set[datetime.datetime]:
|
|
"""
|
|
If the PAPERLESS_IGNORE_DATES environment variable is set, parse the
|
|
user provided string(s) into dates
|
|
|
|
Args:
|
|
env_ignore (str): The value of the environment variable, comma separated dates
|
|
date_order (str, optional): The format of the date strings.
|
|
Defaults to DATE_ORDER.
|
|
|
|
Returns:
|
|
Set[datetime.datetime]: The set of parsed date objects
|
|
"""
|
|
import dateparser
|
|
|
|
ignored_dates = set()
|
|
for s in env_ignore.split(","):
|
|
d = dateparser.parse(
|
|
s,
|
|
settings={
|
|
"DATE_ORDER": date_order,
|
|
},
|
|
)
|
|
if d:
|
|
ignored_dates.add(d.date())
|
|
return ignored_dates
|
|
|
|
|
|
# List dates that should be ignored when trying to parse date from document text
|
|
IGNORE_DATES: set[datetime.date] = set()
|
|
|
|
if os.getenv("PAPERLESS_IGNORE_DATES") is not None:
|
|
IGNORE_DATES = _parse_ignore_dates(os.getenv("PAPERLESS_IGNORE_DATES"))
|
|
|
|
ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default")
|
|
if ENABLE_UPDATE_CHECK != "default":
|
|
ENABLE_UPDATE_CHECK = __get_boolean("PAPERLESS_ENABLE_UPDATE_CHECK")
|
|
|
|
APP_TITLE = os.getenv("PAPERLESS_APP_TITLE", None)
|
|
APP_LOGO = os.getenv("PAPERLESS_APP_LOGO", None)
|
|
|
|
###############################################################################
|
|
# Machine Learning #
|
|
###############################################################################
|
|
|
|
|
|
def _get_nltk_language_setting(ocr_lang: str) -> str | None:
|
|
"""
|
|
Maps an ISO-639-1 language code supported by Tesseract into
|
|
an optional NLTK language name. This is the set of common supported
|
|
languages for all the NLTK data used.
|
|
|
|
Assumption: The primary language is first
|
|
|
|
NLTK Languages:
|
|
- https://www.nltk.org/api/nltk.stem.snowball.html#nltk.stem.snowball.SnowballStemmer
|
|
- https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip
|
|
- https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip
|
|
|
|
The common intersection between all languages in those 3 is handled here
|
|
|
|
"""
|
|
ocr_lang = ocr_lang.split("+")[0]
|
|
iso_code_to_nltk = {
|
|
"dan": "danish",
|
|
"nld": "dutch",
|
|
"eng": "english",
|
|
"fin": "finnish",
|
|
"fra": "french",
|
|
"deu": "german",
|
|
"ita": "italian",
|
|
"nor": "norwegian",
|
|
"por": "portuguese",
|
|
"rus": "russian",
|
|
"spa": "spanish",
|
|
"swe": "swedish",
|
|
}
|
|
|
|
return iso_code_to_nltk.get(ocr_lang)
|
|
|
|
|
|
NLTK_ENABLED: Final[bool] = __get_boolean("PAPERLESS_ENABLE_NLTK", "yes")
|
|
|
|
NLTK_LANGUAGE: str | None = _get_nltk_language_setting(OCR_LANGUAGE)
|
|
|
|
###############################################################################
|
|
# Email (SMTP) Backend #
|
|
###############################################################################
|
|
|
|
EMAIL_HOST: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST", "localhost")
|
|
EMAIL_PORT: Final[int] = int(os.getenv("PAPERLESS_EMAIL_PORT", 25))
|
|
EMAIL_HOST_USER: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST_USER", "")
|
|
EMAIL_HOST_PASSWORD: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST_PASSWORD", "")
|
|
DEFAULT_FROM_EMAIL: Final[str] = os.getenv("PAPERLESS_EMAIL_FROM", EMAIL_HOST_USER)
|
|
EMAIL_USE_TLS: Final[bool] = __get_boolean("PAPERLESS_EMAIL_USE_TLS")
|
|
EMAIL_USE_SSL: Final[bool] = __get_boolean("PAPERLESS_EMAIL_USE_SSL")
|
|
EMAIL_SUBJECT_PREFIX: Final[str] = "[Paperless-ngx] "
|
|
EMAIL_TIMEOUT = 30.0
|
|
EMAIL_ENABLED = EMAIL_HOST != "localhost" or EMAIL_HOST_USER != ""
|
|
if DEBUG: # pragma: no cover
|
|
EMAIL_BACKEND = "django.core.mail.backends.filebased.EmailBackend"
|
|
EMAIL_FILE_PATH = BASE_DIR / "sent_emails"
|
|
|
|
###############################################################################
|
|
# Email Preprocessors #
|
|
###############################################################################
|
|
|
|
EMAIL_GNUPG_HOME: Final[str | None] = os.getenv("PAPERLESS_EMAIL_GNUPG_HOME")
|
|
EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = __get_boolean(
|
|
"PAPERLESS_ENABLE_GPG_DECRYPTOR",
|
|
)
|
|
|
|
|
|
###############################################################################
|
|
# Soft Delete #
|
|
###############################################################################
|
|
EMPTY_TRASH_DELAY = max(__get_int("PAPERLESS_EMPTY_TRASH_DELAY", 30), 1)
|
|
|
|
|
|
###############################################################################
|
|
# Oauth Email #
|
|
###############################################################################
|
|
OAUTH_CALLBACK_BASE_URL = os.getenv("PAPERLESS_OAUTH_CALLBACK_BASE_URL")
|
|
GMAIL_OAUTH_CLIENT_ID = os.getenv("PAPERLESS_GMAIL_OAUTH_CLIENT_ID")
|
|
GMAIL_OAUTH_CLIENT_SECRET = os.getenv("PAPERLESS_GMAIL_OAUTH_CLIENT_SECRET")
|
|
GMAIL_OAUTH_ENABLED = bool(
|
|
(OAUTH_CALLBACK_BASE_URL or PAPERLESS_URL)
|
|
and GMAIL_OAUTH_CLIENT_ID
|
|
and GMAIL_OAUTH_CLIENT_SECRET,
|
|
)
|
|
OUTLOOK_OAUTH_CLIENT_ID = os.getenv("PAPERLESS_OUTLOOK_OAUTH_CLIENT_ID")
|
|
OUTLOOK_OAUTH_CLIENT_SECRET = os.getenv("PAPERLESS_OUTLOOK_OAUTH_CLIENT_SECRET")
|
|
OUTLOOK_OAUTH_ENABLED = bool(
|
|
(OAUTH_CALLBACK_BASE_URL or PAPERLESS_URL)
|
|
and OUTLOOK_OAUTH_CLIENT_ID
|
|
and OUTLOOK_OAUTH_CLIENT_SECRET,
|
|
)
|