mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Transitions the backend to celery and celery beat
This commit is contained in:

committed by
Trenton H

parent
74c1a99545
commit
09287701ae
@@ -1,5 +1,11 @@
|
||||
from .celery import app as celery_app
|
||||
from .checks import binaries_check
|
||||
from .checks import paths_check
|
||||
from .checks import settings_values_check
|
||||
|
||||
__all__ = ["binaries_check", "paths_check", "settings_values_check"]
|
||||
__all__ = [
|
||||
"celery_app",
|
||||
"binaries_check",
|
||||
"paths_check",
|
||||
"settings_values_check",
|
||||
]
|
||||
|
17
src/paperless/celery.py
Normal file
17
src/paperless/celery.py
Normal file
@@ -0,0 +1,17 @@
|
||||
import os
|
||||
|
||||
from celery import Celery
|
||||
|
||||
# Set the default Django settings module for the 'celery' program.
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
|
||||
|
||||
app = Celery("paperless")
|
||||
|
||||
# Using a string here means the worker doesn't have to serialize
|
||||
# the configuration object to child processes.
|
||||
# - namespace='CELERY' means all celery-related configuration keys
|
||||
# should have a `CELERY_` prefix.
|
||||
app.config_from_object("django.conf:settings", namespace="CELERY")
|
||||
|
||||
# Load task modules from all registered Django apps.
|
||||
app.autodiscover_tasks()
|
@@ -10,6 +10,7 @@ from typing import Optional
|
||||
from typing import Set
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from celery.schedules import crontab
|
||||
from concurrent_log_handler.queue import setup_logging_queues
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from dotenv import load_dotenv
|
||||
@@ -128,7 +129,7 @@ INSTALLED_APPS = [
|
||||
"rest_framework",
|
||||
"rest_framework.authtoken",
|
||||
"django_filters",
|
||||
"django_q",
|
||||
"django_celery_results",
|
||||
] + env_apps
|
||||
|
||||
if DEBUG:
|
||||
@@ -179,6 +180,8 @@ ASGI_APPLICATION = "paperless.asgi.application"
|
||||
STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", BASE_URL + "static/")
|
||||
WHITENOISE_STATIC_PREFIX = "/static/"
|
||||
|
||||
_REDIS_URL = os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")
|
||||
|
||||
# TODO: what is this used for?
|
||||
TEMPLATES = [
|
||||
{
|
||||
@@ -200,7 +203,7 @@ CHANNEL_LAYERS = {
|
||||
"default": {
|
||||
"BACKEND": "channels_redis.core.RedisChannelLayer",
|
||||
"CONFIG": {
|
||||
"hosts": [os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")],
|
||||
"hosts": [_REDIS_URL],
|
||||
"capacity": 2000, # default 100
|
||||
"expiry": 15, # default 60
|
||||
},
|
||||
@@ -458,24 +461,48 @@ TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", 1)
|
||||
|
||||
WORKER_TIMEOUT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)
|
||||
|
||||
# Per django-q docs, timeout must be smaller than retry
|
||||
# We default retry to 10s more than the timeout to silence the
|
||||
# warning, as retry functionality isn't used.
|
||||
WORKER_RETRY: Final[int] = __get_int(
|
||||
"PAPERLESS_WORKER_RETRY",
|
||||
WORKER_TIMEOUT + 10,
|
||||
)
|
||||
CELERY_BROKER_URL = _REDIS_URL
|
||||
CELERY_TIMEZONE = TIME_ZONE
|
||||
CELERY_WORKER_HIJACK_ROOT_LOGGER = False
|
||||
CELERY_WORKER_CONCURRENCY = TASK_WORKERS
|
||||
CELERY_WORKER_MAX_TASKS_PER_CHILD = 1
|
||||
CELERY_TASK_TRACK_STARTED = True
|
||||
CELERY_RESULT_EXTENDED = True
|
||||
CELERY_SEND_TASK_SENT_EVENT = True
|
||||
CELERY_TASK_TIME_LIMIT = WORKER_TIMEOUT
|
||||
CELERY_RESULT_BACKEND = "django-db"
|
||||
CELERY_CACHE_BACKEND = "default"
|
||||
|
||||
Q_CLUSTER = {
|
||||
"name": "paperless",
|
||||
"guard_cycle": 5,
|
||||
"catch_up": False,
|
||||
"recycle": 1,
|
||||
"retry": WORKER_RETRY,
|
||||
"timeout": WORKER_TIMEOUT,
|
||||
"workers": TASK_WORKERS,
|
||||
"redis": os.getenv("PAPERLESS_REDIS", "redis://localhost:6379"),
|
||||
"log_level": "DEBUG" if DEBUG else "INFO",
|
||||
CELERY_BEAT_SCHEDULE = {
|
||||
# Every ten minutes
|
||||
"Check all e-mail accounts": {
|
||||
"task": "paperless_mail.tasks.process_mail_accounts",
|
||||
"schedule": crontab(minute="*/10"),
|
||||
},
|
||||
# Hourly at 5 minutes past the hour
|
||||
"Train the classifier": {
|
||||
"task": "documents.tasks.train_classifier",
|
||||
"schedule": crontab(minute="5", hour="*/1"),
|
||||
},
|
||||
# Daily at midnight
|
||||
"Optimize the index": {
|
||||
"task": "documents.tasks.index_optimize",
|
||||
"schedule": crontab(minute=0, hour=0),
|
||||
},
|
||||
# Weekly, Sunday at 00:30
|
||||
"Perform sanity check": {
|
||||
"task": "documents.tasks.sanity_check",
|
||||
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
||||
},
|
||||
}
|
||||
CELERY_BEAT_SCHEDULE_FILENAME = os.path.join(DATA_DIR, "celerybeat-schedule.db")
|
||||
|
||||
# django setting.
|
||||
CACHES = {
|
||||
"default": {
|
||||
"BACKEND": "django.core.cache.backends.redis.RedisCache",
|
||||
"LOCATION": _REDIS_URL,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user