From eb8f37d8462b43a8277633d89301f316ee68dddc Mon Sep 17 00:00:00 2001 From: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Sat, 31 Dec 2022 13:16:07 -0800 Subject: [PATCH] Allows scheduling tasks via cron --- src/paperless/settings.py | 71 +++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index cc1b9e096..55c95018d 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -5,6 +5,7 @@ import multiprocessing import os import re import tempfile +from typing import Dict from typing import Final from typing import Optional from typing import Set @@ -107,6 +108,51 @@ def _parse_redis_url(env_redis: Optional[str]) -> Tuple[str]: return (env_redis, env_redis) +def _parse_beat_schedule() -> Dict: + schedule = {} + tasks = [ + { + "name": "Check all e-mail accounts", + "env_key": "PAPERLESS_EMAIL_TASK_CRON", + # Default every ten minutes + "env_default": "*/10 * * * *", + "task": "paperless_mail.tasks.process_mail_accounts", + }, + { + "name": "Train the classifier", + "env_key": "PAPERLESS_TRAIN_TASK_CRON", + # Default hourly at 5 minutes past the hour + "env_default": "5 */1 * * *", + "task": "documents.tasks.train_classifier", + }, + { + "name": "Optimize the index", + "env_key": "PAPERLESS_INDEX_TASK_CRON", + # Default daily at midnight + "env_default": "0 0 * * *", + "task": "documents.tasks.index_optimize", + }, + { + "name": "Perform sanity check", + "env_key": "PAPERLESS_SANITY_TASK_CRON", + # Default Sunday at 00:30 + "env_default": "30 0 * * sun", + "task": "documents.tasks.sanity_check", + }, + ] + for task in tasks: + value = os.getenv(task["env_key"], task["env_default"]) + if value == "disable": + continue + minute, hour, day_month, month, day_week = value.split(" ") + schedule[task["name"]] = { + "task": task["task"], + "schedule": crontab(minute, hour, day_week, day_month, month), + } + + return schedule + + # NEVER RUN WITH DEBUG IN PRODUCTION. DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO") @@ -530,29 +576,10 @@ CELERY_RESULT_EXTENDED = True CELERY_RESULT_BACKEND = "django-db" CELERY_CACHE_BACKEND = "default" +# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule +CELERY_BEAT_SCHEDULE = _parse_beat_schedule() -CELERY_BEAT_SCHEDULE = { - # Every ten minutes - "Check all e-mail accounts": { - "task": "paperless_mail.tasks.process_mail_accounts", - "schedule": crontab(minute="*/10"), - }, - # Hourly at 5 minutes past the hour - "Train the classifier": { - "task": "documents.tasks.train_classifier", - "schedule": crontab(minute="5", hour="*/1"), - }, - # Daily at midnight - "Optimize the index": { - "task": "documents.tasks.index_optimize", - "schedule": crontab(minute=0, hour=0), - }, - # Weekly, Sunday at 00:30 - "Perform sanity check": { - "task": "documents.tasks.sanity_check", - "schedule": crontab(minute=30, hour=0, day_of_week="sun"), - }, -} +# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename CELERY_BEAT_SCHEDULE_FILENAME = os.path.join(DATA_DIR, "celerybeat-schedule.db") # django setting.