Allows scheduling tasks via cron

This commit is contained in:
Trenton Holmes 2022-12-31 13:16:07 -08:00 committed by Trenton H
parent 5c9e2d7070
commit eb8f37d846

View File

@ -5,6 +5,7 @@ import multiprocessing
import os import os
import re import re
import tempfile import tempfile
from typing import Dict
from typing import Final from typing import Final
from typing import Optional from typing import Optional
from typing import Set from typing import Set
@ -107,6 +108,51 @@ def _parse_redis_url(env_redis: Optional[str]) -> Tuple[str]:
return (env_redis, env_redis) return (env_redis, env_redis)
def _parse_beat_schedule() -> Dict:
schedule = {}
tasks = [
{
"name": "Check all e-mail accounts",
"env_key": "PAPERLESS_EMAIL_TASK_CRON",
# Default every ten minutes
"env_default": "*/10 * * * *",
"task": "paperless_mail.tasks.process_mail_accounts",
},
{
"name": "Train the classifier",
"env_key": "PAPERLESS_TRAIN_TASK_CRON",
# Default hourly at 5 minutes past the hour
"env_default": "5 */1 * * *",
"task": "documents.tasks.train_classifier",
},
{
"name": "Optimize the index",
"env_key": "PAPERLESS_INDEX_TASK_CRON",
# Default daily at midnight
"env_default": "0 0 * * *",
"task": "documents.tasks.index_optimize",
},
{
"name": "Perform sanity check",
"env_key": "PAPERLESS_SANITY_TASK_CRON",
# Default Sunday at 00:30
"env_default": "30 0 * * sun",
"task": "documents.tasks.sanity_check",
},
]
for task in tasks:
value = os.getenv(task["env_key"], task["env_default"])
if value == "disable":
continue
minute, hour, day_month, month, day_week = value.split(" ")
schedule[task["name"]] = {
"task": task["task"],
"schedule": crontab(minute, hour, day_week, day_month, month),
}
return schedule
# NEVER RUN WITH DEBUG IN PRODUCTION. # NEVER RUN WITH DEBUG IN PRODUCTION.
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO") DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
@ -530,29 +576,10 @@ CELERY_RESULT_EXTENDED = True
CELERY_RESULT_BACKEND = "django-db" CELERY_RESULT_BACKEND = "django-db"
CELERY_CACHE_BACKEND = "default" CELERY_CACHE_BACKEND = "default"
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule
CELERY_BEAT_SCHEDULE = _parse_beat_schedule()
CELERY_BEAT_SCHEDULE = { # https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename
# Every ten minutes
"Check all e-mail accounts": {
"task": "paperless_mail.tasks.process_mail_accounts",
"schedule": crontab(minute="*/10"),
},
# Hourly at 5 minutes past the hour
"Train the classifier": {
"task": "documents.tasks.train_classifier",
"schedule": crontab(minute="5", hour="*/1"),
},
# Daily at midnight
"Optimize the index": {
"task": "documents.tasks.index_optimize",
"schedule": crontab(minute=0, hour=0),
},
# Weekly, Sunday at 00:30
"Perform sanity check": {
"task": "documents.tasks.sanity_check",
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
},
}
CELERY_BEAT_SCHEDULE_FILENAME = os.path.join(DATA_DIR, "celerybeat-schedule.db") CELERY_BEAT_SCHEDULE_FILENAME = os.path.join(DATA_DIR, "celerybeat-schedule.db")
# django setting. # django setting.