mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-30 18:27:45 -05:00
Merge branch 'dev' into feature-permissions
This commit is contained in:
@@ -5,6 +5,7 @@ import multiprocessing
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
from typing import Dict
|
||||
from typing import Final
|
||||
from typing import Optional
|
||||
from typing import Set
|
||||
@@ -107,6 +108,57 @@ def _parse_redis_url(env_redis: Optional[str]) -> Tuple[str]:
|
||||
return (env_redis, env_redis)
|
||||
|
||||
|
||||
def _parse_beat_schedule() -> Dict:
|
||||
schedule = {}
|
||||
tasks = [
|
||||
{
|
||||
"name": "Check all e-mail accounts",
|
||||
"env_key": "PAPERLESS_EMAIL_TASK_CRON",
|
||||
# Default every ten minutes
|
||||
"env_default": "*/10 * * * *",
|
||||
"task": "paperless_mail.tasks.process_mail_accounts",
|
||||
},
|
||||
{
|
||||
"name": "Train the classifier",
|
||||
"env_key": "PAPERLESS_TRAIN_TASK_CRON",
|
||||
# Default hourly at 5 minutes past the hour
|
||||
"env_default": "5 */1 * * *",
|
||||
"task": "documents.tasks.train_classifier",
|
||||
},
|
||||
{
|
||||
"name": "Optimize the index",
|
||||
"env_key": "PAPERLESS_INDEX_TASK_CRON",
|
||||
# Default daily at midnight
|
||||
"env_default": "0 0 * * *",
|
||||
"task": "documents.tasks.index_optimize",
|
||||
},
|
||||
{
|
||||
"name": "Perform sanity check",
|
||||
"env_key": "PAPERLESS_SANITY_TASK_CRON",
|
||||
# Default Sunday at 00:30
|
||||
"env_default": "30 0 * * sun",
|
||||
"task": "documents.tasks.sanity_check",
|
||||
},
|
||||
]
|
||||
for task in tasks:
|
||||
# Either get the environment setting or use the default
|
||||
value = os.getenv(task["env_key"], task["env_default"])
|
||||
# Don't add disabled tasks to the schedule
|
||||
if value == "disable":
|
||||
continue
|
||||
# I find https://crontab.guru/ super helpful
|
||||
# crontab(5) format
|
||||
# - five time-and-date fields
|
||||
# - separated by at least one blank
|
||||
minute, hour, day_month, month, day_week = value.split(" ")
|
||||
schedule[task["name"]] = {
|
||||
"task": task["task"],
|
||||
"schedule": crontab(minute, hour, day_week, day_month, month),
|
||||
}
|
||||
|
||||
return schedule
|
||||
|
||||
|
||||
# NEVER RUN WITH DEBUG IN PRODUCTION.
|
||||
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
|
||||
|
||||
@@ -126,7 +178,7 @@ THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails")
|
||||
|
||||
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", os.path.join(BASE_DIR, "..", "data"))
|
||||
|
||||
NLTK_DIR = __get_path("PAPERLESS_NLTK_DIR", "/usr/local/share/nltk_data")
|
||||
NLTK_DIR = __get_path("PAPERLESS_NLTK_DIR", "/usr/share/nltk_data")
|
||||
|
||||
TRASH_DIR = os.getenv("PAPERLESS_TRASH_DIR")
|
||||
|
||||
@@ -533,29 +585,10 @@ CELERY_RESULT_EXTENDED = True
|
||||
CELERY_RESULT_BACKEND = "django-db"
|
||||
CELERY_CACHE_BACKEND = "default"
|
||||
|
||||
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule
|
||||
CELERY_BEAT_SCHEDULE = _parse_beat_schedule()
|
||||
|
||||
CELERY_BEAT_SCHEDULE = {
|
||||
# Every ten minutes
|
||||
"Check all e-mail accounts": {
|
||||
"task": "paperless_mail.tasks.process_mail_accounts",
|
||||
"schedule": crontab(minute="*/10"),
|
||||
},
|
||||
# Hourly at 5 minutes past the hour
|
||||
"Train the classifier": {
|
||||
"task": "documents.tasks.train_classifier",
|
||||
"schedule": crontab(minute="5", hour="*/1"),
|
||||
},
|
||||
# Daily at midnight
|
||||
"Optimize the index": {
|
||||
"task": "documents.tasks.index_optimize",
|
||||
"schedule": crontab(minute=0, hour=0),
|
||||
},
|
||||
# Weekly, Sunday at 00:30
|
||||
"Perform sanity check": {
|
||||
"task": "documents.tasks.sanity_check",
|
||||
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
||||
},
|
||||
}
|
||||
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename
|
||||
CELERY_BEAT_SCHEDULE_FILENAME = os.path.join(DATA_DIR, "celerybeat-schedule.db")
|
||||
|
||||
# django setting.
|
||||
|
@@ -1,7 +1,10 @@
|
||||
import datetime
|
||||
import os
|
||||
from unittest import mock
|
||||
from unittest import TestCase
|
||||
|
||||
from celery.schedules import crontab
|
||||
from paperless.settings import _parse_beat_schedule
|
||||
from paperless.settings import _parse_ignore_dates
|
||||
from paperless.settings import _parse_redis_url
|
||||
from paperless.settings import default_threads_per_worker
|
||||
@@ -60,6 +63,8 @@ class TestIgnoreDateParsing(TestCase):
|
||||
|
||||
self._parse_checker(test_cases)
|
||||
|
||||
|
||||
class TestThreadCalculation(TestCase):
|
||||
def test_workers_threads(self):
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -84,6 +89,8 @@ class TestIgnoreDateParsing(TestCase):
|
||||
|
||||
self.assertLessEqual(default_workers * default_threads, i)
|
||||
|
||||
|
||||
class TestRedisSocketConversion(TestCase):
|
||||
def test_redis_socket_parsing(self):
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -139,3 +146,132 @@ class TestIgnoreDateParsing(TestCase):
|
||||
]:
|
||||
result = _parse_redis_url(input)
|
||||
self.assertTupleEqual(expected, result)
|
||||
|
||||
|
||||
class TestCeleryScheduleParsing(TestCase):
|
||||
def test_schedule_configuration_default(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- No configured task schedules
|
||||
WHEN:
|
||||
- The celery beat schedule is built
|
||||
THEN:
|
||||
- The default schedule is returned
|
||||
"""
|
||||
schedule = _parse_beat_schedule()
|
||||
|
||||
self.assertDictEqual(
|
||||
{
|
||||
"Check all e-mail accounts": {
|
||||
"task": "paperless_mail.tasks.process_mail_accounts",
|
||||
"schedule": crontab(minute="*/10"),
|
||||
},
|
||||
"Train the classifier": {
|
||||
"task": "documents.tasks.train_classifier",
|
||||
"schedule": crontab(minute="5", hour="*/1"),
|
||||
},
|
||||
"Optimize the index": {
|
||||
"task": "documents.tasks.index_optimize",
|
||||
"schedule": crontab(minute=0, hour=0),
|
||||
},
|
||||
"Perform sanity check": {
|
||||
"task": "documents.tasks.sanity_check",
|
||||
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
||||
},
|
||||
},
|
||||
schedule,
|
||||
)
|
||||
|
||||
def test_schedule_configuration_changed(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Email task is configured non-default
|
||||
WHEN:
|
||||
- The celery beat schedule is built
|
||||
THEN:
|
||||
- The email task is configured per environment
|
||||
- The default schedule is returned for other tasks
|
||||
"""
|
||||
with mock.patch.dict(
|
||||
os.environ,
|
||||
{"PAPERLESS_EMAIL_TASK_CRON": "*/50 * * * mon"},
|
||||
):
|
||||
schedule = _parse_beat_schedule()
|
||||
|
||||
self.assertDictEqual(
|
||||
{
|
||||
"Check all e-mail accounts": {
|
||||
"task": "paperless_mail.tasks.process_mail_accounts",
|
||||
"schedule": crontab(minute="*/50", day_of_week="mon"),
|
||||
},
|
||||
"Train the classifier": {
|
||||
"task": "documents.tasks.train_classifier",
|
||||
"schedule": crontab(minute="5", hour="*/1"),
|
||||
},
|
||||
"Optimize the index": {
|
||||
"task": "documents.tasks.index_optimize",
|
||||
"schedule": crontab(minute=0, hour=0),
|
||||
},
|
||||
"Perform sanity check": {
|
||||
"task": "documents.tasks.sanity_check",
|
||||
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
||||
},
|
||||
},
|
||||
schedule,
|
||||
)
|
||||
|
||||
def test_schedule_configuration_disabled(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Search index task is disabled
|
||||
WHEN:
|
||||
- The celery beat schedule is built
|
||||
THEN:
|
||||
- The search index task is not present
|
||||
- The default schedule is returned for other tasks
|
||||
"""
|
||||
with mock.patch.dict(os.environ, {"PAPERLESS_INDEX_TASK_CRON": "disable"}):
|
||||
schedule = _parse_beat_schedule()
|
||||
|
||||
self.assertDictEqual(
|
||||
{
|
||||
"Check all e-mail accounts": {
|
||||
"task": "paperless_mail.tasks.process_mail_accounts",
|
||||
"schedule": crontab(minute="*/10"),
|
||||
},
|
||||
"Train the classifier": {
|
||||
"task": "documents.tasks.train_classifier",
|
||||
"schedule": crontab(minute="5", hour="*/1"),
|
||||
},
|
||||
"Perform sanity check": {
|
||||
"task": "documents.tasks.sanity_check",
|
||||
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
||||
},
|
||||
},
|
||||
schedule,
|
||||
)
|
||||
|
||||
def test_schedule_configuration_disabled_all(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- All tasks are disabled
|
||||
WHEN:
|
||||
- The celery beat schedule is built
|
||||
THEN:
|
||||
- No tasks are scheduled
|
||||
"""
|
||||
with mock.patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
"PAPERLESS_EMAIL_TASK_CRON": "disable",
|
||||
"PAPERLESS_TRAIN_TASK_CRON": "disable",
|
||||
"PAPERLESS_SANITY_TASK_CRON": "disable",
|
||||
"PAPERLESS_INDEX_TASK_CRON": "disable",
|
||||
},
|
||||
):
|
||||
schedule = _parse_beat_schedule()
|
||||
|
||||
self.assertDictEqual(
|
||||
{},
|
||||
schedule,
|
||||
)
|
||||
|
@@ -158,7 +158,7 @@ urlpatterns = [
|
||||
|
||||
|
||||
websocket_urlpatterns = [
|
||||
re_path(r"ws/status/$", StatusConsumer.as_asgi()),
|
||||
path(settings.BASE_URL.lstrip("/") + "ws/status/", StatusConsumer.as_asgi()),
|
||||
]
|
||||
|
||||
# Text in each page's <h1> (and above login form).
|
||||
|
Reference in New Issue
Block a user