Merge branch 'dev' into feature-permissions

This commit is contained in:
shamoon
2023-01-16 15:59:25 -08:00
committed by GitHub
29 changed files with 923 additions and 482 deletions

View File

@@ -5,6 +5,7 @@ import multiprocessing
import os
import re
import tempfile
from typing import Dict
from typing import Final
from typing import Optional
from typing import Set
@@ -107,6 +108,57 @@ def _parse_redis_url(env_redis: Optional[str]) -> Tuple[str]:
return (env_redis, env_redis)
def _parse_beat_schedule() -> Dict:
schedule = {}
tasks = [
{
"name": "Check all e-mail accounts",
"env_key": "PAPERLESS_EMAIL_TASK_CRON",
# Default every ten minutes
"env_default": "*/10 * * * *",
"task": "paperless_mail.tasks.process_mail_accounts",
},
{
"name": "Train the classifier",
"env_key": "PAPERLESS_TRAIN_TASK_CRON",
# Default hourly at 5 minutes past the hour
"env_default": "5 */1 * * *",
"task": "documents.tasks.train_classifier",
},
{
"name": "Optimize the index",
"env_key": "PAPERLESS_INDEX_TASK_CRON",
# Default daily at midnight
"env_default": "0 0 * * *",
"task": "documents.tasks.index_optimize",
},
{
"name": "Perform sanity check",
"env_key": "PAPERLESS_SANITY_TASK_CRON",
# Default Sunday at 00:30
"env_default": "30 0 * * sun",
"task": "documents.tasks.sanity_check",
},
]
for task in tasks:
# Either get the environment setting or use the default
value = os.getenv(task["env_key"], task["env_default"])
# Don't add disabled tasks to the schedule
if value == "disable":
continue
# I find https://crontab.guru/ super helpful
# crontab(5) format
# - five time-and-date fields
# - separated by at least one blank
minute, hour, day_month, month, day_week = value.split(" ")
schedule[task["name"]] = {
"task": task["task"],
"schedule": crontab(minute, hour, day_week, day_month, month),
}
return schedule
# NEVER RUN WITH DEBUG IN PRODUCTION.
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
@@ -126,7 +178,7 @@ THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails")
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", os.path.join(BASE_DIR, "..", "data"))
NLTK_DIR = __get_path("PAPERLESS_NLTK_DIR", "/usr/local/share/nltk_data")
NLTK_DIR = __get_path("PAPERLESS_NLTK_DIR", "/usr/share/nltk_data")
TRASH_DIR = os.getenv("PAPERLESS_TRASH_DIR")
@@ -533,29 +585,10 @@ CELERY_RESULT_EXTENDED = True
CELERY_RESULT_BACKEND = "django-db"
CELERY_CACHE_BACKEND = "default"
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule
CELERY_BEAT_SCHEDULE = _parse_beat_schedule()
CELERY_BEAT_SCHEDULE = {
# Every ten minutes
"Check all e-mail accounts": {
"task": "paperless_mail.tasks.process_mail_accounts",
"schedule": crontab(minute="*/10"),
},
# Hourly at 5 minutes past the hour
"Train the classifier": {
"task": "documents.tasks.train_classifier",
"schedule": crontab(minute="5", hour="*/1"),
},
# Daily at midnight
"Optimize the index": {
"task": "documents.tasks.index_optimize",
"schedule": crontab(minute=0, hour=0),
},
# Weekly, Sunday at 00:30
"Perform sanity check": {
"task": "documents.tasks.sanity_check",
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
},
}
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename
CELERY_BEAT_SCHEDULE_FILENAME = os.path.join(DATA_DIR, "celerybeat-schedule.db")
# django setting.

View File

@@ -1,7 +1,10 @@
import datetime
import os
from unittest import mock
from unittest import TestCase
from celery.schedules import crontab
from paperless.settings import _parse_beat_schedule
from paperless.settings import _parse_ignore_dates
from paperless.settings import _parse_redis_url
from paperless.settings import default_threads_per_worker
@@ -60,6 +63,8 @@ class TestIgnoreDateParsing(TestCase):
self._parse_checker(test_cases)
class TestThreadCalculation(TestCase):
def test_workers_threads(self):
"""
GIVEN:
@@ -84,6 +89,8 @@ class TestIgnoreDateParsing(TestCase):
self.assertLessEqual(default_workers * default_threads, i)
class TestRedisSocketConversion(TestCase):
def test_redis_socket_parsing(self):
"""
GIVEN:
@@ -139,3 +146,132 @@ class TestIgnoreDateParsing(TestCase):
]:
result = _parse_redis_url(input)
self.assertTupleEqual(expected, result)
class TestCeleryScheduleParsing(TestCase):
def test_schedule_configuration_default(self):
"""
GIVEN:
- No configured task schedules
WHEN:
- The celery beat schedule is built
THEN:
- The default schedule is returned
"""
schedule = _parse_beat_schedule()
self.assertDictEqual(
{
"Check all e-mail accounts": {
"task": "paperless_mail.tasks.process_mail_accounts",
"schedule": crontab(minute="*/10"),
},
"Train the classifier": {
"task": "documents.tasks.train_classifier",
"schedule": crontab(minute="5", hour="*/1"),
},
"Optimize the index": {
"task": "documents.tasks.index_optimize",
"schedule": crontab(minute=0, hour=0),
},
"Perform sanity check": {
"task": "documents.tasks.sanity_check",
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
},
},
schedule,
)
def test_schedule_configuration_changed(self):
"""
GIVEN:
- Email task is configured non-default
WHEN:
- The celery beat schedule is built
THEN:
- The email task is configured per environment
- The default schedule is returned for other tasks
"""
with mock.patch.dict(
os.environ,
{"PAPERLESS_EMAIL_TASK_CRON": "*/50 * * * mon"},
):
schedule = _parse_beat_schedule()
self.assertDictEqual(
{
"Check all e-mail accounts": {
"task": "paperless_mail.tasks.process_mail_accounts",
"schedule": crontab(minute="*/50", day_of_week="mon"),
},
"Train the classifier": {
"task": "documents.tasks.train_classifier",
"schedule": crontab(minute="5", hour="*/1"),
},
"Optimize the index": {
"task": "documents.tasks.index_optimize",
"schedule": crontab(minute=0, hour=0),
},
"Perform sanity check": {
"task": "documents.tasks.sanity_check",
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
},
},
schedule,
)
def test_schedule_configuration_disabled(self):
"""
GIVEN:
- Search index task is disabled
WHEN:
- The celery beat schedule is built
THEN:
- The search index task is not present
- The default schedule is returned for other tasks
"""
with mock.patch.dict(os.environ, {"PAPERLESS_INDEX_TASK_CRON": "disable"}):
schedule = _parse_beat_schedule()
self.assertDictEqual(
{
"Check all e-mail accounts": {
"task": "paperless_mail.tasks.process_mail_accounts",
"schedule": crontab(minute="*/10"),
},
"Train the classifier": {
"task": "documents.tasks.train_classifier",
"schedule": crontab(minute="5", hour="*/1"),
},
"Perform sanity check": {
"task": "documents.tasks.sanity_check",
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
},
},
schedule,
)
def test_schedule_configuration_disabled_all(self):
"""
GIVEN:
- All tasks are disabled
WHEN:
- The celery beat schedule is built
THEN:
- No tasks are scheduled
"""
with mock.patch.dict(
os.environ,
{
"PAPERLESS_EMAIL_TASK_CRON": "disable",
"PAPERLESS_TRAIN_TASK_CRON": "disable",
"PAPERLESS_SANITY_TASK_CRON": "disable",
"PAPERLESS_INDEX_TASK_CRON": "disable",
},
):
schedule = _parse_beat_schedule()
self.assertDictEqual(
{},
schedule,
)

View File

@@ -158,7 +158,7 @@ urlpatterns = [
websocket_urlpatterns = [
re_path(r"ws/status/$", StatusConsumer.as_asgi()),
path(settings.BASE_URL.lstrip("/") + "ws/status/", StatusConsumer.as_asgi()),
]
# Text in each page's <h1> (and above login form).