mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-09 09:58:20 -05:00
272 lines
8.7 KiB
Python
272 lines
8.7 KiB
Python
import datetime
|
|
import os
|
|
from unittest import mock
|
|
from unittest import TestCase
|
|
|
|
from celery.schedules import crontab
|
|
from paperless.settings import _parse_beat_schedule
|
|
from paperless.settings import _parse_ignore_dates
|
|
from paperless.settings import _parse_redis_url
|
|
from paperless.settings import default_threads_per_worker
|
|
|
|
|
|
class TestIgnoreDateParsing(TestCase):
|
|
"""
|
|
Tests the parsing of the PAPERLESS_IGNORE_DATES setting value
|
|
"""
|
|
|
|
def _parse_checker(self, test_cases):
|
|
"""
|
|
Helper function to check ignore date parsing
|
|
|
|
Args:
|
|
test_cases (_type_): _description_
|
|
"""
|
|
for env_str, date_format, expected_date_set in test_cases:
|
|
|
|
self.assertSetEqual(
|
|
_parse_ignore_dates(env_str, date_format),
|
|
expected_date_set,
|
|
)
|
|
|
|
def test_no_ignore_dates_set(self):
|
|
"""
|
|
GIVEN:
|
|
- No ignore dates are set
|
|
THEN:
|
|
- No ignore dates are parsed
|
|
"""
|
|
self.assertSetEqual(_parse_ignore_dates(""), set())
|
|
|
|
def test_single_ignore_dates_set(self):
|
|
"""
|
|
GIVEN:
|
|
- Ignore dates are set per certain inputs
|
|
THEN:
|
|
- All ignore dates are parsed
|
|
"""
|
|
test_cases = [
|
|
("1985-05-01", "YMD", {datetime.date(1985, 5, 1)}),
|
|
(
|
|
"1985-05-01,1991-12-05",
|
|
"YMD",
|
|
{datetime.date(1985, 5, 1), datetime.date(1991, 12, 5)},
|
|
),
|
|
("2010-12-13", "YMD", {datetime.date(2010, 12, 13)}),
|
|
("11.01.10", "DMY", {datetime.date(2010, 1, 11)}),
|
|
(
|
|
"11.01.2001,15-06-1996",
|
|
"DMY",
|
|
{datetime.date(2001, 1, 11), datetime.date(1996, 6, 15)},
|
|
),
|
|
]
|
|
|
|
self._parse_checker(test_cases)
|
|
|
|
def test_workers_threads(self):
|
|
"""
|
|
GIVEN:
|
|
- Certain CPU counts
|
|
WHEN:
|
|
- Threads per worker is calculated
|
|
THEN:
|
|
- Threads per worker less than or equal to CPU count
|
|
- At least 1 thread per worker
|
|
"""
|
|
default_workers = 1
|
|
|
|
for i in range(1, 64):
|
|
with mock.patch(
|
|
"paperless.settings.multiprocessing.cpu_count",
|
|
) as cpu_count:
|
|
cpu_count.return_value = i
|
|
|
|
default_threads = default_threads_per_worker(default_workers)
|
|
|
|
self.assertGreaterEqual(default_threads, 1)
|
|
|
|
self.assertLessEqual(default_workers * default_threads, i)
|
|
|
|
def test_redis_socket_parsing(self):
|
|
"""
|
|
GIVEN:
|
|
- Various Redis connection URI formats
|
|
WHEN:
|
|
- The URI is parsed
|
|
THEN:
|
|
- Socket based URIs are translated
|
|
- Non-socket URIs are unchanged
|
|
- None provided uses default
|
|
"""
|
|
|
|
for input, expected in [
|
|
# Nothing is set
|
|
(None, ("redis://localhost:6379", "redis://localhost:6379")),
|
|
# celery style
|
|
(
|
|
"redis+socket:///run/redis/redis.sock",
|
|
(
|
|
"redis+socket:///run/redis/redis.sock",
|
|
"unix:///run/redis/redis.sock",
|
|
),
|
|
),
|
|
# redis-py / channels-redis style
|
|
(
|
|
"unix:///run/redis/redis.sock",
|
|
(
|
|
"redis+socket:///run/redis/redis.sock",
|
|
"unix:///run/redis/redis.sock",
|
|
),
|
|
),
|
|
# celery style with db
|
|
(
|
|
"redis+socket:///run/redis/redis.sock?virtual_host=5",
|
|
(
|
|
"redis+socket:///run/redis/redis.sock?virtual_host=5",
|
|
"unix:///run/redis/redis.sock?db=5",
|
|
),
|
|
),
|
|
# redis-py / channels-redis style with db
|
|
(
|
|
"unix:///run/redis/redis.sock?db=10",
|
|
(
|
|
"redis+socket:///run/redis/redis.sock?virtual_host=10",
|
|
"unix:///run/redis/redis.sock?db=10",
|
|
),
|
|
),
|
|
# Just a host with a port
|
|
(
|
|
"redis://myredishost:6379",
|
|
("redis://myredishost:6379", "redis://myredishost:6379"),
|
|
),
|
|
]:
|
|
result = _parse_redis_url(input)
|
|
self.assertTupleEqual(expected, result)
|
|
|
|
def test_schedule_configuration_default(self):
|
|
"""
|
|
GIVEN:
|
|
- No configured task schedules
|
|
WHEN:
|
|
- The celery beat schedule is built
|
|
THEN:
|
|
- The default schedule is returned
|
|
"""
|
|
schedule = _parse_beat_schedule()
|
|
|
|
self.assertDictEqual(
|
|
{
|
|
"Check all e-mail accounts": {
|
|
"task": "paperless_mail.tasks.process_mail_accounts",
|
|
"schedule": crontab(minute="*/10"),
|
|
},
|
|
"Train the classifier": {
|
|
"task": "documents.tasks.train_classifier",
|
|
"schedule": crontab(minute="5", hour="*/1"),
|
|
},
|
|
"Optimize the index": {
|
|
"task": "documents.tasks.index_optimize",
|
|
"schedule": crontab(minute=0, hour=0),
|
|
},
|
|
"Perform sanity check": {
|
|
"task": "documents.tasks.sanity_check",
|
|
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
|
},
|
|
},
|
|
schedule,
|
|
)
|
|
|
|
def test_schedule_configuration_changed(self):
|
|
"""
|
|
GIVEN:
|
|
- Email task is configured non-default
|
|
WHEN:
|
|
- The celery beat schedule is built
|
|
THEN:
|
|
- The email task is configured per environment
|
|
- The default schedule is returned for other tasks
|
|
"""
|
|
with mock.patch.dict(
|
|
os.environ,
|
|
{"PAPERLESS_EMAIL_TASK_CRON": "*/50 * * * mon"},
|
|
):
|
|
schedule = _parse_beat_schedule()
|
|
|
|
self.assertDictEqual(
|
|
{
|
|
"Check all e-mail accounts": {
|
|
"task": "paperless_mail.tasks.process_mail_accounts",
|
|
"schedule": crontab(minute="*/50", day_of_week="mon"),
|
|
},
|
|
"Train the classifier": {
|
|
"task": "documents.tasks.train_classifier",
|
|
"schedule": crontab(minute="5", hour="*/1"),
|
|
},
|
|
"Optimize the index": {
|
|
"task": "documents.tasks.index_optimize",
|
|
"schedule": crontab(minute=0, hour=0),
|
|
},
|
|
"Perform sanity check": {
|
|
"task": "documents.tasks.sanity_check",
|
|
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
|
},
|
|
},
|
|
schedule,
|
|
)
|
|
|
|
def test_schedule_configuration_disabled(self):
|
|
"""
|
|
GIVEN:
|
|
- Search index task is disabled
|
|
WHEN:
|
|
- The celery beat schedule is built
|
|
THEN:
|
|
- The search index task is not present
|
|
- The default schedule is returned for other tasks
|
|
"""
|
|
with mock.patch.dict(os.environ, {"PAPERLESS_INDEX_TASK_CRON": "disable"}):
|
|
schedule = _parse_beat_schedule()
|
|
|
|
self.assertDictEqual(
|
|
{
|
|
"Check all e-mail accounts": {
|
|
"task": "paperless_mail.tasks.process_mail_accounts",
|
|
"schedule": crontab(minute="*/10"),
|
|
},
|
|
"Train the classifier": {
|
|
"task": "documents.tasks.train_classifier",
|
|
"schedule": crontab(minute="5", hour="*/1"),
|
|
},
|
|
"Perform sanity check": {
|
|
"task": "documents.tasks.sanity_check",
|
|
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
|
},
|
|
},
|
|
schedule,
|
|
)
|
|
|
|
def test_schedule_configuration_disabled_all(self):
|
|
"""
|
|
GIVEN:
|
|
- All tasks are disabled
|
|
WHEN:
|
|
- The celery beat schedule is built
|
|
THEN:
|
|
- No tasks are scheduled
|
|
"""
|
|
with mock.patch.dict(
|
|
os.environ,
|
|
{
|
|
"PAPERLESS_EMAIL_TASK_CRON": "disable",
|
|
"PAPERLESS_TRAIN_TASK_CRON": "disable",
|
|
"PAPERLESS_SANITY_TASK_CRON": "disable",
|
|
"PAPERLESS_INDEX_TASK_CRON": "disable",
|
|
},
|
|
):
|
|
schedule = _parse_beat_schedule()
|
|
|
|
self.assertDictEqual(
|
|
{},
|
|
schedule,
|
|
)
|