mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Expire the scheduled tasks shortly a new one will be added to the queue by default
This commit is contained in:
		| @@ -109,6 +109,16 @@ def _parse_redis_url(env_redis: Optional[str]) -> Tuple[str]: | |||||||
|  |  | ||||||
|  |  | ||||||
| def _parse_beat_schedule() -> Dict: | def _parse_beat_schedule() -> Dict: | ||||||
|  |     """ | ||||||
|  |     Configures the scheduled tasks, according to default or | ||||||
|  |     environment variables.  Task expiration is configured so the task will | ||||||
|  |     expire (and not run), shortly before the default frequency will put another | ||||||
|  |     of the same task into the queue | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     https://docs.celeryq.dev/en/stable/userguide/periodic-tasks.html#beat-entries | ||||||
|  |     https://docs.celeryq.dev/en/latest/userguide/calling.html#expiration | ||||||
|  |     """ | ||||||
|     schedule = {} |     schedule = {} | ||||||
|     tasks = [ |     tasks = [ | ||||||
|         { |         { | ||||||
| @@ -117,6 +127,11 @@ def _parse_beat_schedule() -> Dict: | |||||||
|             # Default every ten minutes |             # Default every ten minutes | ||||||
|             "env_default": "*/10 * * * *", |             "env_default": "*/10 * * * *", | ||||||
|             "task": "paperless_mail.tasks.process_mail_accounts", |             "task": "paperless_mail.tasks.process_mail_accounts", | ||||||
|  |             "options": { | ||||||
|  |                 # 1 minute before default schedule sends again | ||||||
|  |                 "expires": 9.0 | ||||||
|  |                 * 60.0, | ||||||
|  |             }, | ||||||
|         }, |         }, | ||||||
|         { |         { | ||||||
|             "name": "Train the classifier", |             "name": "Train the classifier", | ||||||
| @@ -124,6 +139,11 @@ def _parse_beat_schedule() -> Dict: | |||||||
|             # Default hourly at 5 minutes past the hour |             # Default hourly at 5 minutes past the hour | ||||||
|             "env_default": "5 */1 * * *", |             "env_default": "5 */1 * * *", | ||||||
|             "task": "documents.tasks.train_classifier", |             "task": "documents.tasks.train_classifier", | ||||||
|  |             "options": { | ||||||
|  |                 # 1 minute before default schedule sends again | ||||||
|  |                 "expires": 59.0 | ||||||
|  |                 * 60.0, | ||||||
|  |             }, | ||||||
|         }, |         }, | ||||||
|         { |         { | ||||||
|             "name": "Optimize the index", |             "name": "Optimize the index", | ||||||
| @@ -131,6 +151,12 @@ def _parse_beat_schedule() -> Dict: | |||||||
|             # Default daily at midnight |             # Default daily at midnight | ||||||
|             "env_default": "0 0 * * *", |             "env_default": "0 0 * * *", | ||||||
|             "task": "documents.tasks.index_optimize", |             "task": "documents.tasks.index_optimize", | ||||||
|  |             "options": { | ||||||
|  |                 # 1 hour before default schedule sends again | ||||||
|  |                 "expires": 23.0 | ||||||
|  |                 * 60.0 | ||||||
|  |                 * 60.0, | ||||||
|  |             }, | ||||||
|         }, |         }, | ||||||
|         { |         { | ||||||
|             "name": "Perform sanity check", |             "name": "Perform sanity check", | ||||||
| @@ -138,6 +164,13 @@ def _parse_beat_schedule() -> Dict: | |||||||
|             # Default Sunday at 00:30 |             # Default Sunday at 00:30 | ||||||
|             "env_default": "30 0 * * sun", |             "env_default": "30 0 * * sun", | ||||||
|             "task": "documents.tasks.sanity_check", |             "task": "documents.tasks.sanity_check", | ||||||
|  |             "options": { | ||||||
|  |                 # 1 hour before default schedule sends again | ||||||
|  |                 "expires": 7.0 | ||||||
|  |                 * 23.0 | ||||||
|  |                 * 60.0 | ||||||
|  |                 * 60.0, | ||||||
|  |             }, | ||||||
|         }, |         }, | ||||||
|     ] |     ] | ||||||
|     for task in tasks: |     for task in tasks: | ||||||
| @@ -151,9 +184,11 @@ def _parse_beat_schedule() -> Dict: | |||||||
|         #   - five time-and-date fields |         #   - five time-and-date fields | ||||||
|         #   - separated by at least one blank |         #   - separated by at least one blank | ||||||
|         minute, hour, day_month, month, day_week = value.split(" ") |         minute, hour, day_month, month, day_week = value.split(" ") | ||||||
|  |  | ||||||
|         schedule[task["name"]] = { |         schedule[task["name"]] = { | ||||||
|             "task": task["task"], |             "task": task["task"], | ||||||
|             "schedule": crontab(minute, hour, day_week, day_month, month), |             "schedule": crontab(minute, hour, day_week, day_month, month), | ||||||
|  |             "options": task["options"], | ||||||
|         } |         } | ||||||
|  |  | ||||||
|     return schedule |     return schedule | ||||||
| @@ -561,22 +596,21 @@ LOGGING = { | |||||||
| # Task queue                                                                  # | # Task queue                                                                  # | ||||||
| ############################################################################### | ############################################################################### | ||||||
|  |  | ||||||
| TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", 1) | # https://docs.celeryq.dev/en/stable/userguide/configuration.html | ||||||
|  |  | ||||||
| WORKER_TIMEOUT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800) |  | ||||||
|  |  | ||||||
| CELERY_BROKER_URL = _CELERY_REDIS_URL | CELERY_BROKER_URL = _CELERY_REDIS_URL | ||||||
| CELERY_TIMEZONE = TIME_ZONE | CELERY_TIMEZONE = TIME_ZONE | ||||||
|  |  | ||||||
| CELERY_WORKER_HIJACK_ROOT_LOGGER = False | CELERY_WORKER_HIJACK_ROOT_LOGGER = False | ||||||
| CELERY_WORKER_CONCURRENCY = TASK_WORKERS | CELERY_WORKER_CONCURRENCY: Final[int] = __get_int("PAPERLESS_TASK_WORKERS", 1) | ||||||
|  | TASK_WORKERS = CELERY_WORKER_CONCURRENCY | ||||||
| CELERY_WORKER_MAX_TASKS_PER_CHILD = 1 | CELERY_WORKER_MAX_TASKS_PER_CHILD = 1 | ||||||
| CELERY_WORKER_SEND_TASK_EVENTS = True | CELERY_WORKER_SEND_TASK_EVENTS = True | ||||||
|  | CELERY_TASK_SEND_SENT_EVENT = True | ||||||
| CELERY_SEND_TASK_SENT_EVENT = True | CELERY_SEND_TASK_SENT_EVENT = True | ||||||
|  |  | ||||||
| CELERY_TASK_TRACK_STARTED = True | CELERY_TASK_TRACK_STARTED = True | ||||||
| CELERY_TASK_TIME_LIMIT = WORKER_TIMEOUT | CELERY_TASK_TIME_LIMIT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800) | ||||||
|  |  | ||||||
| CELERY_RESULT_EXTENDED = True | CELERY_RESULT_EXTENDED = True | ||||||
| CELERY_RESULT_BACKEND = "django-db" | CELERY_RESULT_BACKEND = "django-db" | ||||||
| @@ -608,7 +642,7 @@ def default_threads_per_worker(task_workers) -> int: | |||||||
|  |  | ||||||
| THREADS_PER_WORKER = os.getenv( | THREADS_PER_WORKER = os.getenv( | ||||||
|     "PAPERLESS_THREADS_PER_WORKER", |     "PAPERLESS_THREADS_PER_WORKER", | ||||||
|     default_threads_per_worker(TASK_WORKERS), |     default_threads_per_worker(CELERY_WORKER_CONCURRENCY), | ||||||
| ) | ) | ||||||
|  |  | ||||||
| ############################################################################### | ############################################################################### | ||||||
|   | |||||||
| @@ -149,6 +149,11 @@ class TestRedisSocketConversion(TestCase): | |||||||
|  |  | ||||||
|  |  | ||||||
| class TestCeleryScheduleParsing(TestCase): | class TestCeleryScheduleParsing(TestCase): | ||||||
|  |     MAIL_EXPIRE_TIME = 9.0 * 60.0 | ||||||
|  |     CLASSIFIER_EXPIRE_TIME = 59.0 * 60.0 | ||||||
|  |     INDEX_EXPIRE_TIME = 23.0 * 60.0 * 60.0 | ||||||
|  |     SANITY_EXPIRE_TIME = 7.0 * 23.0 * 60.0 * 60.0 | ||||||
|  |  | ||||||
|     def test_schedule_configuration_default(self): |     def test_schedule_configuration_default(self): | ||||||
|         """ |         """ | ||||||
|         GIVEN: |         GIVEN: | ||||||
| @@ -165,18 +170,22 @@ class TestCeleryScheduleParsing(TestCase): | |||||||
|                 "Check all e-mail accounts": { |                 "Check all e-mail accounts": { | ||||||
|                     "task": "paperless_mail.tasks.process_mail_accounts", |                     "task": "paperless_mail.tasks.process_mail_accounts", | ||||||
|                     "schedule": crontab(minute="*/10"), |                     "schedule": crontab(minute="*/10"), | ||||||
|  |                     "options": {"expires": self.MAIL_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|                 "Train the classifier": { |                 "Train the classifier": { | ||||||
|                     "task": "documents.tasks.train_classifier", |                     "task": "documents.tasks.train_classifier", | ||||||
|                     "schedule": crontab(minute="5", hour="*/1"), |                     "schedule": crontab(minute="5", hour="*/1"), | ||||||
|  |                     "options": {"expires": self.CLASSIFIER_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|                 "Optimize the index": { |                 "Optimize the index": { | ||||||
|                     "task": "documents.tasks.index_optimize", |                     "task": "documents.tasks.index_optimize", | ||||||
|                     "schedule": crontab(minute=0, hour=0), |                     "schedule": crontab(minute=0, hour=0), | ||||||
|  |                     "options": {"expires": self.INDEX_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|                 "Perform sanity check": { |                 "Perform sanity check": { | ||||||
|                     "task": "documents.tasks.sanity_check", |                     "task": "documents.tasks.sanity_check", | ||||||
|                     "schedule": crontab(minute=30, hour=0, day_of_week="sun"), |                     "schedule": crontab(minute=30, hour=0, day_of_week="sun"), | ||||||
|  |                     "options": {"expires": self.SANITY_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|             }, |             }, | ||||||
|             schedule, |             schedule, | ||||||
| @@ -203,18 +212,22 @@ class TestCeleryScheduleParsing(TestCase): | |||||||
|                 "Check all e-mail accounts": { |                 "Check all e-mail accounts": { | ||||||
|                     "task": "paperless_mail.tasks.process_mail_accounts", |                     "task": "paperless_mail.tasks.process_mail_accounts", | ||||||
|                     "schedule": crontab(minute="*/50", day_of_week="mon"), |                     "schedule": crontab(minute="*/50", day_of_week="mon"), | ||||||
|  |                     "options": {"expires": self.MAIL_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|                 "Train the classifier": { |                 "Train the classifier": { | ||||||
|                     "task": "documents.tasks.train_classifier", |                     "task": "documents.tasks.train_classifier", | ||||||
|                     "schedule": crontab(minute="5", hour="*/1"), |                     "schedule": crontab(minute="5", hour="*/1"), | ||||||
|  |                     "options": {"expires": self.CLASSIFIER_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|                 "Optimize the index": { |                 "Optimize the index": { | ||||||
|                     "task": "documents.tasks.index_optimize", |                     "task": "documents.tasks.index_optimize", | ||||||
|                     "schedule": crontab(minute=0, hour=0), |                     "schedule": crontab(minute=0, hour=0), | ||||||
|  |                     "options": {"expires": self.INDEX_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|                 "Perform sanity check": { |                 "Perform sanity check": { | ||||||
|                     "task": "documents.tasks.sanity_check", |                     "task": "documents.tasks.sanity_check", | ||||||
|                     "schedule": crontab(minute=30, hour=0, day_of_week="sun"), |                     "schedule": crontab(minute=30, hour=0, day_of_week="sun"), | ||||||
|  |                     "options": {"expires": self.SANITY_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|             }, |             }, | ||||||
|             schedule, |             schedule, | ||||||
| @@ -238,14 +251,17 @@ class TestCeleryScheduleParsing(TestCase): | |||||||
|                 "Check all e-mail accounts": { |                 "Check all e-mail accounts": { | ||||||
|                     "task": "paperless_mail.tasks.process_mail_accounts", |                     "task": "paperless_mail.tasks.process_mail_accounts", | ||||||
|                     "schedule": crontab(minute="*/10"), |                     "schedule": crontab(minute="*/10"), | ||||||
|  |                     "options": {"expires": self.MAIL_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|                 "Train the classifier": { |                 "Train the classifier": { | ||||||
|                     "task": "documents.tasks.train_classifier", |                     "task": "documents.tasks.train_classifier", | ||||||
|                     "schedule": crontab(minute="5", hour="*/1"), |                     "schedule": crontab(minute="5", hour="*/1"), | ||||||
|  |                     "options": {"expires": self.CLASSIFIER_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|                 "Perform sanity check": { |                 "Perform sanity check": { | ||||||
|                     "task": "documents.tasks.sanity_check", |                     "task": "documents.tasks.sanity_check", | ||||||
|                     "schedule": crontab(minute=30, hour=0, day_of_week="sun"), |                     "schedule": crontab(minute=30, hour=0, day_of_week="sun"), | ||||||
|  |                     "options": {"expires": self.SANITY_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|             }, |             }, | ||||||
|             schedule, |             schedule, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Trenton H
					Trenton H