diff --git a/Pipfile b/Pipfile index d3f3bf4b3..b31749495 100644 --- a/Pipfile +++ b/Pipfile @@ -26,6 +26,7 @@ celery = {extras = ["redis"], version = "*"} channels = "~=4.1" channels-redis = "*" concurrent-log-handler = "*" +duration-parser = "*" filelock = "*" flower = "*" gotenberg-client = "*" diff --git a/Pipfile.lock b/Pipfile.lock index b1ffdfa31..df97a0bc0 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -576,6 +576,15 @@ "markers": "python_version >= '3.7'", "version": "==0.7.0" }, + "duration-parser": { + "hashes": [ + "sha256:2d5c465aeccd467f5c981fa78d69edd13459d6cc8ce3c751e12cbe40742163f3", + "sha256:aecbb05af545f688f3f6277ab7720e538a8ab834e22c443e2a912f6c7ab6ec5c" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==1.0.1" + }, "exceptiongroup": { "hashes": [ "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", diff --git a/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html b/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html index a3bea36e7..0dccfa9c7 100644 --- a/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html +++ b/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html @@ -119,6 +119,22 @@
+ @if (formGroup.get('type').value === WorkflowTriggerType.Scheduled) { +

Set scheduled trigger delay and which field to use.

+
+
+ +
+
+ +
+ @if (formGroup.get('schedule_delay_field').value === 'custom_field') { +
+ +
+ } +
+ }

Trigger for documents that match all filters specified below.

@@ -128,7 +144,7 @@ } - @if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated) { + @if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated || formGroup.get('type').value === WorkflowTriggerType.Scheduled) { @if (patternRequired) { @@ -138,7 +154,7 @@ } }
- @if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated) { + @if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated || formGroup.get('type').value === WorkflowTriggerType.Scheduled) {
diff --git a/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.ts b/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.ts index 588202b89..570756604 100644 --- a/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.ts +++ b/src-ui/src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.ts @@ -16,9 +16,10 @@ import { EditDialogComponent } from '../edit-dialog.component' import { MailRuleService } from 'src/app/services/rest/mail-rule.service' import { MailRule } from 'src/app/data/mail-rule' import { CustomFieldsService } from 'src/app/services/rest/custom-fields.service' -import { CustomField } from 'src/app/data/custom-field' +import { CustomField, CustomFieldDataType } from 'src/app/data/custom-field' import { DocumentSource, + ScheduleDelayField, WorkflowTrigger, WorkflowTriggerType, } from 'src/app/data/workflow-trigger' @@ -48,6 +49,25 @@ export const DOCUMENT_SOURCE_OPTIONS = [ }, ] +export const SCHEDULE_DELAY_FIELD_OPTIONS = [ + { + id: ScheduleDelayField.Added, + name: $localize`Added`, + }, + { + id: ScheduleDelayField.Created, + name: $localize`Created`, + }, + { + id: ScheduleDelayField.Modified, + name: $localize`Modified`, + }, + { + id: ScheduleDelayField.CustomField, + name: $localize`Custom Field`, + }, +] + export const WORKFLOW_TYPE_OPTIONS = [ { id: WorkflowTriggerType.Consumption, @@ -61,6 +81,10 @@ export const WORKFLOW_TYPE_OPTIONS = [ id: WorkflowTriggerType.DocumentUpdated, name: $localize`Document Updated`, }, + { + id: WorkflowTriggerType.Scheduled, + name: $localize`Scheduled`, + }, ] export const WORKFLOW_ACTION_OPTIONS = [ @@ -314,6 +338,11 @@ export class WorkflowEditDialogComponent filter_has_document_type: new FormControl( trigger.filter_has_document_type ), + schedule_delay: new FormControl(trigger.schedule_delay), + schedule_delay_field: new FormControl(trigger.schedule_delay_field), + schedule_delay_custom_field: new FormControl( + trigger.schedule_delay_custom_field + ), }), { emitEvent } ) @@ -388,6 +417,16 @@ export class WorkflowEditDialogComponent return WORKFLOW_TYPE_OPTIONS } + get scheduleDelayFieldOptions() { + return SCHEDULE_DELAY_FIELD_OPTIONS + } + + get dateCustomFields() { + return this.customFields?.filter( + (f) => f.data_type === CustomFieldDataType.Date + ) + } + getTriggerTypeOptionName(type: WorkflowTriggerType): string { return this.triggerTypeOptions.find((t) => t.id === type)?.name ?? '' } @@ -408,6 +447,9 @@ export class WorkflowEditDialogComponent matching_algorithm: MATCH_NONE, match: '', is_insensitive: true, + schedule_delay: null, + schedule_delay_field: ScheduleDelayField.Added, + schedule_delay_custom_field: null, } this.object.triggers.push(trigger) this.createTriggerField(trigger) diff --git a/src-ui/src/app/data/workflow-trigger.ts b/src-ui/src/app/data/workflow-trigger.ts index 3e3bf8cf8..78d5e418b 100644 --- a/src-ui/src/app/data/workflow-trigger.ts +++ b/src-ui/src/app/data/workflow-trigger.ts @@ -10,6 +10,14 @@ export enum WorkflowTriggerType { Consumption = 1, DocumentAdded = 2, DocumentUpdated = 3, + Scheduled = 4, +} + +export enum ScheduleDelayField { + Added = 'added', + Created = 'created', + Modified = 'modified', + CustomField = 'custom_field', } export interface WorkflowTrigger extends ObjectWithId { @@ -34,4 +42,10 @@ export interface WorkflowTrigger extends ObjectWithId { filter_has_correspondent?: number // Correspondent.id filter_has_document_type?: number // DocumentType.id + + schedule_delay?: string + + schedule_delay_field?: ScheduleDelayField + + schedule_delay_custom_field?: number // CustomField.id } diff --git a/src/documents/matching.py b/src/documents/matching.py index 36fa9a2c6..59c0ccfda 100644 --- a/src/documents/matching.py +++ b/src/documents/matching.py @@ -409,6 +409,7 @@ def document_matches_workflow( elif ( trigger_type == WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED or trigger_type == WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED + or trigger_type == WorkflowTrigger.WorkflowTriggerType.SCHEDULED ): trigger_matched, reason = existing_document_matches_workflow( document, diff --git a/src/documents/migrations/1056_workflowtrigger_schedule_delay_and_more.py b/src/documents/migrations/1056_workflowtrigger_schedule_delay_and_more.py new file mode 100644 index 000000000..9391fd2d9 --- /dev/null +++ b/src/documents/migrations/1056_workflowtrigger_schedule_delay_and_more.py @@ -0,0 +1,66 @@ +# Generated by Django 5.1.1 on 2024-10-23 20:54 + +import django.db.models.deletion +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + dependencies = [ + ("documents", "1055_alter_storagepath_path"), + ] + + operations = [ + migrations.AddField( + model_name="workflowtrigger", + name="schedule_delay", + field=models.CharField( + blank=True, + help_text="The delay before the scheduled trigger is activated.", + max_length=256, + null=True, + verbose_name="schedule delay", + ), + ), + migrations.AddField( + model_name="workflowtrigger", + name="schedule_delay_custom_field", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="documents.customfield", + verbose_name="schedule delay custom field", + ), + ), + migrations.AddField( + model_name="workflowtrigger", + name="schedule_delay_field", + field=models.CharField( + choices=[ + ("added", "Added"), + ("created", "Created"), + ("modified", "Modified"), + ("custom_field", "Custom Field"), + ], + default="added", + help_text="The field to use for the delay.", + max_length=20, + verbose_name="schedule delay field", + ), + ), + migrations.AlterField( + model_name="workflowtrigger", + name="type", + field=models.PositiveIntegerField( + choices=[ + (1, "Consumption Started"), + (2, "Document Added"), + (3, "Document Updated"), + (4, "Scheduled"), + ], + default=1, + verbose_name="Workflow Trigger Type", + ), + ), + ] diff --git a/src/documents/models.py b/src/documents/models.py index 4528d5127..006f48a56 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -1016,12 +1016,19 @@ class WorkflowTrigger(models.Model): CONSUMPTION = 1, _("Consumption Started") DOCUMENT_ADDED = 2, _("Document Added") DOCUMENT_UPDATED = 3, _("Document Updated") + SCHEDULED = 4, _("Scheduled") class DocumentSourceChoices(models.IntegerChoices): CONSUME_FOLDER = DocumentSource.ConsumeFolder.value, _("Consume Folder") API_UPLOAD = DocumentSource.ApiUpload.value, _("Api Upload") MAIL_FETCH = DocumentSource.MailFetch.value, _("Mail Fetch") + class ScheduleDelayField(models.TextChoices): + ADDED = "added", _("Added") + CREATED = "created", _("Created") + MODIFIED = "modified", _("Modified") + CUSTOM_FIELD = "custom_field", _("Custom Field") + type = models.PositiveIntegerField( _("Workflow Trigger Type"), choices=WorkflowTriggerType.choices, @@ -1098,6 +1105,34 @@ class WorkflowTrigger(models.Model): verbose_name=_("has this correspondent"), ) + schedule_delay = models.CharField( + _("schedule delay"), + max_length=256, + null=True, + blank=True, + help_text=_( + "The delay before the scheduled trigger is activated.", + ), + ) + + schedule_delay_field = models.CharField( + _("schedule delay field"), + max_length=20, + choices=ScheduleDelayField.choices, + default=ScheduleDelayField.ADDED, + help_text=_( + "The field to use for the delay.", + ), + ) + + schedule_delay_custom_field = models.ForeignKey( + CustomField, + null=True, + blank=True, + on_delete=models.SET_NULL, + verbose_name=_("schedule delay custom field"), + ) + class Meta: verbose_name = _("workflow trigger") verbose_name_plural = _("workflow triggers") diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py index aeb901f81..fdb1be8a5 100644 --- a/src/documents/serialisers.py +++ b/src/documents/serialisers.py @@ -21,6 +21,7 @@ from django.utils.crypto import get_random_string from django.utils.text import slugify from django.utils.translation import gettext as _ from drf_writable_nested.serializers import NestedUpdateMixin +from duration_parser import parse_timedelta from guardian.core import ObjectPermissionChecker from guardian.shortcuts import get_users_with_perms from guardian.utils import get_group_obj_perms_model @@ -1737,6 +1738,10 @@ class WorkflowTriggerSerializer(serializers.ModelSerializer): label="Trigger Type", ) + schedule_delay = serializers.CharField( + required=False, + ) + class Meta: model = WorkflowTrigger fields = [ @@ -1752,8 +1757,21 @@ class WorkflowTriggerSerializer(serializers.ModelSerializer): "filter_has_tags", "filter_has_correspondent", "filter_has_document_type", + "schedule_delay", + "schedule_delay_field", + "schedule_delay_custom_field", ] + def validate_schedule_delay(self, value): + if value is not None: + try: + parse_timedelta(value) + except Exception as e: + raise serializers.ValidationError( + f"Invalid schedule delay format: {e}", + ) + return value + def validate(self, attrs): # Empty strings treated as None to avoid unexpected behavior if ( @@ -1779,6 +1797,11 @@ class WorkflowTriggerSerializer(serializers.ModelSerializer): "File name, path or mail rule filter are required", ) + if attrs["type"] == WorkflowTrigger.WorkflowTriggerType.SCHEDULED and ( + "schedule_delay" not in attrs or attrs["schedule_delay"] is None + ): + raise serializers.ValidationError("Schedule delay is required") + return attrs diff --git a/src/documents/tasks.py b/src/documents/tasks.py index 8f5ee51bc..3c9eba7c8 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -14,6 +14,7 @@ from django.db import models from django.db import transaction from django.db.models.signals import post_save from django.utils import timezone +from duration_parser import parse_timedelta from filelock import FileLock from whoosh.writing import AsyncWriter @@ -31,10 +32,13 @@ from documents.double_sided import CollatePlugin from documents.file_handling import create_source_path_directory from documents.file_handling import generate_unique_filename from documents.models import Correspondent +from documents.models import CustomFieldInstance from documents.models import Document from documents.models import DocumentType from documents.models import StoragePath from documents.models import Tag +from documents.models import Workflow +from documents.models import WorkflowTrigger from documents.parsers import DocumentParser from documents.parsers import get_parser_class_for_mime_type from documents.plugins.base import ConsumeTaskPlugin @@ -44,6 +48,7 @@ from documents.plugins.helpers import ProgressStatusOptions from documents.sanity_checker import SanityCheckFailedException from documents.signals import document_updated from documents.signals.handlers import cleanup_document_deletion +from documents.signals.handlers import run_workflows if settings.AUDIT_LOG_ENABLED: from auditlog.models import LogEntry @@ -319,3 +324,65 @@ def empty_trash(doc_ids=None): cleanup_document_deletion, sender=Document, ) + + +@shared_task +def check_scheduled_workflows(): + scheduled_workflows: list[Workflow] = Workflow.objects.filter( + triggers__type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED, + enabled=True, + ).prefetch_related("triggers") + if scheduled_workflows.count() > 0: + logger.debug(f"Checking {len(scheduled_workflows)} scheduled workflows") + for workflow in scheduled_workflows: + schedule_triggers = workflow.triggers.filter( + type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED, + ) + trigger: WorkflowTrigger + for trigger in schedule_triggers: + documents = Document.objects.none() + delay_td = parse_timedelta(trigger.schedule_delay) + logger.debug( + f"Checking trigger {trigger} with delay {delay_td} against field: {trigger.schedule_delay_field}", + ) + if ( + trigger.schedule_delay_field + == WorkflowTrigger.ScheduleDelayField.ADDED + ): + documents = Document.objects.filter( + added__lt=timezone.now() - delay_td, + ) + elif ( + trigger.schedule_delay_field + == WorkflowTrigger.ScheduleDelayField.CREATED + ): + documents = Document.objects.filter( + created__lt=timezone.now() - delay_td, + ) + elif ( + trigger.schedule_delay_field + == WorkflowTrigger.ScheduleDelayField.MODIFIED + ): + documents = Document.objects.filter( + modified__lt=timezone.now() - delay_td, + ) + elif ( + trigger.schedule_delay_field + == WorkflowTrigger.ScheduleDelayField.CUSTOM_FIELD + ): + cf_instances = CustomFieldInstance.objects.filter( + field=trigger.schedule_delay_custom_field, + value_date__lt=timezone.now() - delay_td, + ) + documents = Document.objects.filter( + id__in=cf_instances.values_list("document", flat=True), + ) + if documents.count() > 0: + logger.debug( + f"Found {documents.count()} documents for trigger {trigger}", + ) + for document in documents: + run_workflows( + WorkflowTrigger.WorkflowTriggerType.SCHEDULED, + document, + ) diff --git a/src/documents/tests/test_workflows.py b/src/documents/tests/test_workflows.py index c5d975958..c02b44a1f 100644 --- a/src/documents/tests/test_workflows.py +++ b/src/documents/tests/test_workflows.py @@ -1370,7 +1370,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): doc = Document.objects.create( title="test", ) - self.assertRaises(Exception, document_matches_workflow, doc, w, 4) + self.assertRaises(Exception, document_matches_workflow, doc, w, 99) def test_removal_action_document_updated_workflow(self): """ diff --git a/src/paperless/settings.py b/src/paperless/settings.py index d6489fa81..e8fd455a8 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -216,6 +216,17 @@ def _parse_beat_schedule() -> dict: "expires": 23.0 * 60.0 * 60.0, }, }, + { + "name": "Check and run scheduled workflows", + "env_key": "PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON", + # Default every 5 minutes + "env_default": "*/5 * * * *", + "task": "documents.tasks.check_scheduled_workflows", + "options": { + # 1 minute before default schedule sends again + "expires": 4.0 * 60.0, + }, + }, ] for task in tasks: # Either get the environment setting or use the default diff --git a/src/paperless/tests/test_settings.py b/src/paperless/tests/test_settings.py index 5c257a08c..77423b9f6 100644 --- a/src/paperless/tests/test_settings.py +++ b/src/paperless/tests/test_settings.py @@ -157,6 +157,7 @@ class TestCeleryScheduleParsing(TestCase): INDEX_EXPIRE_TIME = 23.0 * 60.0 * 60.0 SANITY_EXPIRE_TIME = ((7.0 * 24.0) - 1.0) * 60.0 * 60.0 EMPTY_TRASH_EXPIRE_TIME = 23.0 * 60.0 * 60.0 + RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME = 4.0 * 60.0 def test_schedule_configuration_default(self): """ @@ -196,6 +197,11 @@ class TestCeleryScheduleParsing(TestCase): "schedule": crontab(minute=0, hour="1"), "options": {"expires": self.EMPTY_TRASH_EXPIRE_TIME}, }, + "Check and run scheduled workflows": { + "task": "documents.tasks.check_scheduled_workflows", + "schedule": crontab(minute="*/5"), + "options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME}, + }, }, schedule, ) @@ -243,6 +249,11 @@ class TestCeleryScheduleParsing(TestCase): "schedule": crontab(minute=0, hour="1"), "options": {"expires": self.EMPTY_TRASH_EXPIRE_TIME}, }, + "Check and run scheduled workflows": { + "task": "documents.tasks.check_scheduled_workflows", + "schedule": crontab(minute="*/5"), + "options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME}, + }, }, schedule, ) @@ -282,6 +293,11 @@ class TestCeleryScheduleParsing(TestCase): "schedule": crontab(minute=0, hour="1"), "options": {"expires": self.EMPTY_TRASH_EXPIRE_TIME}, }, + "Check and run scheduled workflows": { + "task": "documents.tasks.check_scheduled_workflows", + "schedule": crontab(minute="*/5"), + "options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME}, + }, }, schedule, ) @@ -303,6 +319,7 @@ class TestCeleryScheduleParsing(TestCase): "PAPERLESS_SANITY_TASK_CRON": "disable", "PAPERLESS_INDEX_TASK_CRON": "disable", "PAPERLESS_EMPTY_TRASH_TASK_CRON": "disable", + "PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON": "disable", }, ): schedule = _parse_beat_schedule()