Feature: scheduled workflow trigger (#8036)

This commit is contained in:
shamoon
2024-11-24 10:22:31 -08:00
committed by GitHub
parent d5572137de
commit 2b29233a1e
16 changed files with 907 additions and 78 deletions

View File

@@ -409,6 +409,7 @@ def document_matches_workflow(
elif (
trigger_type == WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED
or trigger_type == WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED
or trigger_type == WorkflowTrigger.WorkflowTriggerType.SCHEDULED
):
trigger_matched, reason = existing_document_matches_workflow(
document,

View File

@@ -0,0 +1,143 @@
# Generated by Django 5.1.1 on 2024-11-05 05:19
import django.core.validators
import django.db.models.deletion
import django.utils.timezone
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1057_paperlesstask_owner"),
]
operations = [
migrations.AddField(
model_name="workflowtrigger",
name="schedule_date_custom_field",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="documents.customfield",
verbose_name="schedule date custom field",
),
),
migrations.AddField(
model_name="workflowtrigger",
name="schedule_date_field",
field=models.CharField(
choices=[
("added", "Added"),
("created", "Created"),
("modified", "Modified"),
("custom_field", "Custom Field"),
],
default="added",
help_text="The field to check for a schedule trigger.",
max_length=20,
verbose_name="schedule date field",
),
),
migrations.AddField(
model_name="workflowtrigger",
name="schedule_is_recurring",
field=models.BooleanField(
default=False,
help_text="If the schedule should be recurring.",
verbose_name="schedule is recurring",
),
),
migrations.AddField(
model_name="workflowtrigger",
name="schedule_offset_days",
field=models.PositiveIntegerField(
default=0,
help_text="The number of days to offset the schedule trigger by.",
verbose_name="schedule offset days",
),
),
migrations.AddField(
model_name="workflowtrigger",
name="schedule_recurring_interval_days",
field=models.PositiveIntegerField(
default=1,
help_text="The number of days between recurring schedule triggers.",
validators=[django.core.validators.MinValueValidator(1)],
verbose_name="schedule recurring delay in days",
),
),
migrations.AlterField(
model_name="workflowtrigger",
name="type",
field=models.PositiveIntegerField(
choices=[
(1, "Consumption Started"),
(2, "Document Added"),
(3, "Document Updated"),
(4, "Scheduled"),
],
default=1,
verbose_name="Workflow Trigger Type",
),
),
migrations.CreateModel(
name="WorkflowRun",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"type",
models.PositiveIntegerField(
choices=[
(1, "Consumption Started"),
(2, "Document Added"),
(3, "Document Updated"),
(4, "Scheduled"),
],
null=True,
verbose_name="workflow trigger type",
),
),
(
"run_at",
models.DateTimeField(
db_index=True,
default=django.utils.timezone.now,
verbose_name="date run",
),
),
(
"document",
models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="workflow_runs",
to="documents.document",
verbose_name="document",
),
),
(
"workflow",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="runs",
to="documents.workflow",
verbose_name="workflow",
),
),
],
options={
"verbose_name": "workflow run",
"verbose_name_plural": "workflow runs",
},
),
]

View File

@@ -1016,12 +1016,19 @@ class WorkflowTrigger(models.Model):
CONSUMPTION = 1, _("Consumption Started")
DOCUMENT_ADDED = 2, _("Document Added")
DOCUMENT_UPDATED = 3, _("Document Updated")
SCHEDULED = 4, _("Scheduled")
class DocumentSourceChoices(models.IntegerChoices):
CONSUME_FOLDER = DocumentSource.ConsumeFolder.value, _("Consume Folder")
API_UPLOAD = DocumentSource.ApiUpload.value, _("Api Upload")
MAIL_FETCH = DocumentSource.MailFetch.value, _("Mail Fetch")
class ScheduleDateField(models.TextChoices):
ADDED = "added", _("Added")
CREATED = "created", _("Created")
MODIFIED = "modified", _("Modified")
CUSTOM_FIELD = "custom_field", _("Custom Field")
type = models.PositiveIntegerField(
_("Workflow Trigger Type"),
choices=WorkflowTriggerType.choices,
@@ -1098,6 +1105,49 @@ class WorkflowTrigger(models.Model):
verbose_name=_("has this correspondent"),
)
schedule_offset_days = models.PositiveIntegerField(
_("schedule offset days"),
default=0,
help_text=_(
"The number of days to offset the schedule trigger by.",
),
)
schedule_is_recurring = models.BooleanField(
_("schedule is recurring"),
default=False,
help_text=_(
"If the schedule should be recurring.",
),
)
schedule_recurring_interval_days = models.PositiveIntegerField(
_("schedule recurring delay in days"),
default=1,
validators=[MinValueValidator(1)],
help_text=_(
"The number of days between recurring schedule triggers.",
),
)
schedule_date_field = models.CharField(
_("schedule date field"),
max_length=20,
choices=ScheduleDateField.choices,
default=ScheduleDateField.ADDED,
help_text=_(
"The field to check for a schedule trigger.",
),
)
schedule_date_custom_field = models.ForeignKey(
CustomField,
null=True,
blank=True,
on_delete=models.SET_NULL,
verbose_name=_("schedule date custom field"),
)
class Meta:
verbose_name = _("workflow trigger")
verbose_name_plural = _("workflow triggers")
@@ -1348,3 +1398,39 @@ class Workflow(models.Model):
def __str__(self):
return f"Workflow: {self.name}"
class WorkflowRun(models.Model):
workflow = models.ForeignKey(
Workflow,
on_delete=models.CASCADE,
related_name="runs",
verbose_name=_("workflow"),
)
type = models.PositiveIntegerField(
_("workflow trigger type"),
choices=WorkflowTrigger.WorkflowTriggerType.choices,
null=True,
)
document = models.ForeignKey(
Document,
null=True,
on_delete=models.CASCADE,
related_name="workflow_runs",
verbose_name=_("document"),
)
run_at = models.DateTimeField(
_("date run"),
default=timezone.now,
db_index=True,
)
class Meta:
verbose_name = _("workflow run")
verbose_name_plural = _("workflow runs")
def __str__(self):
return f"WorkflowRun of {self.workflow} at {self.run_at} on {self.document}"

View File

@@ -1772,6 +1772,11 @@ class WorkflowTriggerSerializer(serializers.ModelSerializer):
"filter_has_tags",
"filter_has_correspondent",
"filter_has_document_type",
"schedule_offset_days",
"schedule_is_recurring",
"schedule_recurring_interval_days",
"schedule_date_field",
"schedule_date_custom_field",
]
def validate(self, attrs):

View File

@@ -37,6 +37,7 @@ from documents.models import PaperlessTask
from documents.models import Tag
from documents.models import Workflow
from documents.models import WorkflowAction
from documents.models import WorkflowRun
from documents.models import WorkflowTrigger
from documents.permissions import get_objects_for_user_owner_aware
from documents.permissions import set_permissions_for_object
@@ -916,6 +917,12 @@ def run_workflows(
document.save()
document.tags.set(doc_tag_ids)
WorkflowRun.objects.create(
workflow=workflow,
type=trigger_type,
document=document if not use_overrides else None,
)
if use_overrides:
return overrides, "\n".join(messages)

View File

@@ -31,10 +31,14 @@ from documents.double_sided import CollatePlugin
from documents.file_handling import create_source_path_directory
from documents.file_handling import generate_unique_filename
from documents.models import Correspondent
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import DocumentType
from documents.models import StoragePath
from documents.models import Tag
from documents.models import Workflow
from documents.models import WorkflowRun
from documents.models import WorkflowTrigger
from documents.parsers import DocumentParser
from documents.parsers import get_parser_class_for_mime_type
from documents.plugins.base import ConsumeTaskPlugin
@@ -44,6 +48,7 @@ from documents.plugins.helpers import ProgressStatusOptions
from documents.sanity_checker import SanityCheckFailedException
from documents.signals import document_updated
from documents.signals.handlers import cleanup_document_deletion
from documents.signals.handlers import run_workflows
if settings.AUDIT_LOG_ENABLED:
from auditlog.models import LogEntry
@@ -337,3 +342,81 @@ def empty_trash(doc_ids=None):
cleanup_document_deletion,
sender=Document,
)
@shared_task
def check_scheduled_workflows():
scheduled_workflows: list[Workflow] = Workflow.objects.filter(
triggers__type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
enabled=True,
).prefetch_related("triggers")
if scheduled_workflows.count() > 0:
logger.debug(f"Checking {len(scheduled_workflows)} scheduled workflows")
for workflow in scheduled_workflows:
schedule_triggers = workflow.triggers.filter(
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
)
trigger: WorkflowTrigger
for trigger in schedule_triggers:
documents = Document.objects.none()
offset_td = timedelta(days=trigger.schedule_offset_days)
logger.debug(
f"Checking trigger {trigger} with offset {offset_td} against field: {trigger.schedule_date_field}",
)
match trigger.schedule_date_field:
case WorkflowTrigger.ScheduleDateField.ADDED:
documents = Document.objects.filter(
added__lt=timezone.now() - offset_td,
)
case WorkflowTrigger.ScheduleDateField.CREATED:
documents = Document.objects.filter(
created__lt=timezone.now() - offset_td,
)
case WorkflowTrigger.ScheduleDateField.MODIFIED:
documents = Document.objects.filter(
modified__lt=timezone.now() - offset_td,
)
case WorkflowTrigger.ScheduleDateField.CUSTOM_FIELD:
cf_instances = CustomFieldInstance.objects.filter(
field=trigger.schedule_date_custom_field,
value_date__lt=timezone.now() - offset_td,
)
documents = Document.objects.filter(
id__in=cf_instances.values_list("document", flat=True),
)
if documents.count() > 0:
logger.debug(
f"Found {documents.count()} documents for trigger {trigger}",
)
for document in documents:
workflow_runs = WorkflowRun.objects.filter(
document=document,
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
workflow=workflow,
).order_by("-run_at")
if not trigger.schedule_is_recurring and workflow_runs.exists():
# schedule is non-recurring and the workflow has already been run
logger.debug(
f"Skipping document {document} for non-recurring workflow {workflow} as it has already been run",
)
continue
elif (
trigger.schedule_is_recurring
and workflow_runs.exists()
and (
workflow_runs.last().run_at
> timezone.now()
- timedelta(
days=trigger.schedule_recurring_interval_days,
)
)
):
# schedule is recurring but the last run was within the number of recurring interval days
logger.debug(
f"Skipping document {document} for recurring workflow {workflow} as the last run was within the recurring interval",
)
continue
run_workflows(
WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
document,
)

View File

@@ -29,6 +29,7 @@ from documents.models import StoragePath
from documents.models import Tag
from documents.models import Workflow
from documents.models import WorkflowAction
from documents.models import WorkflowRun
from documents.models import WorkflowTrigger
from documents.signals import document_consumption_finished
from documents.tests.utils import DirectoriesMixin
@@ -1306,6 +1307,275 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
# group2 should have been added
self.assertIn(self.group2, group_perms)
def test_workflow_scheduled_trigger_created(self):
"""
GIVEN:
- Existing workflow with SCHEDULED trigger against the created field and action that assigns owner
- Existing doc that matches the trigger
WHEN:
- Scheduled workflows are checked
THEN:
- Workflow runs, document owner is updated
"""
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
schedule_offset_days=1,
schedule_date_field="created",
)
action = WorkflowAction.objects.create(
assign_title="Doc assign owner",
assign_owner=self.user2,
)
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
now = timezone.localtime(timezone.now())
created = now - timedelta(weeks=520)
doc = Document.objects.create(
title="sample test",
correspondent=self.c,
original_filename="sample.pdf",
created=created,
)
tasks.check_scheduled_workflows()
doc.refresh_from_db()
self.assertEqual(doc.owner, self.user2)
def test_workflow_scheduled_trigger_added(self):
"""
GIVEN:
- Existing workflow with SCHEDULED trigger against the added field and action that assigns owner
- Existing doc that matches the trigger
WHEN:
- Scheduled workflows are checked
THEN:
- Workflow runs, document owner is updated
"""
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
schedule_offset_days=1,
schedule_date_field=WorkflowTrigger.ScheduleDateField.ADDED,
)
action = WorkflowAction.objects.create(
assign_title="Doc assign owner",
assign_owner=self.user2,
)
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
added = timezone.now() - timedelta(days=365)
doc = Document.objects.create(
title="sample test",
correspondent=self.c,
original_filename="sample.pdf",
added=added,
)
tasks.check_scheduled_workflows()
doc.refresh_from_db()
self.assertEqual(doc.owner, self.user2)
@mock.patch("documents.models.Document.objects.filter", autospec=True)
def test_workflow_scheduled_trigger_modified(self, mock_filter):
"""
GIVEN:
- Existing workflow with SCHEDULED trigger against the modified field and action that assigns owner
- Existing doc that matches the trigger
WHEN:
- Scheduled workflows are checked
THEN:
- Workflow runs, document owner is updated
"""
# we have to mock because modified field is auto_now
mock_filter.return_value = Document.objects.all()
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
schedule_offset_days=1,
schedule_date_field=WorkflowTrigger.ScheduleDateField.MODIFIED,
)
action = WorkflowAction.objects.create(
assign_title="Doc assign owner",
assign_owner=self.user2,
)
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
doc = Document.objects.create(
title="sample test",
correspondent=self.c,
original_filename="sample.pdf",
)
tasks.check_scheduled_workflows()
doc.refresh_from_db()
self.assertEqual(doc.owner, self.user2)
def test_workflow_scheduled_trigger_custom_field(self):
"""
GIVEN:
- Existing workflow with SCHEDULED trigger against a custom field and action that assigns owner
- Existing doc that matches the trigger
WHEN:
- Scheduled workflows are checked
THEN:
- Workflow runs, document owner is updated
"""
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
schedule_offset_days=1,
schedule_date_field=WorkflowTrigger.ScheduleDateField.CUSTOM_FIELD,
schedule_date_custom_field=self.cf1,
)
action = WorkflowAction.objects.create(
assign_title="Doc assign owner",
assign_owner=self.user2,
)
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
doc = Document.objects.create(
title="sample test",
correspondent=self.c,
original_filename="sample.pdf",
)
CustomFieldInstance.objects.create(
document=doc,
field=self.cf1,
value_date=timezone.now() - timedelta(days=2),
)
tasks.check_scheduled_workflows()
doc.refresh_from_db()
self.assertEqual(doc.owner, self.user2)
def test_workflow_scheduled_already_run(self):
"""
GIVEN:
- Existing workflow with SCHEDULED trigger
- Existing doc that has already had the workflow run
WHEN:
- Scheduled workflows are checked
THEN:
- Workflow does not run again
"""
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
schedule_offset_days=1,
schedule_date_field=WorkflowTrigger.ScheduleDateField.CREATED,
)
action = WorkflowAction.objects.create(
assign_title="Doc assign owner",
assign_owner=self.user2,
)
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
doc = Document.objects.create(
title="sample test",
correspondent=self.c,
original_filename="sample.pdf",
created=timezone.now() - timedelta(days=2),
)
wr = WorkflowRun.objects.create(
workflow=w,
document=doc,
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
run_at=timezone.now(),
)
self.assertEqual(
str(wr),
f"WorkflowRun of {w} at {wr.run_at} on {doc}",
) # coverage
tasks.check_scheduled_workflows()
doc.refresh_from_db()
self.assertIsNone(doc.owner)
def test_workflow_scheduled_trigger_too_early(self):
"""
GIVEN:
- Existing workflow with SCHEDULED trigger and recurring interval of 7 days
- Workflow run date is 6 days ago
WHEN:
- Scheduled workflows are checked
THEN:
- Workflow does not run as the offset is not met
"""
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
schedule_offset_days=30,
schedule_date_field=WorkflowTrigger.ScheduleDateField.CREATED,
schedule_is_recurring=True,
schedule_recurring_interval_days=7,
)
action = WorkflowAction.objects.create(
assign_title="Doc assign owner",
assign_owner=self.user2,
)
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
doc = Document.objects.create(
title="sample test",
correspondent=self.c,
original_filename="sample.pdf",
created=timezone.now() - timedelta(days=40),
)
WorkflowRun.objects.create(
workflow=w,
document=doc,
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
run_at=timezone.now() - timedelta(days=6),
)
with self.assertLogs(level="DEBUG") as cm:
tasks.check_scheduled_workflows()
self.assertIn(
"last run was within the recurring interval",
" ".join(cm.output),
)
doc.refresh_from_db()
self.assertIsNone(doc.owner)
def test_workflow_enabled_disabled(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
@@ -1354,7 +1624,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
def test_new_trigger_type_raises_exception(self):
trigger = WorkflowTrigger.objects.create(
type=4,
type=99,
)
action = WorkflowAction.objects.create(
assign_title="Doc assign owner",
@@ -1370,7 +1640,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
doc = Document.objects.create(
title="test",
)
self.assertRaises(Exception, document_matches_workflow, doc, w, 4)
self.assertRaises(Exception, document_matches_workflow, doc, w, 99)
def test_removal_action_document_updated_workflow(self):
"""