Feature: Advanced Workflow Trigger Filters (#11029)

2025-12-16 01:31:09 -06:00 · 2025-10-13 15:23:56 -07:00
parent d394053ddc
commit f6c004183e
16 changed files with 2267 additions and 171 deletions
--- a/src/documents/matching.py
+++ b/src/documents/matching.py
@@ -6,8 +6,11 @@ from fnmatch import fnmatch
 from fnmatch import translate as fnmatch_translate
 from typing import TYPE_CHECKING

+from rest_framework import serializers
+
 from documents.data_models import ConsumableDocument
 from documents.data_models import DocumentSource
+from documents.filters import CustomFieldQueryParser
 from documents.models import Correspondent
 from documents.models import Document
 from documents.models import DocumentType
@@ -342,67 +345,147 @@ def consumable_document_matches_workflow(
 def existing_document_matches_workflow(
    document: Document,
    trigger: WorkflowTrigger,
-) -> tuple[bool, str]:
+) -> tuple[bool, str | None]:
    """
    Returns True if the Document matches all filters from the workflow trigger,
    False otherwise. Includes a reason if doesn't match
    """

-    trigger_matched = True
-    reason = ""
-
+    # Check content matching algorithm
    if trigger.matching_algorithm > MatchingModel.MATCH_NONE and not matches(
        trigger,
        document,
    ):
-        reason = (
+        return (
+            False,
            f"Document content matching settings for algorithm '{trigger.matching_algorithm}' did not match",
        )
-        trigger_matched = False

-    # Document tags vs trigger has_tags
-    if (
-        trigger.filter_has_tags.all().count() > 0
-        and document.tags.filter(
-            id__in=trigger.filter_has_tags.all().values_list("id"),
-        ).count()
-        == 0
-    ):
-        reason = (
-            f"Document tags {document.tags.all()} do not include"
-            f" {trigger.filter_has_tags.all()}",
-        )
-        trigger_matched = False
+    # Check if any tag filters exist to determine if we need to load document tags
+    trigger_has_tags_qs = trigger.filter_has_tags.all()
+    trigger_has_all_tags_qs = trigger.filter_has_all_tags.all()
+    trigger_has_not_tags_qs = trigger.filter_has_not_tags.all()
+
+    has_tags_filter = trigger_has_tags_qs.exists()
+    has_all_tags_filter = trigger_has_all_tags_qs.exists()
+    has_not_tags_filter = trigger_has_not_tags_qs.exists()
+
+    # Load document tags once if any tag filters exist
+    document_tag_ids = None
+    if has_tags_filter or has_all_tags_filter or has_not_tags_filter:
+        document_tag_ids = set(document.tags.values_list("id", flat=True))
+
+    # Document tags vs trigger has_tags (any of)
+    if has_tags_filter:
+        trigger_has_tag_ids = set(trigger_has_tags_qs.values_list("id", flat=True))
+        if not (document_tag_ids & trigger_has_tag_ids):
+            # For error message, load the actual tag objects
+            return (
+                False,
+                f"Document tags {list(document.tags.all())} do not include {list(trigger_has_tags_qs)}",
+            )
+
+    # Document tags vs trigger has_all_tags (all of)
+    if has_all_tags_filter:
+        required_tag_ids = set(trigger_has_all_tags_qs.values_list("id", flat=True))
+        if not required_tag_ids.issubset(document_tag_ids):
+            return (
+                False,
+                f"Document tags {list(document.tags.all())} do not contain all of {list(trigger_has_all_tags_qs)}",
+            )
+
+    # Document tags vs trigger has_not_tags (none of)
+    if has_not_tags_filter:
+        excluded_tag_ids = set(trigger_has_not_tags_qs.values_list("id", flat=True))
+        if document_tag_ids & excluded_tag_ids:
+            return (
+                False,
+                f"Document tags {list(document.tags.all())} include excluded tags {list(trigger_has_not_tags_qs)}",
+            )

    # Document correspondent vs trigger has_correspondent
    if (
-        trigger.filter_has_correspondent is not None
-        and document.correspondent != trigger.filter_has_correspondent
+        trigger.filter_has_correspondent_id is not None
+        and document.correspondent_id != trigger.filter_has_correspondent_id
    ):
-        reason = (
+        return (
+            False,
            f"Document correspondent {document.correspondent} does not match {trigger.filter_has_correspondent}",
        )
-        trigger_matched = False
+
+    if (
+        document.correspondent_id
+        and trigger.filter_has_not_correspondents.filter(
+            id=document.correspondent_id,
+        ).exists()
+    ):
+        return (
+            False,
+            f"Document correspondent {document.correspondent} is excluded by {list(trigger.filter_has_not_correspondents.all())}",
+        )

    # Document document_type vs trigger has_document_type
    if (
-        trigger.filter_has_document_type is not None
-        and document.document_type != trigger.filter_has_document_type
+        trigger.filter_has_document_type_id is not None
+        and document.document_type_id != trigger.filter_has_document_type_id
    ):
-        reason = (
+        return (
+            False,
            f"Document doc type {document.document_type} does not match {trigger.filter_has_document_type}",
        )
-        trigger_matched = False
+
+    if (
+        document.document_type_id
+        and trigger.filter_has_not_document_types.filter(
+            id=document.document_type_id,
+        ).exists()
+    ):
+        return (
+            False,
+            f"Document doc type {document.document_type} is excluded by {list(trigger.filter_has_not_document_types.all())}",
+        )

    # Document storage_path vs trigger has_storage_path
    if (
-        trigger.filter_has_storage_path is not None
-        and document.storage_path != trigger.filter_has_storage_path
+        trigger.filter_has_storage_path_id is not None
+        and document.storage_path_id != trigger.filter_has_storage_path_id
    ):
-        reason = (
+        return (
+            False,
            f"Document storage path {document.storage_path} does not match {trigger.filter_has_storage_path}",
        )
-        trigger_matched = False
+
+    if (
+        document.storage_path_id
+        and trigger.filter_has_not_storage_paths.filter(
+            id=document.storage_path_id,
+        ).exists()
+    ):
+        return (
+            False,
+            f"Document storage path {document.storage_path} is excluded by {list(trigger.filter_has_not_storage_paths.all())}",
+        )
+
+    # Custom field query check
+    if trigger.filter_custom_field_query:
+        parser = CustomFieldQueryParser("filter_custom_field_query")
+        try:
+            custom_field_q, annotations = parser.parse(
+                trigger.filter_custom_field_query,
+            )
+        except serializers.ValidationError:
+            return (False, "Invalid custom field query configuration")
+
+        qs = (
+            Document.objects.filter(id=document.id)
+            .annotate(**annotations)
+            .filter(custom_field_q)
+        )
+        if not qs.exists():
+            return (
+                False,
+                "Document custom fields do not match the configured custom field query",
+            )

    # Document original_filename vs trigger filename
    if (
@@ -414,13 +497,12 @@ def existing_document_matches_workflow(
            trigger.filter_filename.lower(),
        )
    ):
-        reason = (
-            f"Document filename {document.original_filename} does not match"
-            f" {trigger.filter_filename.lower()}",
+        return (
+            False,
+            f"Document filename {document.original_filename} does not match {trigger.filter_filename.lower()}",
        )
-        trigger_matched = False

-    return (trigger_matched, reason)
+    return (True, None)


 def prefilter_documents_by_workflowtrigger(
@@ -433,31 +515,66 @@ def prefilter_documents_by_workflowtrigger(
    document_matches_workflow in run_workflows
    """

-    if trigger.filter_has_tags.all().count() > 0:
-        documents = documents.filter(
-            tags__in=trigger.filter_has_tags.all(),
-        ).distinct()
+    # Filter for documents that have AT LEAST ONE of the specified tags.
+    if trigger.filter_has_tags.exists():
+        documents = documents.filter(tags__in=trigger.filter_has_tags.all()).distinct()
+
+    # Filter for documents that have ALL of the specified tags.
+    if trigger.filter_has_all_tags.exists():
+        for tag in trigger.filter_has_all_tags.all():
+            documents = documents.filter(tags=tag)
+        # Multiple JOINs can create duplicate results.
+        documents = documents.distinct()
+
+    # Exclude documents that have ANY of the specified tags.
+    if trigger.filter_has_not_tags.exists():
+        documents = documents.exclude(tags__in=trigger.filter_has_not_tags.all())
+
+    # Correspondent, DocumentType, etc. filtering

    if trigger.filter_has_correspondent is not None:
        documents = documents.filter(
            correspondent=trigger.filter_has_correspondent,
        )
+    if trigger.filter_has_not_correspondents.exists():
+        documents = documents.exclude(
+            correspondent__in=trigger.filter_has_not_correspondents.all(),
+        )

    if trigger.filter_has_document_type is not None:
        documents = documents.filter(
            document_type=trigger.filter_has_document_type,
        )
+    if trigger.filter_has_not_document_types.exists():
+        documents = documents.exclude(
+            document_type__in=trigger.filter_has_not_document_types.all(),
+        )

    if trigger.filter_has_storage_path is not None:
        documents = documents.filter(
            storage_path=trigger.filter_has_storage_path,
        )
+    if trigger.filter_has_not_storage_paths.exists():
+        documents = documents.exclude(
+            storage_path__in=trigger.filter_has_not_storage_paths.all(),
+        )

-    if trigger.filter_filename is not None and len(trigger.filter_filename) > 0:
-        # the true fnmatch will actually run later so we just want a loose filter here
+    # Custom Field & Filename Filtering
+
+    if trigger.filter_custom_field_query:
+        parser = CustomFieldQueryParser("filter_custom_field_query")
+        try:
+            custom_field_q, annotations = parser.parse(
+                trigger.filter_custom_field_query,
+            )
+        except serializers.ValidationError:
+            return documents.none()
+
+        documents = documents.annotate(**annotations).filter(custom_field_q)
+
+    if trigger.filter_filename:
        regex = fnmatch_translate(trigger.filter_filename).lstrip("^").rstrip("$")
-        regex = f"(?i){regex}"
-        documents = documents.filter(original_filename__regex=regex)
+        documents = documents.filter(original_filename__iregex=regex)

    return documents

@@ -472,13 +589,34 @@ def document_matches_workflow(
    settings from the workflow trigger, False otherwise
    """

+    triggers_queryset = (
+        workflow.triggers.filter(
+            type=trigger_type,
+        )
+        .select_related(
+            "filter_mailrule",
+            "filter_has_document_type",
+            "filter_has_correspondent",
+            "filter_has_storage_path",
+            "schedule_date_custom_field",
+        )
+        .prefetch_related(
+            "filter_has_tags",
+            "filter_has_all_tags",
+            "filter_has_not_tags",
+            "filter_has_not_document_types",
+            "filter_has_not_correspondents",
+            "filter_has_not_storage_paths",
+        )
+    )
+
    trigger_matched = True
-    if workflow.triggers.filter(type=trigger_type).count() == 0:
+    if not triggers_queryset.exists():
        trigger_matched = False
        logger.info(f"Document did not match {workflow}")
        logger.debug(f"No matching triggers with type {trigger_type} found")
    else:
-        for trigger in workflow.triggers.filter(type=trigger_type):
+        for trigger in triggers_queryset:
            if trigger_type == WorkflowTrigger.WorkflowTriggerType.CONSUMPTION:
                trigger_matched, reason = consumable_document_matches_workflow(
                    document,
--- a/src/documents/migrations/1072_workflowtrigger_filter_custom_field_query_and_more.py
+++ b/src/documents/migrations/1072_workflowtrigger_filter_custom_field_query_and_more.py
@@ -0,0 +1,73 @@
+# Generated by Django 5.2.6 on 2025-10-07 18:52
+
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("documents", "1071_tag_tn_ancestors_count_tag_tn_ancestors_pks_and_more"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="workflowtrigger",
+            name="filter_custom_field_query",
+            field=models.TextField(
+                blank=True,
+                help_text="JSON-encoded custom field query expression.",
+                null=True,
+                verbose_name="filter custom field query",
+            ),
+        ),
+        migrations.AddField(
+            model_name="workflowtrigger",
+            name="filter_has_all_tags",
+            field=models.ManyToManyField(
+                blank=True,
+                related_name="workflowtriggers_has_all",
+                to="documents.tag",
+                verbose_name="has all of these tag(s)",
+            ),
+        ),
+        migrations.AddField(
+            model_name="workflowtrigger",
+            name="filter_has_not_correspondents",
+            field=models.ManyToManyField(
+                blank=True,
+                related_name="workflowtriggers_has_not_correspondent",
+                to="documents.correspondent",
+                verbose_name="does not have these correspondent(s)",
+            ),
+        ),
+        migrations.AddField(
+            model_name="workflowtrigger",
+            name="filter_has_not_document_types",
+            field=models.ManyToManyField(
+                blank=True,
+                related_name="workflowtriggers_has_not_document_type",
+                to="documents.documenttype",
+                verbose_name="does not have these document type(s)",
+            ),
+        ),
+        migrations.AddField(
+            model_name="workflowtrigger",
+            name="filter_has_not_storage_paths",
+            field=models.ManyToManyField(
+                blank=True,
+                related_name="workflowtriggers_has_not_storage_path",
+                to="documents.storagepath",
+                verbose_name="does not have these storage path(s)",
+            ),
+        ),
+        migrations.AddField(
+            model_name="workflowtrigger",
+            name="filter_has_not_tags",
+            field=models.ManyToManyField(
+                blank=True,
+                related_name="workflowtriggers_has_not",
+                to="documents.tag",
+                verbose_name="does not have these tag(s)",
+            ),
+        ),
+    ]
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -1065,6 +1065,20 @@ class WorkflowTrigger(models.Model):
        verbose_name=_("has these tag(s)"),
    )

+    filter_has_all_tags = models.ManyToManyField(
+        Tag,
+        blank=True,
+        related_name="workflowtriggers_has_all",
+        verbose_name=_("has all of these tag(s)"),
+    )
+
+    filter_has_not_tags = models.ManyToManyField(
+        Tag,
+        blank=True,
+        related_name="workflowtriggers_has_not",
+        verbose_name=_("does not have these tag(s)"),
+    )
+
    filter_has_document_type = models.ForeignKey(
        DocumentType,
        null=True,
@@ -1073,6 +1087,13 @@ class WorkflowTrigger(models.Model):
        verbose_name=_("has this document type"),
    )

+    filter_has_not_document_types = models.ManyToManyField(
+        DocumentType,
+        blank=True,
+        related_name="workflowtriggers_has_not_document_type",
+        verbose_name=_("does not have these document type(s)"),
+    )
+
    filter_has_correspondent = models.ForeignKey(
        Correspondent,
        null=True,
@@ -1081,6 +1102,13 @@ class WorkflowTrigger(models.Model):
        verbose_name=_("has this correspondent"),
    )

+    filter_has_not_correspondents = models.ManyToManyField(
+        Correspondent,
+        blank=True,
+        related_name="workflowtriggers_has_not_correspondent",
+        verbose_name=_("does not have these correspondent(s)"),
+    )
+
    filter_has_storage_path = models.ForeignKey(
        StoragePath,
        null=True,
@@ -1089,6 +1117,20 @@ class WorkflowTrigger(models.Model):
        verbose_name=_("has this storage path"),
    )

+    filter_has_not_storage_paths = models.ManyToManyField(
+        StoragePath,
+        blank=True,
+        related_name="workflowtriggers_has_not_storage_path",
+        verbose_name=_("does not have these storage path(s)"),
+    )
+
+    filter_custom_field_query = models.TextField(
+        _("filter custom field query"),
+        null=True,
+        blank=True,
+        help_text=_("JSON-encoded custom field query expression."),
+    )
+
    schedule_offset_days = models.IntegerField(
        _("schedule offset days"),
        default=0,
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -44,6 +44,7 @@ if settings.AUDIT_LOG_ENABLED:

 from documents import bulk_edit
 from documents.data_models import DocumentSource
+from documents.filters import CustomFieldQueryParser
 from documents.models import Correspondent
 from documents.models import CustomField
 from documents.models import CustomFieldInstance
@@ -2240,6 +2241,12 @@ class WorkflowTriggerSerializer(serializers.ModelSerializer):
            "match",
            "is_insensitive",
            "filter_has_tags",
+            "filter_has_all_tags",
+            "filter_has_not_tags",
+            "filter_custom_field_query",
+            "filter_has_not_correspondents",
+            "filter_has_not_document_types",
+            "filter_has_not_storage_paths",
            "filter_has_correspondent",
            "filter_has_document_type",
            "filter_has_storage_path",
@@ -2265,6 +2272,20 @@ class WorkflowTriggerSerializer(serializers.ModelSerializer):
        ):
            attrs["filter_path"] = None

+        if (
+            "filter_custom_field_query" in attrs
+            and attrs["filter_custom_field_query"] is not None
+            and len(attrs["filter_custom_field_query"]) == 0
+        ):
+            attrs["filter_custom_field_query"] = None
+
+        if (
+            "filter_custom_field_query" in attrs
+            and attrs["filter_custom_field_query"] is not None
+        ):
+            parser = CustomFieldQueryParser("filter_custom_field_query")
+            parser.parse(attrs["filter_custom_field_query"])
+
        trigger_type = attrs.get("type", getattr(self.instance, "type", None))
        if (
            trigger_type == WorkflowTrigger.WorkflowTriggerType.CONSUMPTION
@@ -2460,6 +2481,20 @@ class WorkflowSerializer(serializers.ModelSerializer):
        if triggers is not None and triggers is not serializers.empty:
            for trigger in triggers:
                filter_has_tags = trigger.pop("filter_has_tags", None)
+                filter_has_all_tags = trigger.pop("filter_has_all_tags", None)
+                filter_has_not_tags = trigger.pop("filter_has_not_tags", None)
+                filter_has_not_correspondents = trigger.pop(
+                    "filter_has_not_correspondents",
+                    None,
+                )
+                filter_has_not_document_types = trigger.pop(
+                    "filter_has_not_document_types",
+                    None,
+                )
+                filter_has_not_storage_paths = trigger.pop(
+                    "filter_has_not_storage_paths",
+                    None,
+                )
                # Convert sources to strings to handle django-multiselectfield v1.0 changes
                WorkflowTriggerSerializer.normalize_workflow_trigger_sources(trigger)
                trigger_instance, _ = WorkflowTrigger.objects.update_or_create(
@@ -2468,6 +2503,22 @@ class WorkflowSerializer(serializers.ModelSerializer):
                )
                if filter_has_tags is not None:
                    trigger_instance.filter_has_tags.set(filter_has_tags)
+                if filter_has_all_tags is not None:
+                    trigger_instance.filter_has_all_tags.set(filter_has_all_tags)
+                if filter_has_not_tags is not None:
+                    trigger_instance.filter_has_not_tags.set(filter_has_not_tags)
+                if filter_has_not_correspondents is not None:
+                    trigger_instance.filter_has_not_correspondents.set(
+                        filter_has_not_correspondents,
+                    )
+                if filter_has_not_document_types is not None:
+                    trigger_instance.filter_has_not_document_types.set(
+                        filter_has_not_document_types,
+                    )
+                if filter_has_not_storage_paths is not None:
+                    trigger_instance.filter_has_not_storage_paths.set(
+                        filter_has_not_storage_paths,
+                    )
                set_triggers.append(trigger_instance)

        if actions is not None and actions is not serializers.empty:
--- a/src/documents/tests/test_api_workflows.py
+++ b/src/documents/tests/test_api_workflows.py
@@ -184,6 +184,17 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
                            "filter_filename": "*",
                            "filter_path": "*/samples/*",
                            "filter_has_tags": [self.t1.id],
+                            "filter_has_all_tags": [self.t2.id],
+                            "filter_has_not_tags": [self.t3.id],
+                            "filter_has_not_correspondents": [self.c2.id],
+                            "filter_has_not_document_types": [self.dt2.id],
+                            "filter_has_not_storage_paths": [self.sp2.id],
+                            "filter_custom_field_query": json.dumps(
+                                [
+                                    "AND",
+                                    [[self.cf1.id, "exact", "value"]],
+                                ],
+                            ),
                            "filter_has_document_type": self.dt.id,
                            "filter_has_correspondent": self.c.id,
                            "filter_has_storage_path": self.sp.id,
@@ -223,6 +234,36 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
        )
        self.assertEqual(response.status_code, status.HTTP_201_CREATED)
        self.assertEqual(Workflow.objects.count(), 2)
+        workflow = Workflow.objects.get(name="Workflow 2")
+        trigger = workflow.triggers.first()
+        self.assertSetEqual(
+            set(trigger.filter_has_tags.values_list("id", flat=True)),
+            {self.t1.id},
+        )
+        self.assertSetEqual(
+            set(trigger.filter_has_all_tags.values_list("id", flat=True)),
+            {self.t2.id},
+        )
+        self.assertSetEqual(
+            set(trigger.filter_has_not_tags.values_list("id", flat=True)),
+            {self.t3.id},
+        )
+        self.assertSetEqual(
+            set(trigger.filter_has_not_correspondents.values_list("id", flat=True)),
+            {self.c2.id},
+        )
+        self.assertSetEqual(
+            set(trigger.filter_has_not_document_types.values_list("id", flat=True)),
+            {self.dt2.id},
+        )
+        self.assertSetEqual(
+            set(trigger.filter_has_not_storage_paths.values_list("id", flat=True)),
+            {self.sp2.id},
+        )
+        self.assertEqual(
+            trigger.filter_custom_field_query,
+            json.dumps(["AND", [[self.cf1.id, "exact", "value"]]]),
+        )

    def test_api_create_invalid_workflow_trigger(self):
        """
@@ -376,6 +417,14 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
                        {
                            "type": WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
                            "filter_has_tags": [self.t1.id],
+                            "filter_has_all_tags": [self.t2.id],
+                            "filter_has_not_tags": [self.t3.id],
+                            "filter_has_not_correspondents": [self.c2.id],
+                            "filter_has_not_document_types": [self.dt2.id],
+                            "filter_has_not_storage_paths": [self.sp2.id],
+                            "filter_custom_field_query": json.dumps(
+                                ["AND", [[self.cf1.id, "exact", "value"]]],
+                            ),
                            "filter_has_correspondent": self.c.id,
                            "filter_has_document_type": self.dt.id,
                        },
@@ -393,6 +442,30 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
        workflow = Workflow.objects.get(id=response.data["id"])
        self.assertEqual(workflow.name, "Workflow Updated")
        self.assertEqual(workflow.triggers.first().filter_has_tags.first(), self.t1)
+        self.assertEqual(
+            workflow.triggers.first().filter_has_all_tags.first(),
+            self.t2,
+        )
+        self.assertEqual(
+            workflow.triggers.first().filter_has_not_tags.first(),
+            self.t3,
+        )
+        self.assertEqual(
+            workflow.triggers.first().filter_has_not_correspondents.first(),
+            self.c2,
+        )
+        self.assertEqual(
+            workflow.triggers.first().filter_has_not_document_types.first(),
+            self.dt2,
+        )
+        self.assertEqual(
+            workflow.triggers.first().filter_has_not_storage_paths.first(),
+            self.sp2,
+        )
+        self.assertEqual(
+            workflow.triggers.first().filter_custom_field_query,
+            json.dumps(["AND", [[self.cf1.id, "exact", "value"]]]),
+        )
        self.assertEqual(workflow.actions.first().assign_title, "Action New Title")

    def test_api_update_workflow_no_trigger_actions(self):
--- a/src/documents/tests/test_workflows.py
+++ b/src/documents/tests/test_workflows.py
@@ -1,4 +1,5 @@
 import datetime
+import json
 import shutil
 import socket
 from datetime import timedelta
@@ -31,6 +32,7 @@ from documents import tasks
 from documents.data_models import ConsumableDocument
 from documents.data_models import DocumentSource
 from documents.matching import document_matches_workflow
+from documents.matching import existing_document_matches_workflow
 from documents.matching import prefilter_documents_by_workflowtrigger
 from documents.models import Correspondent
 from documents.models import CustomField
@@ -46,6 +48,7 @@ from documents.models import WorkflowActionEmail
 from documents.models import WorkflowActionWebhook
 from documents.models import WorkflowRun
 from documents.models import WorkflowTrigger
+from documents.serialisers import WorkflowTriggerSerializer
 from documents.signals import document_consumption_finished
 from documents.tests.utils import DirectoriesMixin
 from documents.tests.utils import DummyProgressManager
@@ -1080,9 +1083,409 @@ class TestWorkflows(
            )
            expected_str = f"Document did not match {w}"
            self.assertIn(expected_str, cm.output[0])
-            expected_str = f"Document tags {doc.tags.all()} do not include {trigger.filter_has_tags.all()}"
+            expected_str = f"Document tags {list(doc.tags.all())} do not include {list(trigger.filter_has_tags.all())}"
            self.assertIn(expected_str, cm.output[1])

+    def test_document_added_no_match_all_tags(self):
+        trigger = WorkflowTrigger.objects.create(
+            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
+        )
+        trigger.filter_has_all_tags.set([self.t1, self.t2])
+        action = WorkflowAction.objects.create(
+            assign_title="Doc assign owner",
+            assign_owner=self.user2,
+        )
+        w = Workflow.objects.create(
+            name="Workflow 1",
+            order=0,
+        )
+        w.triggers.add(trigger)
+        w.actions.add(action)
+        w.save()
+
+        doc = Document.objects.create(
+            title="sample test",
+            correspondent=self.c,
+            original_filename="sample.pdf",
+        )
+        doc.tags.set([self.t1])
+        doc.save()
+
+        with self.assertLogs("paperless.matching", level="DEBUG") as cm:
+            document_consumption_finished.send(
+                sender=self.__class__,
+                document=doc,
+            )
+            expected_str = f"Document did not match {w}"
+            self.assertIn(expected_str, cm.output[0])
+            expected_str = (
+                f"Document tags {list(doc.tags.all())} do not contain all of"
+                f" {list(trigger.filter_has_all_tags.all())}"
+            )
+            self.assertIn(expected_str, cm.output[1])
+
+    def test_document_added_excluded_tags(self):
+        trigger = WorkflowTrigger.objects.create(
+            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
+        )
+        trigger.filter_has_not_tags.set([self.t3])
+        action = WorkflowAction.objects.create(
+            assign_title="Doc assign owner",
+            assign_owner=self.user2,
+        )
+        w = Workflow.objects.create(
+            name="Workflow 1",
+            order=0,
+        )
+        w.triggers.add(trigger)
+        w.actions.add(action)
+        w.save()
+
+        doc = Document.objects.create(
+            title="sample test",
+            correspondent=self.c,
+            original_filename="sample.pdf",
+        )
+        doc.tags.set([self.t3])
+        doc.save()
+
+        with self.assertLogs("paperless.matching", level="DEBUG") as cm:
+            document_consumption_finished.send(
+                sender=self.__class__,
+                document=doc,
+            )
+            expected_str = f"Document did not match {w}"
+            self.assertIn(expected_str, cm.output[0])
+            expected_str = (
+                f"Document tags {list(doc.tags.all())} include excluded tags"
+                f" {list(trigger.filter_has_not_tags.all())}"
+            )
+            self.assertIn(expected_str, cm.output[1])
+
+    def test_document_added_excluded_correspondent(self):
+        trigger = WorkflowTrigger.objects.create(
+            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
+        )
+        trigger.filter_has_not_correspondents.set([self.c])
+        action = WorkflowAction.objects.create(
+            assign_title="Doc assign owner",
+            assign_owner=self.user2,
+        )
+        w = Workflow.objects.create(
+            name="Workflow 1",
+            order=0,
+        )
+        w.triggers.add(trigger)
+        w.actions.add(action)
+        w.save()
+
+        doc = Document.objects.create(
+            title="sample test",
+            correspondent=self.c,
+            original_filename="sample.pdf",
+        )
+
+        with self.assertLogs("paperless.matching", level="DEBUG") as cm:
+            document_consumption_finished.send(
+                sender=self.__class__,
+                document=doc,
+            )
+            expected_str = f"Document did not match {w}"
+            self.assertIn(expected_str, cm.output[0])
+            expected_str = (
+                f"Document correspondent {doc.correspondent} is excluded by"
+                f" {list(trigger.filter_has_not_correspondents.all())}"
+            )
+            self.assertIn(expected_str, cm.output[1])
+
+    def test_document_added_excluded_document_types(self):
+        trigger = WorkflowTrigger.objects.create(
+            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
+        )
+        trigger.filter_has_not_document_types.set([self.dt])
+        action = WorkflowAction.objects.create(
+            assign_title="Doc assign owner",
+            assign_owner=self.user2,
+        )
+        w = Workflow.objects.create(
+            name="Workflow 1",
+            order=0,
+        )
+        w.triggers.add(trigger)
+        w.actions.add(action)
+        w.save()
+
+        doc = Document.objects.create(
+            title="sample test",
+            document_type=self.dt,
+            original_filename="sample.pdf",
+        )
+
+        with self.assertLogs("paperless.matching", level="DEBUG") as cm:
+            document_consumption_finished.send(
+                sender=self.__class__,
+                document=doc,
+            )
+            expected_str = f"Document did not match {w}"
+            self.assertIn(expected_str, cm.output[0])
+            expected_str = (
+                f"Document doc type {doc.document_type} is excluded by"
+                f" {list(trigger.filter_has_not_document_types.all())}"
+            )
+            self.assertIn(expected_str, cm.output[1])
+
+    def test_document_added_excluded_storage_paths(self):
+        trigger = WorkflowTrigger.objects.create(
+            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
+        )
+        trigger.filter_has_not_storage_paths.set([self.sp])
+        action = WorkflowAction.objects.create(
+            assign_title="Doc assign owner",
+            assign_owner=self.user2,
+        )
+        w = Workflow.objects.create(
+            name="Workflow 1",
+            order=0,
+        )
+        w.triggers.add(trigger)
+        w.actions.add(action)
+        w.save()
+
+        doc = Document.objects.create(
+            title="sample test",
+            storage_path=self.sp,
+            original_filename="sample.pdf",
+        )
+
+        with self.assertLogs("paperless.matching", level="DEBUG") as cm:
+            document_consumption_finished.send(
+                sender=self.__class__,
+                document=doc,
+            )
+            expected_str = f"Document did not match {w}"
+            self.assertIn(expected_str, cm.output[0])
+            expected_str = (
+                f"Document storage path {doc.storage_path} is excluded by"
+                f" {list(trigger.filter_has_not_storage_paths.all())}"
+            )
+            self.assertIn(expected_str, cm.output[1])
+
+    def test_document_added_custom_field_query_no_match(self):
+        trigger = WorkflowTrigger.objects.create(
+            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
+            filter_custom_field_query=json.dumps(
+                [
+                    "AND",
+                    [[self.cf1.id, "exact", "expected"]],
+                ],
+            ),
+        )
+        action = WorkflowAction.objects.create(
+            assign_title="Doc assign owner",
+            assign_owner=self.user2,
+        )
+        workflow = Workflow.objects.create(name="Workflow 1", order=0)
+        workflow.triggers.add(trigger)
+        workflow.actions.add(action)
+        workflow.save()
+
+        doc = Document.objects.create(
+            title="sample test",
+            correspondent=self.c,
+            original_filename="sample.pdf",
+        )
+        CustomFieldInstance.objects.create(
+            document=doc,
+            field=self.cf1,
+            value_text="other",
+        )
+
+        with self.assertLogs("paperless.matching", level="DEBUG") as cm:
+            document_consumption_finished.send(
+                sender=self.__class__,
+                document=doc,
+            )
+            expected_str = f"Document did not match {workflow}"
+            self.assertIn(expected_str, cm.output[0])
+            self.assertIn(
+                "Document custom fields do not match the configured custom field query",
+                cm.output[1],
+            )
+
+    def test_document_added_custom_field_query_match(self):
+        trigger = WorkflowTrigger.objects.create(
+            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
+            filter_custom_field_query=json.dumps(
+                [
+                    "AND",
+                    [[self.cf1.id, "exact", "expected"]],
+                ],
+            ),
+        )
+        doc = Document.objects.create(
+            title="sample test",
+            correspondent=self.c,
+            original_filename="sample.pdf",
+        )
+        CustomFieldInstance.objects.create(
+            document=doc,
+            field=self.cf1,
+            value_text="expected",
+        )
+
+        matched, reason = existing_document_matches_workflow(doc, trigger)
+        self.assertTrue(matched)
+        self.assertIsNone(reason)
+
+    def test_prefilter_documents_custom_field_query(self):
+        trigger = WorkflowTrigger.objects.create(
+            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
+            filter_custom_field_query=json.dumps(
+                [
+                    "AND",
+                    [[self.cf1.id, "exact", "match"]],
+                ],
+            ),
+        )
+        doc1 = Document.objects.create(
+            title="doc 1",
+            correspondent=self.c,
+            original_filename="doc1.pdf",
+            checksum="checksum1",
+        )
+        CustomFieldInstance.objects.create(
+            document=doc1,
+            field=self.cf1,
+            value_text="match",
+        )
+
+        doc2 = Document.objects.create(
+            title="doc 2",
+            correspondent=self.c,
+            original_filename="doc2.pdf",
+            checksum="checksum2",
+        )
+        CustomFieldInstance.objects.create(
+            document=doc2,
+            field=self.cf1,
+            value_text="different",
+        )
+
+        filtered = prefilter_documents_by_workflowtrigger(
+            Document.objects.all(),
+            trigger,
+        )
+        self.assertIn(doc1, filtered)
+        self.assertNotIn(doc2, filtered)
+
+    def test_consumption_trigger_requires_filter_configuration(self):
+        serializer = WorkflowTriggerSerializer(
+            data={
+                "type": WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
+            },
+        )
+
+        self.assertFalse(serializer.is_valid())
+        errors = serializer.errors.get("non_field_errors", [])
+        self.assertIn(
+            "File name, path or mail rule filter are required",
+            [str(error) for error in errors],
+        )
+
+    def test_workflow_trigger_serializer_clears_empty_custom_field_query(self):
+        serializer = WorkflowTriggerSerializer(
+            data={
+                "type": WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
+                "filter_custom_field_query": "",
+            },
+        )
+
+        self.assertTrue(serializer.is_valid(), serializer.errors)
+        self.assertIsNone(serializer.validated_data.get("filter_custom_field_query"))
+
+    def test_existing_document_invalid_custom_field_query_configuration(self):
+        trigger = WorkflowTrigger.objects.create(
+            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
+            filter_custom_field_query="{ not json",
+        )
+
+        document = Document.objects.create(
+            title="doc invalid query",
+            original_filename="invalid.pdf",
+            checksum="checksum-invalid-query",
+        )
+
+        matched, reason = existing_document_matches_workflow(document, trigger)
+        self.assertFalse(matched)
+        self.assertEqual(reason, "Invalid custom field query configuration")
+
+    def test_prefilter_documents_returns_none_for_invalid_custom_field_query(self):
+        trigger = WorkflowTrigger.objects.create(
+            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
+            filter_custom_field_query="{ not json",
+        )
+
+        Document.objects.create(
+            title="doc",
+            original_filename="doc.pdf",
+            checksum="checksum-prefilter-invalid",
+        )
+
+        filtered = prefilter_documents_by_workflowtrigger(
+            Document.objects.all(),
+            trigger,
+        )
+
+        self.assertEqual(list(filtered), [])
+
+    def test_prefilter_documents_applies_all_filters(self):
+        other_document_type = DocumentType.objects.create(name="Other Type")
+        other_storage_path = StoragePath.objects.create(
+            name="Blocked path",
+            path="/blocked/",
+        )
+
+        trigger = WorkflowTrigger.objects.create(
+            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
+            filter_has_correspondent=self.c,
+            filter_has_document_type=self.dt,
+            filter_has_storage_path=self.sp,
+        )
+        trigger.filter_has_tags.set([self.t1])
+        trigger.filter_has_all_tags.set([self.t1, self.t2])
+        trigger.filter_has_not_tags.set([self.t3])
+        trigger.filter_has_not_correspondents.set([self.c2])
+        trigger.filter_has_not_document_types.set([other_document_type])
+        trigger.filter_has_not_storage_paths.set([other_storage_path])
+
+        allowed_document = Document.objects.create(
+            title="allowed",
+            correspondent=self.c,
+            document_type=self.dt,
+            storage_path=self.sp,
+            original_filename="allow.pdf",
+            checksum="checksum-prefilter-allowed",
+        )
+        allowed_document.tags.set([self.t1, self.t2])
+
+        blocked_document = Document.objects.create(
+            title="blocked",
+            correspondent=self.c2,
+            document_type=other_document_type,
+            storage_path=other_storage_path,
+            original_filename="block.pdf",
+            checksum="checksum-prefilter-blocked",
+        )
+        blocked_document.tags.set([self.t1, self.t3])
+
+        filtered = prefilter_documents_by_workflowtrigger(
+            Document.objects.all(),
+            trigger,
+        )
+
+        self.assertIn(allowed_document, filtered)
+        self.assertNotIn(blocked_document, filtered)
+
    def test_document_added_no_match_doctype(self):
        trigger = WorkflowTrigger.objects.create(
            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,