Feature: Advanced Workflow Trigger Filters (#11029)

2025-11-25 23:59:09 -06:00 · 2025-10-13 15:23:56 -07:00
parent d394053ddc
commit f6c004183e
16 changed files with 2267 additions and 171 deletions
--- a/src/documents/matching.py
+++ b/src/documents/matching.py
@@ -6,8 +6,11 @@ from fnmatch import fnmatch
 from fnmatch import translate as fnmatch_translate
 from typing import TYPE_CHECKING

+from rest_framework import serializers
+
 from documents.data_models import ConsumableDocument
 from documents.data_models import DocumentSource
+from documents.filters import CustomFieldQueryParser
 from documents.models import Correspondent
 from documents.models import Document
 from documents.models import DocumentType
@@ -342,67 +345,147 @@ def consumable_document_matches_workflow(
 def existing_document_matches_workflow(
    document: Document,
    trigger: WorkflowTrigger,
-) -> tuple[bool, str]:
+) -> tuple[bool, str | None]:
    """
    Returns True if the Document matches all filters from the workflow trigger,
    False otherwise. Includes a reason if doesn't match
    """

-    trigger_matched = True
-    reason = ""
-
+    # Check content matching algorithm
    if trigger.matching_algorithm > MatchingModel.MATCH_NONE and not matches(
        trigger,
        document,
    ):
-        reason = (
+        return (
+            False,
            f"Document content matching settings for algorithm '{trigger.matching_algorithm}' did not match",
        )
-        trigger_matched = False

-    # Document tags vs trigger has_tags
-    if (
-        trigger.filter_has_tags.all().count() > 0
-        and document.tags.filter(
-            id__in=trigger.filter_has_tags.all().values_list("id"),
-        ).count()
-        == 0
-    ):
-        reason = (
-            f"Document tags {document.tags.all()} do not include"
-            f" {trigger.filter_has_tags.all()}",
-        )
-        trigger_matched = False
+    # Check if any tag filters exist to determine if we need to load document tags
+    trigger_has_tags_qs = trigger.filter_has_tags.all()
+    trigger_has_all_tags_qs = trigger.filter_has_all_tags.all()
+    trigger_has_not_tags_qs = trigger.filter_has_not_tags.all()
+
+    has_tags_filter = trigger_has_tags_qs.exists()
+    has_all_tags_filter = trigger_has_all_tags_qs.exists()
+    has_not_tags_filter = trigger_has_not_tags_qs.exists()
+
+    # Load document tags once if any tag filters exist
+    document_tag_ids = None
+    if has_tags_filter or has_all_tags_filter or has_not_tags_filter:
+        document_tag_ids = set(document.tags.values_list("id", flat=True))
+
+    # Document tags vs trigger has_tags (any of)
+    if has_tags_filter:
+        trigger_has_tag_ids = set(trigger_has_tags_qs.values_list("id", flat=True))
+        if not (document_tag_ids & trigger_has_tag_ids):
+            # For error message, load the actual tag objects
+            return (
+                False,
+                f"Document tags {list(document.tags.all())} do not include {list(trigger_has_tags_qs)}",
+            )
+
+    # Document tags vs trigger has_all_tags (all of)
+    if has_all_tags_filter:
+        required_tag_ids = set(trigger_has_all_tags_qs.values_list("id", flat=True))
+        if not required_tag_ids.issubset(document_tag_ids):
+            return (
+                False,
+                f"Document tags {list(document.tags.all())} do not contain all of {list(trigger_has_all_tags_qs)}",
+            )
+
+    # Document tags vs trigger has_not_tags (none of)
+    if has_not_tags_filter:
+        excluded_tag_ids = set(trigger_has_not_tags_qs.values_list("id", flat=True))
+        if document_tag_ids & excluded_tag_ids:
+            return (
+                False,
+                f"Document tags {list(document.tags.all())} include excluded tags {list(trigger_has_not_tags_qs)}",
+            )

    # Document correspondent vs trigger has_correspondent
    if (
-        trigger.filter_has_correspondent is not None
-        and document.correspondent != trigger.filter_has_correspondent
+        trigger.filter_has_correspondent_id is not None
+        and document.correspondent_id != trigger.filter_has_correspondent_id
    ):
-        reason = (
+        return (
+            False,
            f"Document correspondent {document.correspondent} does not match {trigger.filter_has_correspondent}",
        )
-        trigger_matched = False
+
+    if (
+        document.correspondent_id
+        and trigger.filter_has_not_correspondents.filter(
+            id=document.correspondent_id,
+        ).exists()
+    ):
+        return (
+            False,
+            f"Document correspondent {document.correspondent} is excluded by {list(trigger.filter_has_not_correspondents.all())}",
+        )

    # Document document_type vs trigger has_document_type
    if (
-        trigger.filter_has_document_type is not None
-        and document.document_type != trigger.filter_has_document_type
+        trigger.filter_has_document_type_id is not None
+        and document.document_type_id != trigger.filter_has_document_type_id
    ):
-        reason = (
+        return (
+            False,
            f"Document doc type {document.document_type} does not match {trigger.filter_has_document_type}",
        )
-        trigger_matched = False
+
+    if (
+        document.document_type_id
+        and trigger.filter_has_not_document_types.filter(
+            id=document.document_type_id,
+        ).exists()
+    ):
+        return (
+            False,
+            f"Document doc type {document.document_type} is excluded by {list(trigger.filter_has_not_document_types.all())}",
+        )

    # Document storage_path vs trigger has_storage_path
    if (
-        trigger.filter_has_storage_path is not None
-        and document.storage_path != trigger.filter_has_storage_path
+        trigger.filter_has_storage_path_id is not None
+        and document.storage_path_id != trigger.filter_has_storage_path_id
    ):
-        reason = (
+        return (
+            False,
            f"Document storage path {document.storage_path} does not match {trigger.filter_has_storage_path}",
        )
-        trigger_matched = False
+
+    if (
+        document.storage_path_id
+        and trigger.filter_has_not_storage_paths.filter(
+            id=document.storage_path_id,
+        ).exists()
+    ):
+        return (
+            False,
+            f"Document storage path {document.storage_path} is excluded by {list(trigger.filter_has_not_storage_paths.all())}",
+        )
+
+    # Custom field query check
+    if trigger.filter_custom_field_query:
+        parser = CustomFieldQueryParser("filter_custom_field_query")
+        try:
+            custom_field_q, annotations = parser.parse(
+                trigger.filter_custom_field_query,
+            )
+        except serializers.ValidationError:
+            return (False, "Invalid custom field query configuration")
+
+        qs = (
+            Document.objects.filter(id=document.id)
+            .annotate(**annotations)
+            .filter(custom_field_q)
+        )
+        if not qs.exists():
+            return (
+                False,
+                "Document custom fields do not match the configured custom field query",
+            )

    # Document original_filename vs trigger filename
    if (
@@ -414,13 +497,12 @@ def existing_document_matches_workflow(
            trigger.filter_filename.lower(),
        )
    ):
-        reason = (
-            f"Document filename {document.original_filename} does not match"
-            f" {trigger.filter_filename.lower()}",
+        return (
+            False,
+            f"Document filename {document.original_filename} does not match {trigger.filter_filename.lower()}",
        )
-        trigger_matched = False

-    return (trigger_matched, reason)
+    return (True, None)


 def prefilter_documents_by_workflowtrigger(
@@ -433,31 +515,66 @@ def prefilter_documents_by_workflowtrigger(
    document_matches_workflow in run_workflows
    """

-    if trigger.filter_has_tags.all().count() > 0:
-        documents = documents.filter(
-            tags__in=trigger.filter_has_tags.all(),
-        ).distinct()
+    # Filter for documents that have AT LEAST ONE of the specified tags.
+    if trigger.filter_has_tags.exists():
+        documents = documents.filter(tags__in=trigger.filter_has_tags.all()).distinct()
+
+    # Filter for documents that have ALL of the specified tags.
+    if trigger.filter_has_all_tags.exists():
+        for tag in trigger.filter_has_all_tags.all():
+            documents = documents.filter(tags=tag)
+        # Multiple JOINs can create duplicate results.
+        documents = documents.distinct()
+
+    # Exclude documents that have ANY of the specified tags.
+    if trigger.filter_has_not_tags.exists():
+        documents = documents.exclude(tags__in=trigger.filter_has_not_tags.all())
+
+    # Correspondent, DocumentType, etc. filtering

    if trigger.filter_has_correspondent is not None:
        documents = documents.filter(
            correspondent=trigger.filter_has_correspondent,
        )
+    if trigger.filter_has_not_correspondents.exists():
+        documents = documents.exclude(
+            correspondent__in=trigger.filter_has_not_correspondents.all(),
+        )

    if trigger.filter_has_document_type is not None:
        documents = documents.filter(
            document_type=trigger.filter_has_document_type,
        )
+    if trigger.filter_has_not_document_types.exists():
+        documents = documents.exclude(
+            document_type__in=trigger.filter_has_not_document_types.all(),
+        )

    if trigger.filter_has_storage_path is not None:
        documents = documents.filter(
            storage_path=trigger.filter_has_storage_path,
        )
+    if trigger.filter_has_not_storage_paths.exists():
+        documents = documents.exclude(
+            storage_path__in=trigger.filter_has_not_storage_paths.all(),
+        )

-    if trigger.filter_filename is not None and len(trigger.filter_filename) > 0:
-        # the true fnmatch will actually run later so we just want a loose filter here
+    # Custom Field & Filename Filtering
+
+    if trigger.filter_custom_field_query:
+        parser = CustomFieldQueryParser("filter_custom_field_query")
+        try:
+            custom_field_q, annotations = parser.parse(
+                trigger.filter_custom_field_query,
+            )
+        except serializers.ValidationError:
+            return documents.none()
+
+        documents = documents.annotate(**annotations).filter(custom_field_q)
+
+    if trigger.filter_filename:
        regex = fnmatch_translate(trigger.filter_filename).lstrip("^").rstrip("$")
-        regex = f"(?i){regex}"
-        documents = documents.filter(original_filename__regex=regex)
+        documents = documents.filter(original_filename__iregex=regex)

    return documents

@@ -472,13 +589,34 @@ def document_matches_workflow(
    settings from the workflow trigger, False otherwise
    """

+    triggers_queryset = (
+        workflow.triggers.filter(
+            type=trigger_type,
+        )
+        .select_related(
+            "filter_mailrule",
+            "filter_has_document_type",
+            "filter_has_correspondent",
+            "filter_has_storage_path",
+            "schedule_date_custom_field",
+        )
+        .prefetch_related(
+            "filter_has_tags",
+            "filter_has_all_tags",
+            "filter_has_not_tags",
+            "filter_has_not_document_types",
+            "filter_has_not_correspondents",
+            "filter_has_not_storage_paths",
+        )
+    )
+
    trigger_matched = True
-    if workflow.triggers.filter(type=trigger_type).count() == 0:
+    if not triggers_queryset.exists():
        trigger_matched = False
        logger.info(f"Document did not match {workflow}")
        logger.debug(f"No matching triggers with type {trigger_type} found")
    else:
-        for trigger in workflow.triggers.filter(type=trigger_type):
+        for trigger in triggers_queryset:
            if trigger_type == WorkflowTrigger.WorkflowTriggerType.CONSUMPTION:
                trigger_matched, reason = consumable_document_matches_workflow(
                    document,