Feature: Advanced Workflow Trigger Filters (#11029)

This commit is contained in:
shamoon
2025-10-13 15:23:56 -07:00
committed by GitHub
parent d394053ddc
commit f6c004183e
16 changed files with 2267 additions and 171 deletions

View File

@@ -6,8 +6,11 @@ from fnmatch import fnmatch
from fnmatch import translate as fnmatch_translate
from typing import TYPE_CHECKING
from rest_framework import serializers
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentSource
from documents.filters import CustomFieldQueryParser
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
@@ -342,67 +345,147 @@ def consumable_document_matches_workflow(
def existing_document_matches_workflow(
document: Document,
trigger: WorkflowTrigger,
) -> tuple[bool, str]:
) -> tuple[bool, str | None]:
"""
Returns True if the Document matches all filters from the workflow trigger,
False otherwise. Includes a reason if doesn't match
"""
trigger_matched = True
reason = ""
# Check content matching algorithm
if trigger.matching_algorithm > MatchingModel.MATCH_NONE and not matches(
trigger,
document,
):
reason = (
return (
False,
f"Document content matching settings for algorithm '{trigger.matching_algorithm}' did not match",
)
trigger_matched = False
# Document tags vs trigger has_tags
if (
trigger.filter_has_tags.all().count() > 0
and document.tags.filter(
id__in=trigger.filter_has_tags.all().values_list("id"),
).count()
== 0
):
reason = (
f"Document tags {document.tags.all()} do not include"
f" {trigger.filter_has_tags.all()}",
)
trigger_matched = False
# Check if any tag filters exist to determine if we need to load document tags
trigger_has_tags_qs = trigger.filter_has_tags.all()
trigger_has_all_tags_qs = trigger.filter_has_all_tags.all()
trigger_has_not_tags_qs = trigger.filter_has_not_tags.all()
has_tags_filter = trigger_has_tags_qs.exists()
has_all_tags_filter = trigger_has_all_tags_qs.exists()
has_not_tags_filter = trigger_has_not_tags_qs.exists()
# Load document tags once if any tag filters exist
document_tag_ids = None
if has_tags_filter or has_all_tags_filter or has_not_tags_filter:
document_tag_ids = set(document.tags.values_list("id", flat=True))
# Document tags vs trigger has_tags (any of)
if has_tags_filter:
trigger_has_tag_ids = set(trigger_has_tags_qs.values_list("id", flat=True))
if not (document_tag_ids & trigger_has_tag_ids):
# For error message, load the actual tag objects
return (
False,
f"Document tags {list(document.tags.all())} do not include {list(trigger_has_tags_qs)}",
)
# Document tags vs trigger has_all_tags (all of)
if has_all_tags_filter:
required_tag_ids = set(trigger_has_all_tags_qs.values_list("id", flat=True))
if not required_tag_ids.issubset(document_tag_ids):
return (
False,
f"Document tags {list(document.tags.all())} do not contain all of {list(trigger_has_all_tags_qs)}",
)
# Document tags vs trigger has_not_tags (none of)
if has_not_tags_filter:
excluded_tag_ids = set(trigger_has_not_tags_qs.values_list("id", flat=True))
if document_tag_ids & excluded_tag_ids:
return (
False,
f"Document tags {list(document.tags.all())} include excluded tags {list(trigger_has_not_tags_qs)}",
)
# Document correspondent vs trigger has_correspondent
if (
trigger.filter_has_correspondent is not None
and document.correspondent != trigger.filter_has_correspondent
trigger.filter_has_correspondent_id is not None
and document.correspondent_id != trigger.filter_has_correspondent_id
):
reason = (
return (
False,
f"Document correspondent {document.correspondent} does not match {trigger.filter_has_correspondent}",
)
trigger_matched = False
if (
document.correspondent_id
and trigger.filter_has_not_correspondents.filter(
id=document.correspondent_id,
).exists()
):
return (
False,
f"Document correspondent {document.correspondent} is excluded by {list(trigger.filter_has_not_correspondents.all())}",
)
# Document document_type vs trigger has_document_type
if (
trigger.filter_has_document_type is not None
and document.document_type != trigger.filter_has_document_type
trigger.filter_has_document_type_id is not None
and document.document_type_id != trigger.filter_has_document_type_id
):
reason = (
return (
False,
f"Document doc type {document.document_type} does not match {trigger.filter_has_document_type}",
)
trigger_matched = False
if (
document.document_type_id
and trigger.filter_has_not_document_types.filter(
id=document.document_type_id,
).exists()
):
return (
False,
f"Document doc type {document.document_type} is excluded by {list(trigger.filter_has_not_document_types.all())}",
)
# Document storage_path vs trigger has_storage_path
if (
trigger.filter_has_storage_path is not None
and document.storage_path != trigger.filter_has_storage_path
trigger.filter_has_storage_path_id is not None
and document.storage_path_id != trigger.filter_has_storage_path_id
):
reason = (
return (
False,
f"Document storage path {document.storage_path} does not match {trigger.filter_has_storage_path}",
)
trigger_matched = False
if (
document.storage_path_id
and trigger.filter_has_not_storage_paths.filter(
id=document.storage_path_id,
).exists()
):
return (
False,
f"Document storage path {document.storage_path} is excluded by {list(trigger.filter_has_not_storage_paths.all())}",
)
# Custom field query check
if trigger.filter_custom_field_query:
parser = CustomFieldQueryParser("filter_custom_field_query")
try:
custom_field_q, annotations = parser.parse(
trigger.filter_custom_field_query,
)
except serializers.ValidationError:
return (False, "Invalid custom field query configuration")
qs = (
Document.objects.filter(id=document.id)
.annotate(**annotations)
.filter(custom_field_q)
)
if not qs.exists():
return (
False,
"Document custom fields do not match the configured custom field query",
)
# Document original_filename vs trigger filename
if (
@@ -414,13 +497,12 @@ def existing_document_matches_workflow(
trigger.filter_filename.lower(),
)
):
reason = (
f"Document filename {document.original_filename} does not match"
f" {trigger.filter_filename.lower()}",
return (
False,
f"Document filename {document.original_filename} does not match {trigger.filter_filename.lower()}",
)
trigger_matched = False
return (trigger_matched, reason)
return (True, None)
def prefilter_documents_by_workflowtrigger(
@@ -433,31 +515,66 @@ def prefilter_documents_by_workflowtrigger(
document_matches_workflow in run_workflows
"""
if trigger.filter_has_tags.all().count() > 0:
documents = documents.filter(
tags__in=trigger.filter_has_tags.all(),
).distinct()
# Filter for documents that have AT LEAST ONE of the specified tags.
if trigger.filter_has_tags.exists():
documents = documents.filter(tags__in=trigger.filter_has_tags.all()).distinct()
# Filter for documents that have ALL of the specified tags.
if trigger.filter_has_all_tags.exists():
for tag in trigger.filter_has_all_tags.all():
documents = documents.filter(tags=tag)
# Multiple JOINs can create duplicate results.
documents = documents.distinct()
# Exclude documents that have ANY of the specified tags.
if trigger.filter_has_not_tags.exists():
documents = documents.exclude(tags__in=trigger.filter_has_not_tags.all())
# Correspondent, DocumentType, etc. filtering
if trigger.filter_has_correspondent is not None:
documents = documents.filter(
correspondent=trigger.filter_has_correspondent,
)
if trigger.filter_has_not_correspondents.exists():
documents = documents.exclude(
correspondent__in=trigger.filter_has_not_correspondents.all(),
)
if trigger.filter_has_document_type is not None:
documents = documents.filter(
document_type=trigger.filter_has_document_type,
)
if trigger.filter_has_not_document_types.exists():
documents = documents.exclude(
document_type__in=trigger.filter_has_not_document_types.all(),
)
if trigger.filter_has_storage_path is not None:
documents = documents.filter(
storage_path=trigger.filter_has_storage_path,
)
if trigger.filter_has_not_storage_paths.exists():
documents = documents.exclude(
storage_path__in=trigger.filter_has_not_storage_paths.all(),
)
if trigger.filter_filename is not None and len(trigger.filter_filename) > 0:
# the true fnmatch will actually run later so we just want a loose filter here
# Custom Field & Filename Filtering
if trigger.filter_custom_field_query:
parser = CustomFieldQueryParser("filter_custom_field_query")
try:
custom_field_q, annotations = parser.parse(
trigger.filter_custom_field_query,
)
except serializers.ValidationError:
return documents.none()
documents = documents.annotate(**annotations).filter(custom_field_q)
if trigger.filter_filename:
regex = fnmatch_translate(trigger.filter_filename).lstrip("^").rstrip("$")
regex = f"(?i){regex}"
documents = documents.filter(original_filename__regex=regex)
documents = documents.filter(original_filename__iregex=regex)
return documents
@@ -472,13 +589,34 @@ def document_matches_workflow(
settings from the workflow trigger, False otherwise
"""
triggers_queryset = (
workflow.triggers.filter(
type=trigger_type,
)
.select_related(
"filter_mailrule",
"filter_has_document_type",
"filter_has_correspondent",
"filter_has_storage_path",
"schedule_date_custom_field",
)
.prefetch_related(
"filter_has_tags",
"filter_has_all_tags",
"filter_has_not_tags",
"filter_has_not_document_types",
"filter_has_not_correspondents",
"filter_has_not_storage_paths",
)
)
trigger_matched = True
if workflow.triggers.filter(type=trigger_type).count() == 0:
if not triggers_queryset.exists():
trigger_matched = False
logger.info(f"Document did not match {workflow}")
logger.debug(f"No matching triggers with type {trigger_type} found")
else:
for trigger in workflow.triggers.filter(type=trigger_type):
for trigger in triggers_queryset:
if trigger_type == WorkflowTrigger.WorkflowTriggerType.CONSUMPTION:
trigger_matched, reason = consumable_document_matches_workflow(
document,