mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-05-27 13:18:18 -05:00
Enhancement: try filtering large querysets for scheduled workflow
This commit is contained in:
parent
eb07876657
commit
882b15378a
@ -18,6 +18,8 @@ from documents.models import WorkflowTrigger
|
||||
from documents.permissions import get_objects_for_user_owner_aware
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from django.db.models import QuerySet
|
||||
|
||||
from documents.classifier import DocumentClassifier
|
||||
|
||||
logger = logging.getLogger("paperless.matching")
|
||||
@ -389,6 +391,39 @@ def existing_document_matches_workflow(
|
||||
return (trigger_matched, reason)
|
||||
|
||||
|
||||
def filter_documents_by_workflowtrigger_criteria(
|
||||
documents: QuerySet[Document],
|
||||
trigger: WorkflowTrigger,
|
||||
) -> QuerySet[Document]:
|
||||
"""
|
||||
Filters the documents queryset by the criteria defined in the workflow.
|
||||
Returns a filtered queryset of documents that match the trigger's criteria.
|
||||
"""
|
||||
from django.db.models import Q
|
||||
|
||||
if trigger.filter_has_tags.all().count() > 0:
|
||||
documents = documents.filter(
|
||||
Q(tags__in=trigger.filter_has_tags.all()) | Q(tags__isnull=True),
|
||||
).distinct()
|
||||
|
||||
if trigger.filter_has_correspondent is not None:
|
||||
documents = documents.filter(
|
||||
correspondent=trigger.filter_has_correspondent,
|
||||
)
|
||||
|
||||
if trigger.filter_has_document_type is not None:
|
||||
documents = documents.filter(
|
||||
document_type=trigger.filter_has_document_type,
|
||||
)
|
||||
|
||||
if trigger.filter_filename is not None and len(trigger.filter_filename) > 0:
|
||||
documents = documents.filter(
|
||||
original_filename__icontains=trigger.filter_filename,
|
||||
)
|
||||
|
||||
return documents
|
||||
|
||||
|
||||
def document_matches_workflow(
|
||||
document: ConsumableDocument | Document,
|
||||
workflow: Workflow,
|
||||
|
@ -32,6 +32,7 @@ from documents.data_models import DocumentMetadataOverrides
|
||||
from documents.double_sided import CollatePlugin
|
||||
from documents.file_handling import create_source_path_directory
|
||||
from documents.file_handling import generate_unique_filename
|
||||
from documents.matching import filter_documents_by_workflowtrigger_criteria
|
||||
from documents.models import Correspondent
|
||||
from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
@ -459,6 +460,13 @@ def check_scheduled_workflows():
|
||||
|
||||
documents = Document.objects.filter(id__in=matched_ids)
|
||||
|
||||
# Workflows initially matched against one document at a time, so speed things up
|
||||
# by filtering documents by the trigger criteria
|
||||
documents = filter_documents_by_workflowtrigger_criteria(
|
||||
documents,
|
||||
trigger,
|
||||
)
|
||||
|
||||
if documents.count() > 0:
|
||||
logger.debug(
|
||||
f"Found {documents.count()} documents for trigger {trigger}",
|
||||
|
Loading…
x
Reference in New Issue
Block a user