Fix: pass working file to workflows, pickle file bytes (#8741)

This commit is contained in:
shamoon 2025-01-14 23:03:40 -08:00 committed by GitHub
parent e1d6b4a9ac
commit d61b2bbfc6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 33 additions and 7 deletions

View File

@ -557,6 +557,9 @@ class ConsumerPlugin(
document=document, document=document,
logging_group=self.logging_group, logging_group=self.logging_group,
classifier=classifier, classifier=classifier,
original_file=self.unmodified_original
if self.unmodified_original
else self.working_copy,
) )
# After everything is in the database, copy the files into # After everything is in the database, copy the files into

View File

@ -1,6 +1,7 @@
import logging import logging
import os import os
import shutil import shutil
from pathlib import Path
import httpx import httpx
from celery import shared_task from celery import shared_task
@ -539,11 +540,19 @@ def add_to_index(sender, document, **kwargs):
index.add_or_update_document(document) index.add_or_update_document(document)
def run_workflows_added(sender, document: Document, logging_group=None, **kwargs): def run_workflows_added(
sender,
document: Document,
logging_group=None,
original_file=None,
**kwargs,
):
run_workflows( run_workflows(
WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED, trigger_type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
document, document=document,
logging_group, logging_group=logging_group,
overrides=None,
original_file=original_file,
) )
@ -584,6 +593,7 @@ def run_workflows(
document: Document | ConsumableDocument, document: Document | ConsumableDocument,
logging_group=None, logging_group=None,
overrides: DocumentMetadataOverrides | None = None, overrides: DocumentMetadataOverrides | None = None,
original_file: Path | None = None,
) -> tuple[DocumentMetadataOverrides, str] | None: ) -> tuple[DocumentMetadataOverrides, str] | None:
"""Run workflows which match a Document (or ConsumableDocument) for a specific trigger type. """Run workflows which match a Document (or ConsumableDocument) for a specific trigger type.
@ -946,7 +956,11 @@ def run_workflows(
# Something could be renaming the file concurrently so it can't be attached # Something could be renaming the file concurrently so it can't be attached
with FileLock(settings.MEDIA_LOCK): with FileLock(settings.MEDIA_LOCK):
document.refresh_from_db() document.refresh_from_db()
email.attach_file(document.source_path) email.attach_file(
original_file
if original_file is not None
else document.source_path,
)
n_messages = email.send() n_messages = email.send()
logger.debug( logger.debug(
f"Sent {n_messages} notification email(s) to {action.email.to}", f"Sent {n_messages} notification email(s) to {action.email.to}",
@ -1023,9 +1037,18 @@ def run_workflows(
) )
files = None files = None
if action.webhook.include_document: if action.webhook.include_document:
with open(document.source_path, "rb") as f: with open(
original_file
if original_file is not None
else document.source_path,
"rb",
) as f:
files = { files = {
"file": (document.original_filename, f, document.mime_type), "file": (
document.original_filename,
f.read(),
document.mime_type,
),
} }
send_webhook.delay( send_webhook.delay(
url=action.webhook.url, url=action.webhook.url,