Merge pull request #4037 from andreheuer/dev

Enhancement: add task id to pre/post consume script as env
This commit is contained in:
Trenton H 2023-09-08 10:00:05 -07:00 committed by GitHub
commit 714995877a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 39 additions and 19 deletions

View File

@ -126,6 +126,7 @@ script can access the following relevant environment variables set:
| ----------------------- | ------------------------------------------------------------ |
| `DOCUMENT_SOURCE_PATH` | Original path of the consumed document |
| `DOCUMENT_WORKING_PATH` | Path to a copy of the original that consumption will work on |
| `TASK_ID` | UUID of the task used to process the new document (if any) |
!!! note
@ -168,21 +169,22 @@ Executed after the consumer has successfully processed a document and
has moved it into paperless. It receives the following environment
variables:
| Environment Variable | Description |
| ---------------------------- | --------------------------------------------- |
| `DOCUMENT_ID` | Database primary key of the document |
| `DOCUMENT_FILE_NAME` | Formatted filename, not including paths |
| `DOCUMENT_CREATED` | Date & time when document created |
| `DOCUMENT_MODIFIED` | Date & time when document was last modified |
| `DOCUMENT_ADDED` | Date & time when document was added |
| `DOCUMENT_SOURCE_PATH` | Path to the original document file |
| `DOCUMENT_ARCHIVE_PATH` | Path to the generate archive file (if any) |
| `DOCUMENT_THUMBNAIL_PATH` | Path to the generated thumbnail |
| `DOCUMENT_DOWNLOAD_URL` | URL for document download |
| `DOCUMENT_THUMBNAIL_URL` | URL for the document thumbnail |
| `DOCUMENT_CORRESPONDENT` | Assigned correspondent (if any) |
| `DOCUMENT_TAGS` | Comma separated list of tags applied (if any) |
| `DOCUMENT_ORIGINAL_FILENAME` | Filename of original document |
| Environment Variable | Description |
| ---------------------------- | ---------------------------------------------- |
| `DOCUMENT_ID` | Database primary key of the document |
| `DOCUMENT_FILE_NAME` | Formatted filename, not including paths |
| `DOCUMENT_CREATED` | Date & time when document created |
| `DOCUMENT_MODIFIED` | Date & time when document was last modified |
| `DOCUMENT_ADDED` | Date & time when document was added |
| `DOCUMENT_SOURCE_PATH` | Path to the original document file |
| `DOCUMENT_ARCHIVE_PATH` | Path to the generate archive file (if any) |
| `DOCUMENT_THUMBNAIL_PATH` | Path to the generated thumbnail |
| `DOCUMENT_DOWNLOAD_URL` | URL for document download |
| `DOCUMENT_THUMBNAIL_URL` | URL for the document thumbnail |
| `DOCUMENT_CORRESPONDENT` | Assigned correspondent (if any) |
| `DOCUMENT_TAGS` | Comma separated list of tags applied (if any) |
| `DOCUMENT_ORIGINAL_FILENAME` | Filename of original document |
| `TASK_ID` | Task UUID used to import the document (if any) |
The script can be in any language, A simple shell script example:

View File

@ -209,6 +209,7 @@ class Consumer(LoggingMixin):
script_env = os.environ.copy()
script_env["DOCUMENT_SOURCE_PATH"] = original_file_path
script_env["DOCUMENT_WORKING_PATH"] = working_file_path
script_env["TASK_ID"] = self.task_id or ""
try:
completed_proc = run(
@ -279,6 +280,7 @@ class Consumer(LoggingMixin):
",".join(document.tags.all().values_list("name", flat=True)),
)
script_env["DOCUMENT_ORIGINAL_FILENAME"] = str(document.original_filename)
script_env["TASK_ID"] = self.task_id or ""
try:
completed_proc = run(

View File

@ -7,6 +7,7 @@ from typing import Type
import tqdm
from asgiref.sync import async_to_sync
from celery import Task
from celery import shared_task
from channels.layers import get_channel_layer
from django.conf import settings
@ -91,8 +92,9 @@ def train_classifier():
logger.warning("Classifier error: " + str(e))
@shared_task
@shared_task(bind=True)
def consume_file(
self: Task,
input_doc: ConsumableDocument,
overrides: Optional[DocumentMetadataOverrides] = None,
):
@ -163,6 +165,7 @@ def consume_file(
override_created=overrides.created,
override_asn=overrides.asn,
override_owner_id=overrides.owner_id,
task_id=self.request.id,
)
if document:

View File

@ -4,6 +4,7 @@ import re
import shutil
import stat
import tempfile
import uuid
from unittest import mock
from unittest.mock import MagicMock
@ -862,6 +863,7 @@ class PreConsumeTestCase(TestCase):
c = Consumer()
c.original_path = "path-to-file"
c.path = "/tmp/somewhere/path-to-file"
c.task_id = str(uuid.uuid4())
c.run_pre_consume_script()
m.assert_called_once()
@ -877,6 +879,7 @@ class PreConsumeTestCase(TestCase):
subset = {
"DOCUMENT_SOURCE_PATH": c.original_path,
"DOCUMENT_WORKING_PATH": c.path,
"TASK_ID": c.task_id,
}
self.assertDictEqual(environment, {**environment, **subset})
@ -937,7 +940,10 @@ class PreConsumeTestCase(TestCase):
with override_settings(PRE_CONSUME_SCRIPT=script.name):
c = Consumer()
c.path = "path-to-file"
self.assertRaises(ConsumerError, c.run_pre_consume_script)
self.assertRaises(
ConsumerError,
c.run_pre_consume_script,
)
class PostConsumeTestCase(TestCase):
@ -968,7 +974,11 @@ class PostConsumeTestCase(TestCase):
doc = Document.objects.create(title="Test", mime_type="application/pdf")
c = Consumer()
c.filename = "somefile.pdf"
self.assertRaises(ConsumerError, c.run_post_consume_script, doc)
self.assertRaises(
ConsumerError,
c.run_post_consume_script,
doc,
)
@mock.patch("documents.consumer.run")
def test_post_consume_script_simple(self, m):
@ -995,7 +1005,9 @@ class PostConsumeTestCase(TestCase):
doc.tags.add(tag1)
doc.tags.add(tag2)
Consumer().run_post_consume_script(doc)
consumer = Consumer()
consumer.task_id = str(uuid.uuid4())
consumer.run_post_consume_script(doc)
m.assert_called_once()
@ -1017,6 +1029,7 @@ class PostConsumeTestCase(TestCase):
"DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/",
"DOCUMENT_CORRESPONDENT": "my_bank",
"DOCUMENT_TAGS": "a,b",
"TASK_ID": consumer.task_id,
}
self.assertDictEqual(environment, {**environment, **subset})