Merge pull request #4037 from andreheuer/dev

Enhancement: add task id to pre/post consume script as env
This commit is contained in:
Trenton H 2023-09-08 10:00:05 -07:00 committed by GitHub
commit 714995877a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 39 additions and 19 deletions

View File

@ -126,6 +126,7 @@ script can access the following relevant environment variables set:
| ----------------------- | ------------------------------------------------------------ | | ----------------------- | ------------------------------------------------------------ |
| `DOCUMENT_SOURCE_PATH` | Original path of the consumed document | | `DOCUMENT_SOURCE_PATH` | Original path of the consumed document |
| `DOCUMENT_WORKING_PATH` | Path to a copy of the original that consumption will work on | | `DOCUMENT_WORKING_PATH` | Path to a copy of the original that consumption will work on |
| `TASK_ID` | UUID of the task used to process the new document (if any) |
!!! note !!! note
@ -168,21 +169,22 @@ Executed after the consumer has successfully processed a document and
has moved it into paperless. It receives the following environment has moved it into paperless. It receives the following environment
variables: variables:
| Environment Variable | Description | | Environment Variable | Description |
| ---------------------------- | --------------------------------------------- | | ---------------------------- | ---------------------------------------------- |
| `DOCUMENT_ID` | Database primary key of the document | | `DOCUMENT_ID` | Database primary key of the document |
| `DOCUMENT_FILE_NAME` | Formatted filename, not including paths | | `DOCUMENT_FILE_NAME` | Formatted filename, not including paths |
| `DOCUMENT_CREATED` | Date & time when document created | | `DOCUMENT_CREATED` | Date & time when document created |
| `DOCUMENT_MODIFIED` | Date & time when document was last modified | | `DOCUMENT_MODIFIED` | Date & time when document was last modified |
| `DOCUMENT_ADDED` | Date & time when document was added | | `DOCUMENT_ADDED` | Date & time when document was added |
| `DOCUMENT_SOURCE_PATH` | Path to the original document file | | `DOCUMENT_SOURCE_PATH` | Path to the original document file |
| `DOCUMENT_ARCHIVE_PATH` | Path to the generate archive file (if any) | | `DOCUMENT_ARCHIVE_PATH` | Path to the generate archive file (if any) |
| `DOCUMENT_THUMBNAIL_PATH` | Path to the generated thumbnail | | `DOCUMENT_THUMBNAIL_PATH` | Path to the generated thumbnail |
| `DOCUMENT_DOWNLOAD_URL` | URL for document download | | `DOCUMENT_DOWNLOAD_URL` | URL for document download |
| `DOCUMENT_THUMBNAIL_URL` | URL for the document thumbnail | | `DOCUMENT_THUMBNAIL_URL` | URL for the document thumbnail |
| `DOCUMENT_CORRESPONDENT` | Assigned correspondent (if any) | | `DOCUMENT_CORRESPONDENT` | Assigned correspondent (if any) |
| `DOCUMENT_TAGS` | Comma separated list of tags applied (if any) | | `DOCUMENT_TAGS` | Comma separated list of tags applied (if any) |
| `DOCUMENT_ORIGINAL_FILENAME` | Filename of original document | | `DOCUMENT_ORIGINAL_FILENAME` | Filename of original document |
| `TASK_ID` | Task UUID used to import the document (if any) |
The script can be in any language, A simple shell script example: The script can be in any language, A simple shell script example:

View File

@ -209,6 +209,7 @@ class Consumer(LoggingMixin):
script_env = os.environ.copy() script_env = os.environ.copy()
script_env["DOCUMENT_SOURCE_PATH"] = original_file_path script_env["DOCUMENT_SOURCE_PATH"] = original_file_path
script_env["DOCUMENT_WORKING_PATH"] = working_file_path script_env["DOCUMENT_WORKING_PATH"] = working_file_path
script_env["TASK_ID"] = self.task_id or ""
try: try:
completed_proc = run( completed_proc = run(
@ -279,6 +280,7 @@ class Consumer(LoggingMixin):
",".join(document.tags.all().values_list("name", flat=True)), ",".join(document.tags.all().values_list("name", flat=True)),
) )
script_env["DOCUMENT_ORIGINAL_FILENAME"] = str(document.original_filename) script_env["DOCUMENT_ORIGINAL_FILENAME"] = str(document.original_filename)
script_env["TASK_ID"] = self.task_id or ""
try: try:
completed_proc = run( completed_proc = run(

View File

@ -7,6 +7,7 @@ from typing import Type
import tqdm import tqdm
from asgiref.sync import async_to_sync from asgiref.sync import async_to_sync
from celery import Task
from celery import shared_task from celery import shared_task
from channels.layers import get_channel_layer from channels.layers import get_channel_layer
from django.conf import settings from django.conf import settings
@ -91,8 +92,9 @@ def train_classifier():
logger.warning("Classifier error: " + str(e)) logger.warning("Classifier error: " + str(e))
@shared_task @shared_task(bind=True)
def consume_file( def consume_file(
self: Task,
input_doc: ConsumableDocument, input_doc: ConsumableDocument,
overrides: Optional[DocumentMetadataOverrides] = None, overrides: Optional[DocumentMetadataOverrides] = None,
): ):
@ -163,6 +165,7 @@ def consume_file(
override_created=overrides.created, override_created=overrides.created,
override_asn=overrides.asn, override_asn=overrides.asn,
override_owner_id=overrides.owner_id, override_owner_id=overrides.owner_id,
task_id=self.request.id,
) )
if document: if document:

View File

@ -4,6 +4,7 @@ import re
import shutil import shutil
import stat import stat
import tempfile import tempfile
import uuid
from unittest import mock from unittest import mock
from unittest.mock import MagicMock from unittest.mock import MagicMock
@ -862,6 +863,7 @@ class PreConsumeTestCase(TestCase):
c = Consumer() c = Consumer()
c.original_path = "path-to-file" c.original_path = "path-to-file"
c.path = "/tmp/somewhere/path-to-file" c.path = "/tmp/somewhere/path-to-file"
c.task_id = str(uuid.uuid4())
c.run_pre_consume_script() c.run_pre_consume_script()
m.assert_called_once() m.assert_called_once()
@ -877,6 +879,7 @@ class PreConsumeTestCase(TestCase):
subset = { subset = {
"DOCUMENT_SOURCE_PATH": c.original_path, "DOCUMENT_SOURCE_PATH": c.original_path,
"DOCUMENT_WORKING_PATH": c.path, "DOCUMENT_WORKING_PATH": c.path,
"TASK_ID": c.task_id,
} }
self.assertDictEqual(environment, {**environment, **subset}) self.assertDictEqual(environment, {**environment, **subset})
@ -937,7 +940,10 @@ class PreConsumeTestCase(TestCase):
with override_settings(PRE_CONSUME_SCRIPT=script.name): with override_settings(PRE_CONSUME_SCRIPT=script.name):
c = Consumer() c = Consumer()
c.path = "path-to-file" c.path = "path-to-file"
self.assertRaises(ConsumerError, c.run_pre_consume_script) self.assertRaises(
ConsumerError,
c.run_pre_consume_script,
)
class PostConsumeTestCase(TestCase): class PostConsumeTestCase(TestCase):
@ -968,7 +974,11 @@ class PostConsumeTestCase(TestCase):
doc = Document.objects.create(title="Test", mime_type="application/pdf") doc = Document.objects.create(title="Test", mime_type="application/pdf")
c = Consumer() c = Consumer()
c.filename = "somefile.pdf" c.filename = "somefile.pdf"
self.assertRaises(ConsumerError, c.run_post_consume_script, doc) self.assertRaises(
ConsumerError,
c.run_post_consume_script,
doc,
)
@mock.patch("documents.consumer.run") @mock.patch("documents.consumer.run")
def test_post_consume_script_simple(self, m): def test_post_consume_script_simple(self, m):
@ -995,7 +1005,9 @@ class PostConsumeTestCase(TestCase):
doc.tags.add(tag1) doc.tags.add(tag1)
doc.tags.add(tag2) doc.tags.add(tag2)
Consumer().run_post_consume_script(doc) consumer = Consumer()
consumer.task_id = str(uuid.uuid4())
consumer.run_post_consume_script(doc)
m.assert_called_once() m.assert_called_once()
@ -1017,6 +1029,7 @@ class PostConsumeTestCase(TestCase):
"DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/", "DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/",
"DOCUMENT_CORRESPONDENT": "my_bank", "DOCUMENT_CORRESPONDENT": "my_bank",
"DOCUMENT_TAGS": "a,b", "DOCUMENT_TAGS": "a,b",
"TASK_ID": consumer.task_id,
} }
self.assertDictEqual(environment, {**environment, **subset}) self.assertDictEqual(environment, {**environment, **subset})