From 08ae3f8771a64bba05f2eac99784b54bde22e29c Mon Sep 17 00:00:00 2001 From: ziprandom Date: Tue, 21 Jun 2022 15:54:51 +0000 Subject: [PATCH 1/4] use env variables in pre-|post-consume scripts + instead of positional arguments because it's easier to use in the shell script and easier to read in the python code. --- docs/advanced_usage.rst | 26 ++++++++++---------- scripts/post-consumption-example.sh | 9 ------- src/documents/consumer.py | 37 +++++++++++++++++++---------- 3 files changed, 38 insertions(+), 34 deletions(-) diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index 6449c478b..bc194ce5a 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -121,10 +121,10 @@ Pre-consumption script ====================== Executed after the consumer sees a new document in the consumption folder, but -before any processing of the document is performed. This script receives exactly -one argument: +before any processing of the document is performed. This script can access the +following relevant environment variables set: -* Document file name +* ``DOCUMENT_SOURCE_PATH`` A simple but common example for this would be creating a simple script like this: @@ -134,7 +134,7 @@ this: .. code:: bash #!/usr/bin/env bash - pdf2pdfocr.py -i ${1} + pdf2pdfocr.py -i ${DOCUMENT_SOURCE_PATH} ``/etc/paperless.conf`` @@ -157,16 +157,16 @@ Post-consumption script ======================= Executed after the consumer has successfully processed a document and has moved it -into paperless. It receives the following arguments: +into paperless. It receives the following environment variables: -* Document id -* Generated file name -* Source path -* Thumbnail path -* Download URL -* Thumbnail URL -* Correspondent -* Tags +* ``DOCUMENT_ID`` +* ``DOCUMENT_FILE_NAME`` +* ``DOCUMENT_SOURCE_PATH`` +* ``DOCUMENT_THUMBNAIL_PATH`` +* ``DOCUMENT_DOWNLOAD_URL`` +* ``DOCUMENT_THUMBNAIL_URL`` +* ``DOCUMENT_CORRESPONDENT`` +* ``DOCUMENT_TAGS`` The script can be in any language, but for a simple shell script example, you can take a look at `post-consumption-example.sh`_ in this project. diff --git a/scripts/post-consumption-example.sh b/scripts/post-consumption-example.sh index 6edd3e158..b4c40090c 100755 --- a/scripts/post-consumption-example.sh +++ b/scripts/post-consumption-example.sh @@ -1,14 +1,5 @@ #!/usr/bin/env bash -DOCUMENT_ID=${1} -DOCUMENT_FILE_NAME=${2} -DOCUMENT_SOURCE_PATH=${3} -DOCUMENT_THUMBNAIL_PATH=${4} -DOCUMENT_DOWNLOAD_URL=${5} -DOCUMENT_THUMBNAIL_URL=${6} -DOCUMENT_CORRESPONDENT=${7} -DOCUMENT_TAGS=${8} - echo " A document with an id of ${DOCUMENT_ID} was just consumed. I know the diff --git a/src/documents/consumer.py b/src/documents/consumer.py index e5794ce4f..42666e107 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -134,8 +134,11 @@ class Consumer(LoggingMixin): self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}") + script_env = os.environ.copy() + script_env["DOCUMENT_SOURCE_PATH"] = os.path.normpath(self.path) + try: - Popen((settings.PRE_CONSUME_SCRIPT, self.path)).wait() + Popen(settings.PRE_CONSUME_SCRIPT, env=script_env).wait() except Exception as e: self._fail( MESSAGE_PRE_CONSUME_SCRIPT_ERROR, @@ -159,19 +162,29 @@ class Consumer(LoggingMixin): f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}", ) + script_env = os.environ.copy() + + script_env["DOCUMENT_ID"] = str(document.pk) + script_env["DOCUMENT_FILE_NAME"] = document.get_public_filename() + script_env["DOCUMENT_SOURCE_PATH"] = os.path.normpath(document.source_path) + script_env["DOCUMENT_THUMBNAIL_PATH"] = os.path.normpath( + document.thumbnail_path + ) + script_env["DOCUMENT_DOWNLOAD_URL"] = reverse( + "document-download", kwargs={"pk": document.pk} + ) + script_env["DOCUMENT_THUMBNAIL_URL"] = reverse( + "document-thumb", kwargs={"pk": document.pk} + ) + script_env["DOCUMENT_CORRESPONDENT"] = str(document.correspondent) + script_env["DOCUMENT_TAGS"] = str( + ",".join(document.tags.all().values_list("name", flat=True)) + ) + try: Popen( - ( - settings.POST_CONSUME_SCRIPT, - str(document.pk), - document.get_public_filename(), - os.path.normpath(document.source_path), - os.path.normpath(document.thumbnail_path), - reverse("document-download", kwargs={"pk": document.pk}), - reverse("document-thumb", kwargs={"pk": document.pk}), - str(document.correspondent), - str(",".join(document.tags.all().values_list("name", flat=True))), - ), + settings.POST_CONSUME_SCRIPT, + env=script_env, ).wait() except Exception as e: self._fail( From 9424b763cafc94515dd2c04e34948a491216a901 Mon Sep 17 00:00:00 2001 From: ziprandom Date: Tue, 21 Jun 2022 16:02:50 +0000 Subject: [PATCH 2/4] POST_CONSUME_SCRIPT: add document dates to env --- docs/advanced_usage.rst | 3 +++ scripts/post-consumption-example.sh | 3 +++ src/documents/consumer.py | 3 +++ 3 files changed, 9 insertions(+) diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index bc194ce5a..d32d633b9 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -161,6 +161,9 @@ into paperless. It receives the following environment variables: * ``DOCUMENT_ID`` * ``DOCUMENT_FILE_NAME`` +* ``DOCUMENT_CREATED`` +* ``DOCUMENT_MODIFIED`` +* ``DOCUMENT_ADDED`` * ``DOCUMENT_SOURCE_PATH`` * ``DOCUMENT_THUMBNAIL_PATH`` * ``DOCUMENT_DOWNLOAD_URL`` diff --git a/scripts/post-consumption-example.sh b/scripts/post-consumption-example.sh index b4c40090c..81bfa3d48 100755 --- a/scripts/post-consumption-example.sh +++ b/scripts/post-consumption-example.sh @@ -7,6 +7,9 @@ following additional information about it: * Generated File Name: ${DOCUMENT_FILE_NAME} * Source Path: ${DOCUMENT_SOURCE_PATH} +* Created: ${DOCUMENT_CREATED} +* Added: ${DOCUMENT_ADDED} +* Modified: ${DOCUMENT_MODIFIED} * Thumbnail Path: ${DOCUMENT_THUMBNAIL_PATH} * Download URL: ${DOCUMENT_DOWNLOAD_URL} * Thumbnail URL: ${DOCUMENT_THUMBNAIL_URL} diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 42666e107..474f9df8a 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -165,6 +165,9 @@ class Consumer(LoggingMixin): script_env = os.environ.copy() script_env["DOCUMENT_ID"] = str(document.pk) + script_env["DOCUMENT_CREATED"] = str(document.created) + script_env["DOCUMENT_MODIFIED"] = str(document.modified) + script_env["DOCUMENT_ADDED"] = str(document.added) script_env["DOCUMENT_FILE_NAME"] = document.get_public_filename() script_env["DOCUMENT_SOURCE_PATH"] = os.path.normpath(document.source_path) script_env["DOCUMENT_THUMBNAIL_PATH"] = os.path.normpath( From f20254217f7e6bb88750de0eaee9e75faf040551 Mon Sep 17 00:00:00 2001 From: ziprandom Date: Wed, 22 Jun 2022 13:58:50 +0000 Subject: [PATCH 3/4] POST_CONSUME_SCRIPT: add documents archive_path --- docs/advanced_usage.rst | 1 + scripts/post-consumption-example.sh | 1 + src/documents/consumer.py | 1 + 3 files changed, 3 insertions(+) diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index d32d633b9..ba5b3cbaa 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -165,6 +165,7 @@ into paperless. It receives the following environment variables: * ``DOCUMENT_MODIFIED`` * ``DOCUMENT_ADDED`` * ``DOCUMENT_SOURCE_PATH`` +* ``DOCUMENT_ARCHIVE_PATH`` * ``DOCUMENT_THUMBNAIL_PATH`` * ``DOCUMENT_DOWNLOAD_URL`` * ``DOCUMENT_THUMBNAIL_URL`` diff --git a/scripts/post-consumption-example.sh b/scripts/post-consumption-example.sh index 81bfa3d48..6e42b1bc3 100755 --- a/scripts/post-consumption-example.sh +++ b/scripts/post-consumption-example.sh @@ -6,6 +6,7 @@ A document with an id of ${DOCUMENT_ID} was just consumed. I know the following additional information about it: * Generated File Name: ${DOCUMENT_FILE_NAME} +* Archive Path: ${DOCUMENT_ARCHIVE_PATH} * Source Path: ${DOCUMENT_SOURCE_PATH} * Created: ${DOCUMENT_CREATED} * Added: ${DOCUMENT_ADDED} diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 474f9df8a..5673d159a 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -170,6 +170,7 @@ class Consumer(LoggingMixin): script_env["DOCUMENT_ADDED"] = str(document.added) script_env["DOCUMENT_FILE_NAME"] = document.get_public_filename() script_env["DOCUMENT_SOURCE_PATH"] = os.path.normpath(document.source_path) + script_env["DOCUMENT_ARCHIVE_PATH"] = os.path.normpath(str(document.archive_path)) script_env["DOCUMENT_THUMBNAIL_PATH"] = os.path.normpath( document.thumbnail_path ) From 678bcb171af44abca9df2d8e187b46ad24b56932 Mon Sep 17 00:00:00 2001 From: Trenton Holmes Date: Thu, 14 Jul 2022 08:45:51 -0700 Subject: [PATCH 4/4] Re-adds the script arguments --- src/documents/consumer.py | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 5673d159a..fdf7b2f6e 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -134,11 +134,19 @@ class Consumer(LoggingMixin): self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}") + filepath_arg = os.path.normpath(self.path) + script_env = os.environ.copy() - script_env["DOCUMENT_SOURCE_PATH"] = os.path.normpath(self.path) + script_env["DOCUMENT_SOURCE_PATH"] = filepath_arg try: - Popen(settings.PRE_CONSUME_SCRIPT, env=script_env).wait() + Popen( + ( + settings.PRE_CONSUME_SCRIPT, + filepath_arg, + ), + env=script_env, + ).wait() except Exception as e: self._fail( MESSAGE_PRE_CONSUME_SCRIPT_ERROR, @@ -170,24 +178,38 @@ class Consumer(LoggingMixin): script_env["DOCUMENT_ADDED"] = str(document.added) script_env["DOCUMENT_FILE_NAME"] = document.get_public_filename() script_env["DOCUMENT_SOURCE_PATH"] = os.path.normpath(document.source_path) - script_env["DOCUMENT_ARCHIVE_PATH"] = os.path.normpath(str(document.archive_path)) + script_env["DOCUMENT_ARCHIVE_PATH"] = os.path.normpath( + str(document.archive_path), + ) script_env["DOCUMENT_THUMBNAIL_PATH"] = os.path.normpath( - document.thumbnail_path + document.thumbnail_path, ) script_env["DOCUMENT_DOWNLOAD_URL"] = reverse( - "document-download", kwargs={"pk": document.pk} + "document-download", + kwargs={"pk": document.pk}, ) script_env["DOCUMENT_THUMBNAIL_URL"] = reverse( - "document-thumb", kwargs={"pk": document.pk} + "document-thumb", + kwargs={"pk": document.pk}, ) script_env["DOCUMENT_CORRESPONDENT"] = str(document.correspondent) script_env["DOCUMENT_TAGS"] = str( - ",".join(document.tags.all().values_list("name", flat=True)) + ",".join(document.tags.all().values_list("name", flat=True)), ) try: Popen( - settings.POST_CONSUME_SCRIPT, + ( + settings.POST_CONSUME_SCRIPT, + str(document.pk), + document.get_public_filename(), + os.path.normpath(document.source_path), + os.path.normpath(document.thumbnail_path), + reverse("document-download", kwargs={"pk": document.pk}), + reverse("document-thumb", kwargs={"pk": document.pk}), + str(document.correspondent), + str(",".join(document.tags.all().values_list("name", flat=True))), + ), env=script_env, ).wait() except Exception as e: