From 9a47963fd5aa54c5bf463110603a19527a057ca3 Mon Sep 17 00:00:00 2001 From: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Wed, 9 Nov 2022 20:11:36 -0800 Subject: [PATCH 1/4] Captures the stdout and stderr of the pre/post scripts into the log --- src/documents/consumer.py | 65 +++++++++++++++++++++++----- src/documents/tests/test_consumer.py | 49 +++++++++++++++++---- 2 files changed, 95 insertions(+), 19 deletions(-) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index f542a1d98..7c0cbd2d3 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -2,7 +2,8 @@ import datetime import hashlib import os import uuid -from subprocess import Popen +from subprocess import CompletedProcess +from subprocess import run from typing import Optional from typing import Type @@ -148,14 +149,21 @@ class Consumer(LoggingMixin): script_env["DOCUMENT_SOURCE_PATH"] = filepath_arg try: - Popen( - ( + completed_proc = run( + args=[ settings.PRE_CONSUME_SCRIPT, filepath_arg, - ), + ], env=script_env, - ).wait() - except Exception as e: + capture_output=True, + ) + + self._log_script_outputs(completed_proc) + + # Raises exception on non-zero output + completed_proc.check_returncode() + + except Exception as e: # pragma: nocover self._fail( MESSAGE_PRE_CONSUME_SCRIPT_ERROR, f"Error while executing pre-consume script: {e}", @@ -208,8 +216,8 @@ class Consumer(LoggingMixin): script_env["DOCUMENT_ORIGINAL_FILENAME"] = str(document.original_filename) try: - Popen( - ( + completed_proc = run( + args=[ settings.POST_CONSUME_SCRIPT, str(document.pk), document.get_public_filename(), @@ -219,10 +227,17 @@ class Consumer(LoggingMixin): reverse("document-thumb", kwargs={"pk": document.pk}), str(document.correspondent), str(",".join(document.tags.all().values_list("name", flat=True))), - ), + ], env=script_env, - ).wait() - except Exception as e: + capture_output=True, + ) + + self._log_script_outputs(completed_proc) + + # Raises exception on non-zero output + completed_proc.check_returncode() + + except Exception as e: # pragma: nocover self._fail( MESSAGE_POST_CONSUME_SCRIPT_ERROR, f"Error while executing post-consume script: {e}", @@ -510,3 +525,31 @@ class Consumer(LoggingMixin): with open(source, "rb") as read_file: with open(target, "wb") as write_file: write_file.write(read_file.read()) + + def _log_script_outputs(self, completed_process: CompletedProcess): + """ + Decodes a process stdout and stderr streams and logs them to the main log + """ + # Log what the script exited as + self.log( + "info", + f"{completed_process.args[0]} exited {completed_process.returncode}", + ) + + # Decode the output (if any) + stdout_str = completed_process.stdout.decode("utf8", errors="ignore").split( + "\n", + ) + stderr_str = completed_process.stderr.decode("utf8", errors="ignore").split( + "\n", + ) + + if len(stdout_str): + self.log("info", "Script stdout:") + for line in stdout_str: + self.log("info", line) + + if len(stderr_str): + self.log("warning", "Script stderr:") + for line in stderr_str: + self.log("warning", line) diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index 3b94b889b..4a3908f8e 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -2,7 +2,9 @@ import datetime import os import re import shutil +import stat import tempfile +from subprocess import CalledProcessError from unittest import mock from unittest.mock import MagicMock @@ -801,7 +803,7 @@ class TestConsumerCreatedDate(DirectoriesMixin, TestCase): class PreConsumeTestCase(TestCase): - @mock.patch("documents.consumer.Popen") + @mock.patch("documents.consumer.run") @override_settings(PRE_CONSUME_SCRIPT=None) def test_no_pre_consume_script(self, m): c = Consumer() @@ -809,7 +811,7 @@ class PreConsumeTestCase(TestCase): c.run_pre_consume_script() m.assert_not_called() - @mock.patch("documents.consumer.Popen") + @mock.patch("documents.consumer.run") @mock.patch("documents.consumer.Consumer._send_progress") @override_settings(PRE_CONSUME_SCRIPT="does-not-exist") def test_pre_consume_script_not_found(self, m, m2): @@ -818,7 +820,7 @@ class PreConsumeTestCase(TestCase): c.path = "path-to-file" self.assertRaises(ConsumerError, c.run_pre_consume_script) - @mock.patch("documents.consumer.Popen") + @mock.patch("documents.consumer.run") def test_pre_consume_script(self, m): with tempfile.NamedTemporaryFile() as script: with override_settings(PRE_CONSUME_SCRIPT=script.name): @@ -830,14 +832,45 @@ class PreConsumeTestCase(TestCase): args, kwargs = m.call_args - command = args[0] + command = kwargs["args"] self.assertEqual(command[0], script.name) self.assertEqual(command[1], "path-to-file") + @mock.patch("documents.consumer.Consumer.log") + def test_script_with_output(self, mocked_log): + """ + GIVEN: + - A script which outputs to stdout and stderr + WHEN: + - The script is executed as a consume script + THEN: + - The script's outputs are logged + """ + with tempfile.NamedTemporaryFile(mode="w") as script: + # Write up a little script + with script.file as outfile: + outfile.write("#!/usr/bin/env bash\n") + outfile.write("echo This message goes to stdout\n") + outfile.write("echo This message goes to stderr >&2") + + # Make the file executable + st = os.stat(script.name) + os.chmod(script.name, st.st_mode | stat.S_IEXEC) + + with override_settings(PRE_CONSUME_SCRIPT=script.name): + c = Consumer() + c.path = "path-to-file" + c.run_pre_consume_script() + + mocked_log.assert_called() + + mocked_log.assert_any_call("info", "This message goes to stdout") + mocked_log.assert_any_call("warning", "This message goes to stderr") + class PostConsumeTestCase(TestCase): - @mock.patch("documents.consumer.Popen") + @mock.patch("documents.consumer.run") @override_settings(POST_CONSUME_SCRIPT=None) def test_no_post_consume_script(self, m): doc = Document.objects.create(title="Test", mime_type="application/pdf") @@ -858,7 +891,7 @@ class PostConsumeTestCase(TestCase): c.filename = "somefile.pdf" self.assertRaises(ConsumerError, c.run_post_consume_script, doc) - @mock.patch("documents.consumer.Popen") + @mock.patch("documents.consumer.run") def test_post_consume_script_simple(self, m): with tempfile.NamedTemporaryFile() as script: with override_settings(POST_CONSUME_SCRIPT=script.name): @@ -868,7 +901,7 @@ class PostConsumeTestCase(TestCase): m.assert_called_once() - @mock.patch("documents.consumer.Popen") + @mock.patch("documents.consumer.run") def test_post_consume_script_with_correspondent(self, m): with tempfile.NamedTemporaryFile() as script: with override_settings(POST_CONSUME_SCRIPT=script.name): @@ -889,7 +922,7 @@ class PostConsumeTestCase(TestCase): args, kwargs = m.call_args - command = args[0] + command = kwargs["args"] self.assertEqual(command[0], script.name) self.assertEqual(command[1], str(doc.pk)) From c4965580deebe97b40bfddaa9e0301d0ba108078 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Thu, 10 Nov 2022 17:40:36 -0800 Subject: [PATCH 2/4] Fixes stderr appearing to have content when it doesn't --- docs/advanced_usage.rst | 7 +++++++ src/documents/consumer.py | 16 ++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index eda2ca259..0096da99c 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -149,6 +149,9 @@ which will in turn call `pdf2pdfocr.py`_ on your document, which will then overwrite the file with an OCR'd version of the file and exit. At which point, the consumption process will begin with the newly modified file. +The script's stdout and stderr will be logged line by line to the webserver log, along +with the exit code of the script. + .. _pdf2pdfocr.py: https://github.com/LeoFCardoso/pdf2pdfocr .. _advanced-post_consume_script: @@ -178,6 +181,10 @@ example, you can take a look at `post-consumption-example.sh`_ in this project. The post consumption script cannot cancel the consumption process. +The script's stdout and stderr will be logged line by line to the webserver log, along +with the exit code of the script. + + Docker ------ Assumed you have ``/home/foo/paperless-ngx/scripts/post-consumption-example.sh``. diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 7c0cbd2d3..88d882350 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -537,11 +537,19 @@ class Consumer(LoggingMixin): ) # Decode the output (if any) - stdout_str = completed_process.stdout.decode("utf8", errors="ignore").split( - "\n", + stdout_str = ( + completed_process.stdout.decode("utf8", errors="ignore") + .strip() + .split( + "\n", + ) ) - stderr_str = completed_process.stderr.decode("utf8", errors="ignore").split( - "\n", + stderr_str = ( + completed_process.stderr.decode("utf8", errors="ignore") + .strip() + .split( + "\n", + ) ) if len(stdout_str): From 057f6016cc92f6d21b04b9a16dc6f0b255c8b401 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Fri, 11 Nov 2022 08:58:49 -0800 Subject: [PATCH 3/4] Adds further testing to cover scripts with non-zero exit codes --- src/documents/consumer.py | 4 +- src/documents/tests/test_consumer.py | 68 ++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 88d882350..45722d01a 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -163,7 +163,7 @@ class Consumer(LoggingMixin): # Raises exception on non-zero output completed_proc.check_returncode() - except Exception as e: # pragma: nocover + except Exception as e: self._fail( MESSAGE_PRE_CONSUME_SCRIPT_ERROR, f"Error while executing pre-consume script: {e}", @@ -237,7 +237,7 @@ class Consumer(LoggingMixin): # Raises exception on non-zero output completed_proc.check_returncode() - except Exception as e: # pragma: nocover + except Exception as e: self._fail( MESSAGE_POST_CONSUME_SCRIPT_ERROR, f"Error while executing post-consume script: {e}", diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index 4a3908f8e..f6685ad02 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -803,6 +803,15 @@ class TestConsumerCreatedDate(DirectoriesMixin, TestCase): class PreConsumeTestCase(TestCase): + def setUp(self) -> None: + + # this prevents websocket message reports during testing. + patcher = mock.patch("documents.consumer.Consumer._send_progress") + self._send_progress = patcher.start() + self.addCleanup(patcher.stop) + + return super().setUp() + @mock.patch("documents.consumer.run") @override_settings(PRE_CONSUME_SCRIPT=None) def test_no_pre_consume_script(self, m): @@ -868,8 +877,41 @@ class PreConsumeTestCase(TestCase): mocked_log.assert_any_call("info", "This message goes to stdout") mocked_log.assert_any_call("warning", "This message goes to stderr") + def test_script_exit_non_zero(self): + """ + GIVEN: + - A script which exits with a non-zero exit code + WHEN: + - The script is executed as a pre-consume script + THEN: + - A ConsumerError is raised + """ + with tempfile.NamedTemporaryFile(mode="w") as script: + # Write up a little script + with script.file as outfile: + outfile.write("#!/usr/bin/env bash\n") + outfile.write("exit 100\n") + + # Make the file executable + st = os.stat(script.name) + os.chmod(script.name, st.st_mode | stat.S_IEXEC) + + with override_settings(PRE_CONSUME_SCRIPT=script.name): + c = Consumer() + c.path = "path-to-file" + self.assertRaises(ConsumerError, c.run_pre_consume_script) + class PostConsumeTestCase(TestCase): + def setUp(self) -> None: + + # this prevents websocket message reports during testing. + patcher = mock.patch("documents.consumer.Consumer._send_progress") + self._send_progress = patcher.start() + self.addCleanup(patcher.stop) + + return super().setUp() + @mock.patch("documents.consumer.run") @override_settings(POST_CONSUME_SCRIPT=None) def test_no_post_consume_script(self, m): @@ -930,3 +972,29 @@ class PostConsumeTestCase(TestCase): self.assertEqual(command[6], f"/api/documents/{doc.pk}/thumb/") self.assertEqual(command[7], "my_bank") self.assertCountEqual(command[8].split(","), ["a", "b"]) + + def test_script_exit_non_zero(self): + """ + GIVEN: + - A script which exits with a non-zero exit code + WHEN: + - The script is executed as a post-consume script + THEN: + - A ConsumerError is raised + """ + with tempfile.NamedTemporaryFile(mode="w") as script: + # Write up a little script + with script.file as outfile: + outfile.write("#!/usr/bin/env bash\n") + outfile.write("exit -500\n") + + # Make the file executable + st = os.stat(script.name) + os.chmod(script.name, st.st_mode | stat.S_IEXEC) + + with override_settings(POST_CONSUME_SCRIPT=script.name): + c = Consumer() + doc = Document.objects.create(title="Test", mime_type="application/pdf") + c.path = "path-to-file" + with self.assertRaises(ConsumerError): + c.run_post_consume_script(doc) From 3dfeee9332f4436467056178d7ddc3ff3a2aa8a8 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Fri, 11 Nov 2022 10:09:56 -0800 Subject: [PATCH 4/4] Don't do decoding work if not needed --- src/documents/consumer.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 45722d01a..75181cabb 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -537,27 +537,27 @@ class Consumer(LoggingMixin): ) # Decode the output (if any) - stdout_str = ( - completed_process.stdout.decode("utf8", errors="ignore") - .strip() - .split( - "\n", + if len(completed_process.stdout): + stdout_str = ( + completed_process.stdout.decode("utf8", errors="ignore") + .strip() + .split( + "\n", + ) ) - ) - stderr_str = ( - completed_process.stderr.decode("utf8", errors="ignore") - .strip() - .split( - "\n", - ) - ) - - if len(stdout_str): self.log("info", "Script stdout:") for line in stdout_str: self.log("info", line) - if len(stderr_str): + if len(completed_process.stderr): + stderr_str = ( + completed_process.stderr.decode("utf8", errors="ignore") + .strip() + .split( + "\n", + ) + ) + self.log("warning", "Script stderr:") for line in stderr_str: self.log("warning", line)