diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index eda2ca259..0096da99c 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -149,6 +149,9 @@ which will in turn call `pdf2pdfocr.py`_ on your document, which will then overwrite the file with an OCR'd version of the file and exit. At which point, the consumption process will begin with the newly modified file. +The script's stdout and stderr will be logged line by line to the webserver log, along +with the exit code of the script. + .. _pdf2pdfocr.py: https://github.com/LeoFCardoso/pdf2pdfocr .. _advanced-post_consume_script: @@ -178,6 +181,10 @@ example, you can take a look at `post-consumption-example.sh`_ in this project. The post consumption script cannot cancel the consumption process. +The script's stdout and stderr will be logged line by line to the webserver log, along +with the exit code of the script. + + Docker ------ Assumed you have ``/home/foo/paperless-ngx/scripts/post-consumption-example.sh``. diff --git a/src/documents/consumer.py b/src/documents/consumer.py index f542a1d98..75181cabb 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -2,7 +2,8 @@ import datetime import hashlib import os import uuid -from subprocess import Popen +from subprocess import CompletedProcess +from subprocess import run from typing import Optional from typing import Type @@ -148,13 +149,20 @@ class Consumer(LoggingMixin): script_env["DOCUMENT_SOURCE_PATH"] = filepath_arg try: - Popen( - ( + completed_proc = run( + args=[ settings.PRE_CONSUME_SCRIPT, filepath_arg, - ), + ], env=script_env, - ).wait() + capture_output=True, + ) + + self._log_script_outputs(completed_proc) + + # Raises exception on non-zero output + completed_proc.check_returncode() + except Exception as e: self._fail( MESSAGE_PRE_CONSUME_SCRIPT_ERROR, @@ -208,8 +216,8 @@ class Consumer(LoggingMixin): script_env["DOCUMENT_ORIGINAL_FILENAME"] = str(document.original_filename) try: - Popen( - ( + completed_proc = run( + args=[ settings.POST_CONSUME_SCRIPT, str(document.pk), document.get_public_filename(), @@ -219,9 +227,16 @@ class Consumer(LoggingMixin): reverse("document-thumb", kwargs={"pk": document.pk}), str(document.correspondent), str(",".join(document.tags.all().values_list("name", flat=True))), - ), + ], env=script_env, - ).wait() + capture_output=True, + ) + + self._log_script_outputs(completed_proc) + + # Raises exception on non-zero output + completed_proc.check_returncode() + except Exception as e: self._fail( MESSAGE_POST_CONSUME_SCRIPT_ERROR, @@ -510,3 +525,39 @@ class Consumer(LoggingMixin): with open(source, "rb") as read_file: with open(target, "wb") as write_file: write_file.write(read_file.read()) + + def _log_script_outputs(self, completed_process: CompletedProcess): + """ + Decodes a process stdout and stderr streams and logs them to the main log + """ + # Log what the script exited as + self.log( + "info", + f"{completed_process.args[0]} exited {completed_process.returncode}", + ) + + # Decode the output (if any) + if len(completed_process.stdout): + stdout_str = ( + completed_process.stdout.decode("utf8", errors="ignore") + .strip() + .split( + "\n", + ) + ) + self.log("info", "Script stdout:") + for line in stdout_str: + self.log("info", line) + + if len(completed_process.stderr): + stderr_str = ( + completed_process.stderr.decode("utf8", errors="ignore") + .strip() + .split( + "\n", + ) + ) + + self.log("warning", "Script stderr:") + for line in stderr_str: + self.log("warning", line) diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index 3b94b889b..f6685ad02 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -2,7 +2,9 @@ import datetime import os import re import shutil +import stat import tempfile +from subprocess import CalledProcessError from unittest import mock from unittest.mock import MagicMock @@ -801,7 +803,16 @@ class TestConsumerCreatedDate(DirectoriesMixin, TestCase): class PreConsumeTestCase(TestCase): - @mock.patch("documents.consumer.Popen") + def setUp(self) -> None: + + # this prevents websocket message reports during testing. + patcher = mock.patch("documents.consumer.Consumer._send_progress") + self._send_progress = patcher.start() + self.addCleanup(patcher.stop) + + return super().setUp() + + @mock.patch("documents.consumer.run") @override_settings(PRE_CONSUME_SCRIPT=None) def test_no_pre_consume_script(self, m): c = Consumer() @@ -809,7 +820,7 @@ class PreConsumeTestCase(TestCase): c.run_pre_consume_script() m.assert_not_called() - @mock.patch("documents.consumer.Popen") + @mock.patch("documents.consumer.run") @mock.patch("documents.consumer.Consumer._send_progress") @override_settings(PRE_CONSUME_SCRIPT="does-not-exist") def test_pre_consume_script_not_found(self, m, m2): @@ -818,7 +829,7 @@ class PreConsumeTestCase(TestCase): c.path = "path-to-file" self.assertRaises(ConsumerError, c.run_pre_consume_script) - @mock.patch("documents.consumer.Popen") + @mock.patch("documents.consumer.run") def test_pre_consume_script(self, m): with tempfile.NamedTemporaryFile() as script: with override_settings(PRE_CONSUME_SCRIPT=script.name): @@ -830,14 +841,78 @@ class PreConsumeTestCase(TestCase): args, kwargs = m.call_args - command = args[0] + command = kwargs["args"] self.assertEqual(command[0], script.name) self.assertEqual(command[1], "path-to-file") + @mock.patch("documents.consumer.Consumer.log") + def test_script_with_output(self, mocked_log): + """ + GIVEN: + - A script which outputs to stdout and stderr + WHEN: + - The script is executed as a consume script + THEN: + - The script's outputs are logged + """ + with tempfile.NamedTemporaryFile(mode="w") as script: + # Write up a little script + with script.file as outfile: + outfile.write("#!/usr/bin/env bash\n") + outfile.write("echo This message goes to stdout\n") + outfile.write("echo This message goes to stderr >&2") + + # Make the file executable + st = os.stat(script.name) + os.chmod(script.name, st.st_mode | stat.S_IEXEC) + + with override_settings(PRE_CONSUME_SCRIPT=script.name): + c = Consumer() + c.path = "path-to-file" + c.run_pre_consume_script() + + mocked_log.assert_called() + + mocked_log.assert_any_call("info", "This message goes to stdout") + mocked_log.assert_any_call("warning", "This message goes to stderr") + + def test_script_exit_non_zero(self): + """ + GIVEN: + - A script which exits with a non-zero exit code + WHEN: + - The script is executed as a pre-consume script + THEN: + - A ConsumerError is raised + """ + with tempfile.NamedTemporaryFile(mode="w") as script: + # Write up a little script + with script.file as outfile: + outfile.write("#!/usr/bin/env bash\n") + outfile.write("exit 100\n") + + # Make the file executable + st = os.stat(script.name) + os.chmod(script.name, st.st_mode | stat.S_IEXEC) + + with override_settings(PRE_CONSUME_SCRIPT=script.name): + c = Consumer() + c.path = "path-to-file" + self.assertRaises(ConsumerError, c.run_pre_consume_script) + class PostConsumeTestCase(TestCase): - @mock.patch("documents.consumer.Popen") + def setUp(self) -> None: + + # this prevents websocket message reports during testing. + patcher = mock.patch("documents.consumer.Consumer._send_progress") + self._send_progress = patcher.start() + self.addCleanup(patcher.stop) + + return super().setUp() + + @mock.patch("documents.consumer.run") @override_settings(POST_CONSUME_SCRIPT=None) def test_no_post_consume_script(self, m): doc = Document.objects.create(title="Test", mime_type="application/pdf") @@ -858,7 +933,7 @@ class PostConsumeTestCase(TestCase): c.filename = "somefile.pdf" self.assertRaises(ConsumerError, c.run_post_consume_script, doc) - @mock.patch("documents.consumer.Popen") + @mock.patch("documents.consumer.run") def test_post_consume_script_simple(self, m): with tempfile.NamedTemporaryFile() as script: with override_settings(POST_CONSUME_SCRIPT=script.name): @@ -868,7 +943,7 @@ class PostConsumeTestCase(TestCase): m.assert_called_once() - @mock.patch("documents.consumer.Popen") + @mock.patch("documents.consumer.run") def test_post_consume_script_with_correspondent(self, m): with tempfile.NamedTemporaryFile() as script: with override_settings(POST_CONSUME_SCRIPT=script.name): @@ -889,7 +964,7 @@ class PostConsumeTestCase(TestCase): args, kwargs = m.call_args - command = args[0] + command = kwargs["args"] self.assertEqual(command[0], script.name) self.assertEqual(command[1], str(doc.pk)) @@ -897,3 +972,29 @@ class PostConsumeTestCase(TestCase): self.assertEqual(command[6], f"/api/documents/{doc.pk}/thumb/") self.assertEqual(command[7], "my_bank") self.assertCountEqual(command[8].split(","), ["a", "b"]) + + def test_script_exit_non_zero(self): + """ + GIVEN: + - A script which exits with a non-zero exit code + WHEN: + - The script is executed as a post-consume script + THEN: + - A ConsumerError is raised + """ + with tempfile.NamedTemporaryFile(mode="w") as script: + # Write up a little script + with script.file as outfile: + outfile.write("#!/usr/bin/env bash\n") + outfile.write("exit -500\n") + + # Make the file executable + st = os.stat(script.name) + os.chmod(script.name, st.st_mode | stat.S_IEXEC) + + with override_settings(POST_CONSUME_SCRIPT=script.name): + c = Consumer() + doc = Document.objects.create(title="Test", mime_type="application/pdf") + c.path = "path-to-file" + with self.assertRaises(ConsumerError): + c.run_post_consume_script(doc)