Merge pull request #1967 from paperless-ngx/feature-scripts-output

Feature: Capture stdout & stderr of the pre/post consume scripts
This commit is contained in:
shamoon 2022-11-17 14:30:23 -08:00 committed by GitHub
commit 9f5fd6c3ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 176 additions and 17 deletions

View File

@ -149,6 +149,9 @@ which will in turn call `pdf2pdfocr.py`_ on your document, which will then
overwrite the file with an OCR'd version of the file and exit. At which point,
the consumption process will begin with the newly modified file.
The script's stdout and stderr will be logged line by line to the webserver log, along
with the exit code of the script.
.. _pdf2pdfocr.py: https://github.com/LeoFCardoso/pdf2pdfocr
.. _advanced-post_consume_script:
@ -178,6 +181,10 @@ example, you can take a look at `post-consumption-example.sh`_ in this project.
The post consumption script cannot cancel the consumption process.
The script's stdout and stderr will be logged line by line to the webserver log, along
with the exit code of the script.
Docker
------
Assumed you have ``/home/foo/paperless-ngx/scripts/post-consumption-example.sh``.

View File

@ -2,7 +2,8 @@ import datetime
import hashlib
import os
import uuid
from subprocess import Popen
from subprocess import CompletedProcess
from subprocess import run
from typing import Optional
from typing import Type
@ -148,13 +149,20 @@ class Consumer(LoggingMixin):
script_env["DOCUMENT_SOURCE_PATH"] = filepath_arg
try:
Popen(
(
completed_proc = run(
args=[
settings.PRE_CONSUME_SCRIPT,
filepath_arg,
),
],
env=script_env,
).wait()
capture_output=True,
)
self._log_script_outputs(completed_proc)
# Raises exception on non-zero output
completed_proc.check_returncode()
except Exception as e:
self._fail(
MESSAGE_PRE_CONSUME_SCRIPT_ERROR,
@ -208,8 +216,8 @@ class Consumer(LoggingMixin):
script_env["DOCUMENT_ORIGINAL_FILENAME"] = str(document.original_filename)
try:
Popen(
(
completed_proc = run(
args=[
settings.POST_CONSUME_SCRIPT,
str(document.pk),
document.get_public_filename(),
@ -219,9 +227,16 @@ class Consumer(LoggingMixin):
reverse("document-thumb", kwargs={"pk": document.pk}),
str(document.correspondent),
str(",".join(document.tags.all().values_list("name", flat=True))),
),
],
env=script_env,
).wait()
capture_output=True,
)
self._log_script_outputs(completed_proc)
# Raises exception on non-zero output
completed_proc.check_returncode()
except Exception as e:
self._fail(
MESSAGE_POST_CONSUME_SCRIPT_ERROR,
@ -510,3 +525,39 @@ class Consumer(LoggingMixin):
with open(source, "rb") as read_file:
with open(target, "wb") as write_file:
write_file.write(read_file.read())
def _log_script_outputs(self, completed_process: CompletedProcess):
"""
Decodes a process stdout and stderr streams and logs them to the main log
"""
# Log what the script exited as
self.log(
"info",
f"{completed_process.args[0]} exited {completed_process.returncode}",
)
# Decode the output (if any)
if len(completed_process.stdout):
stdout_str = (
completed_process.stdout.decode("utf8", errors="ignore")
.strip()
.split(
"\n",
)
)
self.log("info", "Script stdout:")
for line in stdout_str:
self.log("info", line)
if len(completed_process.stderr):
stderr_str = (
completed_process.stderr.decode("utf8", errors="ignore")
.strip()
.split(
"\n",
)
)
self.log("warning", "Script stderr:")
for line in stderr_str:
self.log("warning", line)

View File

@ -2,7 +2,9 @@ import datetime
import os
import re
import shutil
import stat
import tempfile
from subprocess import CalledProcessError
from unittest import mock
from unittest.mock import MagicMock
@ -801,7 +803,16 @@ class TestConsumerCreatedDate(DirectoriesMixin, TestCase):
class PreConsumeTestCase(TestCase):
@mock.patch("documents.consumer.Popen")
def setUp(self) -> None:
# this prevents websocket message reports during testing.
patcher = mock.patch("documents.consumer.Consumer._send_progress")
self._send_progress = patcher.start()
self.addCleanup(patcher.stop)
return super().setUp()
@mock.patch("documents.consumer.run")
@override_settings(PRE_CONSUME_SCRIPT=None)
def test_no_pre_consume_script(self, m):
c = Consumer()
@ -809,7 +820,7 @@ class PreConsumeTestCase(TestCase):
c.run_pre_consume_script()
m.assert_not_called()
@mock.patch("documents.consumer.Popen")
@mock.patch("documents.consumer.run")
@mock.patch("documents.consumer.Consumer._send_progress")
@override_settings(PRE_CONSUME_SCRIPT="does-not-exist")
def test_pre_consume_script_not_found(self, m, m2):
@ -818,7 +829,7 @@ class PreConsumeTestCase(TestCase):
c.path = "path-to-file"
self.assertRaises(ConsumerError, c.run_pre_consume_script)
@mock.patch("documents.consumer.Popen")
@mock.patch("documents.consumer.run")
def test_pre_consume_script(self, m):
with tempfile.NamedTemporaryFile() as script:
with override_settings(PRE_CONSUME_SCRIPT=script.name):
@ -830,14 +841,78 @@ class PreConsumeTestCase(TestCase):
args, kwargs = m.call_args
command = args[0]
command = kwargs["args"]
self.assertEqual(command[0], script.name)
self.assertEqual(command[1], "path-to-file")
@mock.patch("documents.consumer.Consumer.log")
def test_script_with_output(self, mocked_log):
"""
GIVEN:
- A script which outputs to stdout and stderr
WHEN:
- The script is executed as a consume script
THEN:
- The script's outputs are logged
"""
with tempfile.NamedTemporaryFile(mode="w") as script:
# Write up a little script
with script.file as outfile:
outfile.write("#!/usr/bin/env bash\n")
outfile.write("echo This message goes to stdout\n")
outfile.write("echo This message goes to stderr >&2")
# Make the file executable
st = os.stat(script.name)
os.chmod(script.name, st.st_mode | stat.S_IEXEC)
with override_settings(PRE_CONSUME_SCRIPT=script.name):
c = Consumer()
c.path = "path-to-file"
c.run_pre_consume_script()
mocked_log.assert_called()
mocked_log.assert_any_call("info", "This message goes to stdout")
mocked_log.assert_any_call("warning", "This message goes to stderr")
def test_script_exit_non_zero(self):
"""
GIVEN:
- A script which exits with a non-zero exit code
WHEN:
- The script is executed as a pre-consume script
THEN:
- A ConsumerError is raised
"""
with tempfile.NamedTemporaryFile(mode="w") as script:
# Write up a little script
with script.file as outfile:
outfile.write("#!/usr/bin/env bash\n")
outfile.write("exit 100\n")
# Make the file executable
st = os.stat(script.name)
os.chmod(script.name, st.st_mode | stat.S_IEXEC)
with override_settings(PRE_CONSUME_SCRIPT=script.name):
c = Consumer()
c.path = "path-to-file"
self.assertRaises(ConsumerError, c.run_pre_consume_script)
class PostConsumeTestCase(TestCase):
@mock.patch("documents.consumer.Popen")
def setUp(self) -> None:
# this prevents websocket message reports during testing.
patcher = mock.patch("documents.consumer.Consumer._send_progress")
self._send_progress = patcher.start()
self.addCleanup(patcher.stop)
return super().setUp()
@mock.patch("documents.consumer.run")
@override_settings(POST_CONSUME_SCRIPT=None)
def test_no_post_consume_script(self, m):
doc = Document.objects.create(title="Test", mime_type="application/pdf")
@ -858,7 +933,7 @@ class PostConsumeTestCase(TestCase):
c.filename = "somefile.pdf"
self.assertRaises(ConsumerError, c.run_post_consume_script, doc)
@mock.patch("documents.consumer.Popen")
@mock.patch("documents.consumer.run")
def test_post_consume_script_simple(self, m):
with tempfile.NamedTemporaryFile() as script:
with override_settings(POST_CONSUME_SCRIPT=script.name):
@ -868,7 +943,7 @@ class PostConsumeTestCase(TestCase):
m.assert_called_once()
@mock.patch("documents.consumer.Popen")
@mock.patch("documents.consumer.run")
def test_post_consume_script_with_correspondent(self, m):
with tempfile.NamedTemporaryFile() as script:
with override_settings(POST_CONSUME_SCRIPT=script.name):
@ -889,7 +964,7 @@ class PostConsumeTestCase(TestCase):
args, kwargs = m.call_args
command = args[0]
command = kwargs["args"]
self.assertEqual(command[0], script.name)
self.assertEqual(command[1], str(doc.pk))
@ -897,3 +972,29 @@ class PostConsumeTestCase(TestCase):
self.assertEqual(command[6], f"/api/documents/{doc.pk}/thumb/")
self.assertEqual(command[7], "my_bank")
self.assertCountEqual(command[8].split(","), ["a", "b"])
def test_script_exit_non_zero(self):
"""
GIVEN:
- A script which exits with a non-zero exit code
WHEN:
- The script is executed as a post-consume script
THEN:
- A ConsumerError is raised
"""
with tempfile.NamedTemporaryFile(mode="w") as script:
# Write up a little script
with script.file as outfile:
outfile.write("#!/usr/bin/env bash\n")
outfile.write("exit -500\n")
# Make the file executable
st = os.stat(script.name)
os.chmod(script.name, st.st_mode | stat.S_IEXEC)
with override_settings(POST_CONSUME_SCRIPT=script.name):
c = Consumer()
doc = Document.objects.create(title="Test", mime_type="application/pdf")
c.path = "path-to-file"
with self.assertRaises(ConsumerError):
c.run_post_consume_script(doc)