mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Chore: Standardize subprocess running and logging (#6275)
This commit is contained in:
parent
0f8b2e69c9
commit
2c43b06910
@ -5,8 +5,6 @@ import tempfile
|
||||
import uuid
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from subprocess import CompletedProcess
|
||||
from subprocess import run
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Optional
|
||||
|
||||
@ -51,6 +49,7 @@ from documents.signals import document_consumption_finished
|
||||
from documents.signals import document_consumption_started
|
||||
from documents.utils import copy_basic_file_stats
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
from documents.utils import run_subprocess
|
||||
|
||||
|
||||
class WorkflowTriggerPlugin(
|
||||
@ -397,20 +396,15 @@ class Consumer(LoggingMixin):
|
||||
script_env["TASK_ID"] = self.task_id or ""
|
||||
|
||||
try:
|
||||
completed_proc = run(
|
||||
args=[
|
||||
run_subprocess(
|
||||
[
|
||||
settings.PRE_CONSUME_SCRIPT,
|
||||
original_file_path,
|
||||
],
|
||||
env=script_env,
|
||||
capture_output=True,
|
||||
script_env,
|
||||
self.log,
|
||||
)
|
||||
|
||||
self._log_script_outputs(completed_proc)
|
||||
|
||||
# Raises exception on non-zero output
|
||||
completed_proc.check_returncode()
|
||||
|
||||
except Exception as e:
|
||||
self._fail(
|
||||
ConsumerStatusShortMessage.PRE_CONSUME_SCRIPT_ERROR,
|
||||
@ -468,8 +462,8 @@ class Consumer(LoggingMixin):
|
||||
script_env["TASK_ID"] = self.task_id or ""
|
||||
|
||||
try:
|
||||
completed_proc = run(
|
||||
args=[
|
||||
run_subprocess(
|
||||
[
|
||||
settings.POST_CONSUME_SCRIPT,
|
||||
str(document.pk),
|
||||
document.get_public_filename(),
|
||||
@ -480,15 +474,10 @@ class Consumer(LoggingMixin):
|
||||
str(document.correspondent),
|
||||
str(",".join(document.tags.all().values_list("name", flat=True))),
|
||||
],
|
||||
env=script_env,
|
||||
capture_output=True,
|
||||
script_env,
|
||||
self.log,
|
||||
)
|
||||
|
||||
self._log_script_outputs(completed_proc)
|
||||
|
||||
# Raises exception on non-zero output
|
||||
completed_proc.check_returncode()
|
||||
|
||||
except Exception as e:
|
||||
self._fail(
|
||||
ConsumerStatusShortMessage.POST_CONSUME_SCRIPT_ERROR,
|
||||
@ -929,41 +918,6 @@ class Consumer(LoggingMixin):
|
||||
except Exception: # pragma: no cover
|
||||
pass
|
||||
|
||||
def _log_script_outputs(self, completed_process: CompletedProcess):
|
||||
"""
|
||||
Decodes a process stdout and stderr streams and logs them to the main log
|
||||
"""
|
||||
# Log what the script exited as
|
||||
self.log.info(
|
||||
f"{completed_process.args[0]} exited {completed_process.returncode}",
|
||||
)
|
||||
|
||||
# Decode the output (if any)
|
||||
if len(completed_process.stdout):
|
||||
stdout_str = (
|
||||
completed_process.stdout.decode("utf8", errors="ignore")
|
||||
.strip()
|
||||
.split(
|
||||
"\n",
|
||||
)
|
||||
)
|
||||
self.log.info("Script stdout:")
|
||||
for line in stdout_str:
|
||||
self.log.info(line)
|
||||
|
||||
if len(completed_process.stderr):
|
||||
stderr_str = (
|
||||
completed_process.stderr.decode("utf8", errors="ignore")
|
||||
.strip()
|
||||
.split(
|
||||
"\n",
|
||||
)
|
||||
)
|
||||
|
||||
self.log.warning("Script stderr:")
|
||||
for line in stderr_str:
|
||||
self.log.warning(line)
|
||||
|
||||
|
||||
def parse_doc_title_w_placeholders(
|
||||
title: str,
|
||||
|
@ -1,5 +1,4 @@
|
||||
from pathlib import Path
|
||||
from subprocess import run
|
||||
|
||||
import img2pdf
|
||||
from django.conf import settings
|
||||
@ -7,6 +6,7 @@ from PIL import Image
|
||||
|
||||
from documents.utils import copy_basic_file_stats
|
||||
from documents.utils import maybe_override_pixel_limit
|
||||
from documents.utils import run_subprocess
|
||||
|
||||
|
||||
def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
|
||||
@ -27,7 +27,7 @@ def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
|
||||
# Note the save into the temp folder, so as not to trigger a new
|
||||
# consume
|
||||
scratch_image = target_directory / tiff_path.name
|
||||
run(
|
||||
run_subprocess(
|
||||
[
|
||||
settings.CONVERT_BINARY,
|
||||
"-alpha",
|
||||
|
@ -18,6 +18,7 @@ from django.utils import timezone
|
||||
from documents.loggers import LoggingMixin
|
||||
from documents.signals import document_consumer_declaration
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
from documents.utils import run_subprocess
|
||||
|
||||
# This regular expression will try to find dates in the document at
|
||||
# hand and will match the following formats:
|
||||
@ -164,8 +165,12 @@ def run_convert(
|
||||
|
||||
logger.debug("Execute: " + " ".join(args), extra={"group": logging_group})
|
||||
|
||||
if not subprocess.Popen(args, env=environment).wait() == 0:
|
||||
raise ParseError(f"Convert failed at {args}")
|
||||
try:
|
||||
run_subprocess(args, environment, logger)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise ParseError(f"Convert failed at {args}") from e
|
||||
except Exception as e: # pragma: no cover
|
||||
raise ParseError("Unknown error running convert") from e
|
||||
|
||||
|
||||
def get_default_thumbnail() -> Path:
|
||||
@ -188,9 +193,12 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -
|
||||
# Ghostscript doesn't handle WebP outputs
|
||||
gs_out_path = os.path.join(temp_dir, "gs_out.png")
|
||||
cmd = [settings.GS_BINARY, "-q", "-sDEVICE=pngalpha", "-o", gs_out_path, in_path]
|
||||
|
||||
try:
|
||||
if not subprocess.Popen(cmd).wait() == 0:
|
||||
raise ParseError(f"Thumbnail (gs) failed at {cmd}")
|
||||
try:
|
||||
run_subprocess(cmd, logger=logger)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise ParseError(f"Thumbnail (gs) failed at {cmd}") from e
|
||||
# then run convert on the output from gs to make WebP
|
||||
run_convert(
|
||||
density=300,
|
||||
|
@ -934,7 +934,7 @@ class PreConsumeTestCase(TestCase):
|
||||
|
||||
return super().setUp()
|
||||
|
||||
@mock.patch("documents.consumer.run")
|
||||
@mock.patch("documents.consumer.run_subprocess")
|
||||
@override_settings(PRE_CONSUME_SCRIPT=None)
|
||||
def test_no_pre_consume_script(self, m):
|
||||
c = Consumer()
|
||||
@ -942,7 +942,7 @@ class PreConsumeTestCase(TestCase):
|
||||
c.run_pre_consume_script()
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.consumer.run")
|
||||
@mock.patch("documents.consumer.run_subprocess")
|
||||
@mock.patch("documents.consumer.Consumer._send_progress")
|
||||
@override_settings(PRE_CONSUME_SCRIPT="does-not-exist")
|
||||
def test_pre_consume_script_not_found(self, m, m2):
|
||||
@ -951,7 +951,7 @@ class PreConsumeTestCase(TestCase):
|
||||
c.working_copy = "path-to-file"
|
||||
self.assertRaises(ConsumerError, c.run_pre_consume_script)
|
||||
|
||||
@mock.patch("documents.consumer.run")
|
||||
@mock.patch("documents.consumer.run_subprocess")
|
||||
def test_pre_consume_script(self, m):
|
||||
with tempfile.NamedTemporaryFile() as script:
|
||||
with override_settings(PRE_CONSUME_SCRIPT=script.name):
|
||||
@ -963,10 +963,10 @@ class PreConsumeTestCase(TestCase):
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
args, kwargs = m.call_args
|
||||
args, _ = m.call_args
|
||||
|
||||
command = kwargs["args"]
|
||||
environment = kwargs["env"]
|
||||
command = args[0]
|
||||
environment = args[1]
|
||||
|
||||
self.assertEqual(command[0], script.name)
|
||||
self.assertEqual(command[1], "path-to-file")
|
||||
@ -1050,7 +1050,7 @@ class PostConsumeTestCase(TestCase):
|
||||
|
||||
return super().setUp()
|
||||
|
||||
@mock.patch("documents.consumer.run")
|
||||
@mock.patch("documents.consumer.run_subprocess")
|
||||
@override_settings(POST_CONSUME_SCRIPT=None)
|
||||
def test_no_post_consume_script(self, m):
|
||||
doc = Document.objects.create(title="Test", mime_type="application/pdf")
|
||||
@ -1075,7 +1075,7 @@ class PostConsumeTestCase(TestCase):
|
||||
doc,
|
||||
)
|
||||
|
||||
@mock.patch("documents.consumer.run")
|
||||
@mock.patch("documents.consumer.run_subprocess")
|
||||
def test_post_consume_script_simple(self, m):
|
||||
with tempfile.NamedTemporaryFile() as script:
|
||||
with override_settings(POST_CONSUME_SCRIPT=script.name):
|
||||
@ -1085,7 +1085,7 @@ class PostConsumeTestCase(TestCase):
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.consumer.run")
|
||||
@mock.patch("documents.consumer.run_subprocess")
|
||||
def test_post_consume_script_with_correspondent(self, m):
|
||||
with tempfile.NamedTemporaryFile() as script:
|
||||
with override_settings(POST_CONSUME_SCRIPT=script.name):
|
||||
@ -1106,10 +1106,10 @@ class PostConsumeTestCase(TestCase):
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
_, kwargs = m.call_args
|
||||
args, _ = m.call_args
|
||||
|
||||
command = kwargs["args"]
|
||||
environment = kwargs["env"]
|
||||
command = args[0]
|
||||
environment = args[1]
|
||||
|
||||
self.assertEqual(command[0], script.name)
|
||||
self.assertEqual(command[1], str(doc.pk))
|
||||
|
@ -1,6 +1,9 @@
|
||||
import logging
|
||||
import shutil
|
||||
from os import utime
|
||||
from pathlib import Path
|
||||
from subprocess import CompletedProcess
|
||||
from subprocess import run
|
||||
from typing import Optional
|
||||
from typing import Union
|
||||
|
||||
@ -56,3 +59,54 @@ def maybe_override_pixel_limit() -> None:
|
||||
if pixel_count == 0:
|
||||
pixel_count = None
|
||||
Image.MAX_IMAGE_PIXELS = pixel_count
|
||||
|
||||
|
||||
def run_subprocess(
|
||||
arguments: list[str],
|
||||
env: Optional[dict[str, str]] = None,
|
||||
logger: Optional[logging.Logger] = None,
|
||||
*,
|
||||
check_exit_code: bool = True,
|
||||
log_stdout: bool = True,
|
||||
log_stderr: bool = True,
|
||||
) -> CompletedProcess:
|
||||
"""
|
||||
Runs a subprocess and logs its output, checking return code if requested
|
||||
"""
|
||||
|
||||
proc_name = arguments[0]
|
||||
|
||||
completed_proc = run(args=arguments, env=env, capture_output=True, check=False)
|
||||
|
||||
if logger:
|
||||
logger.info(f"{proc_name} exited {completed_proc.returncode}")
|
||||
|
||||
if log_stdout and logger and completed_proc.stdout:
|
||||
stdout_str = (
|
||||
completed_proc.stdout.decode("utf8", errors="ignore")
|
||||
.strip()
|
||||
.split(
|
||||
"\n",
|
||||
)
|
||||
)
|
||||
logger.info(f"{proc_name} stdout:")
|
||||
for line in stdout_str:
|
||||
logger.info(line)
|
||||
|
||||
if log_stderr and logger and completed_proc.stderr:
|
||||
stderr_str = (
|
||||
completed_proc.stderr.decode("utf8", errors="ignore")
|
||||
.strip()
|
||||
.split(
|
||||
"\n",
|
||||
)
|
||||
)
|
||||
logger.info(f"{proc_name} stderr:")
|
||||
for line in stderr_str:
|
||||
logger.warning(line)
|
||||
|
||||
# Last, if requested, after logging outputs
|
||||
if check_exit_code:
|
||||
completed_proc.check_returncode()
|
||||
|
||||
return completed_proc
|
||||
|
@ -1,6 +1,5 @@
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
@ -13,6 +12,7 @@ from documents.parsers import DocumentParser
|
||||
from documents.parsers import ParseError
|
||||
from documents.parsers import make_thumbnail_from_pdf
|
||||
from documents.utils import maybe_override_pixel_limit
|
||||
from documents.utils import run_subprocess
|
||||
from paperless.config import OcrConfig
|
||||
from paperless.models import ArchiveFileChoices
|
||||
from paperless.models import CleanChoices
|
||||
@ -103,7 +103,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
|
||||
def remove_alpha(self, image_path: str) -> Path:
|
||||
no_alpha_image = Path(self.tempdir) / "image-no-alpha"
|
||||
subprocess.run(
|
||||
run_subprocess(
|
||||
[
|
||||
settings.CONVERT_BINARY,
|
||||
"-alpha",
|
||||
@ -111,6 +111,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
image_path,
|
||||
no_alpha_image,
|
||||
],
|
||||
logger=self.log,
|
||||
)
|
||||
return no_alpha_image
|
||||
|
||||
@ -169,7 +170,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
mode="w+",
|
||||
dir=self.tempdir,
|
||||
) as tmp:
|
||||
subprocess.run(
|
||||
run_subprocess(
|
||||
[
|
||||
"pdftotext",
|
||||
"-q",
|
||||
@ -179,6 +180,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
pdf_file,
|
||||
tmp.name,
|
||||
],
|
||||
logger=self.log,
|
||||
)
|
||||
text = self.read_file_handle_unicode_errors(Path(tmp.name))
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user