mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-09 09:58:20 -05:00
Chore: Standardize subprocess running and logging (#6275)
This commit is contained in:
parent
0f8b2e69c9
commit
2c43b06910
@ -5,8 +5,6 @@ import tempfile
|
|||||||
import uuid
|
import uuid
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from subprocess import CompletedProcess
|
|
||||||
from subprocess import run
|
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
@ -51,6 +49,7 @@ from documents.signals import document_consumption_finished
|
|||||||
from documents.signals import document_consumption_started
|
from documents.signals import document_consumption_started
|
||||||
from documents.utils import copy_basic_file_stats
|
from documents.utils import copy_basic_file_stats
|
||||||
from documents.utils import copy_file_with_basic_stats
|
from documents.utils import copy_file_with_basic_stats
|
||||||
|
from documents.utils import run_subprocess
|
||||||
|
|
||||||
|
|
||||||
class WorkflowTriggerPlugin(
|
class WorkflowTriggerPlugin(
|
||||||
@ -397,20 +396,15 @@ class Consumer(LoggingMixin):
|
|||||||
script_env["TASK_ID"] = self.task_id or ""
|
script_env["TASK_ID"] = self.task_id or ""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
completed_proc = run(
|
run_subprocess(
|
||||||
args=[
|
[
|
||||||
settings.PRE_CONSUME_SCRIPT,
|
settings.PRE_CONSUME_SCRIPT,
|
||||||
original_file_path,
|
original_file_path,
|
||||||
],
|
],
|
||||||
env=script_env,
|
script_env,
|
||||||
capture_output=True,
|
self.log,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._log_script_outputs(completed_proc)
|
|
||||||
|
|
||||||
# Raises exception on non-zero output
|
|
||||||
completed_proc.check_returncode()
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self._fail(
|
self._fail(
|
||||||
ConsumerStatusShortMessage.PRE_CONSUME_SCRIPT_ERROR,
|
ConsumerStatusShortMessage.PRE_CONSUME_SCRIPT_ERROR,
|
||||||
@ -468,8 +462,8 @@ class Consumer(LoggingMixin):
|
|||||||
script_env["TASK_ID"] = self.task_id or ""
|
script_env["TASK_ID"] = self.task_id or ""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
completed_proc = run(
|
run_subprocess(
|
||||||
args=[
|
[
|
||||||
settings.POST_CONSUME_SCRIPT,
|
settings.POST_CONSUME_SCRIPT,
|
||||||
str(document.pk),
|
str(document.pk),
|
||||||
document.get_public_filename(),
|
document.get_public_filename(),
|
||||||
@ -480,15 +474,10 @@ class Consumer(LoggingMixin):
|
|||||||
str(document.correspondent),
|
str(document.correspondent),
|
||||||
str(",".join(document.tags.all().values_list("name", flat=True))),
|
str(",".join(document.tags.all().values_list("name", flat=True))),
|
||||||
],
|
],
|
||||||
env=script_env,
|
script_env,
|
||||||
capture_output=True,
|
self.log,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._log_script_outputs(completed_proc)
|
|
||||||
|
|
||||||
# Raises exception on non-zero output
|
|
||||||
completed_proc.check_returncode()
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self._fail(
|
self._fail(
|
||||||
ConsumerStatusShortMessage.POST_CONSUME_SCRIPT_ERROR,
|
ConsumerStatusShortMessage.POST_CONSUME_SCRIPT_ERROR,
|
||||||
@ -929,41 +918,6 @@ class Consumer(LoggingMixin):
|
|||||||
except Exception: # pragma: no cover
|
except Exception: # pragma: no cover
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def _log_script_outputs(self, completed_process: CompletedProcess):
|
|
||||||
"""
|
|
||||||
Decodes a process stdout and stderr streams and logs them to the main log
|
|
||||||
"""
|
|
||||||
# Log what the script exited as
|
|
||||||
self.log.info(
|
|
||||||
f"{completed_process.args[0]} exited {completed_process.returncode}",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Decode the output (if any)
|
|
||||||
if len(completed_process.stdout):
|
|
||||||
stdout_str = (
|
|
||||||
completed_process.stdout.decode("utf8", errors="ignore")
|
|
||||||
.strip()
|
|
||||||
.split(
|
|
||||||
"\n",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
self.log.info("Script stdout:")
|
|
||||||
for line in stdout_str:
|
|
||||||
self.log.info(line)
|
|
||||||
|
|
||||||
if len(completed_process.stderr):
|
|
||||||
stderr_str = (
|
|
||||||
completed_process.stderr.decode("utf8", errors="ignore")
|
|
||||||
.strip()
|
|
||||||
.split(
|
|
||||||
"\n",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
self.log.warning("Script stderr:")
|
|
||||||
for line in stderr_str:
|
|
||||||
self.log.warning(line)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_doc_title_w_placeholders(
|
def parse_doc_title_w_placeholders(
|
||||||
title: str,
|
title: str,
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from subprocess import run
|
|
||||||
|
|
||||||
import img2pdf
|
import img2pdf
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
@ -7,6 +6,7 @@ from PIL import Image
|
|||||||
|
|
||||||
from documents.utils import copy_basic_file_stats
|
from documents.utils import copy_basic_file_stats
|
||||||
from documents.utils import maybe_override_pixel_limit
|
from documents.utils import maybe_override_pixel_limit
|
||||||
|
from documents.utils import run_subprocess
|
||||||
|
|
||||||
|
|
||||||
def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
|
def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
|
||||||
@ -27,7 +27,7 @@ def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
|
|||||||
# Note the save into the temp folder, so as not to trigger a new
|
# Note the save into the temp folder, so as not to trigger a new
|
||||||
# consume
|
# consume
|
||||||
scratch_image = target_directory / tiff_path.name
|
scratch_image = target_directory / tiff_path.name
|
||||||
run(
|
run_subprocess(
|
||||||
[
|
[
|
||||||
settings.CONVERT_BINARY,
|
settings.CONVERT_BINARY,
|
||||||
"-alpha",
|
"-alpha",
|
||||||
|
@ -18,6 +18,7 @@ from django.utils import timezone
|
|||||||
from documents.loggers import LoggingMixin
|
from documents.loggers import LoggingMixin
|
||||||
from documents.signals import document_consumer_declaration
|
from documents.signals import document_consumer_declaration
|
||||||
from documents.utils import copy_file_with_basic_stats
|
from documents.utils import copy_file_with_basic_stats
|
||||||
|
from documents.utils import run_subprocess
|
||||||
|
|
||||||
# This regular expression will try to find dates in the document at
|
# This regular expression will try to find dates in the document at
|
||||||
# hand and will match the following formats:
|
# hand and will match the following formats:
|
||||||
@ -164,8 +165,12 @@ def run_convert(
|
|||||||
|
|
||||||
logger.debug("Execute: " + " ".join(args), extra={"group": logging_group})
|
logger.debug("Execute: " + " ".join(args), extra={"group": logging_group})
|
||||||
|
|
||||||
if not subprocess.Popen(args, env=environment).wait() == 0:
|
try:
|
||||||
raise ParseError(f"Convert failed at {args}")
|
run_subprocess(args, environment, logger)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
raise ParseError(f"Convert failed at {args}") from e
|
||||||
|
except Exception as e: # pragma: no cover
|
||||||
|
raise ParseError("Unknown error running convert") from e
|
||||||
|
|
||||||
|
|
||||||
def get_default_thumbnail() -> Path:
|
def get_default_thumbnail() -> Path:
|
||||||
@ -188,9 +193,12 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -
|
|||||||
# Ghostscript doesn't handle WebP outputs
|
# Ghostscript doesn't handle WebP outputs
|
||||||
gs_out_path = os.path.join(temp_dir, "gs_out.png")
|
gs_out_path = os.path.join(temp_dir, "gs_out.png")
|
||||||
cmd = [settings.GS_BINARY, "-q", "-sDEVICE=pngalpha", "-o", gs_out_path, in_path]
|
cmd = [settings.GS_BINARY, "-q", "-sDEVICE=pngalpha", "-o", gs_out_path, in_path]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if not subprocess.Popen(cmd).wait() == 0:
|
try:
|
||||||
raise ParseError(f"Thumbnail (gs) failed at {cmd}")
|
run_subprocess(cmd, logger=logger)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
raise ParseError(f"Thumbnail (gs) failed at {cmd}") from e
|
||||||
# then run convert on the output from gs to make WebP
|
# then run convert on the output from gs to make WebP
|
||||||
run_convert(
|
run_convert(
|
||||||
density=300,
|
density=300,
|
||||||
|
@ -934,7 +934,7 @@ class PreConsumeTestCase(TestCase):
|
|||||||
|
|
||||||
return super().setUp()
|
return super().setUp()
|
||||||
|
|
||||||
@mock.patch("documents.consumer.run")
|
@mock.patch("documents.consumer.run_subprocess")
|
||||||
@override_settings(PRE_CONSUME_SCRIPT=None)
|
@override_settings(PRE_CONSUME_SCRIPT=None)
|
||||||
def test_no_pre_consume_script(self, m):
|
def test_no_pre_consume_script(self, m):
|
||||||
c = Consumer()
|
c = Consumer()
|
||||||
@ -942,7 +942,7 @@ class PreConsumeTestCase(TestCase):
|
|||||||
c.run_pre_consume_script()
|
c.run_pre_consume_script()
|
||||||
m.assert_not_called()
|
m.assert_not_called()
|
||||||
|
|
||||||
@mock.patch("documents.consumer.run")
|
@mock.patch("documents.consumer.run_subprocess")
|
||||||
@mock.patch("documents.consumer.Consumer._send_progress")
|
@mock.patch("documents.consumer.Consumer._send_progress")
|
||||||
@override_settings(PRE_CONSUME_SCRIPT="does-not-exist")
|
@override_settings(PRE_CONSUME_SCRIPT="does-not-exist")
|
||||||
def test_pre_consume_script_not_found(self, m, m2):
|
def test_pre_consume_script_not_found(self, m, m2):
|
||||||
@ -951,7 +951,7 @@ class PreConsumeTestCase(TestCase):
|
|||||||
c.working_copy = "path-to-file"
|
c.working_copy = "path-to-file"
|
||||||
self.assertRaises(ConsumerError, c.run_pre_consume_script)
|
self.assertRaises(ConsumerError, c.run_pre_consume_script)
|
||||||
|
|
||||||
@mock.patch("documents.consumer.run")
|
@mock.patch("documents.consumer.run_subprocess")
|
||||||
def test_pre_consume_script(self, m):
|
def test_pre_consume_script(self, m):
|
||||||
with tempfile.NamedTemporaryFile() as script:
|
with tempfile.NamedTemporaryFile() as script:
|
||||||
with override_settings(PRE_CONSUME_SCRIPT=script.name):
|
with override_settings(PRE_CONSUME_SCRIPT=script.name):
|
||||||
@ -963,10 +963,10 @@ class PreConsumeTestCase(TestCase):
|
|||||||
|
|
||||||
m.assert_called_once()
|
m.assert_called_once()
|
||||||
|
|
||||||
args, kwargs = m.call_args
|
args, _ = m.call_args
|
||||||
|
|
||||||
command = kwargs["args"]
|
command = args[0]
|
||||||
environment = kwargs["env"]
|
environment = args[1]
|
||||||
|
|
||||||
self.assertEqual(command[0], script.name)
|
self.assertEqual(command[0], script.name)
|
||||||
self.assertEqual(command[1], "path-to-file")
|
self.assertEqual(command[1], "path-to-file")
|
||||||
@ -1050,7 +1050,7 @@ class PostConsumeTestCase(TestCase):
|
|||||||
|
|
||||||
return super().setUp()
|
return super().setUp()
|
||||||
|
|
||||||
@mock.patch("documents.consumer.run")
|
@mock.patch("documents.consumer.run_subprocess")
|
||||||
@override_settings(POST_CONSUME_SCRIPT=None)
|
@override_settings(POST_CONSUME_SCRIPT=None)
|
||||||
def test_no_post_consume_script(self, m):
|
def test_no_post_consume_script(self, m):
|
||||||
doc = Document.objects.create(title="Test", mime_type="application/pdf")
|
doc = Document.objects.create(title="Test", mime_type="application/pdf")
|
||||||
@ -1075,7 +1075,7 @@ class PostConsumeTestCase(TestCase):
|
|||||||
doc,
|
doc,
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch("documents.consumer.run")
|
@mock.patch("documents.consumer.run_subprocess")
|
||||||
def test_post_consume_script_simple(self, m):
|
def test_post_consume_script_simple(self, m):
|
||||||
with tempfile.NamedTemporaryFile() as script:
|
with tempfile.NamedTemporaryFile() as script:
|
||||||
with override_settings(POST_CONSUME_SCRIPT=script.name):
|
with override_settings(POST_CONSUME_SCRIPT=script.name):
|
||||||
@ -1085,7 +1085,7 @@ class PostConsumeTestCase(TestCase):
|
|||||||
|
|
||||||
m.assert_called_once()
|
m.assert_called_once()
|
||||||
|
|
||||||
@mock.patch("documents.consumer.run")
|
@mock.patch("documents.consumer.run_subprocess")
|
||||||
def test_post_consume_script_with_correspondent(self, m):
|
def test_post_consume_script_with_correspondent(self, m):
|
||||||
with tempfile.NamedTemporaryFile() as script:
|
with tempfile.NamedTemporaryFile() as script:
|
||||||
with override_settings(POST_CONSUME_SCRIPT=script.name):
|
with override_settings(POST_CONSUME_SCRIPT=script.name):
|
||||||
@ -1106,10 +1106,10 @@ class PostConsumeTestCase(TestCase):
|
|||||||
|
|
||||||
m.assert_called_once()
|
m.assert_called_once()
|
||||||
|
|
||||||
_, kwargs = m.call_args
|
args, _ = m.call_args
|
||||||
|
|
||||||
command = kwargs["args"]
|
command = args[0]
|
||||||
environment = kwargs["env"]
|
environment = args[1]
|
||||||
|
|
||||||
self.assertEqual(command[0], script.name)
|
self.assertEqual(command[0], script.name)
|
||||||
self.assertEqual(command[1], str(doc.pk))
|
self.assertEqual(command[1], str(doc.pk))
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
from os import utime
|
from os import utime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from subprocess import CompletedProcess
|
||||||
|
from subprocess import run
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
@ -56,3 +59,54 @@ def maybe_override_pixel_limit() -> None:
|
|||||||
if pixel_count == 0:
|
if pixel_count == 0:
|
||||||
pixel_count = None
|
pixel_count = None
|
||||||
Image.MAX_IMAGE_PIXELS = pixel_count
|
Image.MAX_IMAGE_PIXELS = pixel_count
|
||||||
|
|
||||||
|
|
||||||
|
def run_subprocess(
|
||||||
|
arguments: list[str],
|
||||||
|
env: Optional[dict[str, str]] = None,
|
||||||
|
logger: Optional[logging.Logger] = None,
|
||||||
|
*,
|
||||||
|
check_exit_code: bool = True,
|
||||||
|
log_stdout: bool = True,
|
||||||
|
log_stderr: bool = True,
|
||||||
|
) -> CompletedProcess:
|
||||||
|
"""
|
||||||
|
Runs a subprocess and logs its output, checking return code if requested
|
||||||
|
"""
|
||||||
|
|
||||||
|
proc_name = arguments[0]
|
||||||
|
|
||||||
|
completed_proc = run(args=arguments, env=env, capture_output=True, check=False)
|
||||||
|
|
||||||
|
if logger:
|
||||||
|
logger.info(f"{proc_name} exited {completed_proc.returncode}")
|
||||||
|
|
||||||
|
if log_stdout and logger and completed_proc.stdout:
|
||||||
|
stdout_str = (
|
||||||
|
completed_proc.stdout.decode("utf8", errors="ignore")
|
||||||
|
.strip()
|
||||||
|
.split(
|
||||||
|
"\n",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
logger.info(f"{proc_name} stdout:")
|
||||||
|
for line in stdout_str:
|
||||||
|
logger.info(line)
|
||||||
|
|
||||||
|
if log_stderr and logger and completed_proc.stderr:
|
||||||
|
stderr_str = (
|
||||||
|
completed_proc.stderr.decode("utf8", errors="ignore")
|
||||||
|
.strip()
|
||||||
|
.split(
|
||||||
|
"\n",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
logger.info(f"{proc_name} stderr:")
|
||||||
|
for line in stderr_str:
|
||||||
|
logger.warning(line)
|
||||||
|
|
||||||
|
# Last, if requested, after logging outputs
|
||||||
|
if check_exit_code:
|
||||||
|
completed_proc.check_returncode()
|
||||||
|
|
||||||
|
return completed_proc
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import subprocess
|
|
||||||
import tempfile
|
import tempfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
@ -13,6 +12,7 @@ from documents.parsers import DocumentParser
|
|||||||
from documents.parsers import ParseError
|
from documents.parsers import ParseError
|
||||||
from documents.parsers import make_thumbnail_from_pdf
|
from documents.parsers import make_thumbnail_from_pdf
|
||||||
from documents.utils import maybe_override_pixel_limit
|
from documents.utils import maybe_override_pixel_limit
|
||||||
|
from documents.utils import run_subprocess
|
||||||
from paperless.config import OcrConfig
|
from paperless.config import OcrConfig
|
||||||
from paperless.models import ArchiveFileChoices
|
from paperless.models import ArchiveFileChoices
|
||||||
from paperless.models import CleanChoices
|
from paperless.models import CleanChoices
|
||||||
@ -103,7 +103,7 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
|
|
||||||
def remove_alpha(self, image_path: str) -> Path:
|
def remove_alpha(self, image_path: str) -> Path:
|
||||||
no_alpha_image = Path(self.tempdir) / "image-no-alpha"
|
no_alpha_image = Path(self.tempdir) / "image-no-alpha"
|
||||||
subprocess.run(
|
run_subprocess(
|
||||||
[
|
[
|
||||||
settings.CONVERT_BINARY,
|
settings.CONVERT_BINARY,
|
||||||
"-alpha",
|
"-alpha",
|
||||||
@ -111,6 +111,7 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
image_path,
|
image_path,
|
||||||
no_alpha_image,
|
no_alpha_image,
|
||||||
],
|
],
|
||||||
|
logger=self.log,
|
||||||
)
|
)
|
||||||
return no_alpha_image
|
return no_alpha_image
|
||||||
|
|
||||||
@ -169,7 +170,7 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
mode="w+",
|
mode="w+",
|
||||||
dir=self.tempdir,
|
dir=self.tempdir,
|
||||||
) as tmp:
|
) as tmp:
|
||||||
subprocess.run(
|
run_subprocess(
|
||||||
[
|
[
|
||||||
"pdftotext",
|
"pdftotext",
|
||||||
"-q",
|
"-q",
|
||||||
@ -179,6 +180,7 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
pdf_file,
|
pdf_file,
|
||||||
tmp.name,
|
tmp.name,
|
||||||
],
|
],
|
||||||
|
logger=self.log,
|
||||||
)
|
)
|
||||||
text = self.read_file_handle_unicode_errors(Path(tmp.name))
|
text = self.read_file_handle_unicode_errors(Path(tmp.name))
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user