From f0497e77449bcb18ede3452e26fc001dfd3feaf4 Mon Sep 17 00:00:00 2001 From: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Sun, 27 Nov 2022 08:28:22 -0800 Subject: [PATCH] Fixes how a language code like chi-sim is treated in the checks --- src/paperless_tesseract/checks.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/paperless_tesseract/checks.py b/src/paperless_tesseract/checks.py index 99780cad4..c63761f31 100644 --- a/src/paperless_tesseract/checks.py +++ b/src/paperless_tesseract/checks.py @@ -1,3 +1,4 @@ +import shutil import subprocess from django.conf import settings @@ -7,10 +8,16 @@ from django.core.checks import Warning def get_tesseract_langs(): - with subprocess.Popen(["tesseract", "--list-langs"], stdout=subprocess.PIPE) as p: - stdout, stderr = p.communicate() + proc = subprocess.run( + [shutil.which("tesseract"), "--list-langs"], + capture_output=True, + ) - return stdout.decode().strip().split("\n")[1:] + # Decode bytes to string, split on newlines, trim out the header + proc_lines = proc.stdout.decode("utf8", errors="ignore").strip().split("\n")[1:] + + # Replace _ with - to convert two part languages to the expected code + return [x.replace("_", "-") for x in proc_lines] @register()