From dd833643268e718c1c8e86708f7da8bc2513bcee Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 25 Nov 2020 10:52:38 +0100 Subject: [PATCH] default language check --- src/documents/__init__.py | 3 ++- src/paperless_tesseract/__init__.py | 2 ++ src/paperless_tesseract/checks.py | 24 ++++++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 src/paperless_tesseract/checks.py diff --git a/src/documents/__init__.py b/src/documents/__init__.py index 864b5f5fe..5c9f358c3 100644 --- a/src/documents/__init__.py +++ b/src/documents/__init__.py @@ -1 +1,2 @@ -from .checks import changed_password_check +# this is here so that django finds the checks. +from .checks import * diff --git a/src/paperless_tesseract/__init__.py b/src/paperless_tesseract/__init__.py index e69de29bb..5c9f358c3 100644 --- a/src/paperless_tesseract/__init__.py +++ b/src/paperless_tesseract/__init__.py @@ -0,0 +1,2 @@ +# this is here so that django finds the checks. +from .checks import * diff --git a/src/paperless_tesseract/checks.py b/src/paperless_tesseract/checks.py new file mode 100644 index 000000000..21f229e65 --- /dev/null +++ b/src/paperless_tesseract/checks.py @@ -0,0 +1,24 @@ +import subprocess + +from django.conf import settings +from django.core.checks import Error, register + + +def get_tesseract_langs(): + with subprocess.Popen(['tesseract', '--list-langs'], stdout=subprocess.PIPE) as p: + stdout, stderr = p.communicate() + + return stdout.decode().strip().split("\n")[1:] + + +@register() +def check_default_language_available(app_configs, **kwargs): + langs = get_tesseract_langs() + + if not settings.OCR_LANGUAGE in langs: + return [Error( + f"The default ocr language {settings.OCR_LANGUAGE} is " + f"not installed. Paperless cannot OCR your documents " + f"without it. Please fix PAPERLESS_OCR_LANGUAGE.")] + else: + return []