From 3df8be0bc70ccf05bd8bfe57ed8d5c8854b80c2e Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Sun, 8 Sep 2024 12:17:32 -0700 Subject: [PATCH] Fix: Rework system check so it won't crash if tesseract is not found (#7640) --- src/paperless/checks.py | 2 +- src/paperless_tesseract/checks.py | 34 ++++++++++++++++++------------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/paperless/checks.py b/src/paperless/checks.py index 4ba322666..150fcb201 100644 --- a/src/paperless/checks.py +++ b/src/paperless/checks.py @@ -78,7 +78,7 @@ def binaries_check(app_configs, **kwargs): error = "Paperless can't find {}. Without it, consumption is impossible." hint = "Either it's not in your ${PATH} or it's not installed." - binaries = (settings.CONVERT_BINARY, "tesseract") + binaries = (settings.CONVERT_BINARY, "tesseract", "gs") check_messages = [] for binary in binaries: diff --git a/src/paperless_tesseract/checks.py b/src/paperless_tesseract/checks.py index 82d255005..0d7a1d90d 100644 --- a/src/paperless_tesseract/checks.py +++ b/src/paperless_tesseract/checks.py @@ -21,26 +21,32 @@ def get_tesseract_langs(): @register() def check_default_language_available(app_configs, **kwargs): - installed_langs = get_tesseract_langs() + errs = [] if not settings.OCR_LANGUAGE: - return [ + errs.append( Warning( "No OCR language has been specified with PAPERLESS_OCR_LANGUAGE. " "This means that tesseract will fallback to english.", ), - ] + ) + return errs - specified_langs = settings.OCR_LANGUAGE.split("+") + # binaries_check in paperless will check and report if this doesn't exist + # So skip trying to do anything here and let that handle missing binaries + if shutil.which("tesseract") is not None: + installed_langs = get_tesseract_langs() - for lang in specified_langs: - if lang not in installed_langs: - return [ - Error( - f"The selected ocr language {lang} is " - f"not installed. Paperless cannot OCR your documents " - f"without it. Please fix PAPERLESS_OCR_LANGUAGE.", - ), - ] + specified_langs = [x.strip() for x in settings.OCR_LANGUAGE.split("+")] - return [] + for lang in specified_langs: + if lang not in installed_langs: + errs.append( + Error( + f"The selected ocr language {lang} is " + f"not installed. Paperless cannot OCR your documents " + f"without it. Please fix PAPERLESS_OCR_LANGUAGE.", + ), + ) + + return errs