Fix: Rework system check so it won't crash if tesseract is not found (#7640)

This commit is contained in:
Trenton H 2024-09-08 12:17:32 -07:00 committed by GitHub
parent cc25cbc026
commit 3df8be0bc7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 21 additions and 15 deletions

View File

@ -78,7 +78,7 @@ def binaries_check(app_configs, **kwargs):
error = "Paperless can't find {}. Without it, consumption is impossible."
hint = "Either it's not in your ${PATH} or it's not installed."
binaries = (settings.CONVERT_BINARY, "tesseract")
binaries = (settings.CONVERT_BINARY, "tesseract", "gs")
check_messages = []
for binary in binaries:

View File

@ -21,26 +21,32 @@ def get_tesseract_langs():
@register()
def check_default_language_available(app_configs, **kwargs):
installed_langs = get_tesseract_langs()
errs = []
if not settings.OCR_LANGUAGE:
return [
errs.append(
Warning(
"No OCR language has been specified with PAPERLESS_OCR_LANGUAGE. "
"This means that tesseract will fallback to english.",
),
]
)
return errs
specified_langs = settings.OCR_LANGUAGE.split("+")
# binaries_check in paperless will check and report if this doesn't exist
# So skip trying to do anything here and let that handle missing binaries
if shutil.which("tesseract") is not None:
installed_langs = get_tesseract_langs()
for lang in specified_langs:
if lang not in installed_langs:
return [
Error(
f"The selected ocr language {lang} is "
f"not installed. Paperless cannot OCR your documents "
f"without it. Please fix PAPERLESS_OCR_LANGUAGE.",
),
]
specified_langs = [x.strip() for x in settings.OCR_LANGUAGE.split("+")]
return []
for lang in specified_langs:
if lang not in installed_langs:
errs.append(
Error(
f"The selected ocr language {lang} is "
f"not installed. Paperless cannot OCR your documents "
f"without it. Please fix PAPERLESS_OCR_LANGUAGE.",
),
)
return errs