mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-11 10:00:48 -05:00
41 lines
1.1 KiB
Python
41 lines
1.1 KiB
Python
import subprocess
|
|
|
|
from django.conf import settings
|
|
from django.core.checks import Error
|
|
from django.core.checks import register
|
|
from django.core.checks import Warning
|
|
|
|
|
|
def get_tesseract_langs():
|
|
with subprocess.Popen(["tesseract", "--list-langs"], stdout=subprocess.PIPE) as p:
|
|
stdout, stderr = p.communicate()
|
|
|
|
return stdout.decode().strip().split("\n")[1:]
|
|
|
|
|
|
@register()
|
|
def check_default_language_available(app_configs, **kwargs):
|
|
installed_langs = get_tesseract_langs()
|
|
|
|
if not settings.OCR_LANGUAGE:
|
|
return [
|
|
Warning(
|
|
"No OCR language has been specified with PAPERLESS_OCR_LANGUAGE. "
|
|
"This means that tesseract will fallback to english.",
|
|
),
|
|
]
|
|
|
|
specified_langs = settings.OCR_LANGUAGE.split("+")
|
|
|
|
for lang in specified_langs:
|
|
if lang not in installed_langs:
|
|
return [
|
|
Error(
|
|
f"The selected ocr language {lang} is "
|
|
f"not installed. Paperless cannot OCR your documents "
|
|
f"without it. Please fix PAPERLESS_OCR_LANGUAGE.",
|
|
),
|
|
]
|
|
|
|
return []
|