Account for plusses in the OCR language setting

2026-02-11 23:59:31 -06:00 · 2022-09-28 14:24:34 -07:00
parent 1e891414a3
commit d1a17480ea
1 changed files with 3 additions and 0 deletions
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -719,7 +719,10 @@ def _get_nltk_language_setting(ocr_lang: str) -> Optional[str]:
    Maps an ISO-639-1 language code supported by Tesseract into
    an optional NLTK language name.  This is the set of common supported
    languages for all the NLTK data used.
    Assumption: The primary language is first
    """
    ocr_lang = ocr_lang.split("+")[0]
    iso_code_to_nltk = {
        "dan": "danish",
        "nld": "dutch",