Account for plusses in the OCR language setting

2026-01-10 21:34:20 -06:00 · 2022-09-28 14:24:34 -07:00
parent 2d71415ede
commit e88d911984
1 changed files with 3 additions and 0 deletions
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -719,7 +719,10 @@ def _get_nltk_language_setting(ocr_lang: str) -> Optional[str]:
    Maps an ISO-639-1 language code supported by Tesseract into
    an optional NLTK language name.  This is the set of common supported
    languages for all the NLTK data used.
    Assumption: The primary language is first
    """
    ocr_lang = ocr_lang.split("+")[0]
    iso_code_to_nltk = {
        "dan": "danish",
        "nld": "dutch",