From d1a17480ea2a2855c28212d22948ffd57fdce9ce Mon Sep 17 00:00:00 2001 From: Trenton H Date: Wed, 28 Sep 2022 14:24:34 -0700 Subject: [PATCH] Account for plusses in the OCR language setting --- src/paperless/settings.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index cd63105e2..a262bd501 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -719,7 +719,10 @@ def _get_nltk_language_setting(ocr_lang: str) -> Optional[str]: Maps an ISO-639-1 language code supported by Tesseract into an optional NLTK language name. This is the set of common supported languages for all the NLTK data used. + + Assumption: The primary language is first """ + ocr_lang = ocr_lang.split("+")[0] iso_code_to_nltk = { "dan": "danish", "nld": "dutch",