mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-17 10:13:56 -05:00
Account for plusses in the OCR language setting
This commit is contained in:
parent
1e891414a3
commit
d1a17480ea
@ -719,7 +719,10 @@ def _get_nltk_language_setting(ocr_lang: str) -> Optional[str]:
|
|||||||
Maps an ISO-639-1 language code supported by Tesseract into
|
Maps an ISO-639-1 language code supported by Tesseract into
|
||||||
an optional NLTK language name. This is the set of common supported
|
an optional NLTK language name. This is the set of common supported
|
||||||
languages for all the NLTK data used.
|
languages for all the NLTK data used.
|
||||||
|
|
||||||
|
Assumption: The primary language is first
|
||||||
"""
|
"""
|
||||||
|
ocr_lang = ocr_lang.split("+")[0]
|
||||||
iso_code_to_nltk = {
|
iso_code_to_nltk = {
|
||||||
"dan": "danish",
|
"dan": "danish",
|
||||||
"nld": "dutch",
|
"nld": "dutch",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user