Just in case, catch a sometimes nltk error and return the basic processed content instead

This commit is contained in:
Trenton H
2023-05-24 11:54:12 -07:00
parent 452c79f9a1
commit c1641f6fb8
2 changed files with 38 additions and 12 deletions

View File

@@ -921,6 +921,10 @@ def _get_nltk_language_setting(ocr_lang: str) -> Optional[str]:
languages for all the NLTK data used.
Assumption: The primary language is first
NLTK Languages:
- https://www.nltk.org/api/nltk.stem.snowball.html#nltk.stem.snowball.SnowballStemmer
"""
ocr_lang = ocr_lang.split("+")[0]
iso_code_to_nltk = {