Allows disabling NLTK, adds it as a consideration for low power devices

This commit is contained in:
Trenton H
2022-09-23 07:32:17 -07:00
parent c44c914d3d
commit 1e891414a3
3 changed files with 9 additions and 1 deletions

View File

@@ -312,7 +312,7 @@ class DocumentClassifier:
content = re.sub(r"[^\w\s]", " ", content)
# If the NLTK language is supported, do further processing
if settings.NLTK_LANGUAGE is not None:
if settings.NLTK_LANGUAGE is not None and settings.NLTK_ENABLED:
import nltk

View File

@@ -709,6 +709,10 @@ ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default")
if ENABLE_UPDATE_CHECK != "default":
ENABLE_UPDATE_CHECK = __get_boolean("PAPERLESS_ENABLE_UPDATE_CHECK")
###############################################################################
# Machine Learning #
###############################################################################
def _get_nltk_language_setting(ocr_lang: str) -> Optional[str]:
"""
@@ -735,4 +739,6 @@ def _get_nltk_language_setting(ocr_lang: str) -> Optional[str]:
return iso_code_to_nltk.get(ocr_lang, None)
NLTK_ENABLED: Final[bool] = __get_boolean("PAPERLESS_ENABLE_NLTK", "yes")
NLTK_LANGUAGE: Optional[str] = _get_nltk_language_setting(OCR_LANGUAGE)