mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Allows disabling NLTK, adds it as a consideration for low power devices
This commit is contained in:
parent
c44c914d3d
commit
1e891414a3
@ -774,6 +774,8 @@ configuring some options in paperless can help improve performance immensely:
|
|||||||
OCR results.
|
OCR results.
|
||||||
* If using docker, consider setting ``PAPERLESS_WEBSERVER_WORKERS`` to
|
* If using docker, consider setting ``PAPERLESS_WEBSERVER_WORKERS`` to
|
||||||
1. This will save some memory.
|
1. This will save some memory.
|
||||||
|
* Consider setting ``PAPERLESS_ENABLE_NLTK`` to false, to disable the more
|
||||||
|
advanced language processing, which can take more memory and processing time.
|
||||||
|
|
||||||
For details, refer to :ref:`configuration`.
|
For details, refer to :ref:`configuration`.
|
||||||
|
|
||||||
|
@ -312,7 +312,7 @@ class DocumentClassifier:
|
|||||||
content = re.sub(r"[^\w\s]", " ", content)
|
content = re.sub(r"[^\w\s]", " ", content)
|
||||||
|
|
||||||
# If the NLTK language is supported, do further processing
|
# If the NLTK language is supported, do further processing
|
||||||
if settings.NLTK_LANGUAGE is not None:
|
if settings.NLTK_LANGUAGE is not None and settings.NLTK_ENABLED:
|
||||||
|
|
||||||
import nltk
|
import nltk
|
||||||
|
|
||||||
|
@ -709,6 +709,10 @@ ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default")
|
|||||||
if ENABLE_UPDATE_CHECK != "default":
|
if ENABLE_UPDATE_CHECK != "default":
|
||||||
ENABLE_UPDATE_CHECK = __get_boolean("PAPERLESS_ENABLE_UPDATE_CHECK")
|
ENABLE_UPDATE_CHECK = __get_boolean("PAPERLESS_ENABLE_UPDATE_CHECK")
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# Machine Learning #
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
|
||||||
def _get_nltk_language_setting(ocr_lang: str) -> Optional[str]:
|
def _get_nltk_language_setting(ocr_lang: str) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
@ -735,4 +739,6 @@ def _get_nltk_language_setting(ocr_lang: str) -> Optional[str]:
|
|||||||
return iso_code_to_nltk.get(ocr_lang, None)
|
return iso_code_to_nltk.get(ocr_lang, None)
|
||||||
|
|
||||||
|
|
||||||
|
NLTK_ENABLED: Final[bool] = __get_boolean("PAPERLESS_ENABLE_NLTK", "yes")
|
||||||
|
|
||||||
NLTK_LANGUAGE: Optional[str] = _get_nltk_language_setting(OCR_LANGUAGE)
|
NLTK_LANGUAGE: Optional[str] = _get_nltk_language_setting(OCR_LANGUAGE)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user