mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Allows disabling NLTK, adds it as a consideration for low power devices
This commit is contained in:
		| @@ -774,6 +774,8 @@ configuring some options in paperless can help improve performance immensely: | ||||
|     OCR results. | ||||
| *   If using docker, consider setting ``PAPERLESS_WEBSERVER_WORKERS`` to | ||||
|     1. This will save some memory. | ||||
| *   Consider setting ``PAPERLESS_ENABLE_NLTK`` to false, to disable the more | ||||
|     advanced language processing, which can take more memory and processing time. | ||||
|  | ||||
| For details, refer to :ref:`configuration`. | ||||
|  | ||||
|   | ||||
| @@ -312,7 +312,7 @@ class DocumentClassifier: | ||||
|         content = re.sub(r"[^\w\s]", " ", content) | ||||
|  | ||||
|         # If the NLTK language is supported, do further processing | ||||
|         if settings.NLTK_LANGUAGE is not None: | ||||
|         if settings.NLTK_LANGUAGE is not None and settings.NLTK_ENABLED: | ||||
|  | ||||
|             import nltk | ||||
|  | ||||
|   | ||||
| @@ -709,6 +709,10 @@ ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default") | ||||
| if ENABLE_UPDATE_CHECK != "default": | ||||
|     ENABLE_UPDATE_CHECK = __get_boolean("PAPERLESS_ENABLE_UPDATE_CHECK") | ||||
|  | ||||
| ############################################################################### | ||||
| # Machine Learning                                                            # | ||||
| ############################################################################### | ||||
|  | ||||
|  | ||||
| def _get_nltk_language_setting(ocr_lang: str) -> Optional[str]: | ||||
|     """ | ||||
| @@ -735,4 +739,6 @@ def _get_nltk_language_setting(ocr_lang: str) -> Optional[str]: | ||||
|     return iso_code_to_nltk.get(ocr_lang, None) | ||||
|  | ||||
|  | ||||
| NLTK_ENABLED: Final[bool] = __get_boolean("PAPERLESS_ENABLE_NLTK", "yes") | ||||
|  | ||||
| NLTK_LANGUAGE: Optional[str] = _get_nltk_language_setting(OCR_LANGUAGE) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Trenton H
					Trenton H