mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Added a text cache to optimize performance of date detection
This commit is contained in:
		| @@ -35,6 +35,7 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|     DATE_ORDER = settings.DATE_ORDER |     DATE_ORDER = settings.DATE_ORDER | ||||||
|     DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE |     DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE | ||||||
|     OCR_ALWAYS = settings.OCR_ALWAYS |     OCR_ALWAYS = settings.OCR_ALWAYS | ||||||
|  |     TEXT_CACHE = None | ||||||
|  |  | ||||||
|     def get_thumbnail(self): |     def get_thumbnail(self): | ||||||
|         """ |         """ | ||||||
| @@ -62,15 +63,20 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|         return False |         return False | ||||||
|  |  | ||||||
|     def get_text(self): |     def get_text(self): | ||||||
|  |         if self.TEXT_CACHE is not None: | ||||||
|  |             return self.TEXT_CACHE | ||||||
|  |  | ||||||
|         if not self.OCR_ALWAYS and self._is_ocred(): |         if not self.OCR_ALWAYS and self._is_ocred(): | ||||||
|             self.log("info", "Skipping OCR, using Text from PDF") |             self.log("info", "Skipping OCR, using Text from PDF") | ||||||
|             return get_text_from_pdf(self.document_path) |             self.TEXT_CACHE = get_text_from_pdf(self.document_path) | ||||||
|  |             return self.TEXT_CACHE | ||||||
|  |  | ||||||
|         images = self._get_greyscale() |         images = self._get_greyscale() | ||||||
|  |  | ||||||
|         try: |         try: | ||||||
|  |  | ||||||
|             return self._get_ocr(images) |             self.TEXT_CACHE = self._get_ocr(images) | ||||||
|  |             return self.TEXT_CACHE | ||||||
|         except OCRError as e: |         except OCRError as e: | ||||||
|             raise ParseError(e) |             raise ParseError(e) | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Wolf-Bastian Pöttner
					Wolf-Bastian Pöttner