mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Add support for a heuristic that extracts the document date from its text
This commit is contained in:
		| @@ -118,12 +118,14 @@ class Consumer(object): | ||||
|  | ||||
|             parsed_document = parser_class(doc) | ||||
|             thumbnail = parsed_document.get_thumbnail() | ||||
|             date = parsed_document.get_date() | ||||
|  | ||||
|             try: | ||||
|                 document = self._store( | ||||
|                     parsed_document.get_text(), | ||||
|                     doc, | ||||
|                     thumbnail | ||||
|                     thumbnail, | ||||
|                     date | ||||
|                 ) | ||||
|             except ParseError as e: | ||||
|  | ||||
| @@ -174,7 +176,7 @@ class Consumer(object): | ||||
|         return sorted( | ||||
|             options, key=lambda _: _["weight"], reverse=True)[0]["parser"] | ||||
|  | ||||
|     def _store(self, text, doc, thumbnail): | ||||
|     def _store(self, text, doc, thumbnail, date): | ||||
|  | ||||
|         file_info = FileInfo.from_path(doc) | ||||
|  | ||||
| @@ -182,7 +184,7 @@ class Consumer(object): | ||||
|  | ||||
|         self.log("debug", "Saving record to database") | ||||
|  | ||||
|         created = file_info.created or timezone.make_aware( | ||||
|         created = file_info.created or date or timezone.make_aware( | ||||
|                     datetime.datetime.fromtimestamp(stats.st_mtime)) | ||||
|  | ||||
|         with open(doc, "rb") as f: | ||||
|   | ||||
| @@ -35,6 +35,12 @@ class DocumentParser(object): | ||||
|         """ | ||||
|         raise NotImplementedError() | ||||
|  | ||||
|     def get_date(self): | ||||
|         """ | ||||
|         Returns the date of the document. | ||||
|         """ | ||||
|         raise NotImplementedError() | ||||
|  | ||||
|     def log(self, level, message): | ||||
|         getattr(self.logger, level)(message, extra={ | ||||
|             "group": self.logging_group | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Wolf-Bastian Pöttner
					Wolf-Bastian Pöttner