mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Refactor: extract fn try_consume_file
The main purpose of this change is to make the following commits more readable.
This commit is contained in:
		| @@ -75,80 +75,82 @@ class Consumer: | |||||||
|         docs_old_to_new = sorted(docs, key=lambda doc: os.path.getmtime(doc)) |         docs_old_to_new = sorted(docs, key=lambda doc: os.path.getmtime(doc)) | ||||||
|  |  | ||||||
|         for doc in docs_old_to_new: |         for doc in docs_old_to_new: | ||||||
|  |             self.try_consume_file(doc) | ||||||
|  |  | ||||||
|             doc = os.path.join(self.consume, doc) |     def try_consume_file(self, doc): | ||||||
|  |         doc = os.path.join(self.consume, doc) | ||||||
|  |  | ||||||
|             if not os.path.isfile(doc): |         if not os.path.isfile(doc): | ||||||
|                 continue |             return | ||||||
|  |  | ||||||
|             if not re.match(FileInfo.REGEXES["title"], doc): |         if not re.match(FileInfo.REGEXES["title"], doc): | ||||||
|                 continue |             return | ||||||
|  |  | ||||||
|             if doc in self._ignore: |         if doc in self._ignore: | ||||||
|                 continue |             return | ||||||
|  |  | ||||||
|             if not self._is_ready(doc): |         if not self._is_ready(doc): | ||||||
|                 continue |             return | ||||||
|  |  | ||||||
|             if self._is_duplicate(doc): |         if self._is_duplicate(doc): | ||||||
|                 self.log( |             self.log( | ||||||
|                     "info", |                 "info", | ||||||
|                     "Skipping {} as it appears to be a duplicate".format(doc) |                 "Skipping {} as it appears to be a duplicate".format(doc) | ||||||
|                 ) |             ) | ||||||
|                 self._ignore.append(doc) |             self._ignore.append(doc) | ||||||
|                 continue |             return | ||||||
|  |  | ||||||
|             parser_class = self._get_parser_class(doc) |         parser_class = self._get_parser_class(doc) | ||||||
|             if not parser_class: |         if not parser_class: | ||||||
|                 self.log( |             self.log( | ||||||
|                     "error", "No parsers could be found for {}".format(doc)) |                 "error", "No parsers could be found for {}".format(doc)) | ||||||
|                 self._ignore.append(doc) |             self._ignore.append(doc) | ||||||
|                 continue |             return | ||||||
|  |  | ||||||
|             self.logging_group = uuid.uuid4() |         self.logging_group = uuid.uuid4() | ||||||
|  |  | ||||||
|             self.log("info", "Consuming {}".format(doc)) |         self.log("info", "Consuming {}".format(doc)) | ||||||
|  |  | ||||||
|             document_consumption_started.send( |         document_consumption_started.send( | ||||||
|                 sender=self.__class__, |             sender=self.__class__, | ||||||
|                 filename=doc, |             filename=doc, | ||||||
|                 logging_group=self.logging_group |             logging_group=self.logging_group | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |         parsed_document = parser_class(doc) | ||||||
|  |  | ||||||
|  |         try: | ||||||
|  |             thumbnail = parsed_document.get_thumbnail() | ||||||
|  |             date = parsed_document.get_date() | ||||||
|  |             document = self._store( | ||||||
|  |                 parsed_document.get_text(), | ||||||
|  |                 doc, | ||||||
|  |                 thumbnail, | ||||||
|  |                 date | ||||||
|  |             ) | ||||||
|  |         except ParseError as e: | ||||||
|  |  | ||||||
|  |             self._ignore.append(doc) | ||||||
|  |             self.log("error", "PARSE FAILURE for {}: {}".format(doc, e)) | ||||||
|  |             parsed_document.cleanup() | ||||||
|  |  | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         else: | ||||||
|  |  | ||||||
|  |             parsed_document.cleanup() | ||||||
|  |             self._cleanup_doc(doc) | ||||||
|  |  | ||||||
|  |             self.log( | ||||||
|  |                 "info", | ||||||
|  |                 "Document {} consumption finished".format(document) | ||||||
|             ) |             ) | ||||||
|  |  | ||||||
|             parsed_document = parser_class(doc) |             document_consumption_finished.send( | ||||||
|  |                 sender=self.__class__, | ||||||
|             try: |                 document=document, | ||||||
|                 thumbnail = parsed_document.get_thumbnail() |                 logging_group=self.logging_group | ||||||
|                 date = parsed_document.get_date() |             ) | ||||||
|                 document = self._store( |  | ||||||
|                     parsed_document.get_text(), |  | ||||||
|                     doc, |  | ||||||
|                     thumbnail, |  | ||||||
|                     date |  | ||||||
|                 ) |  | ||||||
|             except ParseError as e: |  | ||||||
|  |  | ||||||
|                 self._ignore.append(doc) |  | ||||||
|                 self.log("error", "PARSE FAILURE for {}: {}".format(doc, e)) |  | ||||||
|                 parsed_document.cleanup() |  | ||||||
|  |  | ||||||
|                 continue |  | ||||||
|  |  | ||||||
|             else: |  | ||||||
|  |  | ||||||
|                 parsed_document.cleanup() |  | ||||||
|                 self._cleanup_doc(doc) |  | ||||||
|  |  | ||||||
|                 self.log( |  | ||||||
|                     "info", |  | ||||||
|                     "Document {} consumption finished".format(document) |  | ||||||
|                 ) |  | ||||||
|  |  | ||||||
|                 document_consumption_finished.send( |  | ||||||
|                     sender=self.__class__, |  | ||||||
|                     document=document, |  | ||||||
|                     logging_group=self.logging_group |  | ||||||
|                 ) |  | ||||||
|  |  | ||||||
|     def _get_parser_class(self, doc): |     def _get_parser_class(self, doc): | ||||||
|         """ |         """ | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Erik Arvstedt
					Erik Arvstedt