Refactor: extract fn try_consume_file

The main purpose of this change is to make the following commits more
readable.
This commit is contained in:
Erik Arvstedt 2018-05-11 14:01:15 +02:00
parent a56a3eb86d
commit f018e8e54f

View File

@ -75,80 +75,82 @@ class Consumer:
docs_old_to_new = sorted(docs, key=lambda doc: os.path.getmtime(doc)) docs_old_to_new = sorted(docs, key=lambda doc: os.path.getmtime(doc))
for doc in docs_old_to_new: for doc in docs_old_to_new:
self.try_consume_file(doc)
doc = os.path.join(self.consume, doc) def try_consume_file(self, doc):
doc = os.path.join(self.consume, doc)
if not os.path.isfile(doc): if not os.path.isfile(doc):
continue return
if not re.match(FileInfo.REGEXES["title"], doc): if not re.match(FileInfo.REGEXES["title"], doc):
continue return
if doc in self._ignore: if doc in self._ignore:
continue return
if not self._is_ready(doc): if not self._is_ready(doc):
continue return
if self._is_duplicate(doc): if self._is_duplicate(doc):
self.log( self.log(
"info", "info",
"Skipping {} as it appears to be a duplicate".format(doc) "Skipping {} as it appears to be a duplicate".format(doc)
) )
self._ignore.append(doc) self._ignore.append(doc)
continue return
parser_class = self._get_parser_class(doc) parser_class = self._get_parser_class(doc)
if not parser_class: if not parser_class:
self.log( self.log(
"error", "No parsers could be found for {}".format(doc)) "error", "No parsers could be found for {}".format(doc))
self._ignore.append(doc) self._ignore.append(doc)
continue return
self.logging_group = uuid.uuid4() self.logging_group = uuid.uuid4()
self.log("info", "Consuming {}".format(doc)) self.log("info", "Consuming {}".format(doc))
document_consumption_started.send( document_consumption_started.send(
sender=self.__class__, sender=self.__class__,
filename=doc, filename=doc,
logging_group=self.logging_group logging_group=self.logging_group
)
parsed_document = parser_class(doc)
try:
thumbnail = parsed_document.get_thumbnail()
date = parsed_document.get_date()
document = self._store(
parsed_document.get_text(),
doc,
thumbnail,
date
)
except ParseError as e:
self._ignore.append(doc)
self.log("error", "PARSE FAILURE for {}: {}".format(doc, e))
parsed_document.cleanup()
return
else:
parsed_document.cleanup()
self._cleanup_doc(doc)
self.log(
"info",
"Document {} consumption finished".format(document)
) )
parsed_document = parser_class(doc) document_consumption_finished.send(
sender=self.__class__,
try: document=document,
thumbnail = parsed_document.get_thumbnail() logging_group=self.logging_group
date = parsed_document.get_date() )
document = self._store(
parsed_document.get_text(),
doc,
thumbnail,
date
)
except ParseError as e:
self._ignore.append(doc)
self.log("error", "PARSE FAILURE for {}: {}".format(doc, e))
parsed_document.cleanup()
continue
else:
parsed_document.cleanup()
self._cleanup_doc(doc)
self.log(
"info",
"Document {} consumption finished".format(document)
)
document_consumption_finished.send(
sender=self.__class__,
document=document,
logging_group=self.logging_group
)
def _get_parser_class(self, doc): def _get_parser_class(self, doc):
""" """