mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Refactor: extract fn try_consume_file
The main purpose of this change is to make the following commits more readable.
This commit is contained in:
parent
a56a3eb86d
commit
f018e8e54f
@ -75,80 +75,82 @@ class Consumer:
|
|||||||
docs_old_to_new = sorted(docs, key=lambda doc: os.path.getmtime(doc))
|
docs_old_to_new = sorted(docs, key=lambda doc: os.path.getmtime(doc))
|
||||||
|
|
||||||
for doc in docs_old_to_new:
|
for doc in docs_old_to_new:
|
||||||
|
self.try_consume_file(doc)
|
||||||
|
|
||||||
doc = os.path.join(self.consume, doc)
|
def try_consume_file(self, doc):
|
||||||
|
doc = os.path.join(self.consume, doc)
|
||||||
|
|
||||||
if not os.path.isfile(doc):
|
if not os.path.isfile(doc):
|
||||||
continue
|
return
|
||||||
|
|
||||||
if not re.match(FileInfo.REGEXES["title"], doc):
|
if not re.match(FileInfo.REGEXES["title"], doc):
|
||||||
continue
|
return
|
||||||
|
|
||||||
if doc in self._ignore:
|
if doc in self._ignore:
|
||||||
continue
|
return
|
||||||
|
|
||||||
if not self._is_ready(doc):
|
if not self._is_ready(doc):
|
||||||
continue
|
return
|
||||||
|
|
||||||
if self._is_duplicate(doc):
|
if self._is_duplicate(doc):
|
||||||
self.log(
|
self.log(
|
||||||
"info",
|
"info",
|
||||||
"Skipping {} as it appears to be a duplicate".format(doc)
|
"Skipping {} as it appears to be a duplicate".format(doc)
|
||||||
)
|
)
|
||||||
self._ignore.append(doc)
|
self._ignore.append(doc)
|
||||||
continue
|
return
|
||||||
|
|
||||||
parser_class = self._get_parser_class(doc)
|
parser_class = self._get_parser_class(doc)
|
||||||
if not parser_class:
|
if not parser_class:
|
||||||
self.log(
|
self.log(
|
||||||
"error", "No parsers could be found for {}".format(doc))
|
"error", "No parsers could be found for {}".format(doc))
|
||||||
self._ignore.append(doc)
|
self._ignore.append(doc)
|
||||||
continue
|
return
|
||||||
|
|
||||||
self.logging_group = uuid.uuid4()
|
self.logging_group = uuid.uuid4()
|
||||||
|
|
||||||
self.log("info", "Consuming {}".format(doc))
|
self.log("info", "Consuming {}".format(doc))
|
||||||
|
|
||||||
document_consumption_started.send(
|
document_consumption_started.send(
|
||||||
sender=self.__class__,
|
sender=self.__class__,
|
||||||
filename=doc,
|
filename=doc,
|
||||||
logging_group=self.logging_group
|
logging_group=self.logging_group
|
||||||
|
)
|
||||||
|
|
||||||
|
parsed_document = parser_class(doc)
|
||||||
|
|
||||||
|
try:
|
||||||
|
thumbnail = parsed_document.get_thumbnail()
|
||||||
|
date = parsed_document.get_date()
|
||||||
|
document = self._store(
|
||||||
|
parsed_document.get_text(),
|
||||||
|
doc,
|
||||||
|
thumbnail,
|
||||||
|
date
|
||||||
|
)
|
||||||
|
except ParseError as e:
|
||||||
|
|
||||||
|
self._ignore.append(doc)
|
||||||
|
self.log("error", "PARSE FAILURE for {}: {}".format(doc, e))
|
||||||
|
parsed_document.cleanup()
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
parsed_document.cleanup()
|
||||||
|
self._cleanup_doc(doc)
|
||||||
|
|
||||||
|
self.log(
|
||||||
|
"info",
|
||||||
|
"Document {} consumption finished".format(document)
|
||||||
)
|
)
|
||||||
|
|
||||||
parsed_document = parser_class(doc)
|
document_consumption_finished.send(
|
||||||
|
sender=self.__class__,
|
||||||
try:
|
document=document,
|
||||||
thumbnail = parsed_document.get_thumbnail()
|
logging_group=self.logging_group
|
||||||
date = parsed_document.get_date()
|
)
|
||||||
document = self._store(
|
|
||||||
parsed_document.get_text(),
|
|
||||||
doc,
|
|
||||||
thumbnail,
|
|
||||||
date
|
|
||||||
)
|
|
||||||
except ParseError as e:
|
|
||||||
|
|
||||||
self._ignore.append(doc)
|
|
||||||
self.log("error", "PARSE FAILURE for {}: {}".format(doc, e))
|
|
||||||
parsed_document.cleanup()
|
|
||||||
|
|
||||||
continue
|
|
||||||
|
|
||||||
else:
|
|
||||||
|
|
||||||
parsed_document.cleanup()
|
|
||||||
self._cleanup_doc(doc)
|
|
||||||
|
|
||||||
self.log(
|
|
||||||
"info",
|
|
||||||
"Document {} consumption finished".format(document)
|
|
||||||
)
|
|
||||||
|
|
||||||
document_consumption_finished.send(
|
|
||||||
sender=self.__class__,
|
|
||||||
document=document,
|
|
||||||
logging_group=self.logging_group
|
|
||||||
)
|
|
||||||
|
|
||||||
def _get_parser_class(self, doc):
|
def _get_parser_class(self, doc):
|
||||||
"""
|
"""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user