Add support for a heuristic that extracts the document date from its text

This commit is contained in:
Wolf-Bastian Pöttner
2018-01-28 19:09:52 +01:00
parent 9faf0a102e
commit b140935843
5 changed files with 40 additions and 3 deletions

View File

@@ -35,6 +35,12 @@ class DocumentParser(object):
"""
raise NotImplementedError()
def get_date(self):
"""
Returns the date of the document.
"""
raise NotImplementedError()
def log(self, level, message):
getattr(self.logger, level)(message, extra={
"group": self.logging_group