mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-10-12 02:26:09 -05:00
Tweak the date guesser to not allow dates prior to 1900 (#414)
This commit is contained in:
@@ -203,6 +203,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
return text
|
||||
|
||||
def get_date(self):
|
||||
|
||||
date = None
|
||||
datestring = None
|
||||
|
||||
@@ -217,20 +218,30 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
|
||||
try:
|
||||
date = dateparser.parse(
|
||||
datestring,
|
||||
settings={'DATE_ORDER': self.DATE_ORDER,
|
||||
'PREFER_DAY_OF_MONTH': 'first',
|
||||
'RETURN_AS_TIMEZONE_AWARE': True})
|
||||
datestring,
|
||||
settings={
|
||||
"DATE_ORDER": self.DATE_ORDER,
|
||||
"PREFER_DAY_OF_MONTH": "first",
|
||||
"RETURN_AS_TIMEZONE_AWARE": True
|
||||
}
|
||||
)
|
||||
except TypeError:
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
||||
if date is not None:
|
||||
if date is not None and date.year > 1900:
|
||||
break
|
||||
else:
|
||||
date = None
|
||||
|
||||
if date is not None:
|
||||
self.log("info", "Detected document date " + date.isoformat() +
|
||||
" based on string " + datestring)
|
||||
self.log(
|
||||
"info",
|
||||
"Detected document date {} based on string {}".format(
|
||||
date.isoformat(),
|
||||
datestring
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.log("info", "Unable to detect date for document")
|
||||
|
||||
|
Reference in New Issue
Block a user