mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Tweak the date guesser to not allow dates prior to 1900 (#414)
This commit is contained in:
parent
a511d34d69
commit
8010d72f18
@ -203,6 +203,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
return text
|
||||
|
||||
def get_date(self):
|
||||
|
||||
date = None
|
||||
datestring = None
|
||||
|
||||
@ -217,20 +218,30 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
|
||||
try:
|
||||
date = dateparser.parse(
|
||||
datestring,
|
||||
settings={'DATE_ORDER': self.DATE_ORDER,
|
||||
'PREFER_DAY_OF_MONTH': 'first',
|
||||
'RETURN_AS_TIMEZONE_AWARE': True})
|
||||
datestring,
|
||||
settings={
|
||||
"DATE_ORDER": self.DATE_ORDER,
|
||||
"PREFER_DAY_OF_MONTH": "first",
|
||||
"RETURN_AS_TIMEZONE_AWARE": True
|
||||
}
|
||||
)
|
||||
except TypeError:
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
||||
if date is not None:
|
||||
if date is not None and date.year > 1900:
|
||||
break
|
||||
else:
|
||||
date = None
|
||||
|
||||
if date is not None:
|
||||
self.log("info", "Detected document date " + date.isoformat() +
|
||||
" based on string " + datestring)
|
||||
self.log(
|
||||
"info",
|
||||
"Detected document date {} based on string {}".format(
|
||||
date.isoformat(),
|
||||
datestring
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.log("info", "Unable to detect date for document")
|
||||
|
||||
|
@ -384,3 +384,16 @@ class TestDate(TestCase):
|
||||
document.get_date(),
|
||||
datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
|
||||
return_value="01-07-0590 00:00:00"
|
||||
)
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_crazy_date(self, *args):
|
||||
document = RasterisedDocumentParser("/dev/null")
|
||||
document.get_text()
|
||||
self.assertIsNone(document.get_date())
|
||||
|
Loading…
x
Reference in New Issue
Block a user