Merge pull request #621 from languitar/fix-620

Handle dateparse exceptions for invalid date formats
This commit is contained in:
Pit 2020-05-19 19:04:28 +02:00 committed by GitHub
commit cc31fc14a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 20 additions and 4 deletions

View File

@ -108,7 +108,7 @@ class DocumentParser:
try:
date = __parser(date_string, self.FILENAME_DATE_ORDER)
except TypeError:
except (TypeError, ValueError):
# Skip all matches that do not parse to a proper date
continue
@ -134,7 +134,7 @@ class DocumentParser:
try:
date = __parser(date_string, self.DATE_ORDER)
except TypeError:
except (TypeError, ValueError):
# Skip all matches that do not parse to a proper date
continue

View File

@ -175,10 +175,26 @@ class TestDate(TestCase):
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
return_value="01-07-0590 00:00:00"
return_value="20 408000l 2475"
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_crazy_date_past(self, *args):
def test_crazy_date_with_spaces(self, *args):
document = RasterisedDocumentParser("/dev/null")
document.get_text()
self.assertIsNone(document.get_date())
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
return_value="No date in here"
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser."
"FILENAME_DATE_ORDER",
new_callable=mock.PropertyMock,
return_value="YMD"
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_filename_date_parse_invalid(self, *args):
document = RasterisedDocumentParser("/tmp/20 408000l 2475 - test.pdf")
document.get_text()
self.assertIsNone(document.get_date())