diff --git a/src/documents/parsers.py b/src/documents/parsers.py index 142ebba68..c0a80a55d 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -108,7 +108,7 @@ class DocumentParser: try: date = __parser(date_string, self.FILENAME_DATE_ORDER) - except TypeError: + except (TypeError, ValueError): # Skip all matches that do not parse to a proper date continue @@ -134,7 +134,7 @@ class DocumentParser: try: date = __parser(date_string, self.DATE_ORDER) - except TypeError: + except (TypeError, ValueError): # Skip all matches that do not parse to a proper date continue diff --git a/src/paperless_tesseract/tests/test_date.py b/src/paperless_tesseract/tests/test_date.py index 4f931737b..9e9d48b90 100644 --- a/src/paperless_tesseract/tests/test_date.py +++ b/src/paperless_tesseract/tests/test_date.py @@ -172,3 +172,29 @@ class TestDate(TestCase): document = RasterisedDocumentParser("/dev/null") document.get_text() self.assertIsNone(document.get_date()) + + @mock.patch( + "paperless_tesseract.parsers.RasterisedDocumentParser.get_text", + return_value="20 408000l 2475" + ) + @mock.patch(MOCK_SCRATCH, SCRATCH) + def test_crazy_date_with_spaces(self, *args): + document = RasterisedDocumentParser("/dev/null") + document.get_text() + self.assertIsNone(document.get_date()) + + @mock.patch( + "paperless_tesseract.parsers.RasterisedDocumentParser.get_text", + return_value="No date in here" + ) + @mock.patch( + "paperless_tesseract.parsers.RasterisedDocumentParser." + "FILENAME_DATE_ORDER", + new_callable=mock.PropertyMock, + return_value="YMD" + ) + @mock.patch(MOCK_SCRATCH, SCRATCH) + def test_filename_date_parse_invalid(self, *args): + document = RasterisedDocumentParser("/tmp/20 408000l 2475 - test.pdf") + document.get_text() + self.assertIsNone(document.get_date())