From a3aab0cb48c6bef7385ee5e2af758cc6544011e6 Mon Sep 17 00:00:00 2001
From: Johannes Wienke <languitar@semipol.de>
Date: Sun, 8 Mar 2020 18:26:29 +0100
Subject: [PATCH 1/2] Remove duplicated date parsing test

The exact same tests existed twice in the file.
---
 src/paperless_tesseract/tests/test_date.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/src/paperless_tesseract/tests/test_date.py b/src/paperless_tesseract/tests/test_date.py
index ac2f9648f..4f931737b 100644
--- a/src/paperless_tesseract/tests/test_date.py
+++ b/src/paperless_tesseract/tests/test_date.py
@@ -172,13 +172,3 @@ class TestDate(TestCase):
         document = RasterisedDocumentParser("/dev/null")
         document.get_text()
         self.assertIsNone(document.get_date())
-
-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
-        return_value="01-07-0590 00:00:00"
-    )
-    @mock.patch(MOCK_SCRATCH, SCRATCH)
-    def test_crazy_date_past(self, *args):
-        document = RasterisedDocumentParser("/dev/null")
-        document.get_text()
-        self.assertIsNone(document.get_date())

From a311cd498c984d032e53319082871b51d42654e6 Mon Sep 17 00:00:00 2001
From: Johannes Wienke <languitar@semipol.de>
Date: Sun, 8 Mar 2020 18:35:28 +0100
Subject: [PATCH 2/2] Handle dateparser ValueErrors

When parsing dates from the document text or filenames, correctly handle values
errors indicating broken dates. Newly added tests ensure that this handling
works properly.
---
 src/documents/parsers.py                   |  4 ++--
 src/paperless_tesseract/tests/test_date.py | 26 ++++++++++++++++++++++
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/src/documents/parsers.py b/src/documents/parsers.py
index 142ebba68..c0a80a55d 100644
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -108,7 +108,7 @@ class DocumentParser:
 
                 try:
                     date = __parser(date_string, self.FILENAME_DATE_ORDER)
-                except TypeError:
+                except (TypeError, ValueError):
                     # Skip all matches that do not parse to a proper date
                     continue
 
@@ -134,7 +134,7 @@ class DocumentParser:
 
             try:
                 date = __parser(date_string, self.DATE_ORDER)
-            except TypeError:
+            except (TypeError, ValueError):
                 # Skip all matches that do not parse to a proper date
                 continue
 
diff --git a/src/paperless_tesseract/tests/test_date.py b/src/paperless_tesseract/tests/test_date.py
index 4f931737b..9e9d48b90 100644
--- a/src/paperless_tesseract/tests/test_date.py
+++ b/src/paperless_tesseract/tests/test_date.py
@@ -172,3 +172,29 @@ class TestDate(TestCase):
         document = RasterisedDocumentParser("/dev/null")
         document.get_text()
         self.assertIsNone(document.get_date())
+
+    @mock.patch(
+        "paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
+        return_value="20 408000l 2475"
+    )
+    @mock.patch(MOCK_SCRATCH, SCRATCH)
+    def test_crazy_date_with_spaces(self, *args):
+        document = RasterisedDocumentParser("/dev/null")
+        document.get_text()
+        self.assertIsNone(document.get_date())
+
+    @mock.patch(
+        "paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
+        return_value="No date in here"
+    )
+    @mock.patch(
+        "paperless_tesseract.parsers.RasterisedDocumentParser."
+        "FILENAME_DATE_ORDER",
+        new_callable=mock.PropertyMock,
+        return_value="YMD"
+    )
+    @mock.patch(MOCK_SCRATCH, SCRATCH)
+    def test_filename_date_parse_invalid(self, *args):
+        document = RasterisedDocumentParser("/tmp/20 408000l 2475 - test.pdf")
+        document.get_text()
+        self.assertIsNone(document.get_date())