From ba0f4718e516fb220ccccad6a7ae5a189792cb29 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Thu, 25 Jan 2024 13:29:22 -0800 Subject: [PATCH] Fix: Modify one of date regexes (#5540) --- src/documents/parsers.py | 2 +- src/documents/tests/test_date_parsing.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/documents/parsers.py b/src/documents/parsers.py index db4b42792..12e5d6b33 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -38,7 +38,7 @@ from documents.utils import copy_file_with_basic_stats DATE_REGEX = re.compile( r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|" r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|" - r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[a-zA-Z]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|" + r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[a-zA-Z]{3,9} [0-9]{4}|[a-zA-Z]{3,9} [0-9]{1,2}, [0-9]{4})(\b|(?=([_-])))|" r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|" r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))|" r"(\b|(?!=([_-])))([0-9]{1,2}[^ ]{2}[\. ]+[^ ]{3,9}[ \.\/-][0-9]{4})(\b|(?=([_-])))|" diff --git a/src/documents/tests/test_date_parsing.py b/src/documents/tests/test_date_parsing.py index 54b4d7b53..d4ea71be5 100644 --- a/src/documents/tests/test_date_parsing.py +++ b/src/documents/tests/test_date_parsing.py @@ -201,6 +201,13 @@ class TestDate(TestCase): datetime.datetime(2022, 3, 25, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), ) + def test_date_format_26(self): + text = "CHASE 0 September 25, 2019 JPMorgan Chase Bank, NA. P0 Box 182051" + self.assertEqual( + parse_date("", text), + datetime.datetime(2019, 9, 25, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + ) + def test_crazy_date_past(self, *args): self.assertIsNone(parse_date("", "01-07-0590 00:00:00"))