diff --git a/src/documents/parsers.py b/src/documents/parsers.py index cb70f4fc6..ca24026fb 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -23,6 +23,7 @@ from documents.signals import document_consumer_declaration # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits # - MONTH ZZZZ, with ZZZZ being 4 digits # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits +# - XX MON ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits, MONTH is 3 letters, e.g. 22-FEB-2022 # TODO: isnt there a date parsing library for this? @@ -31,7 +32,8 @@ DATE_REGEX = re.compile( r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|" # noqa: E501 r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|" # noqa: E501 r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|" - r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))", + r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))|" + r"(\b|(?!=([_-])))([0-9]{1,2}[ \.\/-][A-Z]{3}[ \.\/-][0-9]{4})(\b|(?=([_-])))|" )