update new regex pattern for second boundary

This commit is contained in:
Fantasticle 2022-03-31 09:36:10 +02:00
parent d8261b3359
commit 0baacbef98

View File

@ -23,7 +23,7 @@ from documents.signals import document_consumer_declaration
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
# - MONTH ZZZZ, with ZZZZ being 4 digits
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
# - XX MON ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits, MONTH is 3 letters, e.g. 22-FEB-2022
# - XX MON ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits. MONTH is 3 letters
# TODO: isnt there a date parsing library for this?
@ -33,7 +33,7 @@ DATE_REGEX = re.compile(
r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|" # noqa: E501
r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|"
r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))|"
r"(\b|(?!=([_-])))([0-9]{1,2}[ \.\/-][A-Z]{3}[ \.\/-][0-9]{4})(\b|(?=([_-])))|"
r"(\b|(?!=([_-])))(\b[0-9]{1,2}[ \.\/-][A-Z]{3}[ \.\/-][0-9]{4})(\b|(?=([_-])))", # noqa: E501
)