Adds additional testing for both date parsing and consumed document created date

This commit is contained in:
Trenton Holmes 2022-04-12 19:52:56 -07:00
parent ce32089cc4
commit 8a6aaf4e2d
9 changed files with 345 additions and 42 deletions

View File

@ -3,6 +3,8 @@ import hashlib
import os import os
import uuid import uuid
from subprocess import Popen from subprocess import Popen
from typing import Optional
from typing import Type
import magic import magic
from asgiref.sync import async_to_sync from asgiref.sync import async_to_sync
@ -23,6 +25,7 @@ from .models import Document
from .models import DocumentType from .models import DocumentType
from .models import FileInfo from .models import FileInfo
from .models import Tag from .models import Tag
from .parsers import DocumentParser
from .parsers import get_parser_class_for_mime_type from .parsers import get_parser_class_for_mime_type
from .parsers import parse_date from .parsers import parse_date
from .parsers import ParseError from .parsers import ParseError
@ -186,7 +189,7 @@ class Consumer(LoggingMixin):
override_document_type_id=None, override_document_type_id=None,
override_tag_ids=None, override_tag_ids=None,
task_id=None, task_id=None,
): ) -> Document:
""" """
Return the document object if it was successfully created. Return the document object if it was successfully created.
""" """
@ -220,7 +223,10 @@ class Consumer(LoggingMixin):
self.log("debug", f"Detected mime type: {mime_type}") self.log("debug", f"Detected mime type: {mime_type}")
parser_class = get_parser_class_for_mime_type(mime_type) # Based on the mime type, get the parser for that type
parser_class: Optional[Type[DocumentParser]] = get_parser_class_for_mime_type(
mime_type,
)
if not parser_class: if not parser_class:
self._fail(MESSAGE_UNSUPPORTED_TYPE, f"Unsupported mime type {mime_type}") self._fail(MESSAGE_UNSUPPORTED_TYPE, f"Unsupported mime type {mime_type}")
@ -241,7 +247,10 @@ class Consumer(LoggingMixin):
# This doesn't parse the document yet, but gives us a parser. # This doesn't parse the document yet, but gives us a parser.
document_parser = parser_class(self.logging_group, progress_callback) document_parser: DocumentParser = parser_class(
self.logging_group,
progress_callback,
)
self.log("debug", f"Parser: {type(document_parser).__name__}") self.log("debug", f"Parser: {type(document_parser).__name__}")
@ -270,7 +279,7 @@ class Consumer(LoggingMixin):
text = document_parser.get_text() text = document_parser.get_text()
date = document_parser.get_date() date = document_parser.get_date()
if not date: if date is None:
self._send_progress(90, 100, "WORKING", MESSAGE_PARSE_DATE) self._send_progress(90, 100, "WORKING", MESSAGE_PARSE_DATE)
date = parse_date(self.filename, text) date = parse_date(self.filename, text)
archive_path = document_parser.get_archive_path() archive_path = document_parser.get_archive_path()
@ -342,7 +351,7 @@ class Consumer(LoggingMixin):
).hexdigest() ).hexdigest()
# Don't save with the lock active. Saving will cause the file # Don't save with the lock active. Saving will cause the file
# renaming logic to aquire the lock as well. # renaming logic to acquire the lock as well.
document.save() document.save()
# Delete the file only if it was successfully consumed # Delete the file only if it was successfully consumed
@ -362,7 +371,8 @@ class Consumer(LoggingMixin):
except Exception as e: except Exception as e:
self._fail( self._fail(
str(e), str(e),
f"The following error occured while consuming " f"{self.filename}: {e}", f"The following error occurred while consuming "
f"{self.filename}: {e}",
exc_info=True, exc_info=True,
) )
finally: finally:
@ -376,21 +386,26 @@ class Consumer(LoggingMixin):
return document return document
def _store(self, text, date, mime_type): def _store(self, text, date, mime_type) -> Document:
# If someone gave us the original filename, use it instead of doc. # If someone gave us the original filename, use it instead of doc.
file_info = FileInfo.from_filename(self.filename) file_info = FileInfo.from_filename(self.filename)
stats = os.stat(self.path)
self.log("debug", "Saving record to database") self.log("debug", "Saving record to database")
created = ( if file_info.created is not None:
file_info.created create_date = file_info.created
or date self.log("debug", f"Creation date from FileInfo: {create_date}")
or timezone.make_aware(datetime.datetime.fromtimestamp(stats.st_mtime)) elif date is not None:
) create_date = date
self.log("debug", f"Creation date from parse_date: {create_date}")
else:
stats = os.stat(self.path)
create_date = timezone.make_aware(
datetime.datetime.fromtimestamp(stats.st_mtime),
)
self.log("debug", "Creation date from st_mtime: {create_date}")
storage_type = Document.STORAGE_TYPE_UNENCRYPTED storage_type = Document.STORAGE_TYPE_UNENCRYPTED
@ -400,8 +415,8 @@ class Consumer(LoggingMixin):
content=text, content=text,
mime_type=mime_type, mime_type=mime_type,
checksum=hashlib.md5(f.read()).hexdigest(), checksum=hashlib.md5(f.read()).hexdigest(),
created=created, created=create_date,
modified=created, modified=create_date,
storage_type=storage_type, storage_type=storage_type,
) )

View File

@ -380,6 +380,10 @@ class SavedViewFilterRule(models.Model):
# TODO: why is this in the models file? # TODO: why is this in the models file?
# TODO: how about, what is this and where is it documented?
# It appears to parsing JSON from an environment variable to get a title and date from
# the filename, if possible, as a higher priority than either document filename or
# content parsing
class FileInfo: class FileInfo:
REGEXES = OrderedDict( REGEXES = OrderedDict(
@ -387,8 +391,7 @@ class FileInfo:
( (
"created-title", "created-title",
re.compile( re.compile(
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " r"^(?P<created>\d{8}(\d{6})?Z) - " r"(?P<title>.*)$",
r"(?P<title>.*)$",
flags=re.IGNORECASE, flags=re.IGNORECASE,
), ),
), ),
@ -428,7 +431,7 @@ class FileInfo:
properties[name] = getattr(cls, "_get_{}".format(name))(properties[name]) properties[name] = getattr(cls, "_get_{}".format(name))(properties[name])
@classmethod @classmethod
def from_filename(cls, filename): def from_filename(cls, filename) -> "FileInfo":
# Mutate filename in-place before parsing its components # Mutate filename in-place before parsing its components
# by applying at most one of the configured transformations. # by applying at most one of the configured transformations.
for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS: for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS:

View File

@ -1,3 +1,4 @@
import datetime
import logging import logging
import mimetypes import mimetypes
import os import os
@ -5,6 +6,8 @@ import re
import shutil import shutil
import subprocess import subprocess
import tempfile import tempfile
from typing import Optional
from typing import Set
import magic import magic
from django.conf import settings from django.conf import settings
@ -40,11 +43,11 @@ DATE_REGEX = re.compile(
logger = logging.getLogger("paperless.parsing") logger = logging.getLogger("paperless.parsing")
def is_mime_type_supported(mime_type): def is_mime_type_supported(mime_type) -> bool:
return get_parser_class_for_mime_type(mime_type) is not None return get_parser_class_for_mime_type(mime_type) is not None
def get_default_file_extension(mime_type): def get_default_file_extension(mime_type) -> str:
for response in document_consumer_declaration.send(None): for response in document_consumer_declaration.send(None):
parser_declaration = response[1] parser_declaration = response[1]
supported_mime_types = parser_declaration["mime_types"] supported_mime_types = parser_declaration["mime_types"]
@ -59,14 +62,14 @@ def get_default_file_extension(mime_type):
return "" return ""
def is_file_ext_supported(ext): def is_file_ext_supported(ext) -> bool:
if ext: if ext:
return ext.lower() in get_supported_file_extensions() return ext.lower() in get_supported_file_extensions()
else: else:
return False return False
def get_supported_file_extensions(): def get_supported_file_extensions() -> Set[str]:
extensions = set() extensions = set()
for response in document_consumer_declaration.send(None): for response in document_consumer_declaration.send(None):
parser_declaration = response[1] parser_declaration = response[1]
@ -121,7 +124,7 @@ def run_convert(
auto_orient=False, auto_orient=False,
extra=None, extra=None,
logging_group=None, logging_group=None,
): ) -> None:
environment = os.environ.copy() environment = os.environ.copy()
if settings.CONVERT_MEMORY_LIMIT: if settings.CONVERT_MEMORY_LIMIT:
@ -146,11 +149,11 @@ def run_convert(
raise ParseError("Convert failed at {}".format(args)) raise ParseError("Convert failed at {}".format(args))
def get_default_thumbnail(): def get_default_thumbnail() -> str:
return os.path.join(os.path.dirname(__file__), "resources", "document.png") return os.path.join(os.path.dirname(__file__), "resources", "document.png")
def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None): def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -> str:
out_path = os.path.join(temp_dir, "convert_gs.png") out_path = os.path.join(temp_dir, "convert_gs.png")
# if convert fails, fall back to extracting # if convert fails, fall back to extracting
@ -184,7 +187,7 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None):
return get_default_thumbnail() return get_default_thumbnail()
def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None): def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None) -> str:
""" """
The thumbnail of a PDF is just a 500px wide image of the first page. The thumbnail of a PDF is just a 500px wide image of the first page.
""" """
@ -209,12 +212,12 @@ def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None):
return out_path return out_path
def parse_date(filename, text): def parse_date(filename, text) -> Optional[datetime.datetime]:
""" """
Returns the date of the document. Returns the date of the document.
""" """
def __parser(ds, date_order): def __parser(ds: str, date_order: str) -> datetime.datetime:
""" """
Call dateparser.parse with a particular date ordering Call dateparser.parse with a particular date ordering
""" """
@ -230,9 +233,9 @@ def parse_date(filename, text):
}, },
) )
def __filter(date): def __filter(date: datetime.datetime) -> Optional[datetime.datetime]:
if ( if (
date date is not None
and date.year > 1900 and date.year > 1900
and date <= timezone.now() and date <= timezone.now()
and date.date() not in settings.IGNORE_DATES and date.date() not in settings.IGNORE_DATES
@ -244,8 +247,10 @@ def parse_date(filename, text):
# if filename date parsing is enabled, search there first: # if filename date parsing is enabled, search there first:
if settings.FILENAME_DATE_ORDER: if settings.FILENAME_DATE_ORDER:
logger.info("Attempting parsing from filename")
for m in re.finditer(DATE_REGEX, filename): for m in re.finditer(DATE_REGEX, filename):
date_string = m.group(0) date_string = m.group(0)
logger.info(f"Found potential date: {date_string}")
try: try:
date = __parser(date_string, settings.FILENAME_DATE_ORDER) date = __parser(date_string, settings.FILENAME_DATE_ORDER)
@ -255,11 +260,16 @@ def parse_date(filename, text):
date = __filter(date) date = __filter(date)
if date is not None: if date is not None:
logger.info(f"Found date: {date}")
return date return date
else:
logger.info("Filtered date out")
logger.info("Attempting parsing from content")
# Iterate through all regex matches in text and try to parse the date # Iterate through all regex matches in text and try to parse the date
for m in re.finditer(DATE_REGEX, text): for m in re.finditer(DATE_REGEX, text):
date_string = m.group(0) date_string = m.group(0)
logger.info(f"Found potential date: {date_string}")
try: try:
date = __parser(date_string, settings.DATE_ORDER) date = __parser(date_string, settings.DATE_ORDER)
@ -269,7 +279,10 @@ def parse_date(filename, text):
date = __filter(date) date = __filter(date)
if date is not None: if date is not None:
break logger.info(f"Found date: {date}")
return date
else:
logger.info("Filtered date out")
return date return date
@ -294,7 +307,7 @@ class DocumentParser(LoggingMixin):
self.archive_path = None self.archive_path = None
self.text = None self.text = None
self.date = None self.date: Optional[datetime.datetime] = None
self.progress_callback = progress_callback self.progress_callback = progress_callback
def progress(self, current_progress, max_progress): def progress(self, current_progress, max_progress):
@ -342,7 +355,7 @@ class DocumentParser(LoggingMixin):
def get_text(self): def get_text(self):
return self.text return self.text
def get_date(self): def get_date(self) -> Optional[datetime.datetime]:
return self.date return self.date
def cleanup(self): def cleanup(self):

Binary file not shown.

Binary file not shown.

View File

@ -1,3 +1,4 @@
import datetime
import os import os
import re import re
import shutil import shutil
@ -5,6 +6,8 @@ import tempfile
from unittest import mock from unittest import mock
from unittest.mock import MagicMock from unittest.mock import MagicMock
from dateutil import tz
try: try:
import zoneinfo import zoneinfo
except ImportError: except ImportError:
@ -502,7 +505,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertRaisesMessage( self.assertRaisesMessage(
ConsumerError, ConsumerError,
"sample.pdf: The following error occured while consuming sample.pdf: NO.", "sample.pdf: The following error occurred while consuming sample.pdf: NO.",
self.consumer.try_consume_file, self.consumer.try_consume_file,
filename, filename,
) )
@ -654,6 +657,127 @@ class TestConsumer(DirectoriesMixin, TestCase):
sanity_check() sanity_check()
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
class TestConsumerCreatedDate(DirectoriesMixin, TestCase):
def setUp(self):
super(TestConsumerCreatedDate, self).setUp()
# this prevents websocket message reports during testing.
patcher = mock.patch("documents.consumer.Consumer._send_progress")
self._send_progress = patcher.start()
self.addCleanup(patcher.stop)
self.consumer = Consumer()
def test_consume_date_from_content(self):
"""
GIVEN:
- File content with date in DMY (default) format
THEN:
- Should parse the date from the file content
"""
src = os.path.join(
os.path.dirname(__file__),
"samples",
"documents",
"originals",
"0000005.pdf",
)
dst = os.path.join(self.dirs.scratch_dir, "sample.pdf")
shutil.copy(src, dst)
document = self.consumer.try_consume_file(dst)
self.assertEqual(
document.created,
datetime.datetime(1996, 2, 20, tzinfo=tz.gettz(settings.TIME_ZONE)),
)
@override_settings(FILENAME_DATE_ORDER="YMD")
def test_consume_date_from_filename(self):
"""
GIVEN:
- File content with date in DMY (default) format
- Filename with date in YMD format
THEN:
- Should parse the date from the filename
"""
src = os.path.join(
os.path.dirname(__file__),
"samples",
"documents",
"originals",
"0000005.pdf",
)
dst = os.path.join(self.dirs.scratch_dir, "Scan - 2022-02-01.pdf")
shutil.copy(src, dst)
document = self.consumer.try_consume_file(dst)
self.assertEqual(
document.created,
datetime.datetime(2022, 2, 1, tzinfo=tz.gettz(settings.TIME_ZONE)),
)
def test_consume_date_filename_date_use_content(self):
"""
GIVEN:
- File content with date in DMY (default) format
- Filename date parsing disabled
- Filename with date in YMD format
THEN:
- Should parse the date from the content
"""
src = os.path.join(
os.path.dirname(__file__),
"samples",
"documents",
"originals",
"0000005.pdf",
)
dst = os.path.join(self.dirs.scratch_dir, "Scan - 2022-02-01.pdf")
shutil.copy(src, dst)
document = self.consumer.try_consume_file(dst)
self.assertEqual(
document.created,
datetime.datetime(1996, 2, 20, tzinfo=tz.gettz(settings.TIME_ZONE)),
)
@override_settings(
IGNORE_DATES=(datetime.date(2010, 12, 13), datetime.date(2011, 11, 12)),
)
def test_consume_date_use_content_with_ignore(self):
"""
GIVEN:
- File content with dates in DMY (default) format
- File content includes ignored dates
THEN:
- Should parse the date from the filename
"""
src = os.path.join(
os.path.dirname(__file__),
"samples",
"documents",
"originals",
"0000006.pdf",
)
dst = os.path.join(self.dirs.scratch_dir, "0000006.pdf")
shutil.copy(src, dst)
document = self.consumer.try_consume_file(dst)
self.assertEqual(
document.created,
datetime.datetime(1997, 2, 20, tzinfo=tz.gettz(settings.TIME_ZONE)),
)
class PreConsumeTestCase(TestCase): class PreConsumeTestCase(TestCase):
@mock.patch("documents.consumer.Popen") @mock.patch("documents.consumer.Popen")
@override_settings(PRE_CONSUME_SCRIPT=None) @override_settings(PRE_CONSUME_SCRIPT=None)

View File

@ -8,6 +8,7 @@ from django.conf import settings
from django.test import override_settings from django.test import override_settings
from django.test import TestCase from django.test import TestCase
from documents.parsers import parse_date from documents.parsers import parse_date
from paperless.settings import DATE_ORDER
class TestDate(TestCase): class TestDate(TestCase):
@ -160,19 +161,112 @@ class TestDate(TestCase):
def test_crazy_date_with_spaces(self, *args): def test_crazy_date_with_spaces(self, *args):
self.assertIsNone(parse_date("", "20 408000l 2475")) self.assertIsNone(parse_date("", "20 408000l 2475"))
@override_settings(FILENAME_DATE_ORDER="YMD")
def test_filename_date_parse_valid_ymd(self, *args):
"""
GIVEN:
- Date parsing from the filename is enabled
- Filename date format is with Year Month Day (YMD)
- Filename contains date matching the format
THEN:
- Should parse the date from the filename
"""
self.assertEqual(
parse_date("/tmp/Scan-2022-04-01.pdf", "No date in here"),
datetime.datetime(2022, 4, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
)
@override_settings(FILENAME_DATE_ORDER="DMY")
def test_filename_date_parse_valid_dmy(self, *args):
"""
GIVEN:
- Date parsing from the filename is enabled
- Filename date format is with Day Month Year (DMY)
- Filename contains date matching the format
THEN:
- Should parse the date from the filename
"""
self.assertEqual(
parse_date("/tmp/Scan-10.01.2021.pdf", "No date in here"),
datetime.datetime(2021, 1, 10, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
)
@override_settings(FILENAME_DATE_ORDER="YMD") @override_settings(FILENAME_DATE_ORDER="YMD")
def test_filename_date_parse_invalid(self, *args): def test_filename_date_parse_invalid(self, *args):
"""
GIVEN:
- Date parsing from the filename is enabled
- Filename includes no date
- File content includes no date
THEN:
- No date is parsed
"""
self.assertIsNone( self.assertIsNone(
parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here"), parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here"),
) )
@override_settings(
FILENAME_DATE_ORDER="YMD",
IGNORE_DATES=(datetime.date(2022, 4, 1),),
)
def test_filename_date_ignored_use_content(self, *args):
"""
GIVEN:
- Date parsing from the filename is enabled
- Filename date format is with Day Month Year (YMD)
- Date order is Day Month Year (DMY, the default)
- Filename contains date matching the format
- Filename date is an ignored date
- File content includes a date
THEN:
- Should parse the date from the content not filename
"""
self.assertEqual(
parse_date("/tmp/Scan-2022-04-01.pdf", "The matching date is 24.03.2022"),
datetime.datetime(2022, 3, 24, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
)
@override_settings( @override_settings(
IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)), IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)),
) )
def test_ignored_dates(self, *args): def test_ignored_dates_default_order(self, *args):
"""
GIVEN:
- Ignore dates have been set
- File content includes ignored dates
- File content includes 1 non-ignored date
THEN:
- Should parse the date non-ignored date from content
"""
text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem " "ipsum" text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem " "ipsum"
date = parse_date("", text)
self.assertEqual( self.assertEqual(
date, parse_date("", text),
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
)
@override_settings(
IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)),
DATE_ORDER="YMD",
)
def test_ignored_dates_order_ymd(self, *args):
"""
GIVEN:
- Ignore dates have been set
- Date order is Year Month Date (YMD)
- File content includes ignored dates
- File content includes 1 non-ignored date
THEN:
- Should parse the date non-ignored date from content
"""
text = "lorem ipsum 190311, 20200117 and lorem 13.02.2018 lorem " "ipsum"
self.assertEqual(
parse_date("", text),
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
) )

View File

@ -1,9 +1,11 @@
import datetime
import json import json
import math import math
import multiprocessing import multiprocessing
import os import os
import re import re
from typing import Final from typing import Final
from typing import Set
from urllib.parse import urlparse from urllib.parse import urlparse
from concurrent_log_handler.queue import setup_logging_queues from concurrent_log_handler.queue import setup_logging_queues
@ -604,15 +606,22 @@ if PAPERLESS_TIKA_ENABLED:
INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig") INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
# List dates that should be ignored when trying to parse date from document text # List dates that should be ignored when trying to parse date from document text
IGNORE_DATES = set() IGNORE_DATES: Set[datetime.date] = set()
if os.getenv("PAPERLESS_IGNORE_DATES", ""):
def _parse_ignore_dates(env_ignore: str) -> Set[datetime.datetime]:
import dateparser import dateparser
for s in os.getenv("PAPERLESS_IGNORE_DATES", "").split(","): ignored_dates = set()
for s in env_ignore.split(","):
d = dateparser.parse(s) d = dateparser.parse(s)
if d: if d:
IGNORE_DATES.add(d.date()) ignored_dates.add(d.date())
return ignored_dates
if os.getenv("PAPERLESS_IGNORE_DATES") is not None:
IGNORE_DATES = _parse_ignore_dates(os.getenv("PAPERLESS_IGNORE_DATES"))
ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default") ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default")
if ENABLE_UPDATE_CHECK != "default": if ENABLE_UPDATE_CHECK != "default":

View File

@ -0,0 +1,45 @@
import datetime
from unittest import TestCase
from paperless.settings import _parse_ignore_dates
class TestIgnoreDateParsing(TestCase):
"""
Tests the parsing of the PAPERLESS_IGNORE_DATES setting value
"""
def test_no_ignore_dates_set(self):
"""
GIVEN:
- No ignore dates are set
THEN:
- No ignore dates are parsed
"""
self.assertSetEqual(_parse_ignore_dates(""), set())
def test_single_ignore_dates_set(self):
"""
GIVEN:
- Ignore dates are set per certain inputs
THEN:
- All ignore dates are parsed
"""
test_cases = [
("1985-05-01", [datetime.date(1985, 5, 1)]),
(
"1985-05-01,1991-12-05",
[datetime.date(1985, 5, 1), datetime.date(1991, 12, 5)],
),
("2010-12-13", [datetime.date(2010, 12, 13)]),
]
for env_str, expected_dates in test_cases:
expected_date_set = set()
for expected_date in expected_dates:
expected_date_set.add(expected_date)
self.assertSetEqual(
_parse_ignore_dates(env_str),
expected_date_set,
)