Adds additional testing for both date parsing and consumed document created date

2025-11-23 23:49:08 -06:00 · 2022-04-12 19:52:56 -07:00
parent ce32089cc4
commit 8a6aaf4e2d
9 changed files with 345 additions and 42 deletions
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -380,6 +380,10 @@ class SavedViewFilterRule(models.Model):


 # TODO: why is this in the models file?
+# TODO: how about, what is this and where is it documented?
+# It appears to parsing JSON from an environment variable to get a title and date from
+# the filename, if possible, as a higher priority than either document filename or
+# content parsing
 class FileInfo:

    REGEXES = OrderedDict(
@@ -387,8 +391,7 @@ class FileInfo:
            (
                "created-title",
                re.compile(
-                    r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
-                    r"(?P<title>.*)$",
+                    r"^(?P<created>\d{8}(\d{6})?Z) - " r"(?P<title>.*)$",
                    flags=re.IGNORECASE,
                ),
            ),
@@ -428,7 +431,7 @@ class FileInfo:
            properties[name] = getattr(cls, "_get_{}".format(name))(properties[name])

    @classmethod
-    def from_filename(cls, filename):
+    def from_filename(cls, filename) -> "FileInfo":
        # Mutate filename in-place before parsing its components
        # by applying at most one of the configured transformations.
        for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS: