From 0b34894db9706dfaeea683fc6e0b1f6890eb2efc Mon Sep 17 00:00:00 2001
From: Tikitu de Jager <tikitu@minddistrict.com>
Date: Mon, 7 Mar 2016 20:42:25 +0200
Subject: [PATCH 1/8] Add `FileInfo` class with `pass` implementations

---
 src/documents/models.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
diff --git a/src/documents/models.py b/src/documents/models.py
index 0d79dba0a..bfc0224bd 100644
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -12,6 +12,35 @@ from django.utils import timezone
 from .managers import LogManager
 
 
+class FileInfo(object):
+    def __init__(self, title, suffix,
+                 correspondent=None, tags=None,
+                 file_mtime=None, path=None):
+        self._title = title
+        self._suffix = suffix
+        self._correspondent = correspondent
+        self._tags = tags
+        self._file_mtime = file_mtime
+        self._path = path
+
+    @classmethod
+    def from_path(cls, path):
+        pass
+
+    @classmethod
+    def from_document(cls, document):
+        pass
+
+    def filename(self):
+        pass
+
+    def kwargs_for_document_create(self):
+        pass
+
+    def add_tags(self, tags):
+        self._tags = set(tags).union(self._tags)
+
+
 class SluggedModel(models.Model):
 
     name = models.CharField(max_length=128, unique=True)

From 1f75af01373543d370218b6055f9afb525937c3b Mon Sep 17 00:00:00 2001
From: Tikitu de Jager <tikitu@minddistrict.com>
Date: Mon, 7 Mar 2016 21:05:04 +0200
Subject: [PATCH 2/8] Extract filename parsing into testable class

---
 src/documents/consumer.py | 69 +++------------------------------------
 src/documents/models.py   | 65 +++++++++++++++++++++++++++++++++++-
 2 files changed, 68 insertions(+), 66 deletions(-)

diff --git a/src/documents/consumer.py b/src/documents/consumer.py
index fbdbbc276..74aced5c0 100644
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -24,7 +24,7 @@ from pyocr.tesseract import TesseractError
 
 from paperless.db import GnuPG
 
-from .models import Correspondent, Tag, Document, Log
+from .models import Correspondent, Tag, Document, Log, FileInfo
 from .languages import ISO639
 
 
@@ -54,19 +54,6 @@ class Consumer(object):
 
     DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
 
-    REGEX_TITLE = re.compile(
-        r"^.*/(.*)\.(pdf|jpe?g|png|gif|tiff)$",
-        flags=re.IGNORECASE
-    )
-    REGEX_CORRESPONDENT_TITLE = re.compile(
-        r"^.*/(.+) - (.*)\.(pdf|jpe?g|png|gif|tiff)$",
-        flags=re.IGNORECASE
-    )
-    REGEX_CORRESPONDENT_TITLE_TAGS = re.compile(
-        r"^.*/(.*) - (.*) - ([a-z0-9\-,]*)\.(pdf|jpe?g|png|gif|tiff)$",
-        flags=re.IGNORECASE
-    )
-
     def __init__(self):
 
         self.logger = logging.getLogger(__name__)
@@ -105,7 +92,7 @@ class Consumer(object):
             if not os.path.isfile(doc):
                 continue
 
-            if not re.match(self.REGEX_TITLE, doc):
+            if not re.match(FileInfo.REGEX_TITLE, doc):
                 continue
 
             if doc in self._ignore:
@@ -270,56 +257,8 @@ class Consumer(object):
         return re.sub(r"\s+", " ", r)
 
     def _guess_attributes_from_name(self, parseable):
-        """
-        We use a crude naming convention to make handling the correspondent,
-        title, and tags easier:
-          "<correspondent> - <title> - <tags>.<suffix>"
-          "<correspondent> - <title>.<suffix>"
-          "<title>.<suffix>"
-        """
-
-        def get_correspondent(correspondent_name):
-            return Correspondent.objects.get_or_create(
-                name=correspondent_name,
-                defaults={"slug": slugify(correspondent_name)}
-            )[0]
-
-        def get_tags(tags):
-            r = []
-            for t in tags.split(","):
-                r.append(
-                    Tag.objects.get_or_create(slug=t, defaults={"name": t})[0])
-            return tuple(r)
-
-        def get_suffix(suffix):
-            suffix = suffix.lower()
-            if suffix == "jpeg":
-                return "jpg"
-            return suffix
-
-        # First attempt: "<correspondent> - <title> - <tags>.<suffix>"
-        m = re.match(self.REGEX_CORRESPONDENT_TITLE_TAGS, parseable)
-        if m:
-            return (
-                get_correspondent(m.group(1)),
-                m.group(2),
-                get_tags(m.group(3)),
-                get_suffix(m.group(4))
-            )
-
-        # Second attempt: "<correspondent> - <title>.<suffix>"
-        m = re.match(self.REGEX_CORRESPONDENT_TITLE, parseable)
-        if m:
-            return (
-                get_correspondent(m.group(1)),
-                m.group(2),
-                (),
-                get_suffix(m.group(3))
-            )
-
-        # That didn't work, so we assume correspondent and tags are None
-        m = re.match(self.REGEX_TITLE, parseable)
-        return None, m.group(1), (), get_suffix(m.group(2))
+        file_info = FileInfo.from_path(parseable)
+        return file_info.sender, file_info.title, file_info.tags, file_info.suffix
 
     def _store(self, text, doc, thumbnail):
 
diff --git a/src/documents/models.py b/src/documents/models.py
index bfc0224bd..c8342bf4a 100644
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -23,9 +23,72 @@ class FileInfo(object):
         self._file_mtime = file_mtime
         self._path = path
 
+    REGEX_TITLE = re.compile(
+        r"^.*/(.*)\.(pdf|jpe?g|png|gif|tiff)$",
+        flags=re.IGNORECASE
+    )
+    REGEX_CORRESPONDENT_TITLE = re.compile(
+        r"^.*/(.+) - (.*)\.(pdf|jpe?g|png|gif|tiff)$",
+        flags=re.IGNORECASE
+    )
+    REGEX_CORRESPONDENT_TITLE_TAGS = re.compile(
+        r"^.*/(.*) - (.*) - ([a-z0-9\-,]*)\.(pdf|jpe?g|png|gif|tiff)$",
+        flags=re.IGNORECASE
+    )
+
     @classmethod
     def from_path(cls, path):
-        pass
+        """
+        We use a crude naming convention to make handling the correspondent,
+        title, and tags easier:
+          "<correspondent> - <title> - <tags>.<suffix>"
+          "<correspondent> - <title>.<suffix>"
+          "<title>.<suffix>"
+        """
+
+        def get_correspondent(correspondent_name):
+            return Correspondent.objects.get_or_create(
+                name=correspondent_name,
+                defaults={"slug": slugify(correspondent_name)}
+            )[0]
+
+        def get_tags(tags):
+            r = []
+            for t in tags.split(","):
+                r.append(
+                    Tag.objects.get_or_create(slug=t, defaults={"name": t})[0])
+            return tuple(r)
+
+        def get_suffix(suffix):
+            suffix = suffix.lower()
+            if suffix == "jpeg":
+                return "jpg"
+            return suffix
+
+        # First attempt: "<correspondent> - <title> - <tags>.<suffix>"
+        m = re.match(cls.REGEX_CORRESPONDENT_TITLE_TAGS, path)
+        if m:
+            return cls(
+                title=m.group(2),
+                correspondent=get_correspondent(m.group(1)),
+                tags=get_tags(m.group(3)),
+                suffix=get_suffix(m.group(4))
+            )
+
+        # Second attempt: "<correspondent> - <title>.<suffix>"
+        m = re.match(cls.REGEX_CORRESPONDENT_TITLE, path)
+        if m:
+            return cls(
+                title=m.group(2),
+                correspondent=get_correspondent(m.group(1)),
+                tags=(),
+                suffix=get_suffix(m.group(3))
+            )
+
+        # That didn't work, so we assume correspondent and tags are None
+        m = re.match(cls.REGEX_TITLE, path)
+        return FileInfo(
+            title=m.group(1), tags=(), suffix=get_suffix(m.group(2)))
 
     @classmethod
     def from_document(cls, document):

From 95217e8e21201f06beccf3f368a34396ba35660d Mon Sep 17 00:00:00 2001
From: Tikitu de Jager <tikitu@minddistrict.com>
Date: Mon, 7 Mar 2016 21:08:07 +0200
Subject: [PATCH 3/8] Use FileInfo directly instead of via indirection

---
 src/documents/consumer.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/documents/consumer.py b/src/documents/consumer.py
index 74aced5c0..704548013 100644
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -256,24 +256,20 @@ class Consumer(object):
         # Strip out excess white space to allow matching to go smoother
         return re.sub(r"\s+", " ", r)
 
-    def _guess_attributes_from_name(self, parseable):
-        file_info = FileInfo.from_path(parseable)
-        return file_info.sender, file_info.title, file_info.tags, file_info.suffix
-
     def _store(self, text, doc, thumbnail):
 
-        sender, title, tags, file_type = self._guess_attributes_from_name(doc)
-        relevant_tags = set(list(Tag.match_all(text)) + list(tags))
+        file_info = FileInfo.from_path(doc)
+        relevant_tags = set(list(Tag.match_all(text)) + list(file_info.tags))
 
         stats = os.stat(doc)
 
         self.log("debug", "Saving record to database")
 
         document = Document.objects.create(
-            correspondent=sender,
-            title=title,
+            correspondent=file_info.correspondent,
+            title=file_info.title,
             content=text,
-            file_type=file_type,
+            file_type=file_info.suffix,
             created=timezone.make_aware(
                 datetime.datetime.fromtimestamp(stats.st_mtime)),
             modified=timezone.make_aware(

From 4065d14fabd9122512f09726ba272cd54540db18 Mon Sep 17 00:00:00 2001
From: Tikitu de Jager <tikitu@minddistrict.com>
Date: Mon, 7 Mar 2016 21:11:34 +0200
Subject: [PATCH 4/8] Remove stuff I intended to use but never did

---
 src/documents/models.py | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/src/documents/models.py b/src/documents/models.py
index c8342bf4a..e60a699d2 100644
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -14,14 +14,11 @@ from .managers import LogManager
 
 class FileInfo(object):
     def __init__(self, title, suffix,
-                 correspondent=None, tags=None,
-                 file_mtime=None, path=None):
+                 correspondent=None, tags=None):
         self._title = title
         self._suffix = suffix
         self._correspondent = correspondent
         self._tags = tags
-        self._file_mtime = file_mtime
-        self._path = path
 
     REGEX_TITLE = re.compile(
         r"^.*/(.*)\.(pdf|jpe?g|png|gif|tiff)$",
@@ -90,19 +87,6 @@ class FileInfo(object):
         return FileInfo(
             title=m.group(1), tags=(), suffix=get_suffix(m.group(2)))
 
-    @classmethod
-    def from_document(cls, document):
-        pass
-
-    def filename(self):
-        pass
-
-    def kwargs_for_document_create(self):
-        pass
-
-    def add_tags(self, tags):
-        self._tags = set(tags).union(self._tags)
-
 
 class SluggedModel(models.Model):
 

From ad07eec3e1a0ead8127870967378edfb9e569b37 Mon Sep 17 00:00:00 2001
From: Tikitu de Jager <tikitu@minddistrict.com>
Date: Mon, 7 Mar 2016 21:37:18 +0200
Subject: [PATCH 5/8] Make tests pass

---
 src/documents/models.py              | 15 +++++++++++++++
 src/documents/tests/test_consumer.py | 15 +++++++--------
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/src/documents/models.py b/src/documents/models.py
index e60a699d2..94dc60102 100644
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -87,6 +87,21 @@ class FileInfo(object):
         return FileInfo(
             title=m.group(1), tags=(), suffix=get_suffix(m.group(2)))
 
+    @property
+    def title(self):
+        return self._title
+
+    @property
+    def correspondent(self):
+        return self._correspondent
+
+    @property
+    def tags(self):
+        return self._tags
+
+    @property
+    def suffix(self):
+        return self._suffix
 
 class SluggedModel(models.Model):
 
diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py
index 04f92f98c..0e4c9d368 100644
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -1,12 +1,11 @@
 from django.test import TestCase
 
-from ..consumer import Consumer
+from ..models import FileInfo
 
 
 class TestAttachment(TestCase):
 
     TAGS = ("tag1", "tag2", "tag3")
-    CONSUMER = Consumer()
     SUFFIXES = (
         "pdf", "png", "jpg", "jpeg", "gif",
         "PDF", "PNG", "JPG", "JPEG", "GIF",
@@ -16,14 +15,14 @@ class TestAttachment(TestCase):
     def _test_guess_attributes_from_name(self, path, sender, title, tags):
         for suffix in self.SUFFIXES:
             f = path.format(suffix)
-            results = self.CONSUMER._guess_attributes_from_name(f)
-            self.assertEqual(results[0].name, sender, f)
-            self.assertEqual(results[1], title, f)
-            self.assertEqual(tuple([t.slug for t in results[2]]), tags, f)
+            file_info = FileInfo.from_path(f)
+            self.assertEqual(file_info.correspondent.name, sender, f)
+            self.assertEqual(file_info.title, title, f)
+            self.assertEqual(tuple([t.slug for t in file_info.tags]), tags, f)
             if suffix.lower() == "jpeg":
-                self.assertEqual(results[3], "jpg", f)
+                self.assertEqual(file_info.suffix, "jpg", f)
             else:
-                self.assertEqual(results[3], suffix.lower(), f)
+                self.assertEqual(file_info.suffix, suffix.lower(), f)
 
     def test_guess_attributes_from_name0(self):
         self._test_guess_attributes_from_name(

From 8afdcabca8993bf2f26db504a27db74f3fe1c932 Mon Sep 17 00:00:00 2001
From: Tikitu de Jager <tikitu@minddistrict.com>
Date: Mon, 7 Mar 2016 21:42:52 +0200
Subject: [PATCH 6/8] Template-based tests of combinations of valid elements

---
 src/documents/tests/test_consumer.py | 60 ++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py
index 0e4c9d368..37d765ac7 100644
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -91,3 +91,63 @@ class TestAttachment(TestCase):
             "Τιτλε",
             self.TAGS
         )
+
+
+class Permutations(TestCase):
+    valid_correspondents = ['timmy', 'Dr. McWheelie',
+                            'Dash Gor-don', 'ο Θερμαστής']
+    valid_titles = ['title', 'Title w Spaces', 'Title a-dash', 'Τίτλος', '']
+    valid_tags = ['tag', 'tig,tag', '-', '0,1,2', '']
+    valid_suffixes = ['pdf', 'png', 'jpg', 'jpeg', 'gif']
+
+    def _test_guessed_attributes(
+            self, filename, title, suffix, correspondent=None, tags=None):
+        file_info = FileInfo.from_path(filename)
+
+        # Required
+        self.assertEqual(file_info.title, title, filename)
+        if suffix == 'jpeg':
+            suffix = 'jpg'
+        self.assertEqual(file_info.suffix, suffix, filename)
+        # Optional
+        if correspondent is None:
+            self.assertEqual(file_info.correspondent,
+                             correspondent, filename)
+        else:
+            self.assertEqual(file_info.correspondent.name,
+                             correspondent, filename)
+        if tags is None:
+            self.assertEqual(file_info.tags, (), filename)
+        else:
+            self.assertEqual([t.slug for t in file_info.tags],
+                             tags.split(','),
+                             filename)
+
+    def test_just_title(self):
+        template = '/path/to/{title}.{suffix}'
+        for title in self.valid_titles:
+            for suffix in self.valid_suffixes:
+                spec = dict(title=title, suffix=suffix)
+                filename = template.format(**spec)
+                self._test_guessed_attributes(filename, **spec)
+
+    def test_title_and_correspondent(self):
+        template = '/path/to/{correspondent} - {title}.{suffix}'
+        for correspondent in self.valid_correspondents:
+            for title in self.valid_titles:
+                for suffix in self.valid_suffixes:
+                    spec = dict(correspondent=correspondent, title=title,
+                                suffix=suffix)
+                    filename = template.format(**spec)
+                    self._test_guessed_attributes(filename, **spec)
+
+    def test_title_and_correspondent_and_tags(self):
+        template = '/path/to/{correspondent} - {title} - {tags}.{suffix}'
+        for correspondent in self.valid_correspondents:
+            for title in self.valid_titles:
+                for tags in self.valid_tags:
+                    for suffix in self.valid_suffixes:
+                        spec = dict(correspondent=correspondent, title=title,
+                                    tags=tags, suffix=suffix)
+                        filename = template.format(**spec)
+                        self._test_guessed_attributes(filename, **spec)

From a22f088e28335b4bb33e9226fbb460b4b24086e7 Mon Sep 17 00:00:00 2001
From: Tikitu de Jager <tikitu@minddistrict.com>
Date: Mon, 7 Mar 2016 21:48:47 +0200
Subject: [PATCH 7/8] Add some failing edge case tests

---
 src/documents/tests/test_consumer.py | 32 ++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py
index 37d765ac7..634e8c4f0 100644
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -92,6 +92,38 @@ class TestAttachment(TestCase):
             self.TAGS
         )
 
+    def test_guess_attributes_from_name_when_correspondent_empty(self):
+        self._test_guess_attributes_from_name(
+            '/path/to/ - weird empty correspondent but should not break.{}',
+            None,
+            ' - weird empty correspondent but should not break',
+            ()
+        )
+
+    def test_guess_attributes_from_name_when_title_starts_with_dash(self):
+        self._test_guess_attributes_from_name(
+            '/path/to/- weird but should not break.{}',
+            None,
+            '- weird but should not break',
+            ()
+        )
+
+    def test_guess_attributes_from_name_when_title_ends_with_dash(self):
+        self._test_guess_attributes_from_name(
+            '/path/to/weird but should not break -.{}',
+            None,
+            'weird but should not break -',
+            ()
+        )
+
+    def test_guess_attributes_from_name_when_title_is_empty(self):
+        self._test_guess_attributes_from_name(
+            '/path/to/weird correspondent but should not break - .{}',
+            'weird correspondent but should not break',
+            '',
+            ()
+        )
+
 
 class Permutations(TestCase):
     valid_correspondents = ['timmy', 'Dr. McWheelie',

From 0aa0513004f17e81462ff7fdfd450cced83c5cae Mon Sep 17 00:00:00 2001
From: Daniel Quinn <code@danielquinn.org>
Date: Thu, 24 Mar 2016 19:18:33 +0000
Subject: [PATCH 8/8] Modifications for support for dates

---
 src/documents/consumer.py                     |   7 +-
 .../management/commands/document_exporter.py  |  21 +-
 src/documents/models.py                       | 227 +++++++++++-------
 src/documents/tests/test_consumer.py          | 205 ++++++++++++----
 4 files changed, 314 insertions(+), 146 deletions(-)

diff --git a/src/documents/consumer.py b/src/documents/consumer.py
index 704548013..45239696b 100644
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -19,12 +19,11 @@ from PIL import Image
 
 from django.conf import settings
 from django.utils import timezone
-from django.template.defaultfilters import slugify
 from pyocr.tesseract import TesseractError
 
 from paperless.db import GnuPG
 
-from .models import Correspondent, Tag, Document, Log, FileInfo
+from .models import Tag, Document, Log, FileInfo
 from .languages import ISO639
 
 
@@ -92,7 +91,7 @@ class Consumer(object):
             if not os.path.isfile(doc):
                 continue
 
-            if not re.match(FileInfo.REGEX_TITLE, doc):
+            if not re.match(FileInfo.REGEXES["title"], doc):
                 continue
 
             if doc in self._ignore:
@@ -269,7 +268,7 @@ class Consumer(object):
             correspondent=file_info.correspondent,
             title=file_info.title,
             content=text,
-            file_type=file_info.suffix,
+            file_type=file_info.extension,
             created=timezone.make_aware(
                 datetime.datetime.fromtimestamp(stats.st_mtime)),
             modified=timezone.make_aware(
diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py
index 913f7ae79..1c6ac6e44 100644
--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -96,11 +96,16 @@ class Command(Renderable, BaseCommand):
 
     @staticmethod
     def _get_legacy_file_name(doc):
-        if doc.correspondent and doc.title:
-            tags = ",".join([t.slug for t in doc.tags.all()])
-            if tags:
-                return "{} - {} - {}.{}".format(
-                    doc.correspondent, doc.title, tags, doc.file_type)
-            return "{} - {}.{}".format(
-                doc.correspondent, doc.title, doc.file_type)
-        return os.path.basename(doc.source_path)
+
+        if not doc.correspondent and not doc.title:
+            return os.path.basename(doc.source_path)
+
+        created = doc.created.strftime("%Y%m%d%H%M%SZ")
+        tags = ",".join([t.slug for t in doc.tags.all()])
+
+        if tags:
+            return "{} - {} - {} - {}.{}".format(
+                created, doc.correspondent, doc.title, tags, doc.file_type)
+
+        return "{} - {} - {}.{}".format(
+            created, doc.correspondent, doc.title, doc.file_type)
diff --git a/src/documents/models.py b/src/documents/models.py
index 94dc60102..8880935e3 100644
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -1,8 +1,11 @@
+import dateutil.parser
 import logging
 import os
 import re
 import uuid
 
+from collections import OrderedDict
+
 from django.conf import settings
 from django.core.urlresolvers import reverse
 from django.db import models
@@ -12,97 +15,6 @@ from django.utils import timezone
 from .managers import LogManager
 
 
-class FileInfo(object):
-    def __init__(self, title, suffix,
-                 correspondent=None, tags=None):
-        self._title = title
-        self._suffix = suffix
-        self._correspondent = correspondent
-        self._tags = tags
-
-    REGEX_TITLE = re.compile(
-        r"^.*/(.*)\.(pdf|jpe?g|png|gif|tiff)$",
-        flags=re.IGNORECASE
-    )
-    REGEX_CORRESPONDENT_TITLE = re.compile(
-        r"^.*/(.+) - (.*)\.(pdf|jpe?g|png|gif|tiff)$",
-        flags=re.IGNORECASE
-    )
-    REGEX_CORRESPONDENT_TITLE_TAGS = re.compile(
-        r"^.*/(.*) - (.*) - ([a-z0-9\-,]*)\.(pdf|jpe?g|png|gif|tiff)$",
-        flags=re.IGNORECASE
-    )
-
-    @classmethod
-    def from_path(cls, path):
-        """
-        We use a crude naming convention to make handling the correspondent,
-        title, and tags easier:
-          "<correspondent> - <title> - <tags>.<suffix>"
-          "<correspondent> - <title>.<suffix>"
-          "<title>.<suffix>"
-        """
-
-        def get_correspondent(correspondent_name):
-            return Correspondent.objects.get_or_create(
-                name=correspondent_name,
-                defaults={"slug": slugify(correspondent_name)}
-            )[0]
-
-        def get_tags(tags):
-            r = []
-            for t in tags.split(","):
-                r.append(
-                    Tag.objects.get_or_create(slug=t, defaults={"name": t})[0])
-            return tuple(r)
-
-        def get_suffix(suffix):
-            suffix = suffix.lower()
-            if suffix == "jpeg":
-                return "jpg"
-            return suffix
-
-        # First attempt: "<correspondent> - <title> - <tags>.<suffix>"
-        m = re.match(cls.REGEX_CORRESPONDENT_TITLE_TAGS, path)
-        if m:
-            return cls(
-                title=m.group(2),
-                correspondent=get_correspondent(m.group(1)),
-                tags=get_tags(m.group(3)),
-                suffix=get_suffix(m.group(4))
-            )
-
-        # Second attempt: "<correspondent> - <title>.<suffix>"
-        m = re.match(cls.REGEX_CORRESPONDENT_TITLE, path)
-        if m:
-            return cls(
-                title=m.group(2),
-                correspondent=get_correspondent(m.group(1)),
-                tags=(),
-                suffix=get_suffix(m.group(3))
-            )
-
-        # That didn't work, so we assume correspondent and tags are None
-        m = re.match(cls.REGEX_TITLE, path)
-        return FileInfo(
-            title=m.group(1), tags=(), suffix=get_suffix(m.group(2)))
-
-    @property
-    def title(self):
-        return self._title
-
-    @property
-    def correspondent(self):
-        return self._correspondent
-
-    @property
-    def tags(self):
-        return self._tags
-
-    @property
-    def suffix(self):
-        return self._suffix
-
 class SluggedModel(models.Model):
 
     name = models.CharField(max_length=128, unique=True)
@@ -341,3 +253,136 @@ class Log(models.Model):
             self.group = uuid.uuid4()
 
         models.Model.save(self, *args, **kwargs)
+
+
+class FileInfo(object):
+
+    # This epic regex *almost* worked for our needs, so I'm keeping it here for
+    # posterity, in the hopes that we might find a way to make it work one day.
+    ALMOST_REGEX = re.compile(
+        r"^((?P<date>\d\d\d\d\d\d\d\d\d\d\d\d\d\dZ){separator})?"
+        r"((?P<correspondent>{non_separated_word}+){separator})??"
+        r"(?P<title>{non_separated_word}+)"
+        r"({separator}(?P<tags>[a-z,0-9-]+))?"
+        r"\.(?P<extension>[a-zA-Z.-]+)$".format(
+            separator=r"\s+-\s+",
+            non_separated_word=r"([\w,. ]|([^\s]-))"
+        )
+    )
+
+    REGEXES = OrderedDict([
+        ("created-correspondent-title-tags", re.compile(
+            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
+            r"(?P<correspondent>.*) - "
+            r"(?P<title>.*) - "
+            r"(?P<tags>[a-z0-9\-,]*)"
+            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
+            flags=re.IGNORECASE
+        )),
+        ("created-title-tags", re.compile(
+            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
+            r"(?P<title>.*) - "
+            r"(?P<tags>[a-z0-9\-,]*)"
+            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
+            flags=re.IGNORECASE
+        )),
+        ("created-correspondent-title", re.compile(
+            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
+            r"(?P<correspondent>.*) - "
+            r"(?P<title>.*)"
+            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
+            flags=re.IGNORECASE
+        )),
+        ("created-title", re.compile(
+            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
+            r"(?P<title>.*)"
+            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
+            flags=re.IGNORECASE
+        )),
+        ("correspondent-title-tags", re.compile(
+            r"(?P<correspondent>.*) - "
+            r"(?P<title>.*) - "
+            r"(?P<tags>[a-z0-9\-,]*)"
+            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
+            flags=re.IGNORECASE
+        )),
+        ("correspondent-title", re.compile(
+            r"(?P<correspondent>.*) - "
+            r"(?P<title>.*)?"
+            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
+            flags=re.IGNORECASE
+        )),
+        ("title", re.compile(
+            r"(?P<title>.*)"
+            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
+            flags=re.IGNORECASE
+        ))
+    ])
+
+    def __init__(self, created=None, correspondent=None, title=None, tags=(),
+                 extension=None):
+
+        self.created = created
+        self.title = title
+        self.extension = extension
+        self.correspondent = correspondent
+        self.tags = tags
+
+    @classmethod
+    def _get_created(cls, created):
+        return dateutil.parser.parse("{:0<14}Z".format(created[:-1]))
+
+    @classmethod
+    def _get_correspondent(cls, name):
+        if not name:
+            return None
+        return Correspondent.objects.get_or_create(name=name, defaults={
+            "slug": slugify(name)
+        })[0]
+
+    @classmethod
+    def _get_title(cls, title):
+        return title
+
+    @classmethod
+    def _get_tags(cls, tags):
+        r = []
+        for t in tags.split(","):
+            r.append(
+                Tag.objects.get_or_create(slug=t, defaults={"name": t})[0])
+        return tuple(r)
+
+    @classmethod
+    def _get_extension(cls, extension):
+        r = extension.lower()
+        if r == "jpeg":
+            return "jpg"
+        return r
+
+    @classmethod
+    def _mangle_property(cls, properties, name):
+        if name in properties:
+            properties[name] = getattr(cls, "_get_{}".format(name))(
+                properties[name]
+            )
+
+    @classmethod
+    def from_path(cls, path):
+        """
+        We use a crude naming convention to make handling the correspondent,
+        title, and tags easier:
+          "<correspondent> - <title> - <tags>.<suffix>"
+          "<correspondent> - <title>.<suffix>"
+          "<title>.<suffix>"
+        """
+
+        for regex in cls.REGEXES.values():
+            m = regex.match(os.path.basename(path))
+            if m:
+                properties = m.groupdict()
+                cls._mangle_property(properties, "created")
+                cls._mangle_property(properties, "correspondent")
+                cls._mangle_property(properties, "title")
+                cls._mangle_property(properties, "tags")
+                cls._mangle_property(properties, "extension")
+                return cls(**properties)
diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py
index 634e8c4f0..48407044d 100644
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -1,28 +1,36 @@
 from django.test import TestCase
 
-from ..models import FileInfo
+from ..models import Document, FileInfo
 
 
 class TestAttachment(TestCase):
 
     TAGS = ("tag1", "tag2", "tag3")
-    SUFFIXES = (
+    EXTENSIONS = (
         "pdf", "png", "jpg", "jpeg", "gif",
         "PDF", "PNG", "JPG", "JPEG", "GIF",
         "PdF", "PnG", "JpG", "JPeG", "GiF",
     )
 
     def _test_guess_attributes_from_name(self, path, sender, title, tags):
-        for suffix in self.SUFFIXES:
-            f = path.format(suffix)
+
+        for extension in self.EXTENSIONS:
+
+            f = path.format(extension)
             file_info = FileInfo.from_path(f)
-            self.assertEqual(file_info.correspondent.name, sender, f)
-            self.assertEqual(file_info.title, title, f)
-            self.assertEqual(tuple([t.slug for t in file_info.tags]), tags, f)
-            if suffix.lower() == "jpeg":
-                self.assertEqual(file_info.suffix, "jpg", f)
+
+            if sender:
+                self.assertEqual(file_info.correspondent.name, sender, f)
             else:
-                self.assertEqual(file_info.suffix, suffix.lower(), f)
+                self.assertIsNone(file_info.correspondent, f)
+
+            self.assertEqual(file_info.title, title, f)
+
+            self.assertEqual(tuple([t.slug for t in file_info.tags]), tags, f)
+            if extension.lower() == "jpeg":
+                self.assertEqual(file_info.extension, "jpg", f)
+            else:
+                self.assertEqual(file_info.extension, extension.lower(), f)
 
     def test_guess_attributes_from_name0(self):
         self._test_guess_attributes_from_name(
@@ -96,7 +104,7 @@ class TestAttachment(TestCase):
         self._test_guess_attributes_from_name(
             '/path/to/ - weird empty correspondent but should not break.{}',
             None,
-            ' - weird empty correspondent but should not break',
+            'weird empty correspondent but should not break',
             ()
         )
 
@@ -126,60 +134,171 @@ class TestAttachment(TestCase):
 
 
 class Permutations(TestCase):
-    valid_correspondents = ['timmy', 'Dr. McWheelie',
-                            'Dash Gor-don', 'ο Θερμαστής']
-    valid_titles = ['title', 'Title w Spaces', 'Title a-dash', 'Τίτλος', '']
-    valid_tags = ['tag', 'tig,tag', '-', '0,1,2', '']
-    valid_suffixes = ['pdf', 'png', 'jpg', 'jpeg', 'gif']
 
-    def _test_guessed_attributes(
-            self, filename, title, suffix, correspondent=None, tags=None):
-        file_info = FileInfo.from_path(filename)
+    valid_dates = (
+        "20150102030405Z",
+        "20150102Z",
+    )
+    valid_correspondents = [
+        "timmy",
+        "Dr. McWheelie",
+        "Dash Gor-don",
+        "ο Θερμαστής",
+        ""
+    ]
+    valid_titles = ["title", "Title w Spaces", "Title a-dash", "Τίτλος", ""]
+    valid_tags = ["tag", "tig,tag", "tag1,tag2,tag-3"]
+    valid_extensions = ["pdf", "png", "jpg", "jpeg", "gif"]
 
-        # Required
-        self.assertEqual(file_info.title, title, filename)
-        if suffix == 'jpeg':
-            suffix = 'jpg'
-        self.assertEqual(file_info.suffix, suffix, filename)
-        # Optional
-        if correspondent is None:
-            self.assertEqual(file_info.correspondent,
-                             correspondent, filename)
+    def _test_guessed_attributes(self, filename, created=None,
+                                 correspondent=None, title=None,
+                                 extension=None, tags=None):
+
+        # print(filename)
+        info = FileInfo.from_path(filename)
+
+        # Created
+        if created is None:
+            self.assertIsNone(info.created, filename)
         else:
-            self.assertEqual(file_info.correspondent.name,
-                             correspondent, filename)
+            self.assertEqual(info.created.year, int(created[:4]), filename)
+            self.assertEqual(info.created.month, int(created[4:6]), filename)
+            self.assertEqual(info.created.day, int(created[6:8]), filename)
+
+        # Correspondent
+        if correspondent:
+            self.assertEqual(info.correspondent.name, correspondent, filename)
+        else:
+            self.assertEqual(info.correspondent, None, filename)
+
+        # Title
+        self.assertEqual(info.title, title, filename)
+
+        # Tags
         if tags is None:
-            self.assertEqual(file_info.tags, (), filename)
+            self.assertEqual(info.tags, (), filename)
         else:
-            self.assertEqual([t.slug for t in file_info.tags],
-                             tags.split(','),
-                             filename)
+            self.assertEqual(
+                [t.slug for t in info.tags], tags.split(','),
+                filename
+            )
+
+        # Extension
+        if extension == 'jpeg':
+            extension = 'jpg'
+        self.assertEqual(info.extension, extension, filename)
 
     def test_just_title(self):
-        template = '/path/to/{title}.{suffix}'
+        template = '/path/to/{title}.{extension}'
         for title in self.valid_titles:
-            for suffix in self.valid_suffixes:
-                spec = dict(title=title, suffix=suffix)
+            for extension in self.valid_extensions:
+                spec = dict(title=title, extension=extension)
                 filename = template.format(**spec)
                 self._test_guessed_attributes(filename, **spec)
 
     def test_title_and_correspondent(self):
-        template = '/path/to/{correspondent} - {title}.{suffix}'
+        template = '/path/to/{correspondent} - {title}.{extension}'
         for correspondent in self.valid_correspondents:
             for title in self.valid_titles:
-                for suffix in self.valid_suffixes:
+                for extension in self.valid_extensions:
                     spec = dict(correspondent=correspondent, title=title,
-                                suffix=suffix)
+                                extension=extension)
                     filename = template.format(**spec)
                     self._test_guessed_attributes(filename, **spec)
 
     def test_title_and_correspondent_and_tags(self):
-        template = '/path/to/{correspondent} - {title} - {tags}.{suffix}'
+        template = '/path/to/{correspondent} - {title} - {tags}.{extension}'
         for correspondent in self.valid_correspondents:
             for title in self.valid_titles:
                 for tags in self.valid_tags:
-                    for suffix in self.valid_suffixes:
+                    for extension in self.valid_extensions:
                         spec = dict(correspondent=correspondent, title=title,
-                                    tags=tags, suffix=suffix)
+                                    tags=tags, extension=extension)
                         filename = template.format(**spec)
                         self._test_guessed_attributes(filename, **spec)
+
+    def test_created_and_correspondent_and_title_and_tags(self):
+
+        template = ("/path/to/{created} - "
+                    "{correspondent} - "
+                    "{title} - "
+                    "{tags}"
+                    ".{extension}")
+
+        for created in self.valid_dates:
+            for correspondent in self.valid_correspondents:
+                for title in self.valid_titles:
+                    for tags in self.valid_tags:
+                        for extension in self.valid_extensions:
+                            spec = {
+                                "created": created,
+                                "correspondent": correspondent,
+                                "title": title,
+                                "tags": tags,
+                                "extension": extension
+                            }
+                            self._test_guessed_attributes(
+                                template.format(**spec), **spec)
+
+    def test_created_and_correspondent_and_title(self):
+
+        template = ("/path/to/{created} - "
+                    "{correspondent} - "
+                    "{title}"
+                    ".{extension}")
+
+        for created in self.valid_dates:
+            for correspondent in self.valid_correspondents:
+                for title in self.valid_titles:
+
+                    # Skip cases where title looks like a tag as we can't
+                    # accommodate such cases.
+                    if title.lower() == title:
+                        continue
+
+                    for extension in self.valid_extensions:
+                        spec = {
+                            "created": created,
+                            "correspondent": correspondent,
+                            "title": title,
+                            "extension": extension
+                        }
+                        self._test_guessed_attributes(
+                            template.format(**spec), **spec)
+
+    def test_created_and_title(self):
+
+        template = ("/path/to/{created} - "
+                    "{title}"
+                    ".{extension}")
+
+        for created in self.valid_dates:
+            for title in self.valid_titles:
+                for extension in self.valid_extensions:
+                    spec = {
+                        "created": created,
+                        "title": title,
+                        "extension": extension
+                    }
+                    self._test_guessed_attributes(
+                        template.format(**spec), **spec)
+
+    def test_created_and_title_and_tags(self):
+
+        template = ("/path/to/{created} - "
+                    "{title} - "
+                    "{tags}"
+                    ".{extension}")
+
+        for created in self.valid_dates:
+            for title in self.valid_titles:
+                for tags in self.valid_tags:
+                    for extension in self.valid_extensions:
+                        spec = {
+                            "created": created,
+                            "title": title,
+                            "tags": tags,
+                            "extension": extension
+                        }
+                        self._test_guessed_attributes(
+                            template.format(**spec), **spec)