Version bumb

Merge branch 'master' into dev
minor changes
2025-08-05 18:58:34 -05:00 · 2018-12-11 14:32:30 +01:00 · 2018-12-11 12:38:15 +01:00 · 2018-12-11 12:26:44 +01:00 · 2018-12-11 12:06:15 +01:00 · 2018-12-06 23:38:50 +00:00
94 changed files with 58830 additions and 1239 deletions
--- a/.editorconfig
+++ b/.editorconfig
@@ -0,0 +1,25 @@
+# EditorConfig: http://EditorConfig.org
+
+root = true
+
+[*]
+indent_style = tab
+indent_size = 2
+insert_final_newline = true
+trim_trailing_whitespace = true
+end_of_line = lf
+charset = utf-8
+max_line_length = 79
+
+[{*.html,*.css,*.js}]
+max_line_length = off
+
+[*.py]
+indent_size = 4
+indent_style = space
+
+# Tests don't get a line width restriction.  It's still a good idea to follow
+# the 79 character rule, but in the interests of clarity, tests often need to
+# violate it.
+[**/test_*.py]
+max_line_length = off
--- a/.gitignore
+++ b/.gitignore
@@ -73,7 +73,6 @@ db.sqlite3
 # Other stuff that doesn't belong
 .virtualenv
 virtualenv
-.vagrant
 docker-compose.yml
 docker-compose.env

@@ -81,3 +80,8 @@ docker-compose.env
 scripts/import-for-development
 scripts/nuke

+# Static files collected by the collectstatic command
+static/
+
+# Classification Models
+models/
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,7 +2,7 @@ language: python

 before_install:
 - sudo apt-get update -qq
- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr tesseract-ocr-eng
+- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr tesseract-ocr-eng tesseract-ocr-cat

 sudo: false

--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -40,7 +40,7 @@ Project maintainers who do not follow or enforce the Code of Conduct in good fai

 ## Attribution

-This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4 to remove puritanical language.  The original is available at [http://contributor-covenant.org/version/1/4][version]

 [homepage]: http://contributor-covenant.org
 [version]: http://contributor-covenant.org/version/1/4/
--- a/13
+++ b/13
@@ -1,4 +1,4 @@
-FROM alpine:3.7
+FROM alpine:3.8

 LABEL maintainer="The Paperless Project https://github.com/danielquinn/paperless" \
      contributors="Guy Addadi <addadi@gmail.com>, Pit Kleyersburg <pitkley@googlemail.com>, \
@@ -12,12 +12,11 @@ COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh
 ENV PAPERLESS_EXPORT_DIR=/export \
    PAPERLESS_CONSUMPTION_DIR=/consume

-# Install dependencies
-RUN apk --no-cache --update add \
-        python3 gnupg libmagic bash shadow curl \
-        sudo poppler tesseract-ocr imagemagick ghostscript unpaper && \
-    apk --no-cache add --virtual .build-dependencies \
-        python3-dev poppler-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
+
+RUN apk update --no-cache && apk add python3 gnupg libmagic libpq bash shadow curl \
+        sudo poppler tesseract-ocr imagemagick ghostscript unpaper optipng && \
+    apk add --virtual .build-dependencies \
+        python3-dev poppler-dev postgresql-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
 # Install python dependencies
    python3 -m ensurepip && \
    rm -r /usr/lib/python*/ensurepip && \
--- a/8
+++ b/8
@@ -4,20 +4,20 @@ verify_ssl = true
 name = "pypi"

 [packages]
-django = "<2.0,>=1.11"
+django = "<2.1,>=2.0"
 pillow = "*"
 coveralls = "*"
 dateparser = "*"
+django-cors-headers = "*"
 django-crispy-forms = "*"
 django-extensions = "*"
 django-filter = "*"
-django-flat-responsive = "*"
 djangorestframework = "*"
 factory-boy = "*"
-"flake8" = "*"
 filemagic = "*"
 fuzzywuzzy = {extras = ["speedup"], version = "==0.15.0"}
 gunicorn = "*"
+inotify-simple = "*"
 langdetect = "*"
 pdftotext = "*"
 pyocr = "*"
@@ -25,6 +25,8 @@ python-dateutil = "*"
 python-dotenv = "*"
 python-gnupg = "*"
 pytz = "*"
+sphinx = "*"
+tox = "*"
 pycodestyle = "*"
 pytest = "*"
 pytest-cov = "*"
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
    "_meta": {
        "hash": {
-            "sha256": "928fbb4c8952128aef7a2ed2707ce510d31d49df96cfc5f08959698edff6e67f"
+            "sha256": "3782f7e6b5461c39c8fd0d0048a4622418f247439113bd3cdc91712fd47036f6"
        },
        "pipfile-spec": 6,
        "requires": {},
@@ -14,26 +14,54 @@
        ]
    },
    "default": {
+        "alabaster": {
+            "hashes": [
+                "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359",
+                "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"
+            ],
+            "version": "==0.7.12"
+        },
        "apipkg": {
            "hashes": [
-                "sha256:2e38399dbe842891fe85392601aab8f40a8f4cc5a9053c326de35a1cc0297ac6",
-                "sha256:65d2aa68b28e7d31233bb2ba8eb31cda40e4671f8ac2d6b241e358c9652a74b9"
+                "sha256:37228cda29411948b422fae072f57e31d3396d2ee1c9783775980ee9c9990af6",
+                "sha256:58587dd4dc3daefad0487f6d9ae32b4542b185e1c36db6993290e7c41ca2b47c"
            ],
-            "version": "==1.4"
+            "version": "==1.5"
+        },
+        "atomicwrites": {
+            "hashes": [
+                "sha256:0312ad34fcad8fac3704d441f7b317e50af620823353ec657a53e981f92920c0",
+                "sha256:ec9ae8adaae229e4f8446952d204a3e4b5fdd2d099f9be3aaf556120135fb3ee"
+            ],
+            "version": "==1.2.1"
        },
        "attrs": {
            "hashes": [
-                "sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9",
-                "sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450"
+                "sha256:10cbf6e27dbce8c30807caf056c8eb50917e0eaafe86347671b57254006c3e69",
+                "sha256:ca4be454458f9dec299268d472aaa5a11f67a4ff70093396e1ceae9c76cf4bbb"
            ],
-            "version": "==17.4.0"
+            "version": "==18.2.0"
+        },
+        "babel": {
+            "hashes": [
+                "sha256:6778d85147d5d85345c14a26aada5e478ab04e39b078b0745ee6870c2b5cf669",
+                "sha256:8cba50f48c529ca3fa18cf81fa9403be176d374ac4d60738b839122dfaaa3d23"
+            ],
+            "version": "==2.6.0"
+        },
+        "backcall": {
+            "hashes": [
+                "sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4",
+                "sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"
+            ],
+            "version": "==0.1.0"
        },
        "certifi": {
            "hashes": [
-                "sha256:14131608ad2fd56836d33a71ee60fa1c82bc9d2c8d98b7bdbc631fe1b3cd1296",
-                "sha256:edbc3f203427eef571f79a7692bb160a2b0f7ccaa31953e99bd17e307cf63f7d"
+                "sha256:339dc09518b07e2fa7eda5450740925974815557727d6bd35d319c1524a04a4c",
+                "sha256:6d58c986d22b038c8c0df30d639f23a3e6d172a05c3583e766f4c0b785c0986a"
            ],
-            "version": "==2018.1.18"
+            "version": "==2018.10.15"
        },
        "chardet": {
            "hashes": [
@@ -46,11 +74,12 @@
            "hashes": [
                "sha256:03481e81d558d30d230bc12999e3edffe392d244349a90f4ef9b88425fac74ba",
                "sha256:0b136648de27201056c1869a6c0d4e23f464750fd9a9ba9750b8336a244429ed",
-                "sha256:104ab3934abaf5be871a583541e8829d6c19ce7bde2923b2751e0d3ca44db60a",
-                "sha256:15b111b6a0f46ee1a485414a52a7ad1d703bdf984e9ed3c288a4414d3871dcbd",
+                "sha256:0bf8cbbd71adfff0ef1f3a1531e6402d13b7b01ac50a79c97ca15f030dba6306",
+                "sha256:10a46017fef60e16694a30627319f38a2b9b52e90182dddb6e37dcdab0f4bf95",
                "sha256:198626739a79b09fa0a2f06e083ffd12eb55449b5f8bfdbeed1df4910b2ca640",
-                "sha256:1c383d2ef13ade2acc636556fd544dba6e14fa30755f26812f54300e401f98f2",
+                "sha256:23d341cdd4a0371820eb2b0bd6b88f5003a7438bbedb33688cd33b8eae59affd",
                "sha256:28b2191e7283f4f3568962e373b47ef7f0392993bb6660d079c62bd50fe9d162",
+                "sha256:2a5b73210bad5279ddb558d9a2bfedc7f4bf6ad7f3c988641d83c40293deaec1",
                "sha256:2eb564bbf7816a9d68dd3369a510be3327f1c618d2357fa6b1216994c2e3d508",
                "sha256:337ded681dd2ef9ca04ef5d93cfc87e52e09db2594c296b4a0a3662cb1b41249",
                "sha256:3a2184c6d797a125dca8367878d3b9a178b6fdd05fdc2d35d758c3006a1cd694",
@@ -70,26 +99,22 @@
                "sha256:7e1fe19bd6dce69d9fd159d8e4a80a8f52101380d5d3a4d374b6d3eae0e5de9c",
                "sha256:8c3cb8c35ec4d9506979b4cf90ee9918bc2e49f84189d9bf5c36c0c1119c6558",
                "sha256:9d6dd10d49e01571bf6e147d3b505141ffc093a06756c60b053a859cb2128b1f",
-                "sha256:9e112fcbe0148a6fa4f0a02e8d58e94470fc6cb82a5481618fea901699bf34c4",
-                "sha256:ac4fef68da01116a5c117eba4dd46f2e06847a497de5ed1d64bb99a5fda1ef91",
-                "sha256:b8815995e050764c8610dbc82641807d196927c3dbed207f0a079833ffcf588d",
                "sha256:be6cfcd8053d13f5f5eeb284aa8a814220c3da1b0078fa859011c7fffd86dab9",
                "sha256:c1bb572fab8208c400adaf06a8133ac0712179a334c09224fb11393e920abcdd",
                "sha256:de4418dadaa1c01d497e539210cb6baa015965526ff5afc078c57ca69160108d",
                "sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6",
-                "sha256:e4d96c07229f58cb686120f168276e434660e4358cc9cf3b0464210b04913e77",
-                "sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80",
-                "sha256:f8a923a85cb099422ad5a2e345fe877bbc89a8a8b23235824a93488150e45f6e"
+                "sha256:f05a636b4564104120111800021a92e43397bc12a5c72fed7036be8556e0029e",
+                "sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80"
            ],
            "version": "==4.5.1"
        },
        "coveralls": {
            "hashes": [
-                "sha256:32569a43c9dbc13fa8199247580a4ab182ef439f51f65bb7f8316d377a1340e8",
-                "sha256:664794748d2e5673e347ec476159a9d87f43e0d2d44950e98ed0e27b98da8346"
+                "sha256:ab638e88d38916a6cedbf80a9cd8992d5fa55c77ab755e262e00b36792b7cd6d",
+                "sha256:b2388747e2529fa4c669fb1e3e2756e4e07b6ee56c7d9fce05f35ccccc913aa0"
            ],
            "index": "pypi",
-            "version": "==1.3.0"
+            "version": "==1.5.1"
        },
        "dateparser": {
            "hashes": [
@@ -99,13 +124,28 @@
            "index": "pypi",
            "version": "==0.7.0"
        },
+        "decorator": {
+            "hashes": [
+                "sha256:2c51dff8ef3c447388fe5e4453d24a2bf128d3a4c32af3fabef1f01c6851ab82",
+                "sha256:c39efa13fbdeb4506c476c9b3babf6a718da943dab7811c206005a4a956c080c"
+            ],
+            "version": "==4.3.0"
+        },
        "django": {
            "hashes": [
-                "sha256:056fe5b9e1f8f7fed9bb392919d64f6b33b3a71cfb0f170a90ee277a6ed32bc2",
-                "sha256:4d398c7b02761e234bbde490aea13ea94cb539ceeb72805b72303f348682f2eb"
+                "sha256:25df265e1fdb74f7e7305a1de620a84681bcc9c05e84a3ed97e4a1a63024f18d",
+                "sha256:d6d94554abc82ca37e447c3d28958f5ac39bd7d4adaa285543ae97fb1129fd69"
            ],
            "index": "pypi",
-            "version": "==1.11.12"
+            "version": "==2.0.9"
+        },
+        "django-cors-headers": {
+            "hashes": [
+                "sha256:5545009c9b233ea7e70da7dbab7cb1c12afa01279895086f98ec243d7eab46fa",
+                "sha256:c4c2ee97139d18541a1be7d96fe337d1694623816d83f53cb7c00da9b94acae1"
+            ],
+            "index": "pypi",
+            "version": "==2.4.0"
        },
        "django-crispy-forms": {
            "hashes": [
@@ -117,34 +157,27 @@
        },
        "django-extensions": {
            "hashes": [
-                "sha256:37a543af370ee3b0721ff50442d33c357dd083e6ea06c5b94a199283b6f9e361",
-                "sha256:bc9f2946c117bb2f49e5e0633eba783787790ae810ea112fe7fd82fa64de2ff1"
+                "sha256:30cb6a8c7d6f75a55edf0c0c4491bd98f8264ae1616ce105f9cecac4387edd07",
+                "sha256:4ad86a7a5e84f1c77db030761ae87a600647250c652030a2b71a16e87f3a3d62"
            ],
            "index": "pypi",
-            "version": "==2.0.6"
+            "version": "==2.1.3"
        },
        "django-filter": {
            "hashes": [
-                "sha256:ea204242ea83790e1512c9d0d8255002a652a6f4986e93cee664f28955ba0c22",
-                "sha256:ec0ef1ba23ef95b1620f5d481334413700fb33f45cd76d56a63f4b0b1d76976a"
+                "sha256:6f4e4bc1a11151178520567b50320e5c32f8edb552139d93ea3e30613b886f56",
+                "sha256:86c3925020c27d072cdae7b828aaa5d165c2032a629abbe3c3a1be1edae61c58"
            ],
            "index": "pypi",
-            "version": "==1.1.0"
-        },
-        "django-flat-responsive": {
-            "hashes": [
-                "sha256:451caa2700c541b52fb7ce2d34d3d8dee9e980cf29f5463bc8a8c6256a1a6474"
-            ],
-            "index": "pypi",
-            "version": "==2.0"
+            "version": "==2.0.0"
        },
        "djangorestframework": {
            "hashes": [
-                "sha256:b6714c3e4b0f8d524f193c91ecf5f5450092c2145439ac2769711f7eba89a9d9",
-                "sha256:c375e4f95a3a64fccac412e36fb42ba36881e52313ec021ef410b40f67cddca4"
+                "sha256:607865b0bb1598b153793892101d881466bd5a991de12bd6229abb18b1c86136",
+                "sha256:63f76cbe1e7d12b94c357d7e54401103b2e52aef0f7c1650d6c820ad708776e5"
            ],
            "index": "pypi",
-            "version": "==3.8.2"
+            "version": "==3.9.0"
        },
        "docopt": {
            "hashes": [
@@ -152,6 +185,14 @@
            ],
            "version": "==0.6.2"
        },
+        "docutils": {
+            "hashes": [
+                "sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6",
+                "sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274",
+                "sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6"
+            ],
+            "version": "==0.14"
+        },
        "execnet": {
            "hashes": [
                "sha256:a7a84d5fa07a089186a329528f127c9d73b9de57f1a1131b82bb5320ee651f6a",
@@ -161,18 +202,25 @@
        },
        "factory-boy": {
            "hashes": [
-                "sha256:bd5a096d0f102d79b6c78cef1c8c0b650f2e1a3ecba351c735c6d2df8dabd29c",
-                "sha256:be2abc8092294e4097935a29b4e37f5b9ed3e4205e2e32df215c0315b625995e"
+                "sha256:6f25cc4761ac109efd503f096e2ad99421b1159f01a29dbb917359dcd68e08ca",
+                "sha256:d552cb872b310ae78bd7429bf318e42e1e903b1a109e899a523293dfa762ea4f"
            ],
            "index": "pypi",
-            "version": "==2.10.0"
+            "version": "==2.11.1"
        },
        "faker": {
            "hashes": [
-                "sha256:226d8fa67a8cf8b4007aab721f67639f130e9cfdc53a7095a2290ebb07a65c71",
-                "sha256:48fed4b4a191e2b42ad20c14115f1c6d36d338b80192075d7573f0f42d7fb321"
+                "sha256:2621643b80a10b91999925cfd20f64d2b36f20bf22136bbdc749bb57d6ffe124",
+                "sha256:5ed822d31bd2d6edf10944d176d30dc9c886afdd381eefb7ba8b7aad86171646"
            ],
-            "version": "==0.8.13"
+            "version": "==0.9.2"
+        },
+        "filelock": {
+            "hashes": [
+                "sha256:b8d5ca5ca1c815e1574aee746650ea7301de63d87935b3463d26368b76e31633",
+                "sha256:d610c1bb404daf85976d7a82eb2ada120f04671007266b708606565dd03b5be6"
+            ],
+            "version": "==3.0.10"
        },
        "filemagic": {
            "hashes": [
@@ -181,15 +229,10 @@
            "index": "pypi",
            "version": "==1.6"
        },
-        "flake8": {
-            "hashes": [
-                "sha256:7253265f7abd8b313e3892944044a365e3f4ac3fcdcfb4298f55ee9ddf188ba0",
-                "sha256:c7841163e2b576d435799169b78703ad6ac1bbb0f199994fc05f700b2a90ea37"
-            ],
-            "index": "pypi",
-            "version": "==3.5.0"
-        },
        "fuzzywuzzy": {
+            "extras": [
+                "speedup"
+            ],
            "hashes": [
                "sha256:3759bc6859daa0eecef8c82b45404bdac20c23f23136cf4c18b46b426bbc418f",
                "sha256:5b36957ccf836e700f4468324fa80ba208990385392e217be077d5cd738ae602"
@@ -199,18 +242,61 @@
        },
        "gunicorn": {
            "hashes": [
-                "sha256:75af03c99389535f218cc596c7de74df4763803f7b63eb09d77e92b3956b36c6",
-                "sha256:eee1169f0ca667be05db3351a0960765620dad53f53434262ff8901b68a1b622"
+                "sha256:aa8e0b40b4157b36a5df5e599f45c9c76d6af43845ba3b3b0efe2c70473c2471",
+                "sha256:fa2662097c66f920f53f70621c6c58ca4a3c4d3434205e608e121b5b3b71f4f3"
            ],
            "index": "pypi",
-            "version": "==19.7.1"
+            "version": "==19.9.0"
        },
        "idna": {
            "hashes": [
-                "sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f",
-                "sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4"
+                "sha256:156a6814fb5ac1fc6850fb002e0852d56c0c8d2531923a51032d1b70760e186e",
+                "sha256:684a38a6f903c1d71d6d5fac066b58d7768af4de2b832e426ec79c30daa94a16"
            ],
-            "version": "==2.6"
+            "version": "==2.7"
+        },
+        "imagesize": {
+            "hashes": [
+                "sha256:3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8",
+                "sha256:f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5"
+            ],
+            "version": "==1.1.0"
+        },
+        "inotify-simple": {
+            "hashes": [
+                "sha256:fc2c10dd73278a1027d0663f2db51240af5946390f363a154361406ebdddd8dd"
+            ],
+            "index": "pypi",
+            "version": "==1.1.8"
+        },
+        "ipython": {
+            "hashes": [
+                "sha256:a5781d6934a3341a1f9acb4ea5acdc7ea0a0855e689dbe755d070ca51e995435",
+                "sha256:b10a7ddd03657c761fc503495bc36471c8158e3fc948573fb9fe82a7029d8efd"
+            ],
+            "index": "pypi",
+            "version": "==7.1.1"
+        },
+        "ipython-genutils": {
+            "hashes": [
+                "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
+                "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
+            ],
+            "version": "==0.2.0"
+        },
+        "jedi": {
+            "hashes": [
+                "sha256:0191c447165f798e6a730285f2eee783fff81b0d3df261945ecb80983b5c3ca7",
+                "sha256:b7493f73a2febe0dc33d51c99b474547f7f6c0b2c8fb2b21f453eef204c12148"
+            ],
+            "version": "==0.13.1"
+        },
+        "jinja2": {
+            "hashes": [
+                "sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd",
+                "sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4"
+            ],
+            "version": "==2.10"
        },
        "langdetect": {
            "hashes": [
@@ -219,140 +305,173 @@
            "index": "pypi",
            "version": "==1.0.7"
        },
-        "mccabe": {
+        "markupsafe": {
            "hashes": [
-                "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
-                "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
+                "sha256:a6be69091dac236ea9c6bc7d012beab42010fa914c459791d627dad4910eb665"
            ],
-            "version": "==0.6.1"
+            "version": "==1.0"
        },
        "more-itertools": {
            "hashes": [
-                "sha256:0dd8f72eeab0d2c3bd489025bb2f6a1b8342f9b198f6fc37b52d15cfa4531fea",
-                "sha256:11a625025954c20145b37ff6309cd54e39ca94f72f6bb9576d1195db6fa2442e",
-                "sha256:c9ce7eccdcb901a2c75d326ea134e0886abfbea5f93e91cc95de9507c0816c44"
+                "sha256:c187a73da93e7a8acc0001572aebc7e3c69daf7bf6881a2cea10650bd4420092",
+                "sha256:c476b5d3a34e12d40130bc2f935028b5f636df8f372dc2c1c01dc19681b2039e",
+                "sha256:fcbfeaea0be121980e15bc97b3817b5202ca73d0eae185b4550cbfce2a3ebb3d"
            ],
-            "version": "==4.1.0"
+            "version": "==4.3.0"
+        },
+        "packaging": {
+            "hashes": [
+                "sha256:0886227f54515e592aaa2e5a553332c73962917f2831f1b0f9b9f4380a4b9807",
+                "sha256:f95a1e147590f204328170981833854229bb2912ac3d5f89e2a8ccd2834800c9"
+            ],
+            "version": "==18.0"
+        },
+        "parso": {
+            "hashes": [
+                "sha256:35704a43a3c113cce4de228ddb39aab374b8004f4f2407d070b6a2ca784ce8a2",
+                "sha256:895c63e93b94ac1e1690f5fdd40b65f07c8171e3e53cbd7793b5b96c0e0a7f24"
+            ],
+            "version": "==0.3.1"
        },
        "pdftotext": {
            "hashes": [
-                "sha256:0b82a9fd255a3f2bf5c861cf9e3174d3c4223e1e441bb060c611dcb4e65c6cb8"
+                "sha256:e3ad11efe0aa22cbfc46aa1296b2ea5a52ad208b778288311f2801adef178ccb"
            ],
            "index": "pypi",
-            "version": "==2.0.2"
+            "version": "==2.1.1"
+        },
+        "pexpect": {
+            "hashes": [
+                "sha256:2a8e88259839571d1251d278476f3eec5db26deb73a70be5ed5dc5435e418aba",
+                "sha256:3fbd41d4caf27fa4a377bfd16fef87271099463e6fa73e92a52f92dfee5d425b"
+            ],
+            "markers": "sys_platform != 'win32'",
+            "version": "==4.6.0"
+        },
+        "pickleshare": {
+            "hashes": [
+                "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca",
+                "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"
+            ],
+            "version": "==0.7.5"
        },
        "pillow": {
            "hashes": [
-                "sha256:00633bc2ec40313f4daf351855e506d296ec3c553f21b66720d0f1225ca84c6f",
-                "sha256:03514478db61b034fc5d38b9bf060f994e5916776e93f02e59732a8270069c61",
-                "sha256:040144ba422216aecf7577484865ade90e1a475f867301c48bf9fbd7579efd76",
-                "sha256:16246261ff22368e5e32ad74d5ef40403ab6895171a7fc6d34f6c17cfc0f1943",
-                "sha256:1cb38df69362af35c14d4a50123b63c7ff18ec9a6d4d5da629a6f19d05e16ba8",
-                "sha256:2400e122f7b21d9801798207e424cbe1f716cee7314cd0c8963fdb6fc564b5fb",
-                "sha256:2ee6364b270b56a49e8b8a51488e847ab130adc1220c171bed6818c0d4742455",
-                "sha256:3b4560c3891b05022c464b09121bd507c477505a4e19d703e1027a3a7c68d896",
-                "sha256:41374a6afb3f44794410dab54a0d7175e6209a5a02d407119c81083f1a4c1841",
-                "sha256:438a3faf5f702c8d0f80b9f9f9b8382cfa048ca6a0d64ef71b86b563b0ee0359",
-                "sha256:472a124c640bde4d5468f6991c9fa7e30b723d84ac4195a77c6ab6aea30f2b9c",
-                "sha256:4d32c8e3623a61d6e29ccd024066cd1ba556555abfb4cd714155020e00107e3f",
-                "sha256:4d8077fd649ac40a5c4165f2c22fa2a4ad18c668e271ecb2f9d849d1017a9313",
-                "sha256:62ec7ae98357fcd46002c110bb7cad15fce532776f0cbe7ca1d44c49b837d49d",
-                "sha256:6c7cab6a05351cf61e469937c49dbf3cdf5ffb3eeac71f8d22dc9be3507598d8",
-                "sha256:6eca36905444c4b91fe61f1b9933a47a30480738a1dd26501ff67d94fc2bc112",
-                "sha256:74e2ebfd19c16c28ad43b8a28ff73b904ed382ea4875188838541751986e8c9a",
-                "sha256:7673e7473a13107059377c96c563aa36f73184c29d2926882e0a0210b779a1e7",
-                "sha256:81762cf5fca9a82b53b7b2d0e6b420e0f3b06167b97678c81d00470daa622d58",
-                "sha256:8554bbeb4218d9cfb1917c69e6f2d2ad0be9b18a775d2162547edf992e1f5f1f",
-                "sha256:9b66e968da9c4393f5795285528bc862c7b97b91251f31a08004a3c626d18114",
-                "sha256:a00edb2dec0035e98ac3ec768086f0b06dfabb4ad308592ede364ef573692f55",
-                "sha256:b48401752496757e95304a46213c3155bc911ac884bed2e9b275ce1c1df3e293",
-                "sha256:b6cf18f9e653a8077522bb3aa753a776b117e3e0cc872c25811cfdf1459491c2",
-                "sha256:bb8adab1877e9213385cbb1adc297ed8337e01872c42a30cfaa66ff8c422779c",
-                "sha256:c8a4b39ba380b57a31a4b5449a9d257b1302d8bc4799767e645dcee25725efe1",
-                "sha256:cee9bc75bff455d317b6947081df0824a8f118de2786dc3d74a3503fd631f4ef",
-                "sha256:d0dc1313dff48af64517cbbd85e046d6b477fbe5e9d69712801f024dcb08c62b",
-                "sha256:d5bf527ed83617edd1855a5c923eeeaf68bcb9ac0ceb28e3f19b575b3a424984",
-                "sha256:df5863a21f91de5ecdf7d32a32f406dd9867ebb35d41033b8bd9607a21887599",
-                "sha256:e39142332541ed2884c257495504858b22c078a5d781059b07aba4c3a80d7551",
-                "sha256:e52e8f675ba0b2b417fa98579e7286a41a8e23871f17f4793772f5aa884fea79",
-                "sha256:e6dd55d5d94b9e36929325dd0c9ab85bfde84a5fc35947c334c32af1af668944",
-                "sha256:e87cc1acbebf263f308a8494272c2d42016aa33c32bf14d209c81e1f65e11868",
-                "sha256:ea0091cd4100519cedfeea2c659f52291f535ac6725e2368bcf59e874f270efa",
-                "sha256:eeb247f4f4d962942b3b555530b0c63b77473c7bfe475e51c6b75b7344b49ce3",
-                "sha256:f0d4433adce6075efd24fc0285135248b0b50f5a58129c7e552030e04fe45c7f",
-                "sha256:f1f3bd92f8e12dc22884935a73c9f94c4d9bd0d34410c456540713d6b7832b8c",
-                "sha256:f42a87cbf50e905f49f053c0b1fb86c911c730624022bf44c8857244fc4cdaca",
-                "sha256:f5f302db65e2e0ae96e26670818157640d3ca83a3054c290eff3631598dcf819",
-                "sha256:f7634d534662bbb08976db801ba27a112aee23e597eeaf09267b4575341e45bf",
-                "sha256:fdd374c02e8bb2d6468a85be50ea66e1c4ef9e809974c30d8576728473a6ed03",
-                "sha256:fe6931db24716a0845bd8c8915bd096b77c2a7043e6fc59ae9ca364fe816f08b"
+                "sha256:00203f406818c3f45d47bb8fe7e67d3feddb8dcbbd45a289a1de7dd789226360",
+                "sha256:0616f800f348664e694dddb0b0c88d26761dd5e9f34e1ed7b7a7d2da14b40cb7",
+                "sha256:1f7908aab90c92ad85af9d2fec5fc79456a89b3adcc26314d2cde0e238bd789e",
+                "sha256:2ea3517cd5779843de8a759c2349a3cd8d3893e03ab47053b66d5ec6f8bc4f93",
+                "sha256:48a9f0538c91fc136b3a576bee0e7cd174773dc9920b310c21dcb5519722e82c",
+                "sha256:5280ebc42641a1283b7b1f2c20e5b936692198b9dd9995527c18b794850be1a8",
+                "sha256:5e34e4b5764af65551647f5cc67cf5198c1d05621781d5173b342e5e55bf023b",
+                "sha256:63b120421ab85cad909792583f83b6ca3584610c2fe70751e23f606a3c2e87f0",
+                "sha256:696b5e0109fe368d0057f484e2e91717b49a03f1e310f857f133a4acec9f91dd",
+                "sha256:870ed021a42b1b02b5fe4a739ea735f671a84128c0a666c705db2cb9abd528eb",
+                "sha256:916da1c19e4012d06a372127d7140dae894806fad67ef44330e5600d77833581",
+                "sha256:9303a289fa0811e1c6abd9ddebfc770556d7c3311cb2b32eff72164ddc49bc64",
+                "sha256:9577888ecc0ad7d06c3746afaba339c94d62b59da16f7a5d1cff9e491f23dace",
+                "sha256:987e1c94a33c93d9b209315bfda9faa54b8edfce6438a1e93ae866ba20de5956",
+                "sha256:99a3bbdbb844f4fb5d6dd59fac836a40749781c1fa63c563bc216c27aef63f60",
+                "sha256:99db8dc3097ceafbcff9cb2bff384b974795edeb11d167d391a02c7bfeeb6e16",
+                "sha256:a5a96cf49eb580756a44ecf12949e52f211e20bffbf5a95760ac14b1e499cd37",
+                "sha256:aa6ca3eb56704cdc0d876fc6047ffd5ee960caad52452fbee0f99908a141a0ae",
+                "sha256:aade5e66795c94e4a2b2624affeea8979648d1b0ae3fcee17e74e2c647fc4a8a",
+                "sha256:b78905860336c1d292409e3df6ad39cc1f1c7f0964e66844bbc2ebfca434d073",
+                "sha256:b92f521cdc4e4a3041cc343625b699f20b0b5f976793fb45681aac1efda565f8",
+                "sha256:bfde84bbd6ae5f782206d454b67b7ee8f7f818c29b99fd02bf022fd33bab14cb",
+                "sha256:c2b62d3df80e694c0e4a0ed47754c9480521e25642251b3ab1dff050a4e60409",
+                "sha256:c5e2be6c263b64f6f7656e23e18a4a9980cffc671442795682e8c4e4f815dd9f",
+                "sha256:c99aa3c63104e0818ec566f8ff3942fb7c7a8f35f9912cb63fd8e12318b214b2",
+                "sha256:dae06620d3978da346375ebf88b9e2dd7d151335ba668c995aea9ed07af7add4",
+                "sha256:db5499d0710823fa4fb88206050d46544e8f0e0136a9a5f5570b026584c8fd74",
+                "sha256:f36baafd82119c4a114b9518202f2a983819101dcc14b26e43fc12cbefdce00e",
+                "sha256:f52b79c8796d81391ab295b04e520bda6feed54d54931708872e8f9ae9db0ea1",
+                "sha256:ff8cff01582fa1a7e533cb97f628531c4014af4b5f38e33cdcfe5eec29b6d888"
            ],
            "index": "pypi",
-            "version": "==5.1.0"
+            "version": "==5.3.0"
        },
        "pluggy": {
            "hashes": [
-                "sha256:714306e9b9a7b24ee4c1e3ff6463d7f652cdd30f4693121b31572e2fe1fdaea3",
-                "sha256:7f8ae7f5bdf75671a718d2daf0a64b7885f74510bcd98b1a0bb420eb9a9d0cff",
-                "sha256:d345c8fe681115900d6da8d048ba67c25df42973bda370783cd58826442dcd7c",
-                "sha256:e160a7fcf25762bb60efc7e171d4497ff1d8d2d75a3d0df7a21b76821ecbf5c5"
+                "sha256:447ba94990e8014ee25ec853339faf7b0fc8050cdc3289d4d71f7f410fb90095",
+                "sha256:bde19360a8ec4dfd8a20dcb811780a30998101f078fc7ded6162f0076f50508f"
+            ],
+            "version": "==0.8.0"
+        },
+        "prompt-toolkit": {
+            "hashes": [
+                "sha256:c1d6aff5252ab2ef391c2fe498ed8c088066f66bc64a8d5c095bbf795d9fec34",
+                "sha256:d4c47f79b635a0e70b84fdb97ebd9a274203706b1ee5ed44c10da62755cf3ec9",
+                "sha256:fd17048d8335c1e6d5ee403c3569953ba3eb8555d710bfc548faf0712666ea39"
+            ],
+            "version": "==2.0.7"
+        },
+        "ptyprocess": {
+            "hashes": [
+                "sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0",
+                "sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"
            ],
            "version": "==0.6.0"
        },
        "py": {
            "hashes": [
-                "sha256:29c9fab495d7528e80ba1e343b958684f4ace687327e6f789a94bf3d1915f881",
-                "sha256:983f77f3331356039fdd792e9220b7b8ee1aa6bd2b25f567a963ff1de5a64f6a"
+                "sha256:bf92637198836372b520efcba9e020c330123be8ce527e535d185ed4b6f45694",
+                "sha256:e76826342cefe3c3d5f7e8ee4316b80d1dd8a300781612ddbc765c17ba25a6c6"
            ],
-            "version": "==1.5.3"
+            "version": "==1.7.0"
        },
        "pycodestyle": {
            "hashes": [
-                "sha256:1ec08a51c901dfe44921576ed6e4c1f5b7ecbad403f871397feedb5eb8e4fa14",
-                "sha256:5ff2fbcbab997895ba9ead77e1b38b3ebc2e5c3b8a6194ef918666e4c790a00e",
-                "sha256:682256a5b318149ca0d2a9185d365d8864a768a28db66a84a2ea946bcc426766",
-                "sha256:6c4245ade1edfad79c3446fadfc96b0de2759662dc29d07d80a6f27ad1ca6ba9"
+                "sha256:cbc619d09254895b0d12c2c691e237b2e91e9b2ecf5e84c26b35400f93dcfb83",
+                "sha256:cbfca99bd594a10f674d0cd97a3d802a1fdef635d4361e1a2658de47ed261e3a"
            ],
            "index": "pypi",
-            "version": "==2.3.1"
+            "version": "==2.4.0"
        },
-        "pyflakes": {
+        "pygments": {
            "hashes": [
-                "sha256:08bd6a50edf8cffa9fa09a463063c425ecaaf10d1eb0335a7e8b1401aef89e6f",
-                "sha256:8d616a382f243dbf19b54743f280b80198be0bca3a5396f1d2e1fca6223e8805"
+                "sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
+                "sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc"
            ],
-            "version": "==1.6.0"
+            "version": "==2.2.0"
        },
        "pyocr": {
            "hashes": [
-                "sha256:9ee8b5f38dd966ca531115fc5fe4715f7fa8961a9f14cd5109c2d938c17a2043"
+                "sha256:b6ba6263fd92da56627dff6d263d991a2246aacd117d1788f11b93f419ca395f"
            ],
            "index": "pypi",
-            "version": "==0.5.1"
+            "version": "==0.5.3"
+        },
+        "pyparsing": {
+            "hashes": [
+                "sha256:40856e74d4987de5d01761a22d1621ae1c7f8774585acae358aa5c5936c6c90b",
+                "sha256:f353aab21fd474459d97b709e527b5571314ee5f067441dc9f88e33eecd96592"
+            ],
+            "version": "==2.3.0"
        },
        "pytest": {
            "hashes": [
-                "sha256:6266f87ab64692112e5477eba395cfedda53b1933ccd29478e671e73b420c19c",
-                "sha256:fae491d1874f199537fd5872b5e1f0e74a009b979df9d53d1553fd03da1703e1"
+                "sha256:a9e5e8d7ab9d5b0747f37740276eb362e6a76275d76cebbb52c6049d93b475db",
+                "sha256:bf47e8ed20d03764f963f0070ff1c8fda6e2671fc5dd562a4d3b7148ad60f5ca"
            ],
            "index": "pypi",
-            "version": "==3.5.0"
+            "version": "==3.9.3"
        },
        "pytest-cov": {
            "hashes": [
-                "sha256:03aa752cf11db41d281ea1d807d954c4eda35cfa1b21d6971966cc041bbf6e2d",
-                "sha256:890fe5565400902b0c78b5357004aab1c814115894f4f21370e2433256a3eeec"
+                "sha256:513c425e931a0344944f84ea47f3956be0e416d95acbd897a44970c8d926d5d7",
+                "sha256:e360f048b7dae3f2f2a9a4d067b2dd6b6a015d384d1577c994a43f3f7cbad762"
            ],
            "index": "pypi",
-            "version": "==2.5.1"
+            "version": "==2.6.0"
        },
        "pytest-django": {
            "hashes": [
-                "sha256:534505e0261cc566279032d9d887f844235342806fd63a6925689670fa1b29d7",
-                "sha256:7501942093db2250a32a4e36826edfc542347bb9b26c78ed0649cdcfd49e5789"
+                "sha256:49e9ffc856bc6a1bec1c26c5c7b7213dff7cc8bc6b64d624c4d143d04aff0bcf",
+                "sha256:b379282feaf89069cb790775ab6bbbd2bd2038a68c7ef9b84a41898e0b551081"
            ],
            "index": "pypi",
-            "version": "==3.2.1"
+            "version": "==3.4.3"
        },
        "pytest-env": {
            "hashes": [
@@ -377,76 +496,71 @@
        },
        "pytest-xdist": {
            "hashes": [
-                "sha256:be2662264b035920ba740ed6efb1c816a83c8a22253df7766d129f6a7bfdbd35",
-                "sha256:e8f5744acc270b3e7d915bdb4d5f471670f049b6fbd163d4cbd52203b075d30f"
+                "sha256:3bc9dcb6ff47e607d3c710727cd9996fd7ac1466d405c3b40bb495da99b6b669",
+                "sha256:8e188d13ce6614c7a678179a76f46231199ffdfe6163de031c17e62ffa256917"
            ],
            "index": "pypi",
-            "version": "==1.22.2"
+            "version": "==1.24.0"
        },
        "python-dateutil": {
            "hashes": [
-                "sha256:3220490fb9741e2342e1cf29a503394fdac874bc39568288717ee67047ff29df",
-                "sha256:9d8074be4c993fbe4947878ce593052f71dac82932a677d49194d8ce9778002e"
+                "sha256:063df5763652e21de43de7d9e00ccf239f953a832941e37be541614732cdfc93",
+                "sha256:88f9287c0174266bb0d8cedd395cfba9c58e87e5ad86b2ce58859bc11be3cf02"
            ],
            "index": "pypi",
-            "version": "==2.7.2"
+            "version": "==2.7.5"
        },
        "python-dotenv": {
            "hashes": [
-                "sha256:4965ed170bf51c347a89820e8050655e9c25db3837db6602e906b6d850fad85c",
-                "sha256:509736185257111613009974e666568a1b031b028b61b500ef1ab4ee780089d5"
+                "sha256:122290a38ece9fe4f162dc7c95cae3357b983505830a154d3c98ef7f6c6cea77",
+                "sha256:4a205787bc829233de2a823aa328e44fd9996fedb954989a21f1fc67c13d7a77"
            ],
            "index": "pypi",
-            "version": "==0.8.2"
+            "version": "==0.9.1"
        },
        "python-gnupg": {
            "hashes": [
-                "sha256:38f18712b7cfdd0d769bc88a21e90138154b9be2cbffb1e7d28bc37ee73a1c47",
-                "sha256:5a54a6dd25bf78d3758dd7a1864f4efd122f9ca9402101d90e3ec4483ceafb73"
+                "sha256:2d158dfc6b54927752b945ebe57e6a0c45da27747fa3b9ae66eccc0d2147ac0d",
+                "sha256:faa69bab58ed0936f0ccf96c99b92369b7a1819305d37dfe5c927d21a437a09d"
            ],
            "index": "pypi",
-            "version": "==0.4.2"
+            "version": "==0.4.3"
        },
        "python-levenshtein": {
            "hashes": [
                "sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1"
            ],
+            "markers": "extra == 'speedup'",
            "version": "==0.12.0"
        },
        "pytz": {
            "hashes": [
-                "sha256:65ae0c8101309c45772196b21b74c46b2e5d11b6275c45d251b150d5da334555",
-                "sha256:c06425302f2cf668f1bba7a0a03f3c1d34d4ebeef2c72003da308b3947c7f749"
+                "sha256:31cb35c89bd7d333cd32c5f278fca91b523b0834369e757f4c5641ea252236ca",
+                "sha256:8e0f8568c118d3077b46be7d654cc8167fa916092e28320cde048e54bfc9f1e6"
            ],
            "index": "pypi",
-            "version": "==2018.4"
+            "version": "==2018.7"
        },
        "regex": {
            "hashes": [
-                "sha256:1b428a296531ea1642a7da48562746309c5c06471a97bd0c02dd6a82e9cecee8",
-                "sha256:27d72bb42dffb32516c28d218bb054ce128afd3e18464f30837166346758af67",
-                "sha256:32cf4743debee9ea12d3626ee21eae83052763740e04086304e7a74778bf58c9",
-                "sha256:32f6408dbca35040bc65f9f4ae1444d5546411fde989cb71443a182dd643305e",
-                "sha256:333687d9a44738c486735955993f83bd22061a416c48f5a5f9e765e90cf1b0c9",
-                "sha256:35eeccf17af3b017a54d754e160af597036435c58eceae60f1dd1364ae1250c7",
-                "sha256:361a1fd703a35580a4714ec28d85e29780081a4c399a99bbfb2aee695d72aedb",
-                "sha256:494bed6396a20d3aa6376bdf2d3fbb1005b8f4339558d8ac7b53256755f80303",
-                "sha256:5b9c0ddd5b4afa08c9074170a2ea9b34ea296e32aeea522faaaaeeeb2fe0af2e",
-                "sha256:a50532f61b23d4ab9d216a6214f359dd05c911c1a1ad20986b6738a782926c1a",
-                "sha256:a9243d7b359b72c681a2c32eaa7ace8d346b7e8ce09d172a683acf6853161d9c",
-                "sha256:b44624a38d07d3c954c84ad302c29f7930f4bf01443beef5589e9157b14e2a29",
-                "sha256:be42a601aaaeb7a317f818490a39d153952a97c40c6e9beeb2a1103616405348",
-                "sha256:eee4d94b1a626490fc8170ffd788883f8c641b576e11ba9b4a29c9f6623371e0",
-                "sha256:f69d1201a4750f763971ea8364ed95ee888fc128968b39d38883a72a4d005895"
+                "sha256:0ef96690c3d2294155b7d44187ca4a151e45c931cb768e106ba464a9fa64c5da",
+                "sha256:251683e01a3bcacd9188acf0d4caf7b29a3b963c843159311825613ae144cddb",
+                "sha256:3fe15a75fe00f04d1ec16713d55cf1e206077c450267a10b33318756fb8b3f99",
+                "sha256:53a962f9dc28cdf403978a142cb1e054479759ad64d312a999f9f042c25b5c9a",
+                "sha256:8bd1da6a93d32336a5e5432886dd8543004f0591c39b83dbfa60705cccdf414d",
+                "sha256:b5423061918f602e9342b54d746ac31c598d328ecaf4ef0618763e960c926fd4",
+                "sha256:d80ebc65b1f7d0403117f59309c16eac24be6a0bc730b593a79f703462858d94",
+                "sha256:fd8419979639b7de7fb964a13bce3ac47e6fe33043b83de0398c3067986e5659",
+                "sha256:ff2f15b2b0b4b58ba8a1de651780a0d3fd54f96ad6b77dceb77695220e5d7b7a"
            ],
-            "version": "==2018.2.21"
+            "version": "==2018.11.2"
        },
        "requests": {
            "hashes": [
-                "sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b",
-                "sha256:9c443e7324ba5b85070c4a818ade28bfabedf16ea10206da1132edaa6dda237e"
+                "sha256:99dcfdaaeb17caf6e526f32b6a7b780461512ab3f1d992187801694cba42770c",
+                "sha256:a84b8c9ab6239b578f22d1c21d51b696dcfe004032bb80ea832398d6909d7279"
            ],
-            "version": "==2.18.4"
+            "version": "==2.20.0"
        },
        "six": {
            "hashes": [
@@ -455,6 +569,28 @@
            ],
            "version": "==1.11.0"
        },
+        "snowballstemmer": {
+            "hashes": [
+                "sha256:919f26a68b2c17a7634da993d91339e288964f93c274f1343e3bbbe2096e1128",
+                "sha256:9f3bcd3c401c3e862ec0ebe6d2c069ebc012ce142cce209c098ccb5b09136e89"
+            ],
+            "version": "==1.2.1"
+        },
+        "sphinx": {
+            "hashes": [
+                "sha256:652eb8c566f18823a022bb4b6dbc868d366df332a11a0226b5bc3a798a479f17",
+                "sha256:d222626d8356de702431e813a05c68a35967e3d66c6cd1c2c89539bb179a7464"
+            ],
+            "index": "pypi",
+            "version": "==1.8.1"
+        },
+        "sphinxcontrib-websupport": {
+            "hashes": [
+                "sha256:68ca7ff70785cbe1e7bccc71a48b5b6d965d79ca50629606c7861a21b206d9dd",
+                "sha256:9de47f375baf1ea07cdb3436ff39d7a9c76042c10a769c52353ec46e4e8fc3b9"
+            ],
+            "version": "==1.1.0"
+        },
        "termcolor": {
            "hashes": [
                "sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"
@@ -468,113 +604,20 @@
            ],
            "version": "==1.2"
        },
-        "tzlocal": {
+        "toml": {
            "hashes": [
-                "sha256:4ebeb848845ac898da6519b9b31879cf13b6626f7184c496037b818e238f2c4e"
+                "sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c",
+                "sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e"
            ],
-            "version": "==1.5.1"
+            "version": "==0.10.0"
        },
-        "urllib3": {
+        "tox": {
            "hashes": [
-                "sha256:06330f386d6e4b195fbfc736b297f58c5a892e4440e54d294d7004e3a9bbea1b",
-                "sha256:cc44da8e1145637334317feebd728bd869a35285b93cbb4cca2577da7e62db4f"
-            ],
-            "version": "==1.22"
-        }
-    },
-    "develop": {
-        "backcall": {
-            "hashes": [
-                "sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4",
-                "sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"
-            ],
-            "version": "==0.1.0"
-        },
-        "decorator": {
-            "hashes": [
-                "sha256:2c51dff8ef3c447388fe5e4453d24a2bf128d3a4c32af3fabef1f01c6851ab82",
-                "sha256:c39efa13fbdeb4506c476c9b3babf6a718da943dab7811c206005a4a956c080c"
-            ],
-            "version": "==4.3.0"
-        },
-        "ipython": {
-            "hashes": [
-                "sha256:85882f97d75122ff8cdfe129215a408085a26039527110c8d4a2b8a5e45b7639",
-                "sha256:a6ac981381b3f5f604b37a293369963485200e3639fb0404fa76092383c10c41"
+                "sha256:513e32fdf2f9e2d583c2f248f47ba9886428c949f068ac54a0469cac55df5862",
+                "sha256:75fa30e8329b41b664585f5fb837e23ce1d7e6fa1f7811f2be571c990f9d911b"
            ],
            "index": "pypi",
-            "version": "==6.3.1"
-        },
-        "ipython-genutils": {
-            "hashes": [
-                "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
-                "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
-            ],
-            "version": "==0.2.0"
-        },
-        "jedi": {
-            "hashes": [
-                "sha256:1972f694c6bc66a2fac8718299e2ab73011d653a6d8059790c3476d2353b99ad",
-                "sha256:5861f6dc0c16e024cbb0044999f9cf8013b292c05f287df06d3d991a87a4eb89"
-            ],
-            "version": "==0.12.0"
-        },
-        "parso": {
-            "hashes": [
-                "sha256:62bd6bf7f04ab5c817704ff513ef175328676471bdef3629d4bdd46626f75551",
-                "sha256:a75a304d7090d2c67bd298091c14ef9d3d560e3c53de1c239617889f61d1d307"
-            ],
-            "version": "==0.2.0"
-        },
-        "pexpect": {
-            "hashes": [
-                "sha256:9783f4644a3ef8528a6f20374eeb434431a650c797ca6d8df0d81e30fffdfa24",
-                "sha256:9f8eb3277716a01faafaba553d629d3d60a1a624c7cf45daa600d2148c30020c"
-            ],
-            "markers": "sys_platform != 'win32'",
-            "version": "==4.5.0"
-        },
-        "pickleshare": {
-            "hashes": [
-                "sha256:84a9257227dfdd6fe1b4be1319096c20eb85ff1e82c7932f36efccfe1b09737b",
-                "sha256:c9a2541f25aeabc070f12f452e1f2a8eae2abd51e1cd19e8430402bdf4c1d8b5"
-            ],
-            "version": "==0.7.4"
-        },
-        "prompt-toolkit": {
-            "hashes": [
-                "sha256:1df952620eccb399c53ebb359cc7d9a8d3a9538cb34c5a1344bdbeb29fbcc381",
-                "sha256:3f473ae040ddaa52b52f97f6b4a493cfa9f5920c255a12dc56a7d34397a398a4",
-                "sha256:858588f1983ca497f1cf4ffde01d978a3ea02b01c8a26a8bbc5cd2e66d816917"
-            ],
-            "version": "==1.0.15"
-        },
-        "ptyprocess": {
-            "hashes": [
-                "sha256:e64193f0047ad603b71f202332ab5527c5e52aa7c8b609704fc28c0dc20c4365",
-                "sha256:e8c43b5eee76b2083a9badde89fd1bbce6c8942d1045146e100b7b5e014f4f1a"
-            ],
-            "version": "==0.5.2"
-        },
-        "pygments": {
-            "hashes": [
-                "sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
-                "sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc"
-            ],
-            "version": "==2.2.0"
-        },
-        "simplegeneric": {
-            "hashes": [
-                "sha256:dc972e06094b9af5b855b3df4a646395e43d1c9d0d39ed345b7393560d0b9173"
-            ],
-            "version": "==0.8.1"
-        },
-        "six": {
-            "hashes": [
-                "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
-                "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
-            ],
-            "version": "==1.11.0"
+            "version": "==3.5.3"
        },
        "traitlets": {
            "hashes": [
@@ -583,6 +626,26 @@
            ],
            "version": "==4.3.2"
        },
+        "tzlocal": {
+            "hashes": [
+                "sha256:4ebeb848845ac898da6519b9b31879cf13b6626f7184c496037b818e238f2c4e"
+            ],
+            "version": "==1.5.1"
+        },
+        "urllib3": {
+            "hashes": [
+                "sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39",
+                "sha256:de9529817c93f27c8ccbfead6985011db27bd0ddfcdb2d86f3f663385c6a9c22"
+            ],
+            "version": "==1.24.1"
+        },
+        "virtualenv": {
+            "hashes": [
+                "sha256:686176c23a538ecc56d27ed9d5217abd34644823d6391cbeb232f42bf722baad",
+                "sha256:f899fafcd92e1150f40c8215328be38ff24b519cd95357fa6e78e006c7638208"
+            ],
+            "version": "==16.1.0"
+        },
        "wcwidth": {
            "hashes": [
                "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e",
@@ -590,5 +653,6 @@
            ],
            "version": "==0.1.7"
        }
-    }
+    },
+    "develop": {}
 }
--- a/20
+++ b/20
@@ -1,20 +0,0 @@
-# -*- mode: ruby -*-
-# vi: set ft=ruby :
-
-VAGRANT_API_VERSION = "2"
-Vagrant.configure(VAGRANT_API_VERSION) do |config|
-  config.vm.box = "ubuntu/trusty64"
-
-  # Provision using shell
-  config.vm.host_name = "dev.paperless"
-  config.vm.synced_folder ".", "/opt/paperless"
-  config.vm.provision "shell", path: "scripts/vagrant-provision"
-
-  # Networking details
-  config.vm.network "private_network", ip: "172.28.128.4"
-
-  config.vm.provider "virtualbox" do |vb|
-    # Customize the amount of memory on the VM:
-    vb.memory = "1024"
-  end
-end
--- a/docker-compose.env.example
+++ b/docker-compose.env.example
@@ -1,38 +1,22 @@
 # Environment variables to set for Paperless
-# Commented out variables will be replaced by a default within Paperless.
+# Commented out variables will be replaced with a default within Paperless.
+#
+# In addition to what you see here, you can also define any values you find in
+# paperless.conf.example here.  Values like:
+#
+# * PAPERLESS_PASSPHRASE
+# * PAPERLESS_CONSUMPTION_DIR
+# * PAPERLESS_CONSUME_MAIL_HOST
+#
+# ...are all explained in that file but can be defined here, since the Docker
+# installation doesn't make use of paperless.conf.

-# Passphrase Paperless uses to encrypt and decrypt your documents, if you want
-# encryption at all.
-# PAPERLESS_PASSPHRASE=CHANGE_ME

-# The amount of threads to use for text recognition
-# PAPERLESS_OCR_THREADS=4
-
-# Additional languages to install for text recognition
+# Additional languages to install for text recognition.  Note that this is
+# different from PAPERLESS_OCR_LANGUAGE (default=eng), which defines the
+# default language used when guessing the language from the OCR output.
 # PAPERLESS_OCR_LANGUAGES=deu ita

 # You can change the default user and group id to a custom one
 # USERMAP_UID=1000
 # USERMAP_GID=1000
-
-###############################################################################
-####                         Mail Consumption                              ####
-###############################################################################
-
-# These values are required if you want paperless to check a particular email
-# box every 10 minutes and attempt to consume documents from there.  If you
-# don't define a HOST, mail checking will just be disabled.
-# Don't use quotes after = or it will crash your docker
-# PAPERLESS_CONSUME_MAIL_HOST=
-# PAPERLESS_CONSUME_MAIL_PORT=
-# PAPERLESS_CONSUME_MAIL_USER=
-# PAPERLESS_CONSUME_MAIL_PASS=
-
-# Override the default IMAP inbox here. If it's not set, Paperless defaults to
-# INBOX.
-# PAPERLESS_CONSUME_MAIL_INBOX=INBOX
-
-# Any email sent to the target account that does not contain this text will be
-# ignored.  Mail checking won't work without this.
-# PAPERLESS_EMAIL_SECRET=
-
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,6 +1,134 @@
 Changelog
 #########

+2.6.0
+=====
+
+* Allow an infinite number of logs to be deleted.  Thanks to `Ulli`_ for noting
+  the problem in `#433`_.
+* Fix the ``RecentCorrespondentsFilter`` correspondents filter that was added
+  in 2.4 to play nice with the defaults.  Thanks to `tsia`_ and `Sblop`_ who
+  pointed this out. `#423`_.
+* Updated dependencies to include (among other things) a security patch to
+  requests.
+* Fix text in sample data for tests so that the language guesser stops thinking
+  that everything is in Catalan because we had *Lorem ipsum* in there.
+* Tweaked the gunicorn sample command to use filesystem paths instead of Python
+  paths. `#441`_
+* Added pretty colour boxes next to the hex values in the Tags section, thanks
+  to a pull request from `Joshua Taillon`_ `#442`_.
+* Added a ``.editorconfig`` file to better specify coding style.
+* `Joshua Taillon`_ also added some logic to tie Paperless' date guessing logic
+  into how it parses file names on import. `#440`_
+
+
+2.5.0
+=====
+
+* **New dependency**: Paperless now optimises thumbnail generation with
+  `optipng`_, so you'll need to install that somewhere in your PATH or declare
+  its location in ``PAPERLESS_OPTIPNG_BINARY``.  The Docker image has already
+  been updated on the Docker Hub, so you just need to pull the latest one from
+  there if you're a Docker user.
+
+* "Login free" instances of Paperless were breaking whenever you tried to edit
+  objects in the admin: adding/deleting tags or correspondents, or even fixing
+  spelling.  This was due to the "user hack" we were applying to sessions that
+  weren't using a login, as that hack user didn't have a valid id.  The fix was
+  to attribute the first user id in the system to this hack user.  `#394`_
+
+* A problem in how we handle slug values on Tags and Correspondents required a
+  few changes to how we handle this field `#393`_:
+
+  1. Slugs are no longer editable.  They're derived from the name of the tag or
+     correspondent at save time, so if you wanna change the slug, you have to
+     change the name, and even then you're restricted to the rules of the
+     ``slugify()`` function.  The slug value is still visible in the admin
+     though.
+  2. I've added a migration to go over all existing tags & correspondents and
+     rewrite the ``.slug`` values to ones conforming to the ``slugify()``
+     rules.
+  3. The consumption process now uses the same rules as ``.save()`` in
+     determining a slug and using that to check for an existing
+     tag/correspondent.
+
+* An annoying bug in the date capture code was causing some bogus dates to be
+  attached to documents, which in turn busted the UI.  Thanks to `Andrew Peng`_
+  for reporting this. `#414`_.
+
+* A bug in the Dockerfile meant that Tesseract language files weren't being
+  installed correctly.  `euri10`_ was quick to provide a fix: `#406`_, `#413`_.
+
+* Document consumption is now wrapped in a transaction as per an old ticket
+  `#262`_.
+
+* The ``get_date()`` functionality of the parsers has been consolidated onto
+  the ``DocumentParser`` class since much of that code was redundant anyway.
+
+
+2.4.0
+=====
+
+* A new set of actions are now available thanks to `jonaswinkler`_'s very first
+  pull request!  You can now do nifty things like tag documents in bulk, or set
+  correspondents in bulk.  `#405`_
+* The import/export system is now a little smarter.  By default, documents are
+  tagged as ``unencrypted``, since exports are by their nature unencrypted.
+  It's now in the import step that we decide the storage type.  This allows you
+  to export from an encrypted system and import into an unencrypted one, or
+  vice-versa.
+* The migration history has been slightly modified to accommodate PostgreSQL
+  users.  Additionally, you can now tell paperless to use PostgreSQL simply by
+  declaring ``PAPERLESS_DBUSER`` in your environment.  This will attempt to
+  connect to your Postgres database without a password unless you also set
+  ``PAPERLESS_DBPASS``.
+* A bug was found in the REST API filter system that was the result of an
+  update of django-filter some time ago.  This has now been patched in `#412`_.
+  Thanks to `thepill`_ for spotting it!
+
+
+2.3.0
+=====
+
+* Support for consuming plain text & markdown documents was added by
+  `Joshua Taillon`_!  This was a long-requested feature, and it's addition is
+  likely to be greatly appreciated by the community: `#395`_  Thanks also to
+  `David Martin`_ for his assistance on the issue.
+* `dubit0`_ found & fixed a bug that prevented management commands from running
+  before we had an operational database: `#396`_
+* Joshua also added a simple update to the thumbnail generation process to
+  improve performance: `#399`_
+* As his last bit of effort on this release, Joshua also added some code to
+  allow you to view the documents inline rather than download them as an
+  attachment. `#400`_
+* Finally, `ahyear`_ found a slip in the Docker documentation and patched it.
+  `#401`_
+
+
+2.2.1
+=====
+
+* `Kyle Lucy`_ reported a bug quickly after the release of 2.2.0 where we broke
+  the ``DISABLE_LOGIN`` feature: `#392`_.
+
+
+2.2.0
+=====
+
+* Thanks to `dadosch`_, `Wolfgang Mader`_, and `Tim Brooks`_ this is the first
+  version of Paperless that supports Django 2.0!  As a result of their hard
+  work, you can now also run Paperless on Python 3.7 as well: `#386`_ &
+  `#390`_.
+* `Stéphane Brunner`_ added a few lines of code that made tagging interface a
+  lot easier on those of us with lots of different tags: `#391`_.
+* `Kilian Koeltzsch`_ noticed a bug in how we capture & automatically create
+  tags, so that's fixed now too: `#384`_.
+* `erikarvstedt`_ tweaked the behaviour of the test suite to be better behaved
+  for packaging environments: `#383`_.
+* `Lukasz Soluch`_ added CORS support to make building a new Javascript-based
+  front-end cleaner & easier: `#387`_.
+
+
 2.1.0
 =====

@@ -451,6 +579,22 @@ bulk of the work on this big change.
 .. _mcronce: https://github.com/mcronce
 .. _Enno Lohmeier: https://github.com/elohmeier
 .. _Mark McFate: https://github.com/SummittDweller
+.. _dadosch: https://github.com/dadosch
+.. _Wolfgang Mader: https://github.com/wmader
+.. _Tim Brooks: https://github.com/brookst
+.. _Stéphane Brunner: https://github.com/sbrunner
+.. _Kilian Koeltzsch: https://github.com/kiliankoe
+.. _Lukasz Soluch: https://github.com/LukaszSolo
+.. _Joshua Taillon: https://github.com/jat255
+.. _dubit0: https://github.com/dubit0
+.. _ahyear: https://github.com/ahyear
+.. _jonaswinkler: https://github.com/jonaswinkler
+.. _thepill: https://github.com/thepill
+.. _Andrew Peng: https://github.com/pengc99
+.. _euri10: https://github.com/euri10
+.. _Ulli: https://github.com/Ulli2k
+.. _tsia: https://github.com/tsia
+.. _Sblop:  https://github.com/Sblop

 .. _#20: https://github.com/danielquinn/paperless/issues/20
 .. _#44: https://github.com/danielquinn/paperless/issues/44
@@ -516,6 +660,7 @@ bulk of the work on this big change.
 .. _#322: https://github.com/danielquinn/paperless/pull/322
 .. _#328: https://github.com/danielquinn/paperless/pull/328
 .. _#253: https://github.com/danielquinn/paperless/issues/253
+.. _#262: https://github.com/danielquinn/paperless/issues/262
 .. _#323: https://github.com/danielquinn/paperless/issues/323
 .. _#344: https://github.com/danielquinn/paperless/pull/344
 .. _#351: https://github.com/danielquinn/paperless/pull/351
@@ -525,6 +670,31 @@ bulk of the work on this big change.
 .. _#374: https://github.com/danielquinn/paperless/pull/374
 .. _#375: https://github.com/danielquinn/paperless/pull/375
 .. _#376: https://github.com/danielquinn/paperless/pull/376
+.. _#383: https://github.com/danielquinn/paperless/pull/383
+.. _#384: https://github.com/danielquinn/paperless/issues/384
+.. _#386: https://github.com/danielquinn/paperless/issues/386
+.. _#387: https://github.com/danielquinn/paperless/pull/387
+.. _#391: https://github.com/danielquinn/paperless/pull/391
+.. _#390: https://github.com/danielquinn/paperless/pull/390
+.. _#392: https://github.com/danielquinn/paperless/issues/392
+.. _#393: https://github.com/danielquinn/paperless/issues/393
+.. _#395: https://github.com/danielquinn/paperless/pull/395
+.. _#394: https://github.com/danielquinn/paperless/issues/394
+.. _#396: https://github.com/danielquinn/paperless/pull/396
+.. _#399: https://github.com/danielquinn/paperless/pull/399
+.. _#400: https://github.com/danielquinn/paperless/pull/400
+.. _#401: https://github.com/danielquinn/paperless/pull/401
+.. _#405: https://github.com/danielquinn/paperless/pull/405
+.. _#406: https://github.com/danielquinn/paperless/issues/406
+.. _#412: https://github.com/danielquinn/paperless/issues/412
+.. _#413: https://github.com/danielquinn/paperless/pull/413
+.. _#414: https://github.com/danielquinn/paperless/issues/414
+.. _#423: https://github.com/danielquinn/paperless/issues/423
+.. _#433: https://github.com/danielquinn/paperless/issues/433
+.. _#440: https://github.com/danielquinn/paperless/pull/440
+.. _#441: https://github.com/danielquinn/paperless/pull/441
+.. _#442: https://github.com/danielquinn/paperless/pull/442

 .. _pipenv: https://docs.pipenv.org/
 .. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
+.. _optipng: http://optipng.sourceforge.net/
--- a/docs/changelog_jonaswinkler.rst
+++ b/docs/changelog_jonaswinkler.rst
@@ -0,0 +1,15 @@
+Changelog (jonaswinkler)
+########################
+
+1.0.0
+=====
+
+* First release based on paperless 2.6.0
+* Added: Automatic document classification using neural networks (replaces
+  regex-based tagging)
+* Added: Document types
+* Added: Archive serial number allows easy referencing of physical document
+  copies
+* Added: Inbox tags (added automatically to newly consumed documents)
+* Added: Document viewer on document edit page
+* Database backend is now configurable
--- a/docs/consumption.rst
+++ b/docs/consumption.rst
@@ -76,6 +76,31 @@ Pre-consumption script

 * Document file name

+A simple but common example for this would be creating a simple script like
+this:
+
+``/usr/local/bin/ocr-pdf``
+
+.. code:: bash
+
+    #!/usr/bin/env bash
+    pdf2pdfocr.py -i ${1}
+
+``/etc/paperless.conf``
+
+.. code:: bash
+
+    ...
+    PAPERLESS_PRE_CONSUME_SCRIPT="/usr/local/bin/ocr-pdf"
+    ...
+
+This will pass the path to the document about to be consumed to ``/usr/local/bin/ocr-pdf``,
+which will in turn call `pdf2pdfocr.py`_ on your document, which will then
+overwrite the file with an OCR'd version of the file and exit.  At which point,
+the consumption process will begin with the newly modified file.
+
+.. _pdf2pdfocr.py: https://github.com/LeoFCardoso/pdf2pdfocr
+

 .. _consumption-director-hook-variables-post:

--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@@ -0,0 +1,141 @@
+.. _contributing:
+
+Contributing to Paperless
+#########################
+
+Maybe you've been using Paperless for a while and want to add a feature or two,
+or maybe you've come across a bug that you have some ideas how to solve.  The
+beauty of Free software is that you can see what's wrong and help to get it
+fixed for everyone!
+
+
+How to Get Your Changes Rolled Into Paperless
+=============================================
+
+If you've found a bug, but don't know how to fix it, you can always post an
+issue on `GitHub`_ in the hopes that someone will have the time to fix it for
+you.  If however you're the one with the time, pull requests are always
+welcome, you just have to make sure that your code conforms to a few standards:
+
+Pep8
+----
+
+It's the standard for all Python development, so it's `very well documented`_.
+The short version is:
+
+* Lines should wrap at 79 characters
+* Use ``snake_case`` for variables, ``CamelCase`` for classes, and ``ALL_CAPS``
+  for constants.
+* Space out your operators: ``stuff + 7`` instead of ``stuff+7``
+* Two empty lines between classes, and functions, but 1 empty line between
+  class methods.
+
+There's more to it than that, but if you follow those, you'll probably be
+alright.  When you submit your pull request, there's a pep8 checker that'll
+look at your code to see if anything is off.  If it finds anything, it'll
+complain at you until you fix it.
+
+
+Additional Style Guides
+-----------------------
+
+Where pep8 is ambiguous, I've tried to be a little more specific.  These rules
+aren't hard-and-fast, but if you can conform to them, I'll appreciate it and
+spend less time trying to conform your PR before merging:
+
+
+Function calls
+..............
+
+If you're calling a function and that necessitates more than one line of code,
+please format it like this:
+
+.. code:: python
+
+    my_function(
+        argument1,
+        kwarg1="x",
+        kwarg2="y"
+        another_really_long_kwarg="some big value"
+        a_kwarg_calling_another_long_function=another_function(
+            another_arg,
+            another_kwarg="kwarg!"
+        )
+    )
+
+This is all in the interest of code uniformity rather than anything else.  If
+we stick to a style, everything is understandable in the same way.
+
+
+Quoting Strings
+...............
+
+pep8 is a little too open-minded on this for my liking.  Python strings should
+be quoted with double quotes (``"``) except in cases where the resulting string
+would require too much escaping of a double quote, in which case, a single
+quoted, or triple-quoted string will do:
+
+.. code:: python
+
+    my_string = "This is my string"
+    problematic_string = 'This is a "string" with "quotes" in it'
+
+In HTML templates, please use double-quotes for tag attributes, and single
+quotes for arguments passed to Django tempalte tags:
+
+.. code:: html
+
+    <div class="stuff">
+        <a href="{% url 'some-url-name' pk='w00t' %}">link this</a>
+    </div>
+
+This is to keep linters happy they look at an HTML file and see an attribute
+closing the ``"`` before it should have been.
+
+--
+
+That's all there is in terms of guidelines, so I hope it's not too daunting.
+
+
+Indentation & Spacing
+.....................
+
+When it comes to indentation:
+
+* For Python, the rule is: follow pep8 and use 4 spaces.
+* For Javascript, CSS, and HTML, please use 1 tab.
+
+Additionally, Django templates making use of block elements like ``{% if %}``,
+``{% for %}``, and ``{% block %}`` etc. should be indented:
+
+Good:
+
+.. code:: html
+
+    {% block stuff %}
+    	<h1>This is the stuff</h1>
+    {% endblock %}
+
+Bad:
+
+.. code:: html
+
+    {% block stuff %}
+    <h1>This is the stuff</h1>
+    {% endblock %}
+
+
+The Code of Conduct
+===================
+
+Paperless has a `code of conduct`_.  It's a lot like the other ones you see out
+there, with a few small changes, but basically it boils down to:
+
+> Don't be an ass, or you might get banned.
+
+I'm proud to say that the CoC has never had to be enforced because everyone has
+been awesome, friendly, and professional.
+
+.. _GitHub: https://github.com/danielquinn/paperless/issues
+.. _very well documented: https://www.python.org/dev/peps/pep-0008/
+.. _code of conduct: https://github.com/danielquinn/paperless/blob/master/CODE_OF_CONDUCT.md
--- a/docs/guesswork.rst
+++ b/docs/guesswork.rst
@@ -43,6 +43,16 @@ These however wouldn't work:
 * ``Some Company Name, Invoice 2016-01-01, money, invoices.pdf``
 * ``Another Company- Letter of Reference.jpg``

+Do I have to be so strict about naming?
+---------------------------------------
+Rather than using the strict document naming rules, one can also set the option
+``PAPERLESS_FILENAME_DATE_ORDER`` in ``paperless.conf`` to any date order
+that is accepted by dateparser_. Doing so will cause ``paperless`` to default
+to any date format that is found in the title, instead of a date pulled from
+the document's text, without requiring the strict formatting of the document
+filename as described above.
+
+.. _dateparser: https://github.com/scrapinghub/dateparser/blob/v0.7.0/docs/usage.rst#settings

 .. _guesswork-content:

@@ -82,11 +92,11 @@ text and matching algorithm.  From the help info there:
    uses a regex to match the PDF.  If you don't know what a regex is, you
    probably don't want this option.

-When using the "any" or "all" matching algorithms, you can search for terms that
-consist of multiple words by enclosing them in double quotes. For example, defining
-a match text of ``"Bank of America" BofA`` using the "any" algorithm, will match
-documents that contain either "Bank of America" or "BofA", but will not match
-documents containing "Bank of South America".
+When using the "any" or "all" matching algorithms, you can search for terms
+that consist of multiple words by enclosing them in double quotes. For example,
+defining a match text of ``"Bank of America" BofA`` using the "any" algorithm,
+will match documents that contain either "Bank of America" or "BofA", but will
+not match documents containing "Bank of South America".

 Then just save your tag/correspondent and run another document through the
 consumer.  Once complete, you should see the newly-created document,
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -43,5 +43,7 @@ Contents
   customising
   extending
   troubleshooting
+   contributing
   scanners
   changelog
+   changelog_jonaswinkler
--- a/docs/migrating.rst
+++ b/docs/migrating.rst
@@ -82,6 +82,7 @@ rolled in as part of the update:

    $ cd /path/to/project
    $ git pull
+    $ pip install -r requirements.txt
    $ cd src
    $ ./manage.py migrate

@@ -101,6 +102,7 @@ is similar:
    $ cd /path/to/project
    $ git pull
    $ docker build -t paperless .
+    $ docker-compose run --rm comsumer migrate
    $ docker-compose up -d

 If ``git pull`` doesn't report any changes, there is no need to continue with
--- a/docs/requirements.rst
+++ b/docs/requirements.rst
@@ -33,7 +33,7 @@ In addition to the above, there are a number of Python requirements, all of
 which are listed in a file called ``requirements.txt`` in the project root
 directory.

-If you're not working on a virtual environment (like Vagrant or Docker), you
+If you're not working on a virtual environment (like Docker), you
 should probably be using a virtualenv, but that's your call.  The reasons why
 you might choose a virtualenv or not aren't really within the scope of this
 document.  Needless to say if you don't know what a virtualenv is, you should
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
--- a/docs/setup.rst
+++ b/docs/setup.rst
@@ -39,33 +39,34 @@ or just download the tarball and go that route:
 Installation & Configuration
 ----------------------------

-You can go multiple routes with setting up and running Paperless. The `Vagrant
-route`_ is quick & easy, but means you're running a VM which comes with memory
-consumption etc. We also `support Docker`_, which you can use natively under
-Linux and in a VM with `Docker Machine`_ (this guide was written for native
-Docker usage under Linux, you might have to adapt it for Docker Machine.)
-Not to forget the virtualenv, this is similar to `bare metal`_ with the
-exception that you have to activate the virtualenv first.
-Last but not least, the standard `bare metal`_ approach is a little more
-complicated, but worth it because it makes it easier should you want to
-contribute some code back.
+You can go multiple routes with setting up and running Paperless:

-.. _Vagrant route: setup-installation-vagrant_
-.. _support Docker: setup-installation-docker_
-.. _bare metal: setup-installation-standard_
+ * The `bare metal route`_
+ * The `docker route`_
+
+
+The `docker route`_ is quick & easy.
+
+The `bare metal route`_ is a bit more complicated to setup but makes it easier
+should you want to contribute some code back.
+
+.. _docker route: setup-installation-docker_
+.. _bare metal route: setup-installation-bare-metal_
 .. _Docker Machine: https://docs.docker.com/machine/


-.. _setup-installation-standard:
+.. _setup-installation-bare-metal:

 Standard (Bare Metal)
-.....................
+++++++++++++++++++++

 1. Install the requirements as per the :ref:`requirements <requirements>` page.
 2. Within the extract of master.zip go to the ``src`` directory.
 3. Copy ``../paperless.conf.example`` to ``/etc/paperless.conf`` and open it in
-   your favourite editor.  Because this file contains passwords it should only
-   be readable by user root and paperless!  Set the values for:
+   your favourite editor.  As this file contains passwords.  It should only be
+   readable by user root and paperless!  Set the values for:
+
+   Set the values for:

    * ``PAPERLESS_CONSUMPTION_DIR``: this is where your documents will be
      dumped to be consumed by Paperless.
@@ -82,9 +83,10 @@ Standard (Bare Metal)
 6. Start the webserver with ``./manage.py runserver <IP>:<PORT>``.
   If no specifc IP or port are given, the default is ``127.0.0.1:8000``
   also known as http://localhost:8000/.
-   You should now be able to visit your (empty) at `Paperless webserver`_ or
-   whatever you chose before.  You can login with the user/pass you created in
-   #5.
+   You should now be able to visit your (empty) installation at
+   `Paperless webserver`_ or whatever you chose before.  You can login with the
+   user/pass you created in #5.
+
 7. In a separate window, change to the ``src`` directory in this repo again,
   but this time, you should start the consumer script with
   ``./manage.py document_consumer``.
@@ -93,13 +95,18 @@ Standard (Bare Metal)
 10. Visit the document list on your webserver, and it should be there, indexed
    and downloadable.

-.. _Paperless webserver: http://127.0.0.1:8000
+.. caution::

+    This installation is not secure. Once everything is working head over to
+    `Making things more permanent`_
+
+.. _Paperless webserver: http://127.0.0.1:8000
+.. _Making things more permanent: setup-permanent_

 .. _setup-installation-docker:

 Docker Method
-.............
+++++++++++++

 1. Install `Docker`_.

@@ -256,164 +263,44 @@ Docker Method
   newer ``docker-compose.yml.example`` file


-.. _setup-installation-vagrant:
-
-Vagrant Method
-..............
-
-1. Install `Vagrant`_.  How you do that is really between you and your OS.
-2. Run ``vagrant up``.  An instance will start up for you.  When it's ready and
-   provisioned...
-3. Run ``vagrant ssh`` and once inside your new vagrant box, edit
-   ``/etc/paperless.conf`` and set the values for:
-
-    * ``PAPERLESS_CONSUMPTION_DIR``: This is where your documents will be
-      dumped to be consumed by Paperless.
-    * ``PAPERLESS_PASSPHRASE``: This is the passphrase Paperless uses to
-      encrypt/decrypt the original document.  It's only required if you want
-      your original files to be encrypted, otherwise, just leave it unset.
-    * ``PAPERLESS_EMAIL_SECRET``: this is the "magic word" used when consuming
-      documents from mail or via the API.  If you don't use either, leaving it
-      blank is just fine.
-
-4. Exit the vagrant box and re-enter it with ``vagrant ssh`` again.  This
-   updates the environment to make use of the changes you made to the config
-   file.
-5. Initialise the database with ``/opt/paperless/src/manage.py migrate``.
-6. Still inside your vagrant box, create a user for your Paperless instance
-   with ``/opt/paperless/src/manage.py createsuperuser``. Follow the prompts to
-   create your user.
-7. Start the webserver with
-   ``/opt/paperless/src/manage.py runserver 0.0.0.0:8000``. You should now be
-   able to visit your (empty) `Paperless webserver`_ at ``172.28.128.4:8000``.
-   You can login with the user/pass you created in #6.
-8. In a separate window, run ``vagrant ssh`` again, but this time once inside
-   your vagrant instance, you should start the consumer script with
-   ``/opt/paperless/src/manage.py document_consumer``.
-9. Scan something.  Put it in the ``CONSUMPTION_DIR``.
-10. Wait a few minutes
-11. Visit the document list on your webserver, and it should be there, indexed
-    and downloadable.
-
-.. _Vagrant: https://vagrantup.com/
-.. _Paperless server: http://172.28.128.4:8000
-
-
 .. _setup-permanent:

 Making Things a Little more Permanent
 -------------------------------------

-Once you've tested things and are happy with the work flow, you can automate
-the process of starting the webserver and consumer automatically.
+Once you've tested things and are happy with the work flow, you should secure
+the installation and automate the process of starting the webserver and
+consumer.


-.. _setup-permanent-standard-systemd:
-
-Standard (Bare Metal, Systemd)
-..............................
-
-If you're running on a bare metal system that's using Systemd, you can use the
-service unit files in the ``scripts`` directory to set this up.  You'll need to
-create a user called ``paperless`` (without login (if not already done so #5))
-and setup Paperless to be in a place that this new user can read and write to.
-Be sure to edit the service  scripts to point to the proper location of your
-paperless install, referencing the appropriate Python binary. For example:
-``ExecStart=/path/to/python3 /path/to/paperless/src/manage.py document_consumer``.
-If you don't want to make a new user, you can change the ``Group`` and ``User``
-variables accordingly.
-
-Then, as ``root`` (or using ``sudo``) you can just copy the ``.service`` files
-to the Systemd directory and tell it to enable the two services::
-
-    # cp /path/to/paperless/scripts/paperless-consumer.service /etc/systemd/system/
-    # cp /path/to/paperless/scripts/paperless-webserver.service /etc/systemd/system/
-    # systemctl enable paperless-consumer
-    # systemctl enable paperless-webserver
-    # systemctl start paperless-consumer
-    # systemctl start paperless-webserver
-
-
-.. _setup-permanent-standard-ubuntu14:
-
-Ubuntu 14.04 (Bare Metal, Upstart)
-..................................
-
-Ubuntu 14.04 and earlier use the `Upstart`_ init system to start services
-during the boot process. To configure Upstart to run Paperless automatically
-after restarting your system:
-
-1. Change to the directory where Upstart's configuration files are kept:
-   ``cd /etc/init``
-2. Create a new file: ``sudo nano paperless-server.conf``
-3. In the newly-created file enter::
-
-    start on (local-filesystems and net-device-up IFACE=eth0)
-    stop on shutdown
-
-    respawn
-    respawn limit 10 5
-
-    script
-      exec /srv/paperless/src/manage.py runserver --noreload 0.0.0.0:80
-    end script
-
-   Note that you'll need to replace ``/srv/paperless/src/manage.py`` with the
-   path to the ``manage.py`` script in your installation directory.
-
-  If you are using a network interface other than ``eth0``, you will have to
-  change ``IFACE=eth0``. For example, if you are connected via WiFi, you will
-  likely need to replace ``eth0`` above with ``wlan0``. To see all interfaces,
-  run ``ifconfig -a``.
-
-  Save the file.
-
-4. Create a new file: ``sudo nano paperless-consumer.conf``
-
-5. In the newly-created file enter::
-
-    start on (local-filesystems and net-device-up IFACE=eth0)
-    stop on shutdown
-
-    respawn
-    respawn limit 10 5
-
-    script
-      exec /srv/paperless/src/manage.py document_consumer
-    end script
-
-  Replace ``/srv/paperless/src/manage.py`` with the same values as in step 3
-  above and replace ``eth0`` with the appropriate value, if necessary. Save the
-  file.
-
-These two configuration files together will start both the Paperless webserver
-and document consumer processes when the file system and network interface
-specified is available after boot. Furthermore, if either process ever exits
-unexpectedly, Upstart will try to restart it a maximum of 10 times within a 5
-second period.
-
-.. _Upstart: http://upstart.ubuntu.com/
-
-
-.. _setup-permanent-vagrant:
-
+.. _setup-permanent-webserver:

 Using a Real Webserver
-......................
++++++++++++++++++++++

 The default is to use Django's development server, as that's easy and does the
-job well enough on a home network.  However, if you want to do things right,
-it's probably a good idea to use a webserver capable of handling more than one
-thread. You will also have to let the webserver serve the static files (CSS,
-JavaScript) from the directory configured in ``PAPERLESS_STATICDIR``. For that,
-you need to run ``./manage.py collectstatic`` in the ``src`` directory.  The
-default static files directory is ``../static``.
+job well enough on a home network. However it is heavily discouraged to use
+it for more than that.
+
+If you want to do things right you should use a real webserver capable of
+handling more than one thread. You will also have to let the webserver serve
+the static files (CSS, JavaScript) from the directory configured in
+``PAPERLESS_STATICDIR``.  The default static files directory is ``../static``.
+
+For that you need to activate your virtual environment and collect the static
+files with the command:
+
+.. code:: bash
+
+    $ cd <paperless directory>/src
+    $ ./manage.py collectstatic
+

 Apache
 ~~~~~~

 This is a configuration supplied by `steckerhalter`_ on GitHub.  It uses Apache
-and mod_wsgi, with a Paperless installation in /home/paperless/:
+and mod_wsgi, with a Paperless installation in ``/home/paperless/``:

 .. code:: apache

@@ -444,170 +331,143 @@ Nginx + Gunicorn

 If you're using Nginx, the most common setup is to combine it with a
 Python-based server like Gunicorn so that Nginx is acting as a proxy.  Below is
-a copy of a simple Nginx configuration fragment making use of SSL and IPv6 to
-refer to a gunicorn instance listening on a local Unix socket:
+a copy of a simple Nginx configuration fragment making use of a gunicorn
+instance listening on localhost port 8000.

 .. code:: nginx

-    upstream transfer_server {
-      server unix:/run/example.com/gunicorn.sock fail_timeout=0;
-    }
-
-    # Redirect requests on port 80 to 443
    server {
-      listen 80;
-      listen [::]:80;
-      server_name example.com;
-      rewrite ^ https://$server_name$request_uri? permanent;
+        listen 80;
+
+        index index.html index.htm index.php;
+        access_log /var/log/nginx/paperless_access.log;
+        error_log /var/log/nginx/paperless_error.log;
+
+        location /static {
+
+            autoindex on;
+            alias <path-to-paperless-static-directory>;
+
+        }
+
+        location / {
+
+            proxy_set_header Host $http_host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+
+            proxy_pass http://127.0.0.1:8000;
+        }
    }

-    server {

-      listen 443 ssl;
-      listen [::]:443;
-      client_max_body_size 4G;
-      server_name example.com;
-      keepalive_timeout 5;
-      root /var/www/example.com;
+The gunicorn server can be started with the command:

-      ssl on;
+.. code-block:: shell

-      ssl_certificate         /etc/letsencrypt/live/example.com/fullchain.pem;
-      ssl_certificate_key     /etc/letsencrypt/live/example.com/privkey.pem;
-      ssl_trusted_certificate /etc/letsencrypt/live/example.com/fullchain.pem;
-      ssl_session_timeout 1d;
-      ssl_session_cache shared:SSL:50m;
+    $ <path-to-paperless-virtual-environment>/bin/gunicorn --pythonpath=<path-to-paperless>/src paperless.wsgi -w 2

-      # Diffie-Hellman parameter for DHE ciphersuites, recommended 2048 bits
-      # Generate with:
-      #   openssl dhparam -out /etc/nginx/dhparam.pem 2048
-      ssl_dhparam /etc/nginx/dhparam.pem;

-      # What Mozilla calls "Intermediate configuration"
-      # Copied from https://mozilla.github.io/server-side-tls/ssl-config-generator/
-      ssl_protocols TLSv1 TLSv1.1 TLSv1.2;
-      ssl_ciphers 'ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA:ECDHE-RSA-AES256-SHA384:ECDHE-RSA-AES128-SHA:ECDHE-ECDSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA:ECDHE-RSA-AES256-SHA:DHE-RSA-AES128-SHA256:DHE-RSA-AES128-SHA:DHE-RSA-AES256-SHA256:DHE-RSA-AES256-SHA:ECDHE-ECDSA-DES-CBC3-SHA:ECDHE-RSA-DES-CBC3-SHA:EDH-RSA-DES-CBC3-SHA:AES128-GCM-SHA256:AES256-GCM-SHA384:AES128-SHA256:AES256-SHA256:AES128-SHA:AES256-SHA:DES-CBC3-SHA:!DSS';
-      ssl_prefer_server_ciphers on;
+.. _setup-permanent-standard-systemd:

-      add_header Strict-Transport-Security max-age=15768000;
+Standard (Bare Metal + Systemd)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-      ssl_stapling on;
-      ssl_stapling_verify on;
+If you're running on a bare metal system that's using Systemd, you can use the
+service unit files in the ``scripts`` directory to set this up.

-      access_log /var/log/nginx/example.com.log main;
-      error_log /var/log/nginx/example.com.err info;
+1. You'll need to create a group and user called ``paperless`` (without login)
+2. Setup Paperless to be in a place that this new user can read and write to.
+3. Ensure ``/etc/paperless`` is readable by the ``paperless`` user.
+4. Copy the service file from the ``scripts`` directory to
+   ``/etc/systemd/system``.

-      location / {
-        try_files $uri @proxy_to_app;
-      }
+.. code-block:: bash

-      location @proxy_to_app {
-        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-        proxy_set_header X-Forwarded-Proto https;
-        proxy_set_header Host $host;
-        proxy_redirect off;
-        proxy_pass http://transfer_server;
-      }
+    $ cp /path/to/paperless/scripts/paperless-consumer.service /etc/systemd/system/
+    $ cp /path/to/paperless/scripts/paperless-webserver.service /etc/systemd/system/

-    }
+5. Edit the service file to point the ``ExecStart`` line to the proper location
+   of your paperless install, referencing the appropriate Python binary. For
+   example:
+   ``ExecStart=/path/to/python3 /path/to/paperless/src/manage.py document_consumer``.
+6. Start and enable (so they start on boot) the services.

-Once you've got Nginx configured, you'll want to have a configuration file for
-your gunicorn instance.  This should do the trick:
+.. code-block:: bash

-.. code:: python
+    $ systemctl enable paperless-consumer
+    $ systemctl enable paperless-webserver
+    $ systemctl start paperless-consumer
+    $ systemctl start paperless-webserver

-    import os

-    bind = 'unix:/run/example.com/gunicorn.sock'
-    backlog = 2048
-    workers = 6
-    worker_class = 'sync'
-    worker_connections = 1000
-    timeout = 30
-    keepalive = 2
-    debug = False
-    spew = False
-    daemon = False
-    pidfile = None
-    umask = 0
-    user = None
-    group = None
-    tmp_upload_dir = None
-    errorlog = '/var/log/example.com/gunicorn.err'
-    loglevel = 'warning'
-    accesslog = '/var/log/example.com/gunicorn.log'
-    proc_name = None
+.. _setup-permanent-standard-upstart:

-    def post_fork(server, worker):
-        server.log.info("Worker spawned (pid: %s)", worker.pid)
+Standard (Bare Metal + Upstart)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-    def pre_fork(server, worker):
-        pass
+Ubuntu 14.04 and earlier use the `Upstart`_ init system to start services
+during the boot process. To configure Upstart to run Paperless automatically
+after restarting your system:

-    def pre_exec(server):
-        server.log.info("Forked child, re-executing.")
+1. Change to the directory where Upstart's configuration files are kept:
+   ``cd /etc/init``
+2. Create a new file: ``sudo nano paperless-server.conf``
+3. In the newly-created file enter::

-    def when_ready(server):
-        server.log.info("Server is ready. Spawning workers")
+    start on (local-filesystems and net-device-up IFACE=eth0)
+    stop on shutdown

-    def worker_int(worker):
-        worker.log.info("worker received INT or QUIT signal")
+    respawn
+    respawn limit 10 5

-        ## get traceback info
-        import threading, sys, traceback
-        id2name = dict([(th.ident, th.name) for th in threading.enumerate()])
-        code = []
-        for threadId, stack in sys._current_frames().items():
-            code.append("\n# Thread: %s(%d)" % (id2name.get(threadId,""),
-                threadId))
-            for filename, lineno, name, line in traceback.extract_stack(stack):
-                code.append('File: "%s", line %d, in %s' % (filename,
-                    lineno, name))
-                if line:
-                    code.append("  %s" % (line.strip()))
-        worker.log.debug("\n".join(code))
+    script
+      exec <path to paperless virtual environment>/bin/gunicorn --pythonpath=<path to parperless>/src paperless.wsgi -w 2
+    end script

-    def worker_abort(worker):
-        worker.log.info("worker received SIGABRT signal")
+   Note that you'll need to replace ``/srv/paperless/src/manage.py`` with the
+   path to the ``manage.py`` script in your installation directory.

-Vagrant
-.......
+  If you are using a network interface other than ``eth0``, you will have to
+  change ``IFACE=eth0``. For example, if you are connected via WiFi, you will
+  likely need to replace ``eth0`` above with ``wlan0``. To see all interfaces,
+  run ``ifconfig -a``.
+
+  Save the file.
+
+4. Create a new file: ``sudo nano paperless-consumer.conf``
+
+5. In the newly-created file enter::
+
+    start on (local-filesystems and net-device-up IFACE=eth0)
+    stop on shutdown
+
+    respawn
+    respawn limit 10 5
+
+    script
+      exec <path to paperless virtual environment>/bin/python <path to parperless>/manage.py document_consumer
+    end script
+
+  Replace the path placeholder and ``eth0`` with the appropriate value and save the file.
+
+These two configuration files together will start both the Paperless webserver
+and document consumer processes when the file system and network interface
+specified is available after boot. Furthermore, if either process ever exits
+unexpectedly, Upstart will try to restart it a maximum of 10 times within a 5
+second period.
+
+.. _Upstart: http://upstart.ubuntu.com/

-You may use the Ubuntu explanation above. Replace
-``(local-filesystems and net-device-up IFACE=eth0)`` with ``vagrant-mounted``.

 .. _setup-permanent-docker:

 Docker
-......
+~~~~~~

 If you're using Docker, you can set a restart-policy_ in the
 ``docker-compose.yml`` to have the containers automatically start with the
 Docker daemon.

 .. _restart-policy: https://docs.docker.com/engine/reference/commandline/run/#restart-policies-restart
-
-
-.. _setup-subdirectory:
-
-Hosting Paperless in a Subdirectory
-----------------------------------
-
-Paperless was designed to run off the root of the hosting domain,
-(ie: ``https://example.com/``) but with a few changes, you can configure
-it to run in a subdirectory on your server
-(ie: ``https://example.com/paperless/``).
-
-Thanks to the efforts of `maphy-psd`_ on `Github`_, running Paperless in a
-subdirectory is now as easy as setting a config variable.  Simply set
-``PAPERLESS_FORCE_SCRIPT_NAME`` in your environment or
-``/etc/paperless.conf`` to the path you want Paperless hosted at, configure
-Nginx/Apache for your needs and you're done.  So, if you want Paperless to live
-at ``https://example.com/arbitrary/path/to/paperless`` then you just set
-``PAPERLESS_FORCE_SCRIPT_NAME`` to ``/arbitrary/path/to/paperless``.  Note the
-leading ``/`` there.
-
-As to how to configure Nginx or Apache for this, that's on you :-)
-
-.. _maphy-psd: https://github.com/maphy-psd
-.. _Github: https://github.com/danielquinn/paperless/pull/255
--- a/docs/troubleshooting.rst
+++ b/docs/troubleshooting.rst
@@ -14,9 +14,8 @@ FORGIVING_OCR is enabled``, then you might need to install the
 `Tesseract language files <http://packages.ubuntu.com/search?keywords=tesseract-ocr>`_
 marching your document's languages.

-As an example, if you are running Paperless from the Vagrant setup provided
-(or from any Ubuntu or Debian box), and your documents are written in Spanish
-you may need to run::
+As an example, if you are running Paperless from any Ubuntu or Debian
+box, and your documents are written in Spanish you may need to run::

    apt-get install -y tesseract-ocr-spa

--- a/models/.keep
+++ b/models/.keep
--- a/overrides/README.md
+++ b/overrides/README.md
@@ -0,0 +1,11 @@
+# Customizing Paperless
+
+*See customization
+[documentation](https://paperless.readthedocs.io/en/latest/customising.html) 
+for more detail!*
+
+The example `.css` and `.js` snippets in this folder can be placed into
+one of two files in your ``PAPERLESS_MEDIADIR`` folder: `overrides.js` or 
+`overrides.css`. Please feel free to submit pull requests to the main 
+repository with other examples of customizations that you think others may
+find useful.
--- a/paperless.conf.example
+++ b/paperless.conf.example
@@ -3,6 +3,16 @@
 # As this file contains passwords it should only be readable by the user
 # running paperless.

+###############################################################################
+####                        Database Settings                              ####
+###############################################################################
+
+# By default, sqlite is used as the database backend. This can be changed here.
+#PAPERLESS_DBENGINE="django.db.backends.postgresql_psycopg2"
+#PAPERLESS_DBNAME="paperless"
+#PAPERLESS_DBUSER="paperless"
+#PAPERLESS_DBPASS="paperless"
+

 ###############################################################################
 ####                         Paths & Folders                               ####
@@ -38,6 +48,13 @@ PAPERLESS_CONSUMPTION_DIR=""
 #PAPERLESS_STATIC_URL="/static/"


+# You can specify where the document classification model file should be
+# stored. Make sure that this file is writeable by the user executing the
+# management command "document_create_classifier" and that the path exists.
+# The default location is /models/model.pickle wwithin the install folder.
+#PAPERLESS_MODEL_FILE=/path/to/model/file
+
+
 # These values are required if you want paperless to check a particular email
 # box every 10 minutes and attempt to consume documents from there.  If you
 # don't define a HOST, mail checking will just be disabled.
@@ -59,6 +76,11 @@ PAPERLESS_EMAIL_SECRET=""
 ####                              Security                                 ####
 ###############################################################################

+# Controls whether django's debug mode is enabled. Disable this on production
+# systems. Debug mode is enabled by default.
+PAPERLESS_DEBUG="false"
+
+
 # Paperless can be instructed to attempt to encrypt your PDF files with GPG
 # using the PAPERLESS_PASSPHRASE specified below.  If however you're not
 # concerned about encrypting these files (for example if you have disk
@@ -89,6 +111,12 @@ PAPERLESS_EMAIL_SECRET=""
 # as is "example.com,www.example.com", but NOT " example.com" or "example.com,"
 #PAPERLESS_ALLOWED_HOSTS="example.com,www.example.com"

+# If you decide to use the Paperless API in an ajax call, you need to add your
+# servers to the list of allowed hosts that can do CORS calls. By default
+# Paperless allows calls from localhost:8080, but you'd like to change that,
+# you can set this value to a comma-separated list.
+#PAPERLESS_CORS_ALLOWED_HOSTS="localhost:8080,example.com,localhost:8000"
+
 # To host paperless under a subpath url like example.com/paperless you set
 # this value to /paperless. No trailing slash!
 #
@@ -111,6 +139,18 @@ PAPERLESS_EMAIL_SECRET=""
 # http://paperless.readthedocs.org/en/latest/consumption.html#hooking-into-the-consumption-process
 #PAPERLESS_POST_CONSUME_SCRIPT="/path/to/an/arbitrary/script.sh"

+# By default, when clicking on a document within the web interface, the
+# browser will prompt the user to save the document to disk. By setting this to
+# "true", the document will instead be opened in the browser, if possible.
+#PAPERLESS_INLINE_DOC="false"
+
+# By default, paperless will check the document text for document date information.
+# Uncomment the line below to enable checking the document filename for date
+# information. The date order can be set to any option as specified in
+# https://dateparser.readthedocs.io/en/latest/#settings. The filename will be
+# checked first, and if nothing is found, the document text will be checked
+# as normal.
+#PAPERLESS_FILENAME_DATE_ORDER="YMD"

 #
 # The following values use sensible defaults for modern systems, but if you're
@@ -173,6 +213,12 @@ PAPERLESS_EMAIL_SECRET=""
 #PAPERLESS_CONSUMER_LOOP_TIME=10


+# By default Paperless stops consuming a document if no language can be
+# detected. Set to true to consume documents even if the language detection
+# fails.
+#PAPERLESS_FORGIVING_OCR="false"
+
+
 ###############################################################################
 ####                            Interface                                  ####
 ###############################################################################
@@ -193,3 +239,28 @@ PAPERLESS_EMAIL_SECRET=""
 # positive integer, but if you don't define one in paperless.conf, a default of
 # 100 will be used.
 #PAPERLESS_LIST_PER_PAGE=100
+
+
+# The number of years for which a correspondent will be included in the recent
+# correspondents filter.
+#PAPERLESS_RECENT_CORRESPONDENT_YEARS=1
+
+###############################################################################
+####                     Third-Party Binaries                              ####
+###############################################################################
+
+# There are a few external software packages that Paperless expects to find on
+# your system when it starts up.  Unless you've done something creative with
+# their installation, you probably won't need to edit any of these.  However,
+# if you've installed these programs somewhere where simply typing the name of
+# the program doesn't automatically execute it (ie. the program isn't in your
+# $PATH), then you'll need to specify the literal path for that program here.
+
+# Convert (part of the ImageMagick suite)
+#PAPERLESS_CONVERT_BINARY=/usr/bin/convert
+
+# Unpaper
+#PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper
+
+# Optipng (for optimising thumbnail sizes)
+#PAPERLESS_OPTIPNG_BINARY=/usr/bin/optipng
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,52 +1,83 @@
-apipkg==1.4
-attrs==18.1.0
-certifi==2018.4.16
+-i https://pypi.python.org/simple
+alabaster==0.7.12
+apipkg==1.5
+atomicwrites==1.2.1
+attrs==18.2.0
+babel==2.6.0
+backcall==0.1.0
+certifi==2018.10.15
 chardet==3.0.4
 coverage==4.5.1
-coveralls==1.3.0
+coveralls==1.5.1
 dateparser==0.7.0
+decorator==4.3.0
+django-cors-headers==2.4.0
 django-crispy-forms==1.7.2
-django-extensions==2.0.7
-django-filter==1.1.0
-django-flat-responsive==2.0
-django==1.11.13
-djangorestframework==3.8.2
+django-extensions==2.1.3
+django-filter==2.0.0
+django==2.0.9
+djangorestframework==3.9.0
 docopt==0.6.2
+docutils==0.14
 execnet==1.5.0
 factory-boy==2.11.1
-faker==0.8.15
+faker==0.9.2
+filelock==3.0.10
 filemagic==1.6
-flake8==3.5.0
-fuzzywuzzy==0.15.0
-gunicorn==19.8.1
-idna==2.6
-inotify_simple==1.1.7; sys_platform == 'linux'
+fuzzywuzzy[speedup]==0.15.0
+gunicorn==19.9.0
+idna==2.7
+imagesize==1.1.0
+inotify-simple==1.1.8
+ipython-genutils==0.2.0
+ipython==7.1.1
+jedi==0.13.1
+jinja2==2.10
 langdetect==1.0.7
-mccabe==0.6.1
-more-itertools==4.1.0
-pdftotext==2.0.2
-pillow==5.1.0
-pluggy==0.6.0
-py==1.5.3
-pycodestyle==2.3.1
-pyflakes==1.6.0
-pyocr==0.5.1
-pytest-cov==2.5.1
-pytest-django==3.2.1
+markupsafe==1.0
+more-itertools==4.3.0
+numpy==1.15.1
+packaging==18.0
+parso==0.3.1
+pdftotext==2.1.1
+pexpect==4.6.0 
+pickleshare==0.7.5
+pillow==5.3.0
+pluggy==0.8.0
+psycopg2==2.7.6.1
+prompt-toolkit==2.0.7
+ptyprocess==0.6.0
+py==1.7.0
+pycodestyle==2.4.0
+pygments==2.2.0
+pyocr==0.5.3
+pyparsing==2.3.0
+pytest-cov==2.6.0
+pytest-django==3.4.3
 pytest-env==0.6.2
 pytest-forked==0.2
 pytest-sugar==0.9.1
-pytest-xdist==1.22.2
-pytest==3.5.1
-python-dateutil==2.7.3
-python-dotenv==0.8.2
-python-gnupg==0.4.2
-python-levenshtein==0.12.0
-pytz==2018.4
-regex==2018.2.21
-requests==2.18.4
+pytest-xdist==1.24.0
+pytest==3.9.3
+python-dateutil==2.7.5
+python-dotenv==0.9.1
+python-gnupg==0.4.3
+python-levenshtein==0.12.0 ; extra == 'speedup'
+pytz==2018.7
+regex==2018.11.2
+requests==2.20.0
 six==1.11.0
+scikit-learn==0.19.2
+scipy==1.1.0
+snowballstemmer==1.2.1
+sphinx==1.8.1
+sphinxcontrib-websupport==1.1.0
 termcolor==1.1.0
 text-unidecode==1.2
+toml==0.10.0
+tox==3.5.3
+traitlets==4.3.2
 tzlocal==1.5.1
-urllib3==1.22
+urllib3==1.24.1
+virtualenv==16.1.0
+wcwidth==0.1.7
--- a/scripts/paperless-webserver.service
+++ b/scripts/paperless-webserver.service
@@ -4,7 +4,7 @@ Description=Paperless webserver
 [Service]
 User=paperless
 Group=paperless
-ExecStart=/home/paperless/project/virtualenv/bin/python /home/paperless/project/src/manage.py runserver --noreload 0.0.0.0:8000
+ExecStart=/home/paperless/project/virtualenv/bin/gunicorn --pythonpath=/home/paperless/project/src paperless.wsgi -w 2

 [Install]
 WantedBy=multi-user.target
--- a/scripts/vagrant-provision
+++ b/scripts/vagrant-provision
@@ -1,31 +0,0 @@
-#!/bin/bash
-
-# Install packages
-apt-get update
-apt-get build-dep -y python-imaging
-apt-get install -y libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev
-apt-get install -y build-essential python3-dev python3-pip sqlite3 libsqlite3-dev git
-apt-get install -y tesseract-ocr tesseract-ocr-eng imagemagick unpaper
-
-# Python dependencies
-pip3 install -r /opt/paperless/requirements.txt
-
-# Create the environment file
-cat /opt/paperless/paperless.conf.example | sed -e 's#CONSUMPTION_DIR=""#CONSUMPTION_DIR="/home/vagrant/consumption"#' > /etc/paperless.conf
-chmod 0640 /etc/paperless.conf
-chown root:vagrant /etc/paperless.conf
-
-# Create the consumption directory
-mkdir /home/vagrant/consumption
-chown vagrant:vagrant /home/vagrant/consumption
-
-echo "
-
-
-Now follow the remaining steps in the Vagrant section of the setup
-documentation to complete the process:
-
-http://paperless.readthedocs.org/en/latest/setup.html#setup-installation-vagrant
-
-
-"
--- a/src/documents/actions.py
+++ b/src/documents/actions.py
@@ -0,0 +1,204 @@
+from django.contrib import messages
+from django.contrib.admin import helpers
+from django.contrib.admin.utils import model_ngettext
+from django.core.exceptions import PermissionDenied
+from django.template.response import TemplateResponse
+
+from documents.classifier import DocumentClassifier
+from documents.models import Correspondent, DocumentType, Tag
+
+
+def select_action(
+        modeladmin, request, queryset, title, action, modelclass,
+        success_message="", document_action=None, queryset_action=None):
+
+    opts = modeladmin.model._meta
+    app_label = opts.app_label
+
+    if not modeladmin.has_change_permission(request):
+        raise PermissionDenied
+
+    if request.POST.get("post"):
+        n = queryset.count()
+        selected_object = modelclass.objects.get(id=request.POST.get("obj_id"))
+        if n:
+            for document in queryset:
+                if document_action:
+                    document_action(document, selected_object)
+                document_display = str(document)
+                modeladmin.log_change(request, document, document_display)
+            if queryset_action:
+                queryset_action(queryset, selected_object)
+
+            modeladmin.message_user(request, success_message % {
+                "selected_object": selected_object.name,
+                "count": n,
+                "items": model_ngettext(modeladmin.opts, n)
+            }, messages.SUCCESS)
+
+        # Return None to display the change list page again.
+        return None
+
+    context = dict(
+        modeladmin.admin_site.each_context(request),
+        title=title,
+        queryset=queryset,
+        opts=opts,
+        action_checkbox_name=helpers.ACTION_CHECKBOX_NAME,
+        media=modeladmin.media,
+        action=action,
+        objects=modelclass.objects.all(),
+        itemname=model_ngettext(modelclass, 1)
+    )
+
+    request.current_app = modeladmin.admin_site.name
+
+    return TemplateResponse(
+        request,
+        "admin/{}/{}/select_object.html".format(app_label, opts.model_name),
+        context
+    )
+
+
+def simple_action(
+        modeladmin, request, queryset, success_message="",
+        document_action=None, queryset_action=None):
+
+    if not modeladmin.has_change_permission(request):
+        raise PermissionDenied
+
+    n = queryset.count()
+    if n:
+        for document in queryset:
+            if document_action:
+                document_action(document)
+            document_display = str(document)
+            modeladmin.log_change(request, document, document_display)
+        if queryset_action:
+            queryset_action(queryset)
+        modeladmin.message_user(request, success_message % {
+            "count": n, "items": model_ngettext(modeladmin.opts, n)
+        }, messages.SUCCESS)
+
+    # Return None to display the change list page again.
+    return None
+
+
+def add_tag_to_selected(modeladmin, request, queryset):
+    return select_action(
+        modeladmin=modeladmin,
+        request=request,
+        queryset=queryset,
+        title="Add tag to multiple documents",
+        action="add_tag_to_selected",
+        modelclass=Tag,
+        success_message="Successfully added tag %(selected_object)s to "
+                        "%(count)d %(items)s.",
+        document_action=lambda doc, tag: doc.tags.add(tag)
+    )
+
+
+def remove_tag_from_selected(modeladmin, request, queryset):
+    return select_action(
+        modeladmin=modeladmin,
+        request=request,
+        queryset=queryset,
+        title="Remove tag from multiple documents",
+        action="remove_tag_from_selected",
+        modelclass=Tag,
+        success_message="Successfully removed tag %(selected_object)s from "
+                        "%(count)d %(items)s.",
+        document_action=lambda doc, tag: doc.tags.remove(tag)
+    )
+
+
+def set_correspondent_on_selected(modeladmin, request, queryset):
+
+    return select_action(
+        modeladmin=modeladmin,
+        request=request,
+        queryset=queryset,
+        title="Set correspondent on multiple documents",
+        action="set_correspondent_on_selected",
+        modelclass=Correspondent,
+        success_message="Successfully set correspondent %(selected_object)s "
+                        "on %(count)d %(items)s.",
+        queryset_action=lambda qs, corr: qs.update(correspondent=corr)
+    )
+
+
+def remove_correspondent_from_selected(modeladmin, request, queryset):
+    return simple_action(
+        modeladmin=modeladmin,
+        request=request,
+        queryset=queryset,
+        success_message="Successfully removed correspondent from %(count)d "
+                        "%(items)s.",
+        queryset_action=lambda qs: qs.update(correspondent=None)
+    )
+
+
+def set_document_type_on_selected(modeladmin, request, queryset):
+    return select_action(
+        modeladmin=modeladmin,
+        request=request,
+        queryset=queryset,
+        title="Set document type on multiple documents",
+        action="set_document_type_on_selected",
+        modelclass=DocumentType,
+        success_message="Successfully set document type %(selected_object)s "
+                        "on %(count)d %(items)s.",
+        queryset_action=lambda qs, document_type: qs.update(
+            document_type=document_type)
+    )
+
+
+def remove_document_type_from_selected(modeladmin, request, queryset):
+    return simple_action(
+        modeladmin=modeladmin,
+        request=request,
+        queryset=queryset,
+        success_message="Successfully removed document type from %(count)d "
+                        "%(items)s.",
+        queryset_action=lambda qs: qs.update(document_type=None)
+    )
+
+
+def run_document_classifier_on_selected(modeladmin, request, queryset):
+    clf = DocumentClassifier()
+    try:
+        clf.reload()
+        return simple_action(
+            modeladmin=modeladmin,
+            request=request,
+            queryset=queryset,
+            success_message="Successfully applied document classifier to "
+                            "%(count)d %(items)s.",
+            document_action=lambda doc: clf.classify_document(
+                doc,
+                classify_correspondent=True,
+                classify_tags=True,
+                classify_document_type=True)
+        )
+    except FileNotFoundError:
+        modeladmin.message_user(
+            request,
+            "Classifier model file not found.",
+            messages.ERROR
+        )
+        return None
+
+
+add_tag_to_selected.short_description = "Add tag to selected documents"
+remove_tag_from_selected.short_description = \
+    "Remove tag from selected documents"
+set_correspondent_on_selected.short_description = \
+    "Set correspondent on selected documents"
+remove_correspondent_from_selected.short_description = \
+    "Remove correspondent from selected documents"
+set_document_type_on_selected.short_description = \
+    "Set document type on selected documents"
+remove_document_type_from_selected.short_description = \
+    "Remove document type from selected documents"
+run_document_classifier_on_selected.short_description = \
+    "Run document classifier on selected"
--- a/src/documents/admin.py
+++ b/src/documents/admin.py
@@ -1,37 +1,28 @@
-from datetime import datetime
+from datetime import datetime, timedelta

 from django.conf import settings
-from django.contrib import admin
-from django.contrib.auth.models import User, Group
-from django.core.urlresolvers import reverse
+from django.contrib import admin, messages
+from django.contrib.admin.templatetags.admin_urls import add_preserved_filters
+from django.contrib.auth.models import Group, User
+from django.db import models
+from django.http import HttpResponseRedirect
 from django.templatetags.static import static
+from django.urls import reverse
+from django.utils.html import format_html, format_html_join
+from django.utils.http import urlquote
+from django.utils.safestring import mark_safe

-from .models import Correspondent, Tag, Document, Log
+from documents.actions import (
+    add_tag_to_selected,
+    remove_correspondent_from_selected,
+    remove_tag_from_selected,
+    set_correspondent_on_selected,
+    set_document_type_on_selected,
+    remove_document_type_from_selected,
+    run_document_classifier_on_selected
+)

-
-class MonthListFilter(admin.SimpleListFilter):
-
-    title = "Month"
-
-    # Parameter for the filter that will be used in the URL query.
-    parameter_name = "month"
-
-    def lookups(self, request, model_admin):
-        r = []
-        for document in Document.objects.all():
-            r.append((
-                document.created.strftime("%Y-%m"),
-                document.created.strftime("%B %Y")
-            ))
-        return sorted(set(r), key=lambda x: x[0], reverse=True)
-
-    def queryset(self, request, queryset):
-
-        if not self.value():
-            return None
-
-        year, month = self.value().split("-")
-        return queryset.filter(created__year=year, created__month=month)
+from .models import Correspondent, Document, DocumentType, Log, Tag


 class FinancialYearFilter(admin.SimpleListFilter):
@@ -73,12 +64,12 @@ class FinancialYearFilter(admin.SimpleListFilter):

            # To keep it simple we use the same string for both
            # query parameter and the display.
-            return (query, query)
+            return query, query

        else:
            query = "{0}-{0}".format(date.year)
            display = "{}".format(date.year)
-            return (query, display)
+            return query, display

    def lookups(self, request, model_admin):
        if not settings.FY_START or not settings.FY_END:
@@ -99,29 +90,100 @@ class FinancialYearFilter(admin.SimpleListFilter):
                               created__lte=self._fy_end(end))


+class RecentCorrespondentFilter(admin.RelatedFieldListFilter):
+    """
+    If PAPERLESS_RECENT_CORRESPONDENT_YEARS is set, we limit the available
+    correspondents to documents sent our way over the past ``n`` years.
+    """
+
+    def field_choices(self, field, request, model_admin):
+
+        years = settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS
+        correspondents = Correspondent.objects.all()
+
+        if years and years > 0:
+            self.title = "Correspondent (Recent)"
+            days = 365 * years
+            correspondents = correspondents.filter(
+                documents__created__gte=datetime.now() - timedelta(days=days)
+            ).distinct()
+
+        return [(c.id, c.name) for c in correspondents]
+
+
 class CommonAdmin(admin.ModelAdmin):
    list_per_page = settings.PAPERLESS_LIST_PER_PAGE


 class CorrespondentAdmin(CommonAdmin):

-    list_display = ("name", "match", "matching_algorithm", "document_count")
-    list_filter = ("matching_algorithm",)
-    list_editable = ("match", "matching_algorithm")
+    list_display = (
+        "name",
+        "automatic_classification",
+        "document_count",
+        "last_correspondence"
+    )
+    list_editable = ("automatic_classification",)
+
+    readonly_fields = ("slug",)
+
+    def get_queryset(self, request):
+        qs = super(CorrespondentAdmin, self).get_queryset(request)
+        qs = qs.annotate(
+            document_count=models.Count("documents"),
+            last_correspondence=models.Max("documents__created")
+        )
+        return qs

    def document_count(self, obj):
-        return obj.documents.count()
+        return obj.document_count
+    document_count.admin_order_field = "document_count"
+
+    def last_correspondence(self, obj):
+        return obj.last_correspondence
+    last_correspondence.admin_order_field = "last_correspondence"


 class TagAdmin(CommonAdmin):

-    list_display = ("name", "colour", "match", "matching_algorithm",
-                    "document_count")
-    list_filter = ("colour", "matching_algorithm")
-    list_editable = ("colour", "match", "matching_algorithm")
+    list_display = (
+        "name",
+        "colour",
+        "automatic_classification",
+        "document_count")
+    list_filter = ("colour",)
+    list_editable = ("colour", "automatic_classification")
+
+    readonly_fields = ("slug",)
+
+    class Media:
+        js = ("js/colours.js",)
+
+    def get_queryset(self, request):
+        qs = super(TagAdmin, self).get_queryset(request)
+        qs = qs.annotate(document_count=models.Count("documents"))
+        return qs

    def document_count(self, obj):
-        return obj.documents.count()
+        return obj.document_count
+    document_count.admin_order_field = "document_count"
+
+
+class DocumentTypeAdmin(CommonAdmin):
+
+    list_display = ("name", "automatic_classification", "document_count")
+    list_editable = ("automatic_classification",)
+
+    readonly_fields = ("slug",)
+
+    def get_queryset(self, request):
+        qs = super(DocumentTypeAdmin, self).get_queryset(request)
+        qs = qs.annotate(document_count=models.Count("documents"))
+        return qs
+
+    def document_count(self, obj):
+        return obj.document_count
+    document_count.admin_order_field = "document_count"


 class DocumentAdmin(CommonAdmin):
@@ -132,14 +194,36 @@ class DocumentAdmin(CommonAdmin):
        }

    search_fields = ("correspondent__name", "title", "content", "tags__name")
-    readonly_fields = ("added",)
+    readonly_fields = ("added", "file_type", "storage_type",)
    list_display = ("title", "created", "added", "thumbnail", "correspondent",
-                    "tags_")
-    list_filter = ("tags", "correspondent", FinancialYearFilter,
-                   MonthListFilter)
+                    "tags_", "archive_serial_number", "document_type")
+    list_filter = (
+        "document_type",
+        "tags",
+        ("correspondent", RecentCorrespondentFilter),
+        FinancialYearFilter
+    )
+
+    filter_horizontal = ("tags",)

    ordering = ["-created", "correspondent"]

+    actions = [
+        add_tag_to_selected,
+        remove_tag_from_selected,
+        set_correspondent_on_selected,
+        remove_correspondent_from_selected,
+        set_document_type_on_selected,
+        remove_document_type_from_selected,
+        run_document_classifier_on_selected
+    ]
+
+    date_hierarchy = "created"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.document_queue = []
+
    def has_add_permission(self, request):
        return False

@@ -147,6 +231,82 @@ class DocumentAdmin(CommonAdmin):
        return obj.created.date().strftime("%Y-%m-%d")
    created_.short_description = "Created"

+    def changelist_view(self, request, extra_context=None):
+
+        response = super().changelist_view(
+            request,
+            extra_context=extra_context
+        )
+
+        if request.method == "GET":
+            cl = self.get_changelist_instance(request)
+            self.document_queue = [doc.id for doc in cl.queryset]
+
+        return response
+
+    def change_view(self, request, object_id=None, form_url='',
+                    extra_context=None):
+
+        extra_context = extra_context or {}
+        doc = Document.objects.get(id=object_id)
+        extra_context["download_url"] = doc.download_url
+        extra_context["file_type"] = doc.file_type
+
+        if self.document_queue and object_id:
+            if int(object_id) in self.document_queue:
+                # There is a queue of documents
+                current_index = self.document_queue.index(int(object_id))
+                if current_index < len(self.document_queue) - 1:
+                    # ... and there are still documents in the queue
+                    extra_context["next_object"] = self.document_queue[
+                        current_index + 1
+                    ]
+
+        return super(DocumentAdmin, self).change_view(
+            request,
+            object_id,
+            form_url,
+            extra_context=extra_context,
+        )
+
+    def response_change(self, request, obj):
+
+        # This is mostly copied from ModelAdmin.response_change()
+        opts = self.model._meta
+        preserved_filters = self.get_preserved_filters(request)
+
+        msg_dict = {
+            "name": opts.verbose_name,
+            "obj": format_html(
+                '<a href="{}">{}</a>',
+                urlquote(request.path),
+                obj
+            ),
+        }
+        if "_saveandeditnext" in request.POST:
+            msg = format_html(
+                'The {name} "{obj}" was changed successfully. '
+                'Editing next object.',
+                **msg_dict
+            )
+            self.message_user(request, msg, messages.SUCCESS)
+            redirect_url = reverse(
+                "admin:{}_{}_change".format(opts.app_label, opts.model_name),
+                args=(request.POST["_next_object"],),
+                current_app=self.admin_site.name
+            )
+            redirect_url = add_preserved_filters(
+                {
+                    "preserved_filters": preserved_filters,
+                    "opts": opts
+                },
+                redirect_url
+            )
+            return HttpResponseRedirect(redirect_url)
+
+        return super().response_change(request, obj)
+
+    @mark_safe
    def thumbnail(self, obj):
        return self._html_tag(
            "a",
@@ -159,8 +319,8 @@ class DocumentAdmin(CommonAdmin):
            ),
            href=obj.download_url
        )
-    thumbnail.allow_tags = True

+    @mark_safe
    def tags_(self, obj):
        r = ""
        for tag in obj.tags.all():
@@ -178,9 +338,10 @@ class DocumentAdmin(CommonAdmin):
                }
            )
        return r
-    tags_.allow_tags = True

+    @mark_safe
    def document(self, obj):
+        # TODO: is this method even used anymore?
        return self._html_tag(
            "a",
            self._html_tag(
@@ -193,20 +354,16 @@ class DocumentAdmin(CommonAdmin):
            ),
            href=obj.download_url
        )
-    document.allow_tags = True

    @staticmethod
    def _html_tag(kind, inside=None, **kwargs):
-
-        attributes = []
-        for lft, rgt in kwargs.items():
-            attributes.append('{}="{}"'.format(lft, rgt))
+        attributes = format_html_join(' ', '{}="{}"', kwargs.items())

        if inside is not None:
-            return "<{kind} {attributes}>{inside}</{kind}>".format(
-                kind=kind, attributes=" ".join(attributes), inside=inside)
+            return format_html("<{kind} {attributes}>{inside}</{kind}>",
+                               kind=kind, attributes=attributes, inside=inside)

-        return "<{} {}/>".format(kind, " ".join(attributes))
+        return format_html("<{} {}/>", kind, attributes)


 class LogAdmin(CommonAdmin):
@@ -217,6 +374,7 @@ class LogAdmin(CommonAdmin):

 admin.site.register(Correspondent, CorrespondentAdmin)
 admin.site.register(Tag, TagAdmin)
+admin.site.register(DocumentType, DocumentTypeAdmin)
 admin.site.register(Document, DocumentAdmin)
 admin.site.register(Log, LogAdmin)

--- a/src/documents/apps.py
+++ b/src/documents/apps.py
@@ -11,8 +11,8 @@ class DocumentsConfig(AppConfig):
        from .signals import document_consumption_started
        from .signals import document_consumption_finished
        from .signals.handlers import (
-            set_correspondent,
-            set_tags,
+            classify_document,
+            add_inbox_tags,
            run_pre_consume_script,
            run_post_consume_script,
            cleanup_document_deletion,
@@ -21,8 +21,8 @@ class DocumentsConfig(AppConfig):

        document_consumption_started.connect(run_pre_consume_script)

-        document_consumption_finished.connect(set_tags)
-        document_consumption_finished.connect(set_correspondent)
+        document_consumption_finished.connect(classify_document)
+        document_consumption_finished.connect(add_inbox_tags)
        document_consumption_finished.connect(set_log_entry)
        document_consumption_finished.connect(run_post_consume_script)

--- a/src/documents/checks.py
+++ b/src/documents/checks.py
@@ -2,7 +2,7 @@ import textwrap

 from django.conf import settings
 from django.core.checks import Error, register
-from django.db.utils import OperationalError
+from django.db.utils import OperationalError, ProgrammingError


@register()
@@ -14,7 +14,7 @@ def changed_password_check(app_configs, **kwargs):
    try:
        encrypted_doc = Document.objects.filter(
            storage_type=Document.STORAGE_TYPE_GPG).first()
-    except OperationalError:
+    except (OperationalError, ProgrammingError):
        return []  # No documents table yet

    if encrypted_doc:
--- a/src/documents/classifier.py
+++ b/src/documents/classifier.py
@@ -0,0 +1,240 @@
+import logging
+import os
+import pickle
+
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.neural_network import MLPClassifier
+from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
+
+from documents.models import Correspondent, DocumentType, Tag, Document
+from paperless import settings
+
+
+def preprocess_content(content):
+    content = content.lower()
+    content = content.strip()
+    content = content.replace("\n", " ")
+    content = content.replace("\r", " ")
+    while content.find("  ") > -1:
+        content = content.replace("  ", " ")
+    return content
+
+
+class DocumentClassifier(object):
+
+    def __init__(self):
+        self.classifier_version = 0
+
+        self.data_vectorizer = None
+
+        self.tags_binarizer = None
+        self.correspondent_binarizer = None
+        self.document_type_binarizer = None
+
+        self.tags_classifier = None
+        self.correspondent_classifier = None
+        self.document_type_classifier = None
+
+    def reload(self):
+        if os.path.getmtime(settings.MODEL_FILE) > self.classifier_version:
+            logging.getLogger(__name__).info("Reloading classifier models")
+            with open(settings.MODEL_FILE, "rb") as f:
+                self.data_vectorizer = pickle.load(f)
+                self.tags_binarizer = pickle.load(f)
+                self.correspondent_binarizer = pickle.load(f)
+                self.document_type_binarizer = pickle.load(f)
+
+                self.tags_classifier = pickle.load(f)
+                self.correspondent_classifier = pickle.load(f)
+                self.document_type_classifier = pickle.load(f)
+            self.classifier_version = os.path.getmtime(settings.MODEL_FILE)
+
+    def save_classifier(self):
+        with open(settings.MODEL_FILE, "wb") as f:
+            pickle.dump(self.data_vectorizer, f)
+
+            pickle.dump(self.tags_binarizer, f)
+            pickle.dump(self.correspondent_binarizer, f)
+            pickle.dump(self.document_type_binarizer, f)
+
+            pickle.dump(self.tags_classifier, f)
+            pickle.dump(self.correspondent_classifier, f)
+            pickle.dump(self.document_type_classifier, f)
+
+    def train(self):
+        data = list()
+        labels_tags = list()
+        labels_correspondent = list()
+        labels_document_type = list()
+
+        # Step 1: Extract and preprocess training data from the database.
+        logging.getLogger(__name__).info("Gathering data from database...")
+        for doc in Document.objects.exclude(tags__is_inbox_tag=True):
+            data.append(preprocess_content(doc.content))
+
+            y = -1
+            if doc.document_type:
+                if doc.document_type.automatic_classification:
+                    y = doc.document_type.id
+            labels_document_type.append(y)
+
+            y = -1
+            if doc.correspondent:
+                if doc.correspondent.automatic_classification:
+                    y = doc.correspondent.id
+            labels_correspondent.append(y)
+
+            tags = [tag.id for tag in doc.tags.filter(
+                automatic_classification=True
+            )]
+            labels_tags.append(tags)
+
+        labels_tags_unique = set([tag for tags in labels_tags for tag in tags])
+        logging.getLogger(__name__).info(
+            "{} documents, {} tag(s), {} correspondent(s), "
+            "{} document type(s).".format(
+                len(data),
+                len(labels_tags_unique),
+                len(set(labels_correspondent)),
+                len(set(labels_document_type))
+            )
+        )
+
+        # Step 2: vectorize data
+        logging.getLogger(__name__).info("Vectorizing data...")
+        self.data_vectorizer = CountVectorizer(
+            analyzer="char",
+            ngram_range=(3, 5),
+            min_df=0.1
+        )
+        data_vectorized = self.data_vectorizer.fit_transform(data)
+
+        self.tags_binarizer = MultiLabelBinarizer()
+        labels_tags_vectorized = self.tags_binarizer.fit_transform(labels_tags)
+
+        self.correspondent_binarizer = LabelBinarizer()
+        labels_correspondent_vectorized = \
+            self.correspondent_binarizer.fit_transform(labels_correspondent)
+
+        self.document_type_binarizer = LabelBinarizer()
+        labels_document_type_vectorized = \
+            self.document_type_binarizer.fit_transform(labels_document_type)
+
+        # Step 3: train the classifiers
+        if len(self.tags_binarizer.classes_) > 0:
+            logging.getLogger(__name__).info("Training tags classifier...")
+            self.tags_classifier = MLPClassifier(verbose=True)
+            self.tags_classifier.fit(data_vectorized, labels_tags_vectorized)
+        else:
+            self.tags_classifier = None
+            logging.getLogger(__name__).info(
+                "There are no tags. Not training tags classifier."
+            )
+
+        if len(self.correspondent_binarizer.classes_) > 0:
+            logging.getLogger(__name__).info(
+                "Training correspondent classifier..."
+            )
+            self.correspondent_classifier = MLPClassifier(verbose=True)
+            self.correspondent_classifier.fit(
+                data_vectorized,
+                labels_correspondent_vectorized
+            )
+        else:
+            self.correspondent_classifier = None
+            logging.getLogger(__name__).info(
+                "There are no correspondents. Not training correspondent "
+                "classifier."
+            )
+
+        if len(self.document_type_binarizer.classes_) > 0:
+            logging.getLogger(__name__).info(
+                "Training document type classifier..."
+            )
+            self.document_type_classifier = MLPClassifier(verbose=True)
+            self.document_type_classifier.fit(
+                data_vectorized,
+                labels_document_type_vectorized
+            )
+        else:
+            self.document_type_classifier = None
+            logging.getLogger(__name__).info(
+                "There are no document types. Not training document type "
+                "classifier."
+            )
+
+    def classify_document(
+            self, document, classify_correspondent=False,
+            classify_document_type=False, classify_tags=False,
+            replace_tags=False):
+
+        X = self.data_vectorizer.transform(
+            [preprocess_content(document.content)]
+        )
+
+        if classify_correspondent and self.correspondent_classifier:
+            self._classify_correspondent(X, document)
+
+        if classify_document_type and self.document_type_classifier:
+            self._classify_document_type(X, document)
+
+        if classify_tags and self.tags_classifier:
+            self._classify_tags(X, document, replace_tags)
+
+        document.save(update_fields=("correspondent", "document_type"))
+
+    def _classify_correspondent(self, X, document):
+        y = self.correspondent_classifier.predict(X)
+        correspondent_id = self.correspondent_binarizer.inverse_transform(y)[0]
+        try:
+            correspondent = None
+            if correspondent_id != -1:
+                correspondent = Correspondent.objects.get(id=correspondent_id)
+                logging.getLogger(__name__).info(
+                    "Detected correspondent: {}".format(correspondent.name)
+                )
+            else:
+                logging.getLogger(__name__).info("Detected correspondent: -")
+            document.correspondent = correspondent
+        except Correspondent.DoesNotExist:
+            logging.getLogger(__name__).warning(
+                "Detected correspondent with id {} does not exist "
+                "anymore! Did you delete it?".format(correspondent_id)
+            )
+
+    def _classify_document_type(self, X, document):
+        y = self.document_type_classifier.predict(X)
+        document_type_id = self.document_type_binarizer.inverse_transform(y)[0]
+        try:
+            document_type = None
+            if document_type_id != -1:
+                document_type = DocumentType.objects.get(id=document_type_id)
+                logging.getLogger(__name__).info(
+                    "Detected document type: {}".format(document_type.name)
+                )
+            else:
+                logging.getLogger(__name__).info("Detected document type: -")
+            document.document_type = document_type
+        except DocumentType.DoesNotExist:
+            logging.getLogger(__name__).warning(
+                "Detected document type with id {} does not exist "
+                "anymore! Did you delete it?".format(document_type_id)
+            )
+
+    def _classify_tags(self, X, document, replace_tags):
+        y = self.tags_classifier.predict(X)
+        tags_ids = self.tags_binarizer.inverse_transform(y)[0]
+        if replace_tags:
+            document.tags.clear()
+        for tag_id in tags_ids:
+            try:
+                tag = Tag.objects.get(id=tag_id)
+                logging.getLogger(__name__).info(
+                    "Detected tag: {}".format(tag.name)
+                )
+                document.tags.add(tag)
+            except Tag.DoesNotExist:
+                logging.getLogger(__name__).warning(
+                    "Detected tag with id {} does not exist anymore! Did "
+                    "you delete it?".format(tag_id)
+                )
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -1,3 +1,4 @@
+from django.db import transaction
 import datetime
 import hashlib
 import logging
@@ -111,8 +112,11 @@ class Consumer:
                if not self.try_consume_file(file):
                    self._ignore.append((file, mtime))

+    @transaction.atomic
    def try_consume_file(self, file):
-        "Return True if file was consumed"
+        """
+        Return True if file was consumed
+        """

        if not re.match(FileInfo.REGEXES["title"], file):
            return False
@@ -145,7 +149,7 @@ class Consumer:
        parsed_document = parser_class(doc)

        try:
-            thumbnail = parsed_document.get_thumbnail()
+            thumbnail = parsed_document.get_optimised_thumbnail()
            date = parsed_document.get_date()
            document = self._store(
                parsed_document.get_text(),
@@ -221,7 +225,7 @@ class Consumer:
                storage_type=self.storage_type
            )

-        relevant_tags = set(list(Tag.match_all(text)) + list(file_info.tags))
+        relevant_tags = set(file_info.tags)
        if relevant_tags:
            tag_names = ", ".join([t.slug for t in relevant_tags])
            self.log("debug", "Tagging with {}".format(tag_names))
--- a/src/documents/filters.py
+++ b/src/documents/filters.py
@@ -1,11 +1,17 @@
-from django_filters.rest_framework import CharFilter, FilterSet
+from django_filters.rest_framework import BooleanFilter, FilterSet

-from .models import Correspondent, Document, Tag
+from .models import Correspondent, Document, Tag, DocumentType
+
+
+CHAR_KWARGS = (
+    "startswith", "endswith", "contains",
+    "istartswith", "iendswith", "icontains"
+)


 class CorrespondentFilterSet(FilterSet):

-    class Meta(object):
+    class Meta:
        model = Correspondent
        fields = {
            "name": [
@@ -18,7 +24,7 @@ class CorrespondentFilterSet(FilterSet):

 class TagFilterSet(FilterSet):

-    class Meta(object):
+    class Meta:
        model = Tag
        fields = {
            "name": [
@@ -29,30 +35,42 @@ class TagFilterSet(FilterSet):
        }


+class DocumentTypeFilterSet(FilterSet):
+
+    class Meta:
+        model = DocumentType
+        fields = {
+            "name": [
+                "startswith", "endswith", "contains",
+                "istartswith", "iendswith", "icontains"
+            ],
+            "slug": ["istartswith", "iendswith", "icontains"]
+        }
+
+
 class DocumentFilterSet(FilterSet):

-    CHAR_KWARGS = {
-        "lookup_expr": (
-            "startswith",
-            "endswith",
-            "contains",
-            "istartswith",
-            "iendswith",
-            "icontains"
-        )
-    }
+    tags_empty = BooleanFilter(
+        label="Is tagged",
+        field_name="tags",
+        lookup_expr="isnull",
+        exclude=True
+    )

-    correspondent__name = CharFilter(name="correspondent__name", **CHAR_KWARGS)
-    correspondent__slug = CharFilter(name="correspondent__slug", **CHAR_KWARGS)
-    tags__name = CharFilter(name="tags__name", **CHAR_KWARGS)
-    tags__slug = CharFilter(name="tags__slug", **CHAR_KWARGS)
-
-    class Meta(object):
+    class Meta:
        model = Document
        fields = {
-            "title": [
-                "startswith", "endswith", "contains",
-                "istartswith", "iendswith", "icontains"
-            ],
-            "content": ["contains", "icontains"],
+
+            "title": CHAR_KWARGS,
+            "content": ("contains", "icontains"),
+
+            "correspondent__name": CHAR_KWARGS,
+            "correspondent__slug": CHAR_KWARGS,
+
+            "tags__name": CHAR_KWARGS,
+            "tags__slug": CHAR_KWARGS,
+
+            "document_type__name": CHAR_KWARGS,
+            "document_type__slug": CHAR_KWARGS,
+
        }
--- a/src/documents/management/commands/document_consumer.py
+++ b/src/documents/management/commands/document_consumer.py
@@ -1,7 +1,5 @@
-import datetime
 import logging
 import os
-import sys
 import time

 from django.conf import settings
@@ -13,7 +11,7 @@ from ...mail import MailFetcher, MailFetcherError
 try:
    from inotify_simple import INotify, flags
 except ImportError:
-    pass
+    INotify = flags = None


 class Command(BaseCommand):
@@ -62,7 +60,8 @@ class Command(BaseCommand):
        parser.add_argument(
            "--no-inotify",
            action="store_true",
-            help="Don't use inotify, even if it's available."
+            help="Don't use inotify, even if it's available.",
+            default=False
        )

    def handle(self, *args, **options):
@@ -71,8 +70,7 @@ class Command(BaseCommand):
        directory = options["directory"]
        loop_time = options["loop_time"]
        mail_delta = options["mail_delta"] * 60
-        use_inotify = (not options["no_inotify"]
-                       and "inotify_simple" in sys.modules)
+        use_inotify = INotify is not None and options["no_inotify"] is False

        try:
            self.file_consumer = Consumer(consume=directory)
--- a/src/documents/management/commands/document_correspondents.py
+++ b/src/documents/management/commands/document_correspondents.py
@@ -1,82 +0,0 @@
-import sys
-
-from django.core.management.base import BaseCommand
-
-from documents.models import Correspondent, Document
-
-from ...mixins import Renderable
-
-
-class Command(Renderable, BaseCommand):
-
-    help = """
-        Using the current set of correspondent rules, apply said rules to all
-        documents in the database, effectively allowing you to back-tag all
-        previously indexed documents with correspondent created (or modified)
-        after their initial import.
-    """.replace("    ", "")
-
-    TOO_MANY_CONTINUE = (
-        "Detected {} potential correspondents for {}, so we've opted for {}")
-    TOO_MANY_SKIP = (
-        "Detected {} potential correspondents for {}, so we're skipping it")
-    CHANGE_MESSAGE = (
-        'Document {}: "{}" was given the correspondent id {}: "{}"')
-
-    def __init__(self, *args, **kwargs):
-        self.verbosity = 0
-        BaseCommand.__init__(self, *args, **kwargs)
-
-    def add_arguments(self, parser):
-        parser.add_argument(
-            "--use-first",
-            default=False,
-            action="store_true",
-            help="By default this command won't try to assign a correspondent "
-                 "if more than one matches the document.  Use this flag if "
-                 "you'd rather it just pick the first one it finds."
-        )
-
-    def handle(self, *args, **options):
-
-        self.verbosity = options["verbosity"]
-
-        for document in Document.objects.filter(correspondent__isnull=True):
-
-            potential_correspondents = list(
-                Correspondent.match_all(document.content))
-
-            if not potential_correspondents:
-                continue
-
-            potential_count = len(potential_correspondents)
-            correspondent = potential_correspondents[0]
-
-            if potential_count > 1:
-                if not options["use_first"]:
-                    print(
-                        self.TOO_MANY_SKIP.format(potential_count, document),
-                        file=sys.stderr
-                    )
-                    continue
-                print(
-                    self.TOO_MANY_CONTINUE.format(
-                        potential_count,
-                        document,
-                        correspondent
-                    ),
-                    file=sys.stderr
-                )
-
-            document.correspondent = correspondent
-            document.save(update_fields=("correspondent",))
-
-            print(
-                self.CHANGE_MESSAGE.format(
-                    document.pk,
-                    document.title,
-                    correspondent.pk,
-                    correspondent.name
-                ),
-                file=sys.stderr
-            )
--- a/src/documents/management/commands/document_create_classifier.py
+++ b/src/documents/management/commands/document_create_classifier.py
@@ -0,0 +1,25 @@
+import logging
+
+from django.core.management.base import BaseCommand
+from documents.classifier import DocumentClassifier
+from paperless import settings
+from ...mixins import Renderable
+
+
+class Command(Renderable, BaseCommand):
+
+    help = """
+        Trains the classifier on your data and saves the resulting models to a
+        file. The document consumer will then automatically use this new model.
+    """.replace("    ", "")
+
+    def __init__(self, *args, **kwargs):
+        BaseCommand.__init__(self, *args, **kwargs)
+
+    def handle(self, *args, **options):
+        clf = DocumentClassifier()
+        clf.train()
+        logging.getLogger(__name__).info(
+            "Saving models to {}...".format(settings.MODEL_FILE)
+        )
+        clf.save_classifier()
--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -6,7 +6,7 @@ import shutil
 from django.core.management.base import BaseCommand, CommandError
 from django.core import serializers

-from documents.models import Document, Correspondent, Tag
+from documents.models import Document, Correspondent, Tag, DocumentType
 from paperless.db import GnuPG

 from ...mixins import Renderable
@@ -55,7 +55,12 @@ class Command(Renderable, BaseCommand):
        documents = Document.objects.all()
        document_map = {d.pk: d for d in documents}
        manifest = json.loads(serializers.serialize("json", documents))
-        for document_dict in manifest:
+
+        for index, document_dict in enumerate(manifest):
+
+            # Force output to unencrypted as that will be the current state.
+            # The importer will make the decision to encrypt or not.
+            manifest[index]["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED  # NOQA: E501

            document = document_map[document_dict["pk"]]

@@ -91,6 +96,9 @@ class Command(Renderable, BaseCommand):
        manifest += json.loads(serializers.serialize(
            "json", Tag.objects.all()))

+        manifest += json.loads(serializers.serialize(
+            "json", DocumentType.objects.all()))
+
        with open(os.path.join(self.target, "manifest.json"), "w") as f:
            json.dump(manifest, f, indent=2)

--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -94,7 +94,7 @@ class Command(Renderable, BaseCommand):
            document_path = os.path.join(self.source, doc_file)
            thumbnail_path = os.path.join(self.source, thumb_file)

-            if document.storage_type == Document.STORAGE_TYPE_GPG:
+            if settings.PASSPHRASE:

                with open(document_path, "rb") as unencrypted:
                    with open(document.source_path, "wb") as encrypted:
@@ -112,3 +112,15 @@ class Command(Renderable, BaseCommand):

                shutil.copy(document_path, document.source_path)
                shutil.copy(thumbnail_path, document.thumbnail_path)
+
+        # Reset the storage type to whatever we've used while importing
+
+        storage_type = Document.STORAGE_TYPE_UNENCRYPTED
+        if settings.PASSPHRASE:
+            storage_type = Document.STORAGE_TYPE_GPG
+
+        Document.objects.filter(
+            pk__in=[r["pk"] for r in self.manifest]
+        ).update(
+            storage_type=storage_type
+        )
--- a/src/documents/management/commands/document_retagger.py
+++ b/src/documents/management/commands/document_retagger.py
@@ -1,5 +1,8 @@
+import logging
+
 from django.core.management.base import BaseCommand

+from documents.classifier import DocumentClassifier
 from documents.models import Document, Tag

 from ...mixins import Renderable
@@ -8,25 +11,66 @@ from ...mixins import Renderable
 class Command(Renderable, BaseCommand):

    help = """
-        Using the current set of tagging rules, apply said rules to all
-        documents in the database, effectively allowing you to back-tag all
-        previously indexed documents with tags created (or modified) after
-        their initial import.
+        Using the current classification model, assigns correspondents, tags
+        and document types to all documents, effectively allowing you to
+        back-tag all previously indexed documents with metadata created (or
+        modified) after their initial import.
    """.replace("    ", "")

    def __init__(self, *args, **kwargs):
        self.verbosity = 0
        BaseCommand.__init__(self, *args, **kwargs)

+    def add_arguments(self, parser):
+        parser.add_argument(
+            "-c", "--correspondent",
+            action="store_true"
+        )
+        parser.add_argument(
+            "-T", "--tags",
+            action="store_true"
+        )
+        parser.add_argument(
+            "-t", "--type",
+            action="store_true"
+        )
+        parser.add_argument(
+            "-i", "--inbox-only",
+            action="store_true"
+        )
+        parser.add_argument(
+            "-r", "--replace-tags",
+            action="store_true"
+        )
+
    def handle(self, *args, **options):

        self.verbosity = options["verbosity"]

-        for document in Document.objects.all():
+        if options["inbox_only"]:
+            queryset = Document.objects.filter(tags__is_inbox_tag=True)
+        else:
+            queryset = Document.objects.all()
+        documents = queryset.distinct()

-            tags = Tag.objects.exclude(
-                pk__in=document.tags.values_list("pk", flat=True))
+        logging.getLogger(__name__).info("Loading classifier")
+        clf = DocumentClassifier()
+        try:
+            clf.reload()
+        except FileNotFoundError:
+            logging.getLogger(__name__).fatal("Cannot classify documents, "
+                                              "classifier model file was not "
+                                              "found.")
+            return

-            for tag in Tag.match_all(document.content, tags):
-                print('Tagging {} with "{}"'.format(document, tag))
-                document.tags.add(tag)
+        for document in documents:
+            logging.getLogger(__name__).info(
+                "Processing document {}".format(document.title)
+            )
+            clf.classify_document(
+                document,
+                classify_document_type=options["type"],
+                classify_tags=options["tags"],
+                classify_correspondent=options["correspondent"],
+                replace_tags=options["replace_tags"]
+            )
--- a/src/documents/migrations/0003_sender.py
+++ b/src/documents/migrations/0003_sender.py
@@ -32,7 +32,6 @@ def realign_senders(apps, schema_editor):


 class Migration(migrations.Migration):
-
    dependencies = [
        ('documents', '0002_auto_20151226_1316'),
    ]
--- a/src/documents/migrations/0011_auto_20160303_1929.py
+++ b/src/documents/migrations/0011_auto_20160303_1929.py
@@ -6,7 +6,7 @@ from django.db import migrations


 class Migration(migrations.Migration):
-
+    atomic = False
    dependencies = [
        ('documents', '0010_log'),
    ]
--- a/src/documents/migrations/0012_auto_20160305_0040.py
+++ b/src/documents/migrations/0012_auto_20160305_0040.py
@@ -112,7 +112,6 @@ def move_documents_and_create_thumbnails(apps, schema_editor):


 class Migration(migrations.Migration):
-
    dependencies = [
        ('documents', '0011_auto_20160303_1929'),
    ]
--- a/src/documents/migrations/0014_document_checksum.py
+++ b/src/documents/migrations/0014_document_checksum.py
@@ -128,7 +128,6 @@ def do_nothing(apps, schema_editor):


 class Migration(migrations.Migration):
-
    dependencies = [
        ('documents', '0013_auto_20160325_2111'),
    ]
@@ -159,9 +158,4 @@ class Migration(migrations.Migration):
            name='modified',
            field=models.DateTimeField(auto_now=True, db_index=True),
        ),
-        migrations.AlterField(
-            model_name='document',
-            name='checksum',
-            field=models.CharField(editable=False, help_text='The checksum of the original document (before it was encrypted).  We use this to prevent duplicate document imports.', max_length=32, unique=True),
-        ),
    ]
--- a/src/documents/migrations/0015_add_insensitive_to_match.py
+++ b/src/documents/migrations/0015_add_insensitive_to_match.py
@@ -12,6 +12,11 @@ class Migration(migrations.Migration):
    ]

    operations = [
+        migrations.AlterField(
+            model_name='document',
+            name='checksum',
+            field=models.CharField(editable=False, help_text='The checksum of the original document (before it was encrypted).  We use this to prevent duplicate document imports.', max_length=32, unique=True),
+        ),
        migrations.AddField(
            model_name='correspondent',
            name='is_insensitive',
--- a/src/documents/migrations/0019_add_consumer_user.py
+++ b/src/documents/migrations/0019_add_consumer_user.py
@@ -15,7 +15,6 @@ def reverse_func(apps, schema_editor):


 class Migration(migrations.Migration):
-
    dependencies = [
        ('documents', '0018_auto_20170715_1712'),
    ]
--- a/src/documents/migrations/0020_document_added.py
+++ b/src/documents/migrations/0020_document_added.py
@@ -11,8 +11,8 @@ def set_added_time_to_created_time(apps, schema_editor):
        doc.added = doc.created
        doc.save()

-class Migration(migrations.Migration):

+class Migration(migrations.Migration):
    dependencies = [
        ('documents', '0019_add_consumer_user'),
    ]
--- a/src/documents/migrations/0022_auto_20181007_1420.py
+++ b/src/documents/migrations/0022_auto_20181007_1420.py
@@ -0,0 +1,52 @@
+# Generated by Django 2.0.8 on 2018-10-07 14:20
+
+from django.db import migrations, models
+from django.utils.text import slugify
+
+
+def re_slug_all_the_things(apps, schema_editor):
+    """
+    Rewrite all slug values to make sure they're actually slugs before we brand
+    them as uneditable.
+    """
+
+    Tag = apps.get_model("documents", "Tag")
+    Correspondent = apps.get_model("documents", "Correspondent")
+
+    for klass in (Tag, Correspondent):
+        for instance in klass.objects.all():
+            klass.objects.filter(
+                pk=instance.pk
+            ).update(
+                slug=slugify(instance.slug)
+            )
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '0021_document_storage_type'),
+    ]
+
+    operations = [
+        migrations.AlterModelOptions(
+            name='tag',
+            options={'ordering': ('name',)},
+        ),
+        migrations.AlterField(
+            model_name='correspondent',
+            name='slug',
+            field=models.SlugField(blank=True, editable=False),
+        ),
+        migrations.AlterField(
+            model_name='document',
+            name='file_type',
+            field=models.CharField(choices=[('pdf', 'PDF'), ('png', 'PNG'), ('jpg', 'JPG'), ('gif', 'GIF'), ('tiff', 'TIFF'), ('txt', 'TXT'), ('csv', 'CSV'), ('md', 'MD')], editable=False, max_length=4),
+        ),
+        migrations.AlterField(
+            model_name='tag',
+            name='slug',
+            field=models.SlugField(blank=True, editable=False),
+        ),
+        migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop)
+    ]
--- a/src/documents/migrations/1001_workflow_improvements.py
+++ b/src/documents/migrations/1001_workflow_improvements.py
@@ -0,0 +1,23 @@
+# Generated by Django 2.0.7 on 2018-07-12 09:52
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '0022_auto_20181007_1420'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='document',
+            name='archive_serial_number',
+            field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True),
+        ),
+        migrations.AddField(
+            model_name='tag',
+            name='is_inbox_tag',
+            field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.'),
+        ),
+    ]
--- a/src/documents/migrations/1002_auto_20180823_1155.py
+++ b/src/documents/migrations/1002_auto_20180823_1155.py
@@ -0,0 +1,33 @@
+# Generated by Django 2.0.7 on 2018-08-23 11:55
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '1001_workflow_improvements'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='DocumentType',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('name', models.CharField(max_length=128, unique=True)),
+                ('slug', models.SlugField(blank=True)),
+                ('match', models.CharField(blank=True, max_length=256)),
+                ('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF.  Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided.  A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF.  (If you don\'t know what a regex is, you probably don\'t want this option.)  Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')),
+                ('is_insensitive', models.BooleanField(default=True)),
+            ],
+            options={
+                'abstract': False,
+            },
+        ),
+        migrations.AddField(
+            model_name='document',
+            name='document_type',
+            field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.DocumentType'),
+        ),
+    ]
--- a/src/documents/migrations/1003_auto_20180904_1425.py
+++ b/src/documents/migrations/1003_auto_20180904_1425.py
@@ -0,0 +1,77 @@
+# Generated by Django 2.0.8 on 2018-09-04 14:25
+
+from django.db import migrations, models
+
+
+def transfer_automatic_classification(apps, schema_editor):
+    for model_name in ["Tag", "Correspondent", "DocumentType"]:
+        model_class = apps.get_model("documents", model_name)
+        for o in model_class.objects.all():
+            o.automatic_classification = o.match is not None and len(o.match) > 0
+            o.save()
+
+
+def reverse_automatic_classification(apps, schema_editor):
+    pass
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '1002_auto_20180823_1155'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='correspondent',
+            name='automatic_classification',
+            field=models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.'),
+        ),
+        migrations.AddField(
+            model_name='documenttype',
+            name='automatic_classification',
+            field=models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.'),
+        ),
+        migrations.AddField(
+            model_name='tag',
+            name='automatic_classification',
+            field=models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.'),
+        ),
+        migrations.RunPython(transfer_automatic_classification, reverse_automatic_classification),
+        migrations.RemoveField(
+            model_name='correspondent',
+            name='is_insensitive',
+        ),
+        migrations.RemoveField(
+            model_name='correspondent',
+            name='match',
+        ),
+        migrations.RemoveField(
+            model_name='correspondent',
+            name='matching_algorithm',
+        ),
+        migrations.RemoveField(
+            model_name='documenttype',
+            name='is_insensitive',
+        ),
+        migrations.RemoveField(
+            model_name='documenttype',
+            name='match',
+        ),
+        migrations.RemoveField(
+            model_name='documenttype',
+            name='matching_algorithm',
+        ),
+        migrations.RemoveField(
+            model_name='tag',
+            name='is_insensitive',
+        ),
+        migrations.RemoveField(
+            model_name='tag',
+            name='match',
+        ),
+        migrations.RemoveField(
+            model_name='tag',
+            name='matching_algorithm',
+        ),
+    ]
--- a/src/documents/migrations/1004_documenttype_slug.py
+++ b/src/documents/migrations/1004_documenttype_slug.py
@@ -0,0 +1,36 @@
+# Generated by Django 2.0.8 on 2018-10-07 14:20
+
+from django.db import migrations, models
+from django.utils.text import slugify
+
+
+def re_slug_all_the_things(apps, schema_editor):
+    """
+    Rewrite all slug values to make sure they're actually slugs before we brand
+    them as uneditable.
+    """
+
+    DocumentType = apps.get_model("documents", "DocumentType")
+
+    for instance in DocumentType.objects.all():
+        DocumentType.objects.filter(
+            pk=instance.pk
+        ).update(
+            slug=slugify(instance.slug)
+        )
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '1003_auto_20180904_1425'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='documenttype',
+            name='slug',
+            field=models.SlugField(blank=True, editable=False),
+        ),
+        migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop)
+    ]
--- a/src/documents/mixins.py
+++ b/src/documents/mixins.py
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -1,151 +1,48 @@
 # coding=utf-8

-import dateutil.parser
 import logging
 import os
 import re
 import uuid
-
 from collections import OrderedDict
-from fuzzywuzzy import fuzz

+import dateutil.parser
 from django.conf import settings
-from django.core.urlresolvers import reverse
 from django.db import models
 from django.template.defaultfilters import slugify
 from django.utils import timezone
+from django.utils.text import slugify
+from fuzzywuzzy import fuzz

 from .managers import LogManager

+try:
+    from django.core.urlresolvers import reverse
+except ImportError:
+    from django.urls import reverse
+

 class MatchingModel(models.Model):

-    MATCH_ANY = 1
-    MATCH_ALL = 2
-    MATCH_LITERAL = 3
-    MATCH_REGEX = 4
-    MATCH_FUZZY = 5
-    MATCHING_ALGORITHMS = (
-        (MATCH_ANY, "Any"),
-        (MATCH_ALL, "All"),
-        (MATCH_LITERAL, "Literal"),
-        (MATCH_REGEX, "Regular Expression"),
-        (MATCH_FUZZY, "Fuzzy Match"),
-    )
-
    name = models.CharField(max_length=128, unique=True)
-    slug = models.SlugField(blank=True)
+    slug = models.SlugField(blank=True, editable=False)

-    match = models.CharField(max_length=256, blank=True)
-    matching_algorithm = models.PositiveIntegerField(
-        choices=MATCHING_ALGORITHMS,
-        default=MATCH_ANY,
-        help_text=(
-            "Which algorithm you want to use when matching text to the OCR'd "
-            "PDF.  Here, \"any\" looks for any occurrence of any word "
-            "provided in the PDF, while \"all\" requires that every word "
-            "provided appear in the PDF, albeit not in the order provided.  A "
-            "\"literal\" match means that the text you enter must appear in "
-            "the PDF exactly as you've entered it, and \"regular expression\" "
-            "uses a regex to match the PDF.  (If you don't know what a regex "
-            "is, you probably don't want this option.)  Finally, a \"fuzzy "
-            "match\" looks for words or phrases that are mostly—but not "
-            "exactly—the same, which can be useful for matching against "
-            "documents containg imperfections that foil accurate OCR."
-        )
+    automatic_classification = models.BooleanField(
+        default=False,
+        help_text="Automatically assign to newly added documents based on "
+                  "current usage in your document collection."
    )

-    is_insensitive = models.BooleanField(default=True)
-
    class Meta:
        abstract = True
+        ordering = ("name",)

    def __str__(self):
        return self.name

-    @property
-    def conditions(self):
-        return "{}: \"{}\" ({})".format(
-            self.name, self.match, self.get_matching_algorithm_display())
-
-    @classmethod
-    def match_all(cls, text, tags=None):
-
-        if tags is None:
-            tags = cls.objects.all()
-
-        text = text.lower()
-        for tag in tags:
-            if tag.matches(text):
-                yield tag
-
-    def matches(self, text):
-
-        search_kwargs = {}
-
-        # Check that match is not empty
-        if self.match.strip() == "":
-            return False
-
-        if self.is_insensitive:
-            search_kwargs = {"flags": re.IGNORECASE}
-
-        if self.matching_algorithm == self.MATCH_ALL:
-            for word in self._split_match():
-                search_result = re.search(
-                    r"\b{}\b".format(word), text, **search_kwargs)
-                if not search_result:
-                    return False
-            return True
-
-        if self.matching_algorithm == self.MATCH_ANY:
-            for word in self._split_match():
-                if re.search(r"\b{}\b".format(word), text, **search_kwargs):
-                    return True
-            return False
-
-        if self.matching_algorithm == self.MATCH_LITERAL:
-            return bool(re.search(
-                r"\b{}\b".format(self.match), text, **search_kwargs))
-
-        if self.matching_algorithm == self.MATCH_REGEX:
-            return bool(re.search(
-                re.compile(self.match, **search_kwargs), text))
-
-        if self.matching_algorithm == self.MATCH_FUZZY:
-            match = re.sub(r'[^\w\s]', '', self.match)
-            text = re.sub(r'[^\w\s]', '', text)
-            if self.is_insensitive:
-                match = match.lower()
-                text = text.lower()
-
-            return True if fuzz.partial_ratio(match, text) >= 90 else False
-
-        raise NotImplementedError("Unsupported matching algorithm")
-
-    def _split_match(self):
-        """
-        Splits the match to individual keywords, getting rid of unnecessary
-        spaces and grouping quoted words together.
-
-        Example:
-          '  some random  words "with   quotes  " and   spaces'
-            ==>
-          ["some", "random", "words", "with\s+quotes", "and", "spaces"]
-        """
-        findterms = re.compile(r'"([^"]+)"|(\S+)').findall
-        normspace = re.compile(r"\s+").sub
-        return [
-            normspace(" ", (t[0] or t[1]).strip()).replace(" ", r"\s+")
-            for t in findterms(self.match)
-        ]
-
    def save(self, *args, **kwargs):

-        self.match = self.match.lower()
-
-        if not self.slug:
-            self.slug = slugify(self.name)
+        self.slug = slugify(self.name)

        models.Model.save(self, *args, **kwargs)

@@ -180,6 +77,17 @@ class Tag(MatchingModel):

    colour = models.PositiveIntegerField(choices=COLOURS, default=1)

+    is_inbox_tag = models.BooleanField(
+        default=False,
+        help_text="Marks this tag as an inbox tag: All newly consumed "
+                  "documents will be tagged with inbox tags."
+    )
+
+
+class DocumentType(MatchingModel):
+
+    pass
+

 class Document(models.Model):

@@ -188,7 +96,11 @@ class Document(models.Model):
    TYPE_JPG = "jpg"
    TYPE_GIF = "gif"
    TYPE_TIF = "tiff"
-    TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,)
+    TYPE_TXT = "txt"
+    TYPE_CSV = "csv"
+    TYPE_MD = "md"
+    TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,
+             TYPE_TXT, TYPE_CSV, TYPE_MD)

    STORAGE_TYPE_UNENCRYPTED = "unencrypted"
    STORAGE_TYPE_GPG = "gpg"
@@ -207,6 +119,14 @@ class Document(models.Model):

    title = models.CharField(max_length=128, blank=True, db_index=True)

+    document_type = models.ForeignKey(
+        DocumentType,
+        blank=True,
+        null=True,
+        related_name="documents",
+        on_delete=models.SET_NULL
+    )
+
    content = models.TextField(
        db_index=True,
        blank=True,
@@ -247,6 +167,15 @@ class Document(models.Model):
    added = models.DateTimeField(
        default=timezone.now, editable=False, db_index=True)

+    archive_serial_number = models.IntegerField(
+        blank=True,
+        null=True,
+        unique=True,
+        db_index=True,
+        help_text="The position of this document in your physical document "
+                  "archive."
+    )
+
    class Meta:
        ordering = ("correspondent", "title")

@@ -361,51 +290,52 @@ class FileInfo:
        )
    )

+    formats = "pdf|jpe?g|png|gif|tiff?|te?xt|md|csv"
    REGEXES = OrderedDict([
        ("created-correspondent-title-tags", re.compile(
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
            r"(?P<correspondent>.*) - "
            r"(?P<title>.*) - "
            r"(?P<tags>[a-z0-9\-,]*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("created-title-tags", re.compile(
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
            r"(?P<title>.*) - "
            r"(?P<tags>[a-z0-9\-,]*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("created-correspondent-title", re.compile(
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
            r"(?P<correspondent>.*) - "
            r"(?P<title>.*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("created-title", re.compile(
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
            r"(?P<title>.*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("correspondent-title-tags", re.compile(
            r"(?P<correspondent>.*) - "
            r"(?P<title>.*) - "
            r"(?P<tags>[a-z0-9\-,]*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("correspondent-title", re.compile(
            r"(?P<correspondent>.*) - "
            r"(?P<title>.*)?"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("title", re.compile(
            r"(?P<title>.*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        ))
    ])
@@ -442,8 +372,10 @@ class FileInfo:
    def _get_tags(cls, tags):
        r = []
        for t in tags.split(","):
-            r.append(
-                Tag.objects.get_or_create(slug=t, defaults={"name": t})[0])
+            r.append(Tag.objects.get_or_create(
+                slug=slugify(t),
+                defaults={"name": t}
+            )[0])
        return tuple(r)

    @classmethod
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -1,8 +1,32 @@
 import logging
+import os
+import re
 import shutil
+import subprocess
 import tempfile

+import dateparser
 from django.conf import settings
+from django.utils import timezone
+
+# This regular expression will try to find dates in the document at
+# hand and will match the following formats:
+# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
+# - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
+# - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
+# - ZZZZ.XX.YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
+# - ZZZZ/XX/YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
+# - ZZZZ-XX-YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
+# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
+# - MONTH ZZZZ, with ZZZZ being 4 digits
+# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
+DATE_REGEX = re.compile(
+    r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' +  # NOQA: E501
+    r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' +  # NOQA: E501
+    r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' +  # NOQA: E501
+    r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' +
+    r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
+)


 class ParseError(Exception):
@@ -16,6 +40,9 @@ class DocumentParser:
    """

    SCRATCH = settings.SCRATCH_DIR
+    DATE_ORDER = settings.DATE_ORDER
+    FILENAME_DATE_ORDER = settings.FILENAME_DATE_ORDER
+    OPTIPNG = settings.OPTIPNG_BINARY

    def __init__(self, path):
        self.document_path = path
@@ -29,6 +56,19 @@ class DocumentParser:
        """
        raise NotImplementedError()

+    def optimise_thumbnail(self, in_path):
+
+        out_path = os.path.join(self.tempdir, "optipng.png")
+
+        args = (self.OPTIPNG, "-o5", in_path, "-out", out_path)
+        if not subprocess.Popen(args).wait() == 0:
+            raise ParseError("Optipng failed at {}".format(args))
+
+        return out_path
+
+    def get_optimised_thumbnail(self):
+        return self.optimise_thumbnail(self.get_thumbnail())
+
    def get_text(self):
        """
        Returns the text from the document and only the text.
@@ -39,7 +79,82 @@ class DocumentParser:
        """
        Returns the date of the document.
        """
-        raise NotImplementedError()
+
+        def __parser(ds, date_order):
+            """
+            Call dateparser.parse with a particular date ordering
+            """
+            return dateparser.parse(
+                ds,
+                settings={
+                    "DATE_ORDER": date_order,
+                    "PREFER_DAY_OF_MONTH": "first",
+                    "RETURN_AS_TIMEZONE_AWARE":
+                    True
+                }
+            )
+
+        date = None
+        date_string = None
+
+        next_year = timezone.now().year + 5  # Arbitrary 5 year future limit
+        title = os.path.basename(self.document_path)
+
+        # if filename date parsing is enabled, search there first:
+        if self.FILENAME_DATE_ORDER:
+            self.log("info", "Checking document title for date")
+            for m in re.finditer(DATE_REGEX, title):
+                date_string = m.group(0)
+
+                try:
+                    date = __parser(date_string, self.FILENAME_DATE_ORDER)
+                except TypeError:
+                    # Skip all matches that do not parse to a proper date
+                    continue
+
+                if date is not None and next_year > date.year > 1900:
+                    self.log(
+                        "info",
+                        "Detected document date {} based on string {} "
+                        "from document title"
+                        "".format(date.isoformat(), date_string)
+                    )
+                    return date
+
+        try:
+            # getting text after checking filename will save time if only
+            # looking at the filename instead of the whole text
+            text = self.get_text()
+        except ParseError:
+            return None
+
+        # Iterate through all regex matches in text and try to parse the date
+        for m in re.finditer(DATE_REGEX, text):
+            date_string = m.group(0)
+
+            try:
+                date = __parser(date_string, self.DATE_ORDER)
+            except TypeError:
+                # Skip all matches that do not parse to a proper date
+                continue
+
+            if date is not None and next_year > date.year > 1900:
+                break
+            else:
+                date = None
+
+        if date is not None:
+            self.log(
+                "info",
+                "Detected document date {} based on string {}".format(
+                    date.isoformat(),
+                    date_string
+                )
+            )
+        else:
+            self.log("info", "Unable to detect date for document")
+
+        return date

    def log(self, level, message):
        getattr(self.logger, level)(message, extra={
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -1,21 +1,28 @@
 from rest_framework import serializers

-from .models import Correspondent, Tag, Document, Log
+from .models import Correspondent, Tag, Document, Log, DocumentType


 class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):

-    class Meta(object):
+    class Meta:
        model = Correspondent
-        fields = ("id", "slug", "name")
+        fields = ("id", "slug", "name", "automatic_classification")
+
+
+class DocumentTypeSerializer(serializers.HyperlinkedModelSerializer):
+
+    class Meta:
+        model = DocumentType
+        fields = ("id", "slug", "name", "automatic_classification")


 class TagSerializer(serializers.HyperlinkedModelSerializer):

-    class Meta(object):
+    class Meta:
        model = Tag
        fields = (
-            "id", "slug", "name", "colour", "match", "matching_algorithm")
+            "id", "slug", "name", "colour", "automatic_classification")


 class CorrespondentField(serializers.HyperlinkedRelatedField):
@@ -28,17 +35,25 @@ class TagsField(serializers.HyperlinkedRelatedField):
        return Tag.objects.all()


+class DocumentTypeField(serializers.HyperlinkedRelatedField):
+    def get_queryset(self):
+        return DocumentType.objects.all()
+
+
 class DocumentSerializer(serializers.ModelSerializer):

    correspondent = CorrespondentField(
        view_name="drf:correspondent-detail", allow_null=True)
    tags = TagsField(view_name="drf:tag-detail", many=True)
+    document_type = DocumentTypeField(
+        view_name="drf:documenttype-detail", allow_null=True)

-    class Meta(object):
+    class Meta:
        model = Document
        fields = (
            "id",
            "correspondent",
+            "document_type",
            "title",
            "content",
            "file_type",
@@ -57,7 +72,7 @@ class LogSerializer(serializers.ModelSerializer):
    time = serializers.DateTimeField()
    messages = serializers.CharField()

-    class Meta(object):
+    class Meta:
        model = Log
        fields = (
            "time",
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -8,57 +8,36 @@ from django.contrib.auth.models import User
 from django.contrib.contenttypes.models import ContentType
 from django.utils import timezone

-from ..models import Correspondent, Document, Tag
+from documents.classifier import DocumentClassifier
+from ..models import Document, Tag


 def logger(message, group):
    logging.getLogger(__name__).debug(message, extra={"group": group})


-def set_correspondent(sender, document=None, logging_group=None, **kwargs):
+classifier = DocumentClassifier()

-    # No sense in assigning a correspondent when one is already set.
-    if document.correspondent:
-        return

-    # No matching correspondents, so no need to continue
-    potential_correspondents = list(Correspondent.match_all(document.content))
-    if not potential_correspondents:
-        return
-
-    potential_count = len(potential_correspondents)
-    selected = potential_correspondents[0]
-    if potential_count > 1:
-        message = "Detected {} potential correspondents, so we've opted for {}"
-        logger(
-            message.format(potential_count, selected),
-            logging_group
+def classify_document(sender, document=None, logging_group=None, **kwargs):
+    global classifier
+    try:
+        classifier.reload()
+        classifier.classify_document(
+            document,
+            classify_correspondent=True,
+            classify_tags=True,
+            classify_document_type=True
+        )
+    except FileNotFoundError:
+        logging.getLogger(__name__).fatal(
+            "Cannot classify document, classifier model file was not found."
        )

-    logger(
-        'Assigning correspondent "{}" to "{}" '.format(selected, document),
-        logging_group
-    )

-    document.correspondent = selected
-    document.save(update_fields=("correspondent",))
-
-
-def set_tags(sender, document=None, logging_group=None, **kwargs):
-
-    current_tags = set(document.tags.all())
-    relevant_tags = set(Tag.match_all(document.content)) - current_tags
-
-    if not relevant_tags:
-        return
-
-    message = 'Tagging "{}" with "{}"'
-    logger(
-        message.format(document, ", ".join([t.slug for t in relevant_tags])),
-        logging_group
-    )
-
-    document.tags.add(*relevant_tags)
+def add_inbox_tags(sender, document=None, logging_group=None, **kwargs):
+    inbox_tags = Tag.objects.filter(is_inbox_tag=True)
+    document.tags.add(*inbox_tags)


 def run_pre_consume_script(sender, filename, **kwargs):
--- a/src/documents/static/documents/js/pdf.js
+++ b/src/documents/static/documents/js/pdf.js
--- a/src/documents/static/documents/js/pdf.js.map
+++ b/src/documents/static/documents/js/pdf.js.map
--- a/src/documents/static/documents/js/pdf.worker.js
+++ b/src/documents/static/documents/js/pdf.worker.js
--- a/src/documents/static/documents/js/pdf.worker.js.map
+++ b/src/documents/static/documents/js/pdf.worker.js.map
--- a/src/documents/static/js/colours.js
+++ b/src/documents/static/js/colours.js
@@ -0,0 +1,66 @@
+// The following jQuery snippet will add a small square next to the selection
+// drop-down on the `Add tag` page that will update to show the selected tag
+// color as the drop-down value is changed.
+
+django.jQuery(document).ready(function(){
+
+  if (django.jQuery("#id_colour").length) {
+
+    let colour;
+    let colour_num;
+
+    colour_num = django.jQuery("#id_colour").val() - 1;
+    colour = django.jQuery('#id_colour')[0][colour_num].text;
+    django.jQuery('#id_colour').after('<div class="colour_square"></div>');
+
+    django.jQuery('.colour_square').css({
+      'float': 'left',
+      'width': '20px',
+      'height': '20px',
+      'margin': '5px',
+      'border': '1px solid rgba(0, 0, 0, .2)',
+      'background': colour
+    });
+
+    django.jQuery('#id_colour').change(function () {
+      colour_num = django.jQuery("#id_colour").val() - 1;
+      colour = django.jQuery('#id_colour')[0][colour_num].text;
+      django.jQuery('.colour_square').css({'background': colour});
+    });
+
+  } else if (django.jQuery("select[id*='colour']").length) {
+
+    django.jQuery('select[id*="-colour"]').each(function (index, element) {
+      let id;
+      let loop_colour_num;
+      let loop_colour;
+
+      id = "colour_square_" + index;
+      django.jQuery(element).after('<div class="colour_square" id="' + id + '"></div>');
+
+      loop_colour_num = django.jQuery(element).val() - 1;
+      loop_colour = django.jQuery(element)[0][loop_colour_num].text;
+
+      django.jQuery("<style type='text/css'>\
+                        .colour_square{ \
+                            float: left; \
+                            width: 20px; \
+                            height: 20px; \
+                            margin: 5px; \
+                            border: 1px solid rgba(0,0,0,.2); \
+                        } </style>").appendTo("head");
+      django.jQuery('#' + id).css({'background': loop_colour});
+
+      console.log(id, loop_colour_num, loop_colour);
+
+      django.jQuery(element).change(function () {
+        loop_colour_num = django.jQuery(element).val() - 1;
+        loop_colour = django.jQuery(element)[0][loop_colour_num].text;
+        django.jQuery('#' + id).css({'background': loop_colour});
+        console.log('#' + id, loop_colour)
+      });
+    })
+
+  }
+
+});
--- a/src/documents/static/paperless.css
+++ b/src/documents/static/paperless.css
@@ -20,4 +20,17 @@ td a.tag {
 #result_list td textarea {
  width: 90%;
  height: 5em;
+}
+
+#change_form_twocolumn_parent {
+  display: flex;
+}
+#change_form_form_parent {
+  flex:50%;
+  margin-right: 10px;
+}
+#change_form_viewer_parent {
+  flex:50%;
+  margin-left: 10px;
+  text-align: center;
 }
--- a/src/documents/templates/admin/documents/document/change_form.html
+++ b/src/documents/templates/admin/documents/document/change_form.html
@@ -1,5 +1,42 @@
 {% extends 'admin/change_form.html' %}

+{% block content %}
+
+{{ block.super }}
+
+{% if file_type in "pdf jpg png" %}
+
+	<div id="change_form_twocolumn_parent">
+		<div id="change_form_form_parent"></div>
+		<div id="change_form_viewer_parent">
+			{% if file_type == "pdf" %}
+				{% include "admin/documents/document/viewers/viewer_pdf.html" %}
+			{% endif %}
+			{% if file_type in "jpg png" %}
+				{% include "admin/documents/document/viewers/viewer_image.html" %}
+			{% endif %}
+		</div>
+	</div>
+
+	<script>
+		django.jQuery("#change_form_form_parent").append(django.jQuery("#document_form"));
+		django.jQuery("#content-main").append(django.jQuery("#change_form_twocolumn_parent"));
+	</script>
+
+{% endif %}
+
+{% if next_object %}
+	<script type="text/javascript">//<![CDATA[
+		(function($){
+			$('<input type="submit" value="Save and edit next" name="_saveandeditnext" />')
+			.prependTo('div.submit-row');
+			$('<input type="hidden" value="{{next_object}}" name="_next_object" />')
+			.prependTo('div.submit-row');
+		})(django.jQuery);
+	//]]></script>
+{% endif %}
+
+{% endblock content %}

 {% block footer %}

@@ -10,4 +47,4 @@
 		django.jQuery(".field-created input").first().attr("type", "date")
 	</script>

-{% endblock footer %}
+{% endblock footer %}
--- a/src/documents/templates/admin/documents/document/change_list_results.html
+++ b/src/documents/templates/admin/documents/document/change_list_results.html
@@ -24,7 +24,8 @@
    border: 1px solid #cccccc;
    border-radius: 2%;
    overflow: hidden;
-    height: 300px;
+    height: 350px;
+    position: relative;
  }
  .result .header {
    padding: 5px;
@@ -60,6 +61,11 @@
  .result a.tag {
    color: #ffffff;
  }
+  .result .documentType {
+    padding: 5px;
+    background-color: #eeeeee;
+    text-align: center;
+  }
  .result .date {
    padding: 5px;
  }
@@ -79,6 +85,15 @@
  .result .image img {
    width: 100%;
  }
+  .result .footer {
+    position: absolute;
+    bottom: 0;
+    right: 0;
+    border-left: 1px solid #cccccc;
+    border-top: 1px solid #cccccc;
+    padding: 4px 10px 4px 10px;
+    background: white;
+  }

  .grid {
    margin-right: 260px;
@@ -152,7 +167,9 @@
    {# 4: Image #}
    {# 5: Correspondent #}
    {# 6: Tags #}
-    {# 7: Document edit url #}
+    {# 7: Archive serial number #}
+    {# 8: Document type #}
+    {# 9: Document edit url #}
    <div class="box">
      <div class="result">
        <div class="header">
@@ -166,7 +183,7 @@
            selection would not be possible with mouse click + drag. Instead,
            the underlying link would be dragged.
          {% endcomment %}
-          <div class="headerLink" onclick="location.href='{{ result.7 }}';"></div>
+          <div class="headerLink" onclick="location.href='{{ result.9 }}';"></div>
          <div class="checkbox">{{ result.0 }}</div>
          <div class="info">
            {{ result.5 }}
@@ -174,10 +191,14 @@
          {{ result.1 }}
          <div style="clear: both;"></div>
        </div>
+        {% if '>-<' not in result.8 %}<div class="documentType">{{ result.8 }}</div>{% endif %}
        <div class="tags">{{ result.6 }}</div>
        <div class="date">{{ result.2 }}</div>
        <div style="clear: both;"></div>
        <div class="image">{{ result.4 }}</div>
+        {# Only show the archive serial number if it is set on the document. #}
+        {# checking for >-< (i.e., will a dash be displayed) doesn't feel like a very good solution to me. #}
+        {% if '>-<' not in result.7 %}<div class="footer">#{{ result.7 }}</div>{% endif %}
      </div>
    </div>
  {% endfor %}
--- a/src/documents/templates/admin/documents/document/select_object.html
+++ b/src/documents/templates/admin/documents/document/select_object.html
@@ -0,0 +1,50 @@
+{% extends "admin/base_site.html" %}
+
+
+{% load i18n l10n admin_urls static %}
+{% load staticfiles %}
+
+
+{% block extrahead %}
+	{{ block.super }}
+	{{ media }}
+	<script type="text/javascript" src="{% static 'admin/js/cancel.js' %}"></script>
+{% endblock %}
+
+
+{% block bodyclass %}{{ block.super }} app-{{ opts.app_label }} model-{{ opts.model_name }} delete-confirmation delete-selected-confirmation{% endblock %}
+
+
+{% block breadcrumbs %}
+	<div class="breadcrumbs">
+		<a href="{% url 'admin:index' %}">{% trans 'Home' %}</a>
+		&rsaquo; <a href="{% url 'admin:app_list' app_label=opts.app_label %}">{{ opts.app_config.verbose_name }}</a>
+		&rsaquo; <a href="{% url opts|admin_urlname:'changelist' %}">{{ opts.verbose_name_plural|capfirst }}</a>
+		&rsaquo; {{ title }}
+	</div>
+{% endblock %}
+
+{% block content %}
+	<p>Please select the {{itemname}}.</p>
+	<form method="post">{% csrf_token %}
+		<div>
+			{% for obj in queryset %}
+			<input type="hidden" name="{{ action_checkbox_name }}" value="{{ obj.pk|unlocalize }}"/>
+			{% endfor %}
+			<p>
+				<select name="obj_id">
+					{% for obj in objects %}
+					<option value="{{ obj.id }}">{{ obj.name }}</option>
+					{% endfor %}
+				</select>
+			</p>
+
+			<input type="hidden" name="action" value="{{ action }}"/>
+			<input type="hidden" name="post" value="yes" />
+			<p>
+				<input type="submit" value="{% trans 'Confirm' %}" />
+				<a href="#" class="button cancel-link">{% trans "Go back" %}</a>
+			</p>
+		</div>
+	</form>
+{% endblock %}
--- a/src/documents/templates/admin/documents/document/viewers/viewer_image.html
+++ b/src/documents/templates/admin/documents/document/viewers/viewer_image.html
@@ -0,0 +1 @@
+<img src="{{download_url}}" style="max-width: 100%">
--- a/src/documents/templates/admin/documents/document/viewers/viewer_pdf.html
+++ b/src/documents/templates/admin/documents/document/viewers/viewer_pdf.html
@@ -0,0 +1,130 @@
+{% load static %}
+
+<div>
+    <input id="prev" value="Previous" class="default" type="button">
+    <input id="next" value="Next" class="default" type="button">
+    &nbsp; &nbsp;
+    <span>Page: <span id="page_num"></span> / <span id="page_count"></span></span>
+    &nbsp; &nbsp;
+    <input id="zoomin" value="+" class="default" type="button">
+    <input id="zoomout" value="-" class="default" type="button">
+</div>
+
+<div style="width: 100%; overflow: auto;">
+    <canvas id="the-canvas"></canvas>
+</div>
+<script type="text/javascript" src="{% static 'documents/js/pdf.js' %}"></script>
+<script type="text/javascript" src="{% static 'documents/js/pdf.worker.js' %}"></script>
+
+{# Load and display PDF document#}
+<script>
+var pdfjsLib = window['pdfjs-dist/build/pdf'];
+
+var pdfDoc = null,
+    pageNum = 1,
+    pageRendering = false,
+    pageNumPending = null,
+    scale = 1.0,
+    canvas = document.getElementById('the-canvas'),
+    ctx = canvas.getContext('2d');
+
+/**
+ * Get page info from document, resize canvas accordingly, and render page.
+ * @param num Page number.
+ */
+function renderPage(num) {
+    pageRendering = true;
+    // Using promise to fetch the page
+    pdfDoc.getPage(num).then(function(page) {
+        var viewport = page.getViewport(scale);
+        canvas.height = viewport.height;
+        canvas.width = viewport.width;
+        // Render PDF page into canvas context
+        var renderContext = {
+            canvasContext: ctx,
+            viewport: viewport
+        };
+        var renderTask = page.render(renderContext);
+        // Wait for rendering to finish
+        renderTask.promise.then(function () {
+            pageRendering = false;
+            if (pageNumPending !== null) {
+                // New page rendering is pending
+                renderPage(pageNumPending);
+                pageNumPending = null;
+            }
+        });
+    });
+    // Update page counters
+    document.getElementById('page_num').textContent = num;
+}
+
+/**
+ * If another page rendering in progress, waits until the rendering is
+ * finised. Otherwise, executes rendering immediately.
+ */
+function queueRenderPage(num) {
+    if (pageRendering) {
+        pageNumPending = num;
+    } else {
+        renderPage(num);
+    }
+}
+
+/**
+ * Displays previous page.
+ */
+function onPrevPage() {
+    if (pageNum <= 1) {
+        return;
+    }
+    pageNum--;
+    queueRenderPage(pageNum);
+}
+
+document.getElementById('prev').addEventListener('click', onPrevPage);
+
+/**
+ * Displays next page.
+ */
+function onNextPage() {
+    if (pageNum >= pdfDoc.numPages) {
+        return;
+    }
+    pageNum++;
+    queueRenderPage(pageNum);
+}
+
+document.getElementById('next').addEventListener('click', onNextPage);
+
+/**
+ * Displays next page.
+ */
+function onZoomIn() {
+    scale *= 1.2;
+    queueRenderPage(pageNum);
+}
+
+document.getElementById('zoomin').addEventListener('click', onZoomIn);
+
+/**
+ * Displays next page.
+ */
+function onZoomOut() {
+    scale /= 1.2;
+    queueRenderPage(pageNum);
+}
+
+document.getElementById('zoomout').addEventListener('click', onZoomOut);
+
+/**
+ * Asynchronously downloads PDF.
+ */
+pdfjsLib.getDocument("{{download_url}}").then(function (pdfDoc_) {
+    pdfDoc = pdfDoc_;
+    document.getElementById('page_count').textContent = pdfDoc.numPages;
+    // Initial/first page rendering
+    renderPage(pageNum);
+});
+</script>
+
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -3,7 +3,7 @@ from unittest import mock
 from tempfile import TemporaryDirectory

 from ..consumer import Consumer
-from ..models import FileInfo
+from ..models import FileInfo, Tag


 class TestConsumer(TestCase):
@@ -190,6 +190,20 @@ class TestAttributes(TestCase):
            ()
        )

+    def test_case_insensitive_tag_creation(self):
+        """
+        Tags should be detected and created as lower case.
+        :return:
+        """
+
+        path = "Title - Correspondent - tAg1,TAG2.pdf"
+        self.assertEqual(len(FileInfo.from_path(path).tags), 2)
+
+        path = "Title - Correspondent - tag1,tag2.pdf"
+        self.assertEqual(len(FileInfo.from_path(path).tags), 2)
+
+        self.assertEqual(Tag.objects.all().count(), 2)
+

 class TestFieldPermutations(TestCase):

--- a/src/documents/tests/test_matchables.py
+++ b/src/documents/tests/test_matchables.py
@@ -166,7 +166,7 @@ class TestMatching(TestCase):
    def test_match_regex(self):

        self._test_matching(
-            "alpha\w+gamma",
+            r"alpha\w+gamma",
            "MATCH_REGEX",
            (
                "I have alpha_and_gamma in me",
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -1,6 +1,8 @@
 from django.http import HttpResponse, HttpResponseBadRequest
 from django.views.generic import DetailView, FormView, TemplateView
 from django_filters.rest_framework import DjangoFilterBackend
+from django.conf import settings
+
 from paperless.db import GnuPG
 from paperless.mixins import SessionOrBasicAuthMixin
 from paperless.views import StandardPagination
@@ -18,14 +20,21 @@ from rest_framework.viewsets import (
    ReadOnlyModelViewSet
 )

-from .filters import CorrespondentFilterSet, DocumentFilterSet, TagFilterSet
+from .filters import (
+    CorrespondentFilterSet,
+    DocumentFilterSet,
+    TagFilterSet,
+    DocumentTypeFilterSet
+)
+
 from .forms import UploadForm
-from .models import Correspondent, Document, Log, Tag
+from .models import Correspondent, Document, Log, Tag, DocumentType
 from .serialisers import (
    CorrespondentSerializer,
    DocumentSerializer,
    LogSerializer,
-    TagSerializer
+    TagSerializer,
+    DocumentTypeSerializer
 )


@@ -48,6 +57,9 @@ class FetchView(SessionOrBasicAuthMixin, DetailView):
            Document.TYPE_JPG: "image/jpeg",
            Document.TYPE_GIF: "image/gif",
            Document.TYPE_TIF: "image/tiff",
+            Document.TYPE_CSV: "text/csv",
+            Document.TYPE_MD:  "text/markdown",
+            Document.TYPE_TXT: "text/plain"
        }

        if self.kwargs["kind"] == "thumb":
@@ -60,8 +72,11 @@ class FetchView(SessionOrBasicAuthMixin, DetailView):
            self._get_raw_data(self.object.source_file),
            content_type=content_types[self.object.file_type]
        )
-        response["Content-Disposition"] = 'attachment; filename="{}"'.format(
-            self.object.file_name)
+
+        DISPOSITION = 'inline' if settings.INLINE_DOC else 'attachment'
+
+        response["Content-Disposition"] = '{}; filename="{}"'.format(
+            DISPOSITION, self.object.file_name)

        return response

@@ -108,6 +123,17 @@ class TagViewSet(ModelViewSet):
    ordering_fields = ("name", "slug")


+class DocumentTypeViewSet(ModelViewSet):
+    model = DocumentType
+    queryset = DocumentType.objects.all()
+    serializer_class = DocumentTypeSerializer
+    pagination_class = StandardPagination
+    permission_classes = (IsAuthenticated,)
+    filter_backends = (DjangoFilterBackend, OrderingFilter)
+    filter_class = DocumentTypeFilterSet
+    ordering_fields = ("name", "slug")
+
+
 class DocumentViewSet(RetrieveModelMixin,
                      UpdateModelMixin,
                      DestroyModelMixin,
--- a/src/manage.py
+++ b/src/manage.py
--- a/src/paperless/checks.py
+++ b/src/paperless/checks.py
@@ -76,7 +76,12 @@ def binaries_check(app_configs, **kwargs):
    error = "Paperless can't find {}. Without it, consumption is impossible."
    hint = "Either it's not in your ${PATH} or it's not installed."

-    binaries = (settings.CONVERT_BINARY, settings.UNPAPER_BINARY, "tesseract")
+    binaries = (
+        settings.CONVERT_BINARY,
+        settings.OPTIPNG_BINARY,
+        settings.UNPAPER_BINARY,
+        "tesseract"
+    )

    check_messages = []
    for binary in binaries:
--- a/src/paperless/models.py
+++ b/src/paperless/models.py
@@ -1,15 +1,20 @@
+from django.contrib.auth.models import User as DjangoUser
+
+
 class User:
    """
-      This is a dummy django User used with our middleware to disable
-      login authentication if that is configured in paperless.conf
+    This is a dummy django User used with our middleware to disable
+    login authentication if that is configured in paperless.conf
    """
+
    is_superuser = True
    is_active = True
    is_staff = True
    is_authenticated = True

-    # Must be -1 to avoid colliding with real user ID's (which start at 1)
-    id = -1
+    @property
+    def id(self):
+        return DjangoUser.objects.order_by("pk").first().pk

    @property
    def pk(self):
@@ -17,9 +22,9 @@ class User:


 """
-  NOTE: These are here as a hack instead of being in the User definition
-  above due to the way pycodestyle handles lamdbdas.
-  See https://github.com/PyCQA/pycodestyle/issues/379 for more.
+NOTE: These are here as a hack instead of being in the User definition
+NOTE: above due to the way pycodestyle handles lamdbdas.
+NOTE: See https://github.com/PyCQA/pycodestyle/issues/379 for more.
 """

 User.has_module_perms = lambda *_: True
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -22,6 +22,14 @@ elif os.path.exists("/usr/local/etc/paperless.conf"):
    load_dotenv("/usr/local/etc/paperless.conf")


+def __get_boolean(key, default="NO"):
+    """
+    Return a boolean value based on whatever the user has supplied in the
+    environment based on whether the value "looks like" it's True or not.
+    """
+    return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
+
+
 # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

@@ -39,7 +47,7 @@ SECRET_KEY = os.getenv(


 # SECURITY WARNING: don't run with debug turned on in production!
-DEBUG = True
+DEBUG = __get_boolean("PAPERLESS_DEBUG", "YES")

 LOGIN_URL = "admin:login"

@@ -50,7 +58,7 @@ if _allowed_hosts:
    ALLOWED_HOSTS = _allowed_hosts.split(",")

 FORCE_SCRIPT_NAME = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
-    
+
 # Application definition

 INSTALLED_APPS = [
@@ -61,13 +69,14 @@ INSTALLED_APPS = [
    "django.contrib.messages",
    "django.contrib.staticfiles",

+    "corsheaders",
    "django_extensions",

    "documents.apps.DocumentsConfig",
    "reminders.apps.RemindersConfig",
    "paperless_tesseract.apps.PaperlessTesseractConfig",
+    "paperless_text.apps.PaperlessTextConfig",

-    "flat_responsive",  # TODO: Remove as of Django 2.x
    "django.contrib.admin",

    "rest_framework",
@@ -79,24 +88,24 @@ INSTALLED_APPS = [
 if os.getenv("PAPERLESS_INSTALLED_APPS"):
    INSTALLED_APPS += os.getenv("PAPERLESS_INSTALLED_APPS").split(",")

-
-
-MIDDLEWARE_CLASSES = [
+MIDDLEWARE = [
    'django.middleware.security.SecurityMiddleware',
    'django.contrib.sessions.middleware.SessionMiddleware',
+    'corsheaders.middleware.CorsMiddleware',
    'django.middleware.common.CommonMiddleware',
    'django.middleware.csrf.CsrfViewMiddleware',
    'django.contrib.auth.middleware.AuthenticationMiddleware',
-    'django.contrib.auth.middleware.SessionAuthenticationMiddleware',
    'django.contrib.messages.middleware.MessageMiddleware',
    'django.middleware.clickjacking.XFrameOptionsMiddleware',
 ]

+# We allow CORS from localhost:8080
+CORS_ORIGIN_WHITELIST = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "localhost:8080").split(","))
+
 # If auth is disabled, we just use our "bypass" authentication middleware
 if bool(os.getenv("PAPERLESS_DISABLE_LOGIN", "false").lower() in ("yes", "y", "1", "t", "true")):
-    _index = MIDDLEWARE_CLASSES.index("django.contrib.auth.middleware.AuthenticationMiddleware")
-    MIDDLEWARE_CLASSES[_index] = "paperless.middleware.Middleware"
-    MIDDLEWARE_CLASSES.remove("django.contrib.auth.middleware.SessionAuthenticationMiddleware")
+    _index = MIDDLEWARE.index("django.contrib.auth.middleware.AuthenticationMiddleware")
+    MIDDLEWARE[_index] = "paperless.middleware.Middleware"

 ROOT_URLCONF = 'paperless.urls'

@@ -135,13 +144,18 @@ DATABASES = {
    }
 }

-if os.getenv("PAPERLESS_DBUSER") and os.getenv("PAPERLESS_DBPASS"):
+if os.getenv("PAPERLESS_DBENGINE"):
    DATABASES["default"] = {
-        "ENGINE": "django.db.backends.postgresql_psycopg2",
+        "ENGINE": os.getenv("PAPERLESS_DBENGINE"),
        "NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
        "USER": os.getenv("PAPERLESS_DBUSER"),
-        "PASSWORD": os.getenv("PAPERLESS_DBPASS")
    }
+    if os.getenv("PAPERLESS_DBPASS"):
+        DATABASES["default"]["PASSWORD"] = os.getenv("PAPERLESS_DBPASS")
+    if os.getenv("PAPERLESS_DBHOST"):
+        DATABASES["default"]["HOST"] = os.getenv("PAPERLESS_DBHOST")
+    if os.getenv("PAPERLESS_DBPORT"):
+        DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")


 # Password validation
@@ -189,6 +203,24 @@ STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", "/static/")
 MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/")


+# Other
+
+# Disable Django's artificial limit on the number of form fields to submit at
+# once.  This is a protection against overloading the server, but since this is
+# a self-hosted sort of gig, the benefits of being able to mass-delete a tonne
+# of log entries outweight the benefits of such a safeguard.
+
+DATA_UPLOAD_MAX_NUMBER_FIELDS = None
+
+
+# Document classification models location
+MODEL_FILE = os.getenv(
+    "PAPERLESS_MODEL_FILE", os.path.join(
+        BASE_DIR, "..", "models", "model.pickle"
+    )
+)
+
+
 # Paperless-specific stuff
 # You shouldn't have to edit any of these values.  Rather, you can set these
 # values in /etc/paperless.conf instead.
@@ -221,12 +253,12 @@ OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")
 OCR_THREADS = os.getenv("PAPERLESS_OCR_THREADS")

 # OCR all documents?
-OCR_ALWAYS = bool(os.getenv("PAPERLESS_OCR_ALWAYS", "NO").lower() in ("yes", "y", "1", "t", "true"))  # NOQA
+OCR_ALWAYS = __get_boolean("PAPERLESS_OCR_ALWAYS")

 # If this is true, any failed attempts to OCR a PDF will result in the PDF
 # being indexed anyway, with whatever we could get.  If it's False, the file
 # will simply be left in the CONSUMPTION_DIR.
-FORGIVING_OCR = bool(os.getenv("PAPERLESS_FORGIVING_OCR", "YES").lower() in ("yes", "y", "1", "t", "true"))  # NOQA
+FORGIVING_OCR = __get_boolean("PAPERLESS_FORGIVING_OCR")

 # GNUPG needs a home directory for some reason
 GNUPG_HOME = os.getenv("HOME", "/tmp")
@@ -237,6 +269,9 @@ CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR")
 CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
 CONVERT_DENSITY = os.getenv("PAPERLESS_CONVERT_DENSITY")

+# OptiPNG
+OPTIPNG_BINARY = os.getenv("PAPERLESS_OPTIPNG_BINARY", "optipng")
+
 # Unpaper
 UNPAPER_BINARY = os.getenv("PAPERLESS_UNPAPER_BINARY", "unpaper")

@@ -270,6 +305,9 @@ PASSPHRASE = os.getenv("PAPERLESS_PASSPHRASE")
 PRE_CONSUME_SCRIPT = os.getenv("PAPERLESS_PRE_CONSUME_SCRIPT")
 POST_CONSUME_SCRIPT = os.getenv("PAPERLESS_POST_CONSUME_SCRIPT")

+# Whether to display a selected document inline, or download it as attachment:
+INLINE_DOC = __get_boolean("PAPERLESS_INLINE_DOC")
+
 # The number of items on each page in the web UI.  This value must be a
 # positive integer, but if you don't define one in paperless.conf, a default of
 # 100 will be used.
@@ -280,3 +318,10 @@ FY_END = os.getenv("PAPERLESS_FINANCIAL_YEAR_END")

 # Specify the default date order (for autodetected dates)
 DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
+FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
+
+# Specify for how many years a correspondent is considered recent. Recent
+# correspondents will be shown in a separate "Recent correspondents" filter as
+# well. Set to 0 to disable this filter.
+PAPERLESS_RECENT_CORRESPONDENT_YEARS = int(os.getenv(
+    "PAPERLESS_RECENT_CORRESPONDENT_YEARS", 0))
--- a/src/paperless/urls.py
+++ b/src/paperless/urls.py
@@ -12,12 +12,14 @@ from documents.views import (
    FetchView,
    LogViewSet,
    PushView,
-    TagViewSet
+    TagViewSet,
+    DocumentTypeViewSet
 )
 from reminders.views import ReminderViewSet

 router = DefaultRouter()
 router.register(r"correspondents", CorrespondentViewSet)
+router.register(r"document_types", DocumentTypeViewSet)
 router.register(r"documents", DocumentViewSet)
 router.register(r"logs", LogViewSet)
 router.register(r"reminders", ReminderViewSet)
@@ -28,9 +30,11 @@ urlpatterns = [
    # API
    url(
        r"^api/auth/",
-        include('rest_framework.urls', namespace="rest_framework")
+        include(
+            ('rest_framework.urls', 'rest_framework'),
+            namespace="rest_framework")
    ),
-    url(r"^api/", include(router.urls, namespace="drf")),
+    url(r"^api/", include((router.urls, 'drf'), namespace="drf")),

    # File downloads
    url(
--- a/src/paperless/version.py
+++ b/src/paperless/version.py
@@ -1 +1 @@
-__version__ = (2, 1, 0)
+__version__ = (1, 0, 0)
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -4,7 +4,6 @@ import re
 import subprocess
 from multiprocessing.pool import Pool

-import dateparser
 import langdetect
 import pyocr
 from django.conf import settings
@@ -33,7 +32,6 @@ class RasterisedDocumentParser(DocumentParser):
    DENSITY = settings.CONVERT_DENSITY if settings.CONVERT_DENSITY else 300
    THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
    UNPAPER = settings.UNPAPER_BINARY
-    DATE_ORDER = settings.DATE_ORDER
    DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
    OCR_ALWAYS = settings.OCR_ALWAYS

@@ -46,14 +44,18 @@ class RasterisedDocumentParser(DocumentParser):
        The thumbnail of a PDF is just a 500px wide image of the first page.
        """

+        out_path = os.path.join(self.tempdir, "convert.png")
+
+        # Run convert to get a decent thumbnail
        run_convert(
            self.CONVERT,
            "-scale", "500x5000",
            "-alpha", "remove",
-            self.document_path, os.path.join(self.tempdir, "convert-%04d.png")
+            "{}[0]".format(self.document_path),
+            out_path
        )

-        return os.path.join(self.tempdir, "convert-0000.png")
+        return out_path

    def _is_ocred(self):

@@ -151,7 +153,10 @@ class RasterisedDocumentParser(DocumentParser):
                )
                raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
                return raw_text
-            raise OCRError("Language detection failed")
+            error_msg = ("Language detection failed. Set "
+                         "PAPERLESS_FORGIVING_OCR in config file to continue "
+                         "anyway.")
+            raise OCRError(error_msg)

        if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
            raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
@@ -171,8 +176,8 @@ class RasterisedDocumentParser(DocumentParser):
                raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
                return raw_text
            raise OCRError(
-                "The guessed language is not available in this instance of "
-                "Tesseract."
+                "The guessed language ({}) is not available in this instance "
+                "of Tesseract.".format(guessed_language)
            )

    def _ocr(self, imgs, lang):
@@ -201,54 +206,6 @@ class RasterisedDocumentParser(DocumentParser):
        text += self._ocr(imgs[middle + 1:], self.DEFAULT_OCR_LANGUAGE)
        return text

-    def get_date(self):
-        date = None
-        datestring = None
-
-        try:
-            text = self.get_text()
-        except ParseError as e:
-            return None
-
-        # This regular expression will try to find dates in the document at
-        # hand and will match the following formats:
-        # - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
-        # - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
-        # - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
-        # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
-        # - MONTH ZZZZ, with ZZZZ being 4 digits
-        # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
-        pattern = re.compile(
-            r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' +
-            r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' +
-            r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' +
-            r'\b([^\W\d_]{3,9} [0-9]{4})\b')
-
-        # Iterate through all regex matches and try to parse the date
-        for m in re.finditer(pattern, text):
-            datestring = m.group(0)
-
-            try:
-                date = dateparser.parse(
-                           datestring,
-                           settings={'DATE_ORDER': self.DATE_ORDER,
-                                     'PREFER_DAY_OF_MONTH': 'first',
-                                     'RETURN_AS_TIMEZONE_AWARE': True})
-            except TypeError:
-                # Skip all matches that do not parse to a proper date
-                continue
-
-            if date is not None:
-                break
-
-        if date is not None:
-            self.log("info", "Detected document date " + date.isoformat() +
-                             " based on string " + datestring)
-        else:
-            self.log("info", "Unable to detect date for document")
-
-        return date
-

 def run_convert(*args):

@@ -264,7 +221,8 @@ def run_convert(*args):

 def run_unpaper(args):
    unpaper, pnm = args
-    command_args = unpaper, pnm, pnm.replace(".pnm", ".unpaper.pnm")
+    command_args = (unpaper, "--overwrite", pnm,
+                    pnm.replace(".pnm", ".unpaper.pnm"))
    if not subprocess.Popen(command_args).wait() == 0:
        raise ParseError("Unpaper failed at {}".format(command_args))

@@ -272,8 +230,9 @@ def run_unpaper(args):
 def strip_excess_whitespace(text):
    collapsed_spaces = re.sub(r"([^\S\r\n]+)", " ", text)
    no_leading_whitespace = re.sub(
-        "([\n\r]+)([^\S\n\r]+)", '\\1', collapsed_spaces)
-    no_trailing_whitespace = re.sub("([^\S\n\r]+)$", '', no_leading_whitespace)
+        r"([\n\r]+)([^\S\n\r]+)", '\\1', collapsed_spaces)
+    no_trailing_whitespace = re.sub(
+        r"([^\S\n\r]+)$", '', no_leading_whitespace)
    return no_trailing_whitespace


--- a/src/paperless_tesseract/signals.py
+++ b/src/paperless_tesseract/signals.py
@@ -5,7 +5,7 @@ from .parsers import RasterisedDocumentParser

 class ConsumerDeclaration:

-    MATCHING_FILES = re.compile("^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")
+    MATCHING_FILES = re.compile(r"^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")

    @classmethod
    def handle(cls, sender, **kwargs):
--- a/src/paperless_tesseract/tests/samples/2013-12-11_tests_date_in_filename_2.pdf
+++ b/src/paperless_tesseract/tests/samples/2013-12-11_tests_date_in_filename_2.pdf
--- a/src/paperless_tesseract/tests/samples/2013-12-11_tests_date_in_filename_2.png
+++ b/src/paperless_tesseract/tests/samples/2013-12-11_tests_date_in_filename_2.png
--- a/src/paperless_tesseract/tests/samples/tests_date_3.pdf
+++ b/src/paperless_tesseract/tests/samples/tests_date_3.pdf
--- a/src/paperless_tesseract/tests/samples/tests_date_3.png
+++ b/src/paperless_tesseract/tests/samples/tests_date_3.png
--- a/src/paperless_tesseract/tests/samples/tests_date_4.pdf
+++ b/src/paperless_tesseract/tests/samples/tests_date_4.pdf
--- a/src/paperless_tesseract/tests/samples/tests_date_4.png
+++ b/src/paperless_tesseract/tests/samples/tests_date_4.png
--- a/src/paperless_tesseract/tests/samples/tests_date_in_filename_2018-03-20_1.pdf
+++ b/src/paperless_tesseract/tests/samples/tests_date_in_filename_2018-03-20_1.pdf
--- a/src/paperless_tesseract/tests/samples/tests_date_in_filename_2018-03-20_1.png
+++ b/src/paperless_tesseract/tests/samples/tests_date_in_filename_2018-03-20_1.png
--- a/src/paperless_tesseract/tests/test_date.py
+++ b/src/paperless_tesseract/tests/test_date.py
@@ -5,9 +5,10 @@ from unittest import mock
 from uuid import uuid4

 from dateutil import tz
-from django.test import TestCase
+from django.test import TestCase, override_settings

 from ..parsers import RasterisedDocumentParser
+from django.conf import settings


 class TestDate(TestCase):
@@ -33,7 +34,7 @@ class TestDate(TestCase):

    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SAMPLE_FILES
+        SCRATCH
    )
    def test_date_format_2(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
@@ -43,7 +44,7 @@ class TestDate(TestCase):

    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SAMPLE_FILES
+        SCRATCH
    )
    def test_date_format_3(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
@@ -53,34 +54,44 @@ class TestDate(TestCase):

    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SAMPLE_FILES
+        SCRATCH
    )
    def test_date_format_4(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
        document = RasterisedDocumentParser(input_file)
        document._text = "lorem ipsum 13.02.2018 lorem ipsum"
+        date = document.get_date()
        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc())
+            date,
+            datetime.datetime(
+                2018, 2, 13, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SAMPLE_FILES
+        SCRATCH
    )
    def test_date_format_5(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
        document = RasterisedDocumentParser(input_file)
        document._text = (
-            "lorem ipsum 130218, 2018, 20180213 and 13.02.2018 lorem ipsum")
+            "lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem "
+            "ipsum"
+        )
+        date = document.get_date()
        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc())
+            date,
+            datetime.datetime(
+                2018, 2, 13, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SAMPLE_FILES
+        SCRATCH
    )
    def test_date_format_6(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
@@ -100,7 +111,7 @@ class TestDate(TestCase):

    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SAMPLE_FILES
+        SCRATCH
    )
    def test_date_format_7(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
@@ -110,59 +121,80 @@ class TestDate(TestCase):
            "März 2019\n"
            "lorem ipsum"
        )
+        date = document.get_date()
        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2019, 3, 1, 0, 0, tzinfo=tz.tzutc())
+            date,
+            datetime.datetime(
+                2019, 3, 1, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SAMPLE_FILES
+        SCRATCH
    )
    def test_date_format_8(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
        document = RasterisedDocumentParser(input_file)
-        document._text = ("lorem ipsum\n"
-                          "Wohnort\n"
-                          "3100\n"
-                          "IBAN\n"
-                          "AT87 4534\n"
-                          "1234\n"
-                          "1234 5678\n"
-                          "BIC\n"
-                          "lorem ipsum\n"
-                          "März 2020")
-        self.assertEqual(document.get_date(),
-                         datetime.datetime(2020, 3, 1, 0, 0,
-                                           tzinfo=tz.tzutc()))
+        document._text = (
+            "lorem ipsum\n"
+            "Wohnort\n"
+            "3100\n"
+            "IBAN\n"
+            "AT87 4534\n"
+            "1234\n"
+            "1234 5678\n"
+            "BIC\n"
+            "lorem ipsum\n"
+            "März 2020"
+        )
+        self.assertEqual(
+            document.get_date(),
+            datetime.datetime(
+                2020, 3, 1, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
+        )

    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SAMPLE_FILES
+        SCRATCH
    )
    def test_date_format_9(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
        document = RasterisedDocumentParser(input_file)
-        document._text = ("lorem ipsum\n"
-                          "27. Nullmonth 2020\n"
-                          "März 2020\n"
-                          "lorem ipsum")
-        self.assertEqual(document.get_date(),
-                         datetime.datetime(2020, 3, 1, 0, 0,
-                                           tzinfo=tz.tzutc()))
+        document._text = (
+            "lorem ipsum\n"
+            "27. Nullmonth 2020\n"
+            "März 2020\n"
+            "lorem ipsum"
+        )
+        self.assertEqual(
+            document.get_date(),
+            datetime.datetime(
+                2020, 3, 1, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
+        )

    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SAMPLE_FILES
+        SCRATCH
    )
    def test_get_text_1_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.pdf")
        document = RasterisedDocumentParser(input_file)
+        document.DATE_ORDER = 'DMY'
        document.get_text()
+        date = document.get_date()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
+            date,
+            datetime.datetime(
+                2018, 4, 1, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
@@ -172,11 +204,15 @@ class TestDate(TestCase):
    def test_get_text_1_png(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.png")
        document = RasterisedDocumentParser(input_file)
+        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), False)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
+                2018, 4, 1, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
@@ -186,11 +222,15 @@ class TestDate(TestCase):
    def test_get_text_2_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.pdf")
        document = RasterisedDocumentParser(input_file)
+        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
+                2013, 2, 1, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
@@ -200,67 +240,91 @@ class TestDate(TestCase):
    def test_get_text_2_png(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.png")
        document = RasterisedDocumentParser(input_file)
+        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), False)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
+                2013, 2, 1, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
        SCRATCH
    )
+    @override_settings(OCR_LANGUAGE="deu")
    def test_get_text_3_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.pdf")
        document = RasterisedDocumentParser(input_file)
+        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
+                2018, 10, 5, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
        SCRATCH
    )
+    @override_settings(OCR_LANGUAGE="deu")
    def test_get_text_3_png(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.png")
        document = RasterisedDocumentParser(input_file)
+        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), False)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
+                2018, 10, 5, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
        SCRATCH
    )
+    @override_settings(OCR_LANGUAGE="eng")
    def test_get_text_4_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.pdf")
        document = RasterisedDocumentParser(input_file)
+        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
+                2018, 10, 5, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
        SCRATCH
    )
+    @override_settings(OCR_LANGUAGE="eng")
    def test_get_text_4_png(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.png")
        document = RasterisedDocumentParser(input_file)
+        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), False)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
+                2018, 10, 5, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
@@ -270,11 +334,15 @@ class TestDate(TestCase):
    def test_get_text_5_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.pdf")
        document = RasterisedDocumentParser(input_file)
+        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
+                2018, 12, 17, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
@@ -284,11 +352,15 @@ class TestDate(TestCase):
    def test_get_text_5_png(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.png")
        document = RasterisedDocumentParser(input_file)
+        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), False)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
+                2018, 12, 17, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
@@ -303,7 +375,10 @@ class TestDate(TestCase):
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
+                2018, 12, 17, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
@@ -318,7 +393,10 @@ class TestDate(TestCase):
        self.assertEqual(document._is_ocred(), False)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
+                2018, 12, 17, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
@@ -328,6 +406,7 @@ class TestDate(TestCase):
    def test_get_text_6_pdf_eu(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
        document = RasterisedDocumentParser(input_file)
+        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(document.get_date(), None)
@@ -339,6 +418,7 @@ class TestDate(TestCase):
    def test_get_text_6_png_eu(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
        document = RasterisedDocumentParser(input_file)
+        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), False)
        self.assertEqual(document.get_date(), None)
@@ -350,37 +430,173 @@ class TestDate(TestCase):
    def test_get_text_7_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_7.pdf")
        document = RasterisedDocumentParser(input_file)
+        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
+                2018, 4, 1, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SAMPLE_FILES
+        SCRATCH
    )
    def test_get_text_8_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_8.pdf")
        document = RasterisedDocumentParser(input_file)
+        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
+                2017, 12, 31, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SAMPLE_FILES
+        SCRATCH
    )
    def test_get_text_9_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_9.pdf")
        document = RasterisedDocumentParser(input_file)
+        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
+                2017, 12, 31, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )
+
+    @mock.patch(
+        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
+        SCRATCH
+    )
+    def test_filename_date_1_pdf(self):
+        input_file = os.path.join(
+            self.SAMPLE_FILES,
+            "tests_date_in_filename_2018-03-20_1.pdf"
+        )
+        document = RasterisedDocumentParser(input_file)
+        document.FILENAME_DATE_ORDER = 'YMD'
+        document.get_text()
+        date = document.get_date()
+        self.assertEqual(document._is_ocred(), True)
+        self.assertEqual(
+            date,
+            datetime.datetime(
+                2018, 3, 20, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
+        )
+
+    @mock.patch(
+        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
+        SCRATCH
+    )
+    def test_filename_date_1_png(self):
+        input_file = os.path.join(
+            self.SAMPLE_FILES,
+            "tests_date_in_filename_2018-03-20_1.png"
+        )
+        document = RasterisedDocumentParser(input_file)
+        document.FILENAME_DATE_ORDER = 'YMD'
+        date = document.get_date()
+        self.assertEqual(document._is_ocred(), False)
+        self.assertEqual(
+            date,
+            datetime.datetime(
+                2018, 3, 20, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
+        )
+
+    @mock.patch(
+        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
+        SCRATCH
+    )
+    def test_filename_date_2_pdf(self):
+        input_file = os.path.join(
+            self.SAMPLE_FILES,
+            "2013-12-11_tests_date_in_filename_2.pdf"
+        )
+        document = RasterisedDocumentParser(input_file)
+        document.FILENAME_DATE_ORDER = 'YMD'
+        date = document.get_date()
+        self.assertEqual(document._is_ocred(), True)
+        self.assertEqual(
+            date,
+            datetime.datetime(
+                2013, 12, 11, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
+        )
+
+    @mock.patch(
+        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
+        SCRATCH
+    )
+    def test_filename_date_2_png(self):
+        input_file = os.path.join(
+            self.SAMPLE_FILES,
+            "2013-12-11_tests_date_in_filename_2.png"
+        )
+        document = RasterisedDocumentParser(input_file)
+        document.FILENAME_DATE_ORDER = 'YMD'
+        date = document.get_date()
+        self.assertEqual(document._is_ocred(), False)
+        self.assertEqual(
+            date,
+            datetime.datetime(
+                2013, 12, 11, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
+        )
+
+    @mock.patch(
+        "paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
+        return_value="01-07-0590 00:00:00"
+    )
+    @mock.patch(
+        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
+        SCRATCH
+    )
+    def test_crazy_date_past(self, *args):
+        document = RasterisedDocumentParser("/dev/null")
+        document.get_text()
+        self.assertIsNone(document.get_date())
+
+    @mock.patch(
+        "paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
+        return_value="01-07-2350 00:00:00"
+    )
+    @mock.patch(
+        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
+        SCRATCH
+    )
+    def test_crazy_date_future(self, *args):
+        document = RasterisedDocumentParser("/dev/null")
+        document.get_text()
+        self.assertIsNone(document.get_date())
+
+    @mock.patch(
+        "paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
+        return_value="01-07-0590 00:00:00"
+    )
+    @mock.patch(
+        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
+        SCRATCH
+    )
+    def test_crazy_date_past(self, *args):
+        document = RasterisedDocumentParser("/dev/null")
+        document.get_text()
+        self.assertIsNone(document.get_date())
--- a/src/paperless_text/init.py
+++ b/src/paperless_text/init.py
--- a/src/paperless_text/apps.py
+++ b/src/paperless_text/apps.py
@@ -0,0 +1,16 @@
+from django.apps import AppConfig
+
+
+class PaperlessTextConfig(AppConfig):
+
+    name = "paperless_text"
+
+    def ready(self):
+
+        from documents.signals import document_consumer_declaration
+
+        from .signals import ConsumerDeclaration
+
+        document_consumer_declaration.connect(ConsumerDeclaration.handle)
+
+        AppConfig.ready(self)
--- a/src/paperless_text/parsers.py
+++ b/src/paperless_text/parsers.py
@@ -0,0 +1,105 @@
+import os
+import subprocess
+
+from django.conf import settings
+
+from documents.parsers import DocumentParser, ParseError
+
+
+class TextDocumentParser(DocumentParser):
+    """
+    This parser directly parses a text document (.txt, .md, or .csv)
+    """
+
+    CONVERT = settings.CONVERT_BINARY
+    THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
+    UNPAPER = settings.UNPAPER_BINARY
+    DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
+    OCR_ALWAYS = settings.OCR_ALWAYS
+
+    def __init__(self, path):
+        super().__init__(path)
+        self._text = None
+
+    def get_thumbnail(self):
+        """
+        The thumbnail of a text file is just a 500px wide image of the text
+        rendered onto a letter-sized page.
+        """
+        # The below is heavily cribbed from https://askubuntu.com/a/590951
+
+        bg_color = "white"  # bg color
+        text_color = "black"  # text color
+        psize = [500, 647]  # icon size
+        n_lines = 50  # number of lines to show
+        out_path = os.path.join(self.tempdir, "convert.png")
+
+        temp_bg = os.path.join(self.tempdir, "bg.png")
+        temp_txlayer = os.path.join(self.tempdir, "tx.png")
+        picsize = "x".join([str(n) for n in psize])
+        txsize = "x".join([str(n - 8) for n in psize])
+
+        def create_bg():
+            work_size = ",".join([str(n - 1) for n in psize])
+            r = str(round(psize[0] / 10))
+            rounded = ",".join([r, r])
+            run_command(
+                self.CONVERT,
+                "-size ", picsize,
+                ' xc:none -draw ',
+                '"fill ', bg_color, ' roundrectangle 0,0,', work_size, ",", rounded, '" ',  # NOQA: E501
+                temp_bg
+            )
+
+        def read_text():
+            with open(self.document_path, 'r') as src:
+                lines = [l.strip() for l in src.readlines()]
+                text = "\n".join([l for l in lines[:n_lines]])
+                return text.replace('"', "'")
+
+        def create_txlayer():
+            run_command(
+                self.CONVERT,
+                "-background none",
+                "-fill",
+                text_color,
+                "-pointsize", "12",
+                "-border 4 -bordercolor none",
+                "-size ", txsize,
+                ' caption:"', read_text(), '" ',
+                temp_txlayer
+            )
+
+        create_txlayer()
+        create_bg()
+        run_command(
+            self.CONVERT,
+            temp_bg,
+            temp_txlayer,
+            "-background None -layers merge ",
+            out_path
+        )
+
+        return out_path
+
+    def get_text(self):
+
+        if self._text is not None:
+            return self._text
+
+        with open(self.document_path, 'r') as f:
+            self._text = f.read()
+
+        return self._text
+
+
+def run_command(*args):
+    environment = os.environ.copy()
+    if settings.CONVERT_MEMORY_LIMIT:
+        environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT
+    if settings.CONVERT_TMPDIR:
+        environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR
+
+    if not subprocess.Popen(' '.join(args), env=environment,
+                            shell=True).wait() == 0:
+        raise ParseError("Convert failed at {}".format(args))
--- a/src/paperless_text/signals.py
+++ b/src/paperless_text/signals.py
@@ -0,0 +1,23 @@
+import re
+
+from .parsers import TextDocumentParser
+
+
+class ConsumerDeclaration:
+
+    MATCHING_FILES = re.compile(r"^.*\.(te?xt|md|csv)$")
+
+    @classmethod
+    def handle(cls, sender, **kwargs):
+        return cls.test
+
+    @classmethod
+    def test(cls, doc):
+
+        if cls.MATCHING_FILES.match(doc.lower()):
+            return {
+                "parser": TextDocumentParser,
+                "weight": 10
+            }
+
+        return None
--- a/src/reminders/migrations/0002_auto_20181007_1420.py
+++ b/src/reminders/migrations/0002_auto_20181007_1420.py
@@ -0,0 +1,19 @@
+# Generated by Django 2.0.8 on 2018-10-07 14:20
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('reminders', '0001_initial'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='reminder',
+            name='document',
+            field=models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, to='documents.Document'),
+        ),
+    ]
--- a/src/reminders/models.py
+++ b/src/reminders/models.py
@@ -3,6 +3,7 @@ from django.db import models

 class Reminder(models.Model):

-    document = models.ForeignKey("documents.Document")
+    document = models.ForeignKey(
+        "documents.Document", on_delete=models.PROTECT)
    date = models.DateTimeField()
    note = models.TextField(blank=True)
--- a/src/tox.ini
+++ b/src/tox.ini
@@ -5,7 +5,7 @@

 [tox]
 skipsdist = True
-envlist = py34, py35, py36, pycodestyle, doc
+envlist = py34, py35, py36, py37, pycodestyle, doc

 [testenv]
 commands = pytest
				`@@ -0,0 +1 @@`
				`<img src="{{download_url}}" style="max-width: 100%">`