Merge branch 'dev'

revert last commit.
added another library that's required to get this running on raspberry pi
2025-08-05 18:58:34 -05:00 · 2020-12-10 02:24:58 +01:00 · 2020-12-10 02:24:36 +01:00 · 2020-12-10 02:14:26 +01:00 · 2020-12-10 01:12:30 +01:00 · 2020-12-10 00:59:03 +01:00
140 changed files with 4477 additions and 1846 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -76,16 +76,11 @@ scripts/nuke
 /static/

 # Stored PDFs
-/media/documents/originals/*
-/media/documents/thumbnails/*
-
-/data/classification_model.pickle
-/data/db.sqlite3
-/data/index
-
+/media/
+/data/
 /paperless.conf
-/consume
-/export
+/consume/
+/export/
 /src-ui/.vscode

 # this is where the compiled frontend is moved to.
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,8 @@
 language: python

+dist: focal
+os: linux
+
 jobs:
  include:
    - name: "Paperless on Python 3.6"
@@ -33,7 +36,7 @@ jobs:

 before_install:
  - sudo apt-get update -qq
-  - sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr imagemagick ghostscript
+  - sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr imagemagick ghostscript optipng

 install:
  - pip install --upgrade pipenv
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -24,3 +24,7 @@ feature-X branches is for experimental stuff that will eventually be merged into
 I'm trying to get most of paperless tested, so please do the same for your code! I know its a hassle, but it makes sure that your code works now and will allow us to detect regressions easily.

 To test your code, execute `pytest` in the src/ directory. Executing that in the project root is no good. This also generates a html coverage report, which you can use to see if you missed anything important during testing.
+
+## More info:
+
+... is available in the documentation. https://paperless-ng.readthedocs.io/en/latest/extending.html
--- a/7
+++ b/7
@@ -19,6 +19,7 @@ django-extensions = "*"
 django-filter = "~=2.4.0"
 django-q = "~=1.3.4"
 djangorestframework = "~=3.12.2"
+filelock = "*"
 fuzzywuzzy = "*"
 gunicorn = "*"
 imap-tools = "*"
@@ -26,7 +27,7 @@ langdetect = "*"
 pdftotext = "*"
 pathvalidate = "*"
 pillow = "*"
-pyocr = "~=0.7.2"
+pikepdf = "*"
 python-gnupg = "*"
 python-dotenv = "*"
 python-dateutil = "*"
@@ -38,7 +39,9 @@ scikit-learn="~=0.23.2"
 whitenoise = "~=5.2.0"
 watchdog = "*"
 whoosh="~=2.7.4"
-inotify-simple = "*"
+inotifyrecursive = "~=0.3.4"
+ocrmypdf = "*"
+tqdm = "*"

 [dev-packages]
 coveralls = "*"
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
    "_meta": {
        "hash": {
-            "sha256": "d6432a18280c092c108e998f00bcd377c0c55ef18f26cb0b8eb64f9618b9f383"
+            "sha256": "3d576f289958226a7583e4c471c7f8c11bff6933bf093185f623cfb381a92412"
        },
        "pipfile-spec": 6,
        "requires": {
@@ -39,10 +39,100 @@
        },
        "blessed": {
            "hashes": [
-                "sha256:7d4914079a6e8e14fbe080dcaf14dee596a088057cdc598561080e3266123b48",
-                "sha256:81125aa5b84cb9dfc09ff451886f64b4b923b75c5eaf51fde9d1c48a135eb797"
+                "sha256:0a74a8d3f0366db600d061273df77d44f0db07daade7bb7a4d49c8bc22ed9f74",
+                "sha256:580429e7e0c6f6a42ea81b0ae5a4993b6205c6ccbb635d034b4277af8175753e"
            ],
-            "version": "==1.17.11"
+            "version": "==1.17.12"
+        },
+        "cffi": {
+            "hashes": [
+                "sha256:00a1ba5e2e95684448de9b89888ccd02c98d512064b4cb987d48f4b40aa0421e",
+                "sha256:00e28066507bfc3fe865a31f325c8391a1ac2916219340f87dfad602c3e48e5d",
+                "sha256:045d792900a75e8b1e1b0ab6787dd733a8190ffcf80e8c8ceb2fb10a29ff238a",
+                "sha256:0638c3ae1a0edfb77c6765d487fee624d2b1ee1bdfeffc1f0b58c64d149e7eec",
+                "sha256:105abaf8a6075dc96c1fe5ae7aae073f4696f2905fde6aeada4c9d2926752362",
+                "sha256:155136b51fd733fa94e1c2ea5211dcd4c8879869008fc811648f16541bf99668",
+                "sha256:1a465cbe98a7fd391d47dce4b8f7e5b921e6cd805ef421d04f5f66ba8f06086c",
+                "sha256:1d2c4994f515e5b485fd6d3a73d05526aa0fcf248eb135996b088d25dfa1865b",
+                "sha256:23f318bf74b170c6e9adb390e8bd282457f6de46c19d03b52f3fd042b5e19654",
+                "sha256:2c24d61263f511551f740d1a065eb0212db1dbbbbd241db758f5244281590c06",
+                "sha256:51a8b381b16ddd370178a65360ebe15fbc1c71cf6f584613a7ea08bfad946698",
+                "sha256:594234691ac0e9b770aee9fcdb8fa02c22e43e5c619456efd0d6c2bf276f3eb2",
+                "sha256:5cf4be6c304ad0b6602f5c4e90e2f59b47653ac1ed9c662ed379fe48a8f26b0c",
+                "sha256:64081b3f8f6f3c3de6191ec89d7dc6c86a8a43911f7ecb422c60e90c70be41c7",
+                "sha256:6bc25fc545a6b3d57b5f8618e59fc13d3a3a68431e8ca5fd4c13241cd70d0009",
+                "sha256:798caa2a2384b1cbe8a2a139d80734c9db54f9cc155c99d7cc92441a23871c03",
+                "sha256:7c6b1dece89874d9541fc974917b631406233ea0440d0bdfbb8e03bf39a49b3b",
+                "sha256:840793c68105fe031f34d6a086eaea153a0cd5c491cde82a74b420edd0a2b909",
+                "sha256:8d6603078baf4e11edc4168a514c5ce5b3ba6e3e9c374298cb88437957960a53",
+                "sha256:9cc46bc107224ff5b6d04369e7c595acb700c3613ad7bcf2e2012f62ece80c35",
+                "sha256:9f7a31251289b2ab6d4012f6e83e58bc3b96bd151f5b5262467f4bb6b34a7c26",
+                "sha256:9ffb888f19d54a4d4dfd4b3f29bc2c16aa4972f1c2ab9c4ab09b8ab8685b9c2b",
+                "sha256:a5ed8c05548b54b998b9498753fb9cadbfd92ee88e884641377d8a8b291bcc01",
+                "sha256:a7711edca4dcef1a75257b50a2fbfe92a65187c47dab5a0f1b9b332c5919a3fb",
+                "sha256:af5c59122a011049aad5dd87424b8e65a80e4a6477419c0c1015f73fb5ea0293",
+                "sha256:b18e0a9ef57d2b41f5c68beefa32317d286c3d6ac0484efd10d6e07491bb95dd",
+                "sha256:b4e248d1087abf9f4c10f3c398896c87ce82a9856494a7155823eb45a892395d",
+                "sha256:ba4e9e0ae13fc41c6b23299545e5ef73055213e466bd107953e4a013a5ddd7e3",
+                "sha256:be8661bcee1bc2fc4b033a6ab65bd1f87ce5008492601695d0b9a4e820c3bde5",
+                "sha256:c6332685306b6417a91b1ff9fae889b3ba65c2292d64bd9245c093b1b284809d",
+                "sha256:d5ff0621c88ce83a28a10d2ce719b2ee85635e85c515f12bac99a95306da4b2e",
+                "sha256:d9efd8b7a3ef378dd61a1e77367f1924375befc2eba06168b6ebfa903a5e59ca",
+                "sha256:df5169c4396adc04f9b0a05f13c074df878b6052430e03f50e68adf3a57aa28d",
+                "sha256:ebb253464a5d0482b191274f1c8bf00e33f7e0b9c66405fbffc61ed2c839c775",
+                "sha256:ec80dc47f54e6e9a78181ce05feb71a0353854cc26999db963695f950b5fb375",
+                "sha256:f032b34669220030f905152045dfa27741ce1a6db3324a5bc0b96b6c7420c87b",
+                "sha256:f60567825f791c6f8a592f3c6e3bd93dd2934e3f9dac189308426bd76b00ef3b",
+                "sha256:f803eaa94c2fcda012c047e62bc7a51b0bdabda1cad7a92a522694ea2d76e49f"
+            ],
+            "version": "==1.14.4"
+        },
+        "chardet": {
+            "hashes": [
+                "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
+                "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
+            ],
+            "markers": "python_version >= '3.1'",
+            "version": "==3.0.4"
+        },
+        "coloredlogs": {
+            "hashes": [
+                "sha256:346f58aad6afd48444c2468618623638dadab76e4e70d5e10822676f2d32226a",
+                "sha256:a1fab193d2053aa6c0a97608c4342d031f1f93a3d1218432c59322441d31a505",
+                "sha256:b0c2124367d4f72bd739f48e1f61491b4baf145d6bda33b606b4a53cb3f96a97"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==14.0"
+        },
+        "cryptography": {
+            "hashes": [
+                "sha256:07ca431b788249af92764e3be9a488aa1d39a0bc3be313d826bbec690417e538",
+                "sha256:13b88a0bd044b4eae1ef40e265d006e34dbcde0c2f1e15eb9896501b2d8f6c6f",
+                "sha256:257dab4f368fae15f378ea9a4d2799bf3696668062de0e9fa0ebb7a738a6917d",
+                "sha256:32434673d8505b42c0de4de86da8c1620651abd24afe91ae0335597683ed1b77",
+                "sha256:3cd75a683b15576cfc822c7c5742b3276e50b21a06672dc3a800a2d5da4ecd1b",
+                "sha256:4e7268a0ca14536fecfdf2b00297d4e407da904718658c1ff1961c713f90fd33",
+                "sha256:545a8550782dda68f8cdc75a6e3bf252017aa8f75f19f5a9ca940772fc0cb56e",
+                "sha256:55d0b896631412b6f0c7de56e12eb3e261ac347fbaa5d5e705291a9016e5f8cb",
+                "sha256:5849d59358547bf789ee7e0d7a9036b2d29e9a4ddf1ce5e06bb45634f995c53e",
+                "sha256:59f7d4cfea9ef12eb9b14b83d79b432162a0a24a91ddc15c2c9bf76a68d96f2b",
+                "sha256:6dc59630ecce8c1f558277ceb212c751d6730bd12c80ea96b4ac65637c4f55e7",
+                "sha256:7117319b44ed1842c617d0a452383a5a052ec6aa726dfbaffa8b94c910444297",
+                "sha256:75e8e6684cf0034f6bf2a97095cb95f81537b12b36a8fedf06e73050bb171c2d",
+                "sha256:7b8d9d8d3a9bd240f453342981f765346c87ade811519f98664519696f8e6ab7",
+                "sha256:a035a10686532b0587d58a606004aa20ad895c60c4d029afa245802347fab57b",
+                "sha256:a4e27ed0b2504195f855b52052eadcc9795c59909c9d84314c5408687f933fc7",
+                "sha256:a733671100cd26d816eed39507e585c156e4498293a907029969234e5e634bc4",
+                "sha256:a75f306a16d9f9afebfbedc41c8c2351d8e61e818ba6b4c40815e2b5740bb6b8",
+                "sha256:bd717aa029217b8ef94a7d21632a3bb5a4e7218a4513d2521c2a2fd63011e98b",
+                "sha256:d25cecbac20713a7c3bc544372d42d8eafa89799f492a43b79e1dfd650484851",
+                "sha256:d26a2557d8f9122f9bf445fc7034242f4375bd4e95ecda007667540270965b13",
+                "sha256:d3545829ab42a66b84a9aaabf216a4dce7f16dbc76eb69be5c302ed6b8f4a29b",
+                "sha256:d3d5e10be0cf2a12214ddee45c6bd203dab435e3d83b4560c03066eda600bfe3",
+                "sha256:efe15aca4f64f3a7ea0c09c87826490e50ed166ce67368a68f315ea0807a20df"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==3.2.1"
        },
        "dateparser": {
            "hashes": [
@@ -54,11 +144,11 @@
        },
        "django": {
            "hashes": [
-                "sha256:14a4b7cd77297fba516fc0d92444cc2e2e388aa9de32d7a68d4a83d58f5a4927",
-                "sha256:14b87775ffedab2ef6299b73343d1b4b41e5d4e2aa58c6581f114dbec01e3f8f"
+                "sha256:5c866205f15e7a7123f1eec6ab939d22d5bde1416635cab259684af66d8e48a2",
+                "sha256:edb10b5c45e7e9c0fb1dc00b76ec7449aca258a39ffd613dbd078c51d19c9f03"
            ],
            "index": "pypi",
-            "version": "==3.1.3"
+            "version": "==3.1.4"
        },
        "django-cors-headers": {
            "hashes": [
@@ -70,11 +160,11 @@
        },
        "django-extensions": {
            "hashes": [
-                "sha256:6809c89ca952f0e08d4e0766bc0101dfaf508d7649aced1180c091d737046ea7",
-                "sha256:dc663652ac9460fd06580a973576820430c6d428720e874ae46b041fa63e0efa"
+                "sha256:7cd002495ff0a0e5eb6cdd6be759600905b4e4079232ea27618fc46bdd853651",
+                "sha256:c7f88625a53f631745d4f2bef9ec4dcb999ed59476393bdbbe99db8596778846"
            ],
            "index": "pypi",
-            "version": "==3.0.9"
+            "version": "==3.1.0"
        },
        "django-filter": {
            "hashes": [
@@ -107,6 +197,14 @@
            "index": "pypi",
            "version": "==3.12.2"
        },
+        "filelock": {
+            "hashes": [
+                "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59",
+                "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"
+            ],
+            "index": "pypi",
+            "version": "==3.0.12"
+        },
        "fuzzywuzzy": {
            "hashes": [
                "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8",
@@ -123,22 +221,53 @@
            "index": "pypi",
            "version": "==20.0.4"
        },
+        "humanfriendly": {
+            "hashes": [
+                "sha256:175ffa628aa76da2c17369a5da5856084562cc66dfe7f82ae93ca3ef175277a6",
+                "sha256:3c9ab8d28e88e6cc998e41963357736dafd555ee5bb666b50e42f6ce28dd3e3d"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==9.0"
+        },
        "imap-tools": {
            "hashes": [
-                "sha256:96e9a4ff6483462635737730a1df28e739faa71967b12a84f4363fb386542246",
-                "sha256:a3ee1827dc4ff185b259b33d0238b091a87d489f63ee59959fcc81716456c602"
+                "sha256:72bf46dc135b039a5d5b59f4e079242ac15eac02a30038e8cb2dec7b153cab65",
+                "sha256:75dc1c72dd76d9e577df26a1e0ec3a809b5eebce77678851458dcd2eae127ac9"
            ],
            "index": "pypi",
-            "version": "==0.32.0"
+            "version": "==0.33.0"
+        },
+        "img2pdf": {
+            "hashes": [
+                "sha256:57905015579b1026acf1605aa95859cd79b051fa1c35485573d165526fc9dbb5",
+                "sha256:eaee690ab8403dd1a9cb4db10afee41dd3e6c7ed63bdace02a0121f9feadb0c9"
+            ],
+            "version": "==0.4.0"
+        },
+        "importlib-metadata": {
+            "hashes": [
+                "sha256:6112e21359ef8f344e7178aa5b72dc6e62b38b0d008e6d3cb212c5b84df72013",
+                "sha256:b0c2d3b226157ae4517d9625decf63591461c66b3a808c2666d538946519d170"
+            ],
+            "markers": "python_version < '3.8'",
+            "version": "==3.1.1"
        },
        "inotify-simple": {
            "hashes": [
                "sha256:8440ffe49c4ae81a8df57c1ae1eb4b6bfa7acb830099bfb3e305b383005cc128",
                "sha256:854f9ac752cc1fcff6ca34e9d3d875c9a94c9b7d6eb377f63be2d481a566c6ee"
            ],
-            "index": "pypi",
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
            "version": "==1.3.5"
        },
+        "inotifyrecursive": {
+            "hashes": [
+                "sha256:7e5f4a2e1dc2bef0efa3b5f6b339c41fb4599055a2b54909d020e9e932cc8d2f",
+                "sha256:a2c450b317693e4538416f90eb1d7858506dafe6b8b885037bd2dd9ae2dafa1e"
+            ],
+            "index": "pypi",
+            "version": "==0.3.5"
+        },
        "joblib": {
            "hashes": [
                "sha256:698c311779f347cf6b7e6b8a39bb682277b8ee4aba8cf9507bc0cf4cd4737b72",
@@ -156,6 +285,51 @@
            "index": "pypi",
            "version": "==1.0.8"
        },
+        "lxml": {
+            "hashes": [
+                "sha256:0448576c148c129594d890265b1a83b9cd76fd1f0a6a04620753d9a6bcfd0a4d",
+                "sha256:127f76864468d6630e1b453d3ffbbd04b024c674f55cf0a30dc2595137892d37",
+                "sha256:1471cee35eba321827d7d53d104e7b8c593ea3ad376aa2df89533ce8e1b24a01",
+                "sha256:2363c35637d2d9d6f26f60a208819e7eafc4305ce39dc1d5005eccc4593331c2",
+                "sha256:2e5cc908fe43fe1aa299e58046ad66981131a66aea3129aac7770c37f590a644",
+                "sha256:2e6fd1b8acd005bd71e6c94f30c055594bbd0aa02ef51a22bbfa961ab63b2d75",
+                "sha256:366cb750140f221523fa062d641393092813b81e15d0e25d9f7c6025f910ee80",
+                "sha256:42ebca24ba2a21065fb546f3e6bd0c58c3fe9ac298f3a320147029a4850f51a2",
+                "sha256:4e751e77006da34643ab782e4a5cc21ea7b755551db202bc4d3a423b307db780",
+                "sha256:4fb85c447e288df535b17ebdebf0ec1cf3a3f1a8eba7e79169f4f37af43c6b98",
+                "sha256:50c348995b47b5a4e330362cf39fc503b4a43b14a91c34c83b955e1805c8e308",
+                "sha256:535332fe9d00c3cd455bd3dd7d4bacab86e2d564bdf7606079160fa6251caacf",
+                "sha256:535f067002b0fd1a4e5296a8f1bf88193080ff992a195e66964ef2a6cfec5388",
+                "sha256:5be4a2e212bb6aa045e37f7d48e3e1e4b6fd259882ed5a00786f82e8c37ce77d",
+                "sha256:60a20bfc3bd234d54d49c388950195d23a5583d4108e1a1d47c9eef8d8c042b3",
+                "sha256:648914abafe67f11be7d93c1a546068f8eff3c5fa938e1f94509e4a5d682b2d8",
+                "sha256:681d75e1a38a69f1e64ab82fe4b1ed3fd758717bed735fb9aeaa124143f051af",
+                "sha256:68a5d77e440df94011214b7db907ec8f19e439507a70c958f750c18d88f995d2",
+                "sha256:69a63f83e88138ab7642d8f61418cf3180a4d8cd13995df87725cb8b893e950e",
+                "sha256:6e4183800f16f3679076dfa8abf2db3083919d7e30764a069fb66b2b9eff9939",
+                "sha256:6fd8d5903c2e53f49e99359b063df27fdf7acb89a52b6a12494208bf61345a03",
+                "sha256:791394449e98243839fa822a637177dd42a95f4883ad3dec2a0ce6ac99fb0a9d",
+                "sha256:7a7669ff50f41225ca5d6ee0a1ec8413f3a0d8aa2b109f86d540887b7ec0d72a",
+                "sha256:7e9eac1e526386df7c70ef253b792a0a12dd86d833b1d329e038c7a235dfceb5",
+                "sha256:7ee8af0b9f7de635c61cdd5b8534b76c52cd03536f29f51151b377f76e214a1a",
+                "sha256:8246f30ca34dc712ab07e51dc34fea883c00b7ccb0e614651e49da2c49a30711",
+                "sha256:8c88b599e226994ad4db29d93bc149aa1aff3dc3a4355dd5757569ba78632bdf",
+                "sha256:91d6dace31b07ab47eeadd3f4384ded2f77b94b30446410cb2c3e660e047f7a7",
+                "sha256:923963e989ffbceaa210ac37afc9b906acebe945d2723e9679b643513837b089",
+                "sha256:94d55bd03d8671686e3f012577d9caa5421a07286dd351dfef64791cf7c6c505",
+                "sha256:97db258793d193c7b62d4e2586c6ed98d51086e93f9a3af2b2034af01450a74b",
+                "sha256:a9d6bc8642e2c67db33f1247a77c53476f3a166e09067c0474facb045756087f",
+                "sha256:cd11c7e8d21af997ee8079037fff88f16fda188a9776eb4b81c7e4c9c0a7d7fc",
+                "sha256:d8d3d4713f0c28bdc6c806a278d998546e8efc3498949e3ace6e117462ac0a5e",
+                "sha256:e0bfe9bb028974a481410432dbe1b182e8191d5d40382e5b8ff39cdd2e5c5931",
+                "sha256:e1dbb88a937126ab14d219a000728224702e0ec0fc7ceb7131c53606b7a76772",
+                "sha256:f4822c0660c3754f1a41a655e37cb4dbbc9be3d35b125a37fab6f82d47674ebc",
+                "sha256:f83d281bb2a6217cd806f4cf0ddded436790e66f393e124dfe9731f6b3fb9afe",
+                "sha256:fc37870d6716b137e80d19241d0e2cff7a7643b925dfa49b4c8ebd1295eb506e"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==4.6.2"
+        },
        "numpy": {
            "hashes": [
                "sha256:08308c38e44cc926bdfce99498b21eec1f848d24c302519e64203a8da99a97db",
@@ -197,6 +371,14 @@
            "markers": "python_version >= '3.6'",
            "version": "==1.19.4"
        },
+        "ocrmypdf": {
+            "hashes": [
+                "sha256:91e7394172cedb3be801a229dbd3d308fb5ae80cbc3a77879fa7954beea407b1",
+                "sha256:e550b8e884150accab7ea41f4a576b5844594cb5cbd6ed514fbf1206720343ad"
+            ],
+            "index": "pypi",
+            "version": "==11.3.4"
+        },
        "pathtools": {
            "hashes": [
                "sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0",
@@ -212,6 +394,14 @@
            "index": "pypi",
            "version": "==2.3.0"
        },
+        "pdfminer.six": {
+            "hashes": [
+                "sha256:b9aac0ebeafb21c08bf65f2039f4b2c5f78a3449d0a41df711d72445649e952a",
+                "sha256:d78877ba8d8bf957f3bb636c4f73f4f6f30f56c461993877ac22c39c20837509"
+            ],
+            "markers": "python_version >= '3.4'",
+            "version": "==20201018"
+        },
        "pdftotext": {
            "hashes": [
                "sha256:98aeb8b07a4127e1a30223bd933ef080bbd29aa88f801717ca6c5618380b8aa6"
@@ -219,6 +409,33 @@
            "index": "pypi",
            "version": "==2.1.5"
        },
+        "pikepdf": {
+            "hashes": [
+                "sha256:0829bd5dacd73bb4a37e7575bae523f49603479755563c92ddb55c206700cab1",
+                "sha256:0d2b631077cd6af6e4d1b396208020705842610a6f13fab489d5f9c47916baa2",
+                "sha256:21c98af08fae4ac9fbcad02b613b6768a4ca300fda4cba867f4a4b6f73c2d04b",
+                "sha256:2240372fed30124ddc35b0c15a613f2b687a426ea2f150091e0a0c58cca7a495",
+                "sha256:2a97f5f1403e058d217d7f6861cf51fca200c5687bce0d052f5f2fa89b5bfa22",
+                "sha256:3faaefca0ae80d19891acec8b0dd5e6235f59f2206d82375eb80d090285e9557",
+                "sha256:48ef45b64882901c0d69af3b85d16a19bd0f3e95b43e614fefb53521d8caf36c",
+                "sha256:5212fe41f2323fc7356ba67caa39737fe13080562cff37bcbb74a8094076c8d0",
+                "sha256:56859c32170663c57bd0658189ce44e180533eebe813853446cd6413810be9eb",
+                "sha256:5f8fd1cb3478c5534222018aca24fbbd2bc74460c899bda988ec76722c13caa9",
+                "sha256:74300a32c41b3d578772f6933f23a88b19f74484185e71e5225ce2f7ea5aea78",
+                "sha256:8cbc946bdd217148f4a9c029fcea62f4ae0f67d5346de4c865f4718cd0ddc37f",
+                "sha256:9ceefd30076f732530cf84a1be2ecb2fa9931af932706ded760a6d37c73b96ad",
+                "sha256:ad69c170fda41b07a4c6b668a3128e7a759f50d9aebcfcde0ccff1358abe0423",
+                "sha256:b715fe182189fb6870fab5b0383bb2fb278c88c46eade346b0f4c1ed8818c09d",
+                "sha256:bb01ecf95083ffcb9ad542dc5342ccc1059e46f1395fd966629d36d9cc766b4a",
+                "sha256:bd6328547219cf48cefb4e0a1bc54442910594de1c5a5feae847d9ff3c629031",
+                "sha256:edb128379bb1dea76b5bdbdacf5657a6e4754bacc2049640762725590d8ed905",
+                "sha256:f8e687900557fcd4c51b4e72b9e337fdae9e2c81049d1d80b624bb2e88b5769d",
+                "sha256:fe0ca120e3347c851c34a91041d574f3c588d832023906d8ae18d66d042e8a52",
+                "sha256:fe8e0152672f24d8bfdecc725f97e9013f2de1b41849150959526ca3562bd3ef"
+            ],
+            "index": "pypi",
+            "version": "==2.2.0"
+        },
        "pillow": {
            "hashes": [
                "sha256:006de60d7580d81f4a1a7e9f0173dc90a932e3905cc4d47ea909bc946302311a",
@@ -254,6 +471,14 @@
            "index": "pypi",
            "version": "==8.0.1"
        },
+        "pluggy": {
+            "hashes": [
+                "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0",
+                "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==0.13.1"
+        },
        "psycopg2-binary": {
            "hashes": [
                "sha256:0deac2af1a587ae12836aa07970f5cb91964f05a7c6cdb69d8425ff4c15d4e2c",
@@ -297,13 +522,13 @@
            "index": "pypi",
            "version": "==2.8.6"
        },
-        "pyocr": {
+        "pycparser": {
            "hashes": [
-                "sha256:fa15adc7e1cf0d345a2990495fe125a947c6e09a60ddba0256a1c14b2e603179",
-                "sha256:fd602af17b6e21985669aadc058a95f343ff921e962ed4aa6520ded32e4d1301"
+                "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0",
+                "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705"
            ],
-            "index": "pypi",
-            "version": "==0.7.2"
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==2.20"
        },
        "python-dateutil": {
            "hashes": [
@@ -411,6 +636,53 @@
            ],
            "version": "==2020.11.13"
        },
+        "reportlab": {
+            "hashes": [
+                "sha256:0008b5baa39d7e3a8132c4b47ecae88d6858ad386518e754e5e7b8025ee4722b",
+                "sha256:0ad5a540c336941272fe161ef3a9830da3d4b3a65a195531cebd3cad5db58b2a",
+                "sha256:0c965a5691686d746f558ee1c52aa9c63a01a0e13cba61ffc661573948e32f61",
+                "sha256:0fd568fa5615ae99f76289c52ff230207852ee942d4934f6c893c93d2a79544e",
+                "sha256:1117d905a3404c696869c7aabec9454b43ed6acbbc73f9256c6fcea23e7ae93e",
+                "sha256:1ea7c388e91ad9d823655ad6a13751ff67e8a0e7cf4065cf051b4c931cdd9450",
+                "sha256:26c0ee8f62652cc7fcdc47a1cb3b34775a4d625738025c1a7edb8718bda5a315",
+                "sha256:368c5b3fc3d5a541cb9dcacefa563fdb445365f517e3cbf64b4326631d1cf13c",
+                "sha256:451d42fdcdd7d84587d6d9c8f5d9a7d0e997305efb606705063ca1fe8bcca551",
+                "sha256:47394acba4da8e56ef8e55d8eb483b868521696ba49ab0f0fcf8a1a4a5ac6e49",
+                "sha256:51b16e297f7b937fc530dd151e4b38f1d305b01c9aa10657bc32a5d2901b8ad7",
+                "sha256:51c0cdcf606ded0a7b4b50050400f25125ea797fbfc3c817135993b38f8b764e",
+                "sha256:55c672c579618843e0fd00140fb71f1ffebc4f1c542ac385c4f4999f2f5398d9",
+                "sha256:5c34a96ecfbf595caf16178a06abcd26a5f8720e01fe1285d4c97333382cfaeb",
+                "sha256:61aa89a00754b18c4f2956b8bff831f1fd3affef6476dc63462d92211941605e",
+                "sha256:62234d29c97279917903e4587faf240a5dea4617be250db55386ff268eb5a7c5",
+                "sha256:670f2a8dcc23bf798c39b95c64bf76ee387549b962f76783670821978a226663",
+                "sha256:69387f171f6c7b55109caa6d061b17a18f2f9e724a0212c07cd692aeb369dd19",
+                "sha256:6c5c8871b659f7c2975382d7b61f3c182701fa9eb62cf649c3c73ba8fc5e2595",
+                "sha256:80139ceb3a568f5be908094f1701fd05391b71425e8b69aaed0d30db647ca2aa",
+                "sha256:80661a76d0019b5e2c315ccd3bc7093d754067d6142b36a3a0ec4f416073d23b",
+                "sha256:85a2236f324ae336da7f4b183fa99bed261bcc00ac1255ee91a504e68b086d00",
+                "sha256:89a3acd98bd4478d6bbc5cb32e0665ea546c98bff8b58d5e1014659daa6ef75a",
+                "sha256:8a39119fcab146bde41fd1c6d148f9ee1e2cca10c6f9c2b7eb4dd710a3a2c6ac",
+                "sha256:9c31c2526401da6cc92018f68483f2aac0a731cb98435445ea4b72d46b438c84",
+                "sha256:9e8ae1c3b8a1697147c5c97f00d66ab1c54d88c4615b0cdd9b1a667d7baf3eb7",
+                "sha256:a479c38ab2b997ce05d3bef906783ac20cf4cb224a154e80c9018c5e4d943a35",
+                "sha256:a79aab8d069543d5085d58260f18705a08acd92a4501a41261913fddc2137d46",
+                "sha256:b0a8314383de853599ca531dfe55eaa49bb8d6b0bb663b2f8479b7a0f3385ea2",
+                "sha256:b3d9926e64bd8008007b2d9819d7b30179b069ce95431d5060f71afc36885389",
+                "sha256:c2a9a77ce4f25ffb52d705be82a9f41b47f6b0da23870ebc3587709e7242da30",
+                "sha256:c578dd0799f70fb577474cd383f035c6e1057e4fe837278113f9cfa6eee4b076",
+                "sha256:c5abd9d0023ad20030524ab0d5fa39d77aed025519b1fa426304ab2dd0328b89",
+                "sha256:ced96125525ba21311e9512adf391170b9e149f89e27e45b06ff07b70f97a0b2",
+                "sha256:d692fb88d6ef5e75242b00009b54953a0425eaa8bd3a36db9db8b396785e1f57",
+                "sha256:d70c2104286459658e61388af9eee838b612986bd8a36e1d21ba36152983ac15",
+                "sha256:de47c65c10ac6f0d2addb28f1b1657b1c707aca014d09d01b3b728cf19e8f791",
+                "sha256:e6e7592527791841db0820a72c6afae52655a05b0b6d4df184fd2bafe82ee1ee",
+                "sha256:e8a7e95ee6ea5566291b59ede5b9fadce809dca43ebfbfe11e3ff3d6492c6f0e",
+                "sha256:f041759138b3a95508c4281b3db3bf9bb28636d84c554272a58a5ca7c9f9bbf4",
+                "sha256:f39c7fc1fa2e4a1d9747a3effd70731a9d0e9eb5738247fa089c059eff19d43e",
+                "sha256:f65ac89ee0ba569f5279360eae08783f7f2e95c9810a9846c957fbd5950f4896"
+            ],
+            "version": "==3.5.56"
+        },
        "scikit-learn": {
            "hashes": [
                "sha256:090bbf144fd5823c1f2efa3e1a9bf180295b24294ca8f478e75b40ed54f8036e",
@@ -474,6 +746,13 @@
            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
            "version": "==1.15.0"
        },
+        "sortedcontainers": {
+            "hashes": [
+                "sha256:37257a32add0a3ee490bb170b599e93095eed89a55da91fa9f48753ea12fd73f",
+                "sha256:59cc937650cf60d677c16775597c89a960658a09cf7c1a668f86e1e4464b10a1"
+            ],
+            "version": "==2.3.0"
+        },
        "sqlparse": {
            "hashes": [
                "sha256:017cde379adbd6a1f15a61873f43e8274179378e95ef3fede90b5aa64d304ed0",
@@ -490,6 +769,14 @@
            "markers": "python_version >= '3.5'",
            "version": "==2.1.0"
        },
+        "tqdm": {
+            "hashes": [
+                "sha256:38b658a3e4ecf9b4f6f8ff75ca16221ae3378b2e175d846b6b33ea3a20852cf5",
+                "sha256:d4f413aecb61c9779888c64ddf0c62910ad56dcbe857d8922bb505d4dbff0df1"
+            ],
+            "index": "pypi",
+            "version": "==4.54.1"
+        },
        "tzlocal": {
            "hashes": [
                "sha256:643c97c5294aedc737780a49d9df30889321cbe1204eac2c2ec6134035a92e44",
@@ -499,11 +786,11 @@
        },
        "watchdog": {
            "hashes": [
-                "sha256:034c85530b647486e8c8477410fe79476511282658f2ce496f97106d9e5acfb8",
-                "sha256:4214e1379d128b0588021880ccaf40317ee156d4603ac388b9adcf29165e0c04"
+                "sha256:3caefdcc8f06a57fdc5ef2d22aa7c0bfda4f55e71a0bee74cbf3176d97536ef3",
+                "sha256:e38bffc89b15bafe2a131f0e1c74924cf07dcec020c2e0a26cccd208831fcd43"
            ],
            "index": "pypi",
-            "version": "==0.10.3"
+            "version": "==0.10.4"
        },
        "wcwidth": {
            "hashes": [
@@ -528,6 +815,14 @@
            ],
            "index": "pypi",
            "version": "==2.7.4"
+        },
+        "zipp": {
+            "hashes": [
+                "sha256:102c24ef8f171fd729d46599845e95c7ab894a4cf45f5de11a44cc7444fb1108",
+                "sha256:ed5eee1974372595f9e416cc7bbeeb12335201d8081ca8a0743c954d4446e5cb"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==3.4.0"
        }
    },
    "develop": {
@@ -571,16 +866,17 @@
        },
        "certifi": {
            "hashes": [
-                "sha256:1f422849db327d534e3d0c5f02a263458c3955ec0aae4ff09b95f195c59f4edd",
-                "sha256:f05def092c44fbf25834a51509ef6e631dc19765ab8a57b4e7ab85531f0a9cf4"
+                "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c",
+                "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"
            ],
-            "version": "==2020.11.8"
+            "version": "==2020.12.5"
        },
        "chardet": {
            "hashes": [
                "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
                "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
            ],
+            "markers": "python_version >= '3.1'",
            "version": "==3.0.4"
        },
        "coverage": {
@@ -673,17 +969,18 @@
        },
        "faker": {
            "hashes": [
-                "sha256:5398268e1d751ffdb3ed36b8a790ed98659200599b368eec38a02eed15bce997",
-                "sha256:d4183b8f57316de3be27cd6c3b40e9f9343d27c95c96179f027316c58c2c239e"
+                "sha256:1fcb415562ee6e2395b041e85fa6901d4708d30b84d54015226fa754ed0822c3",
+                "sha256:e8beccb398ee9b8cc1a91d9295121d66512b6753b4846eb1e7370545d46b3311"
            ],
-            "markers": "python_version >= '3.5'",
-            "version": "==4.17.1"
+            "markers": "python_version >= '3.6'",
+            "version": "==5.0.1"
        },
        "filelock": {
            "hashes": [
                "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59",
                "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"
            ],
+            "index": "pypi",
            "version": "==3.0.12"
        },
        "idna": {
@@ -705,11 +1002,11 @@
        },
        "importlib-metadata": {
            "hashes": [
-                "sha256:030f3b1bdb823ecbe4a9659e14cc861ce5af403fe99863bae173ec5fe00ab132",
-                "sha256:caeee3603f5dcf567864d1be9b839b0bcfdf1383e3e7be33ce2dead8144ff19c"
+                "sha256:6112e21359ef8f344e7178aa5b72dc6e62b38b0d008e6d3cb212c5b84df72013",
+                "sha256:b0c2d3b226157ae4517d9625decf63591461c66b3a808c2666d538946519d170"
            ],
            "markers": "python_version < '3.8'",
-            "version": "==2.1.0"
+            "version": "==3.1.1"
        },
        "importlib-resources": {
            "hashes": [
@@ -780,11 +1077,11 @@
        },
        "packaging": {
            "hashes": [
-                "sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8",
-                "sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181"
+                "sha256:05af3bb85d320377db281cf254ab050e1a7ebcbf5410685a9a407e18a1f81236",
+                "sha256:eb41423378682dadb7166144a4926e443093863024de508ca5c9737d6bc08376"
            ],
            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==20.4"
+            "version": "==20.7"
        },
        "pluggy": {
            "hashes": [
@@ -812,11 +1109,11 @@
        },
        "pygments": {
            "hashes": [
-                "sha256:381985fcc551eb9d37c52088a32914e00517e57f4a21609f48141ba08e193fa0",
-                "sha256:88a0bbcd659fcb9573703957c6b9cff9fab7295e6e76db54c9d00ae42df32773"
+                "sha256:ccf3acacf3782cbed4a989426012f1c535c9a90d3a7fc3f16d231b9372d2b716",
+                "sha256:f275b6c0909e5dafd2d6269a656aa90fa58ebf4a74f8fcf9053195d226b24a08"
            ],
            "markers": "python_version >= '3.5'",
-            "version": "==2.7.2"
+            "version": "==2.7.3"
        },
        "pyparsing": {
            "hashes": [
@@ -1025,18 +1322,18 @@
        },
        "virtualenv": {
            "hashes": [
-                "sha256:07cff122e9d343140366055f31be4dcd61fd598c69d11cd33a9d9c8df4546dd7",
-                "sha256:e0aac7525e880a429764cefd3aaaff54afb5d9f25c82627563603f5d7de5a6e5"
+                "sha256:54b05fc737ea9c9ee9f8340f579e5da5b09fb64fd010ab5757eb90268616907c",
+                "sha256:b7a8ec323ee02fb2312f098b6b4c9de99559b462775bc8fe3627a73706603c1b"
            ],
            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==20.2.1"
+            "version": "==20.2.2"
        },
        "zipp": {
            "hashes": [
                "sha256:102c24ef8f171fd729d46599845e95c7ab894a4cf45f5de11a44cc7444fb1108",
                "sha256:ed5eee1974372595f9e416cc7bbeeb12335201d8081ca8a0743c954d4446e5cb"
            ],
-            "markers": "python_version < '3.8'",
+            "markers": "python_version >= '3.6'",
            "version": "==3.4.0"
        }
    }
--- a/README.md
+++ b/README.md
@@ -25,36 +25,43 @@ Here's what you get:

 ![Dashboard](https://github.com/jonaswinkler/paperless-ng/raw/master/docs/_static/screenshots/dashboard.png)

-# Why Paperless-ng?
+# Features

-I wanted to make big changes to the project that will impact the way it is used by its users greatly. Among the users who currently use paperless in production there are probably many that don't want these changes right away. I also wanted to have more control over what goes into the code and what does not. Therefore, paperless-ng was created. NG stands for both Angular (the framework used for the Frontend) and next-gen. Publishing this project under a different name also avoids confusion between paperless and paperless-ng.
-
-The gist of the changes is the following:
-
-* New front end. This will eventually be mobile friendly as well.
-* New full text search.
-* New email processing.
+* Performs OCR on your documents, adds selectable text to image only documents and adds tags, correspondents and document types to your documents.
+* Single page application front end. Should be pretty snappy. Will be mobile friendly in the future.
+	* Includes a dashboard that shows basic statistics and has document upload.
+	* Filtering by tags, correspondents, types, and more.
+	* Customizable views can be saved and displayed on the dashboard.
+	* Full text search with auto completion, scored results and query highlighting allows you to quickly find what you need.
+* Email processing: Paperless adds documents from your email accounts.
+	* Configure multiple accounts and filters for each account.
+	* When adding documents from mails, paperless can move these mails to a new folder, mark them as read, flag them or delete them.
 * Machine learning powered document matching.
-* A task processor that processes documents in parallel and also tells you when something goes wrong.
-* Code cleanup in many, MANY areas. Some of the code was just overly complicated.
+	* Paperless learns from your documents and will be able to automatically assign tags, correspondents and types to documents once you've stored a few documents in paperless.
+* We have a mobile app that offers a 'Share with paperless' option over at https://github.com/qcasey/paperless_share. You can use that in combination with any of the mobile scanning apps out there. It's still a little rough around the edges, but it works!
+* A task processor that processes documents in parallel and also tells you when something goes wrong. On modern multi core systems, consumption is blazing fast.
+* Code cleanup in many, MANY areas. Some of the code from OG paperless was just overly complicated.
 * More tests, more stability.

 If you want to see some screenshots of paperless-ng in action, [some are available in the documentation](https://paperless-ng.readthedocs.io/en/latest/screenshots.html).

-For a complete list of changes, check out the [changelog](https://paperless-ng.readthedocs.io/en/latest/changelog.html)
+For a complete list of changes from paperless, check out the [changelog](https://paperless-ng.readthedocs.io/en/latest/changelog.html)

 # Roadmap for 1.0

 - Make the front end nice (except mobile).
 - Test coverage at 90%.
- Store archived documents with an embedded OCR text layer, while keeping originals available. Making good progress in the `feature-ocrmypdf` branch.
 - Fix whatever bugs I and you find.

 ## Roadmap for versions beyond 1.0

+These are things that I want to add to paperless eventually. They are sorted by priority.
+
+- **Bulk editing**. Add/remove metadata from multiple documents at once.
 - **More search.** The search backend is incredibly versatile and customizable. Searching is the most important feature of this project and thus, I want to implement things like:
  - Group and limit search results by correspondent, show “more from this” links in the results.
  - Ability to search for “Similar documents” in the search results
+- **Nested tags**. Organize tags in a hierarchical structure. This will combine the benefits of folders and tags in one coherent system.
 - **An interactive consumer** that shows its progress for documents it processes on the web page.
 	- With live updates ans websockets. This already works on a dev branch, but requires a lot of new dependencies, which I'm not particular happy about.
 	- Notifications when a document was added with buttons to open the new document right away.
@@ -86,7 +93,7 @@ Please open an issue and start a discussion about it!

 ## Feel like helping out?

-There's still lots of things to be done, just have a look at that issue log. If you feel like conctributing to the project, please do! Bug fixes and improvements to the front end (I just can't seem to get some of these CSS things right) are always welcome.
+There's still lots of things to be done, just have a look at that issue log. If you feel like contributing to the project, please do! Bug fixes and improvements to the front end (I just can't seem to get some of these CSS things right) are always welcome. The documentation has some basic information on how to get started.

 If you want to implement something big: Please start a discussion about that in the issues! Maybe I've already had something similar in mind and we can make it happen together. However, keep in mind that the general roadmap is to make the existing features stable and get them tested. See the roadmap above.

@@ -94,7 +101,7 @@ If you want to implement something big: Please start a discussion about that in

 Paperless has been around a while now, and people are starting to build stuff on top of it.  If you're one of those people, we can add your project to this list:

-* [Paperless App](https://github.com/bauerj/paperless_app): An Android/iOS app for Paperless.
+* [Paperless App](https://github.com/bauerj/paperless_app): An Android/iOS app for Paperless. We're working on making this compatible.
 * [Paperless Desktop](https://github.com/thomasbrueggemann/paperless-desktop): A desktop UI for your Paperless installation.  Runs on Mac, Linux, and Windows.
 * [ansible-role-paperless](https://github.com/ovv/ansible-role-paperless): An easy way to get Paperless running via Ansible.
 * [paperless-cli](https://github.com/stgarf/paperless-cli): A golang command line binary to interact with a Paperless instance.
--- a/docker/docker-compose.env
+++ b/docker/docker-compose.env
@@ -32,8 +32,3 @@
 # The default language to use for OCR. Set this to the language most of your
 # documents are written in.
 #PAPERLESS_OCR_LANGUAGE=eng
-
-# By default Paperless does not OCR a document if the text can be retrieved from
-# the document directly. Set to true to always OCR documents. (i.e., if you
-# know that some of your documents have faulty/bad OCR data)
-#PAPERLESS_OCR_ALWAYS=true
--- a/docker/docker-entrypoint.sh
+++ b/docker/docker-entrypoint.sh
@@ -23,8 +23,14 @@ wait_for_postgres() {
 	echo "Waiting for PostgreSQL to start..."

 	host="${PAPERLESS_DBHOST}"
+	port="${PAPERLESS_DBPORT}"

-	while !</dev/tcp/$host/5432 ;
+	if [[ -z $port ]] ;
+	then
+		port="5432"
+	fi
+
+	while !</dev/tcp/$host/$port ;
 	do

 		if [ $attempt_num -eq $max_attempts ]
@@ -113,13 +119,13 @@ install_languages() {
    done
 }

-initialize
-
 # Install additional languages if specified
 if [[ ! -z "$PAPERLESS_OCR_LANGUAGES"  ]]; then
 		install_languages "$PAPERLESS_OCR_LANGUAGES"
 fi

+initialize
+
 if [[ "$1" != "/"* ]]; then
 	exec sudo -HEu paperless python3 manage.py "$@"
 else
--- a/docker/hub/docker-compose.postgres.yml
+++ b/docker/hub/docker-compose.postgres.yml
@@ -15,7 +15,7 @@ services:
      POSTGRES_PASSWORD: paperless

  webserver:
-    image: jonaswinkler/paperless-ng:0.9.4
+    image: jonaswinkler/paperless-ng:0.9.6
    restart: always
    depends_on:
      - db
--- a/docker/hub/docker-compose.sqlite.yml
+++ b/docker/hub/docker-compose.sqlite.yml
@@ -5,7 +5,7 @@ services:
    restart: always

  webserver:
-    image: jonaswinkler/paperless-ng:0.9.4
+    image: jonaswinkler/paperless-ng:0.9.6
    restart: always
    depends_on:
      - broker
--- a/docker/local/Dockerfile
+++ b/docker/local/Dockerfile
@@ -11,12 +11,18 @@ RUN apt-get update \
 		curl \
 		ghostscript \
 		gnupg \
+		icc-profiles-free \
 		imagemagick \
 		libatlas-base-dev \
+		liblept5 \
 		libmagic-dev \
 		libpoppler-cpp-dev \
 		libpq-dev \
+		libqpdf-dev \
+		libxml2 \
 		optipng \
+		pngquant \
+		qpdf \
 		sudo \
 		tesseract-ocr \
 		tesseract-ocr-eng \
@@ -26,9 +32,10 @@ RUN apt-get update \
 		tesseract-ocr-spa \
 		tzdata \
 		unpaper \
+		zlib1g \
 	&& pip3 install --upgrade supervisor setuptools \
 	&& pip install --no-cache-dir -r requirements.txt \
-	&& apt-get -y purge build-essential \
+	&& apt-get -y purge build-essential libqpdf-dev \
 	&& apt-get -y autoremove --purge \
 	&& rm -rf /var/lib/apt/lists/* \
 	&& mkdir /var/log/supervisord /var/run/supervisord
--- a/docs/administration.rst
+++ b/docs/administration.rst
@@ -119,8 +119,11 @@ Updating paperless without docker

 After grabbing the new release and unpacking the contents, do the following:

-1.  Update python requirements. Paperless uses
-    `Pipenv`_ for managing dependencies:
+1.  Update dependencies. New paperless version may require additional
+    dependencies. The dependencies required are listed in the section about 
+    :ref:`bare metal installations <setup-bare_metal>`.
+
+2.  Update python requirements. If you use Pipenv, this is done with the following steps.

    .. code:: shell-session

@@ -132,14 +135,14 @@ After grabbing the new release and unpacking the contents, do the following:
    This creates a new virtual environment (or uses your existing environment)
    and installs all dependencies into it.

-2.  Collect static files.
+3.  Collect static files.

    .. code:: shell-session

        $ cd src
        $ pipenv run python3 manage.py collectstatic --clear
    
-3.  Migrate the database.
+4.  Migrate the database.

    .. code:: shell-session

@@ -153,14 +156,14 @@ Management utilities
 Paperless comes with some management commands that perform various maintenance
 tasks on your paperless instance. You can invoke these commands either by

-.. code:: bash
+.. code:: shell-session

    $ cd /path/to/paperless
    $ docker-compose run --rm webserver <command> <arguments>

 or

-.. code:: bash
+.. code:: shell-session

    $ cd /path/to/paperless/src
    $ pipenv run python manage.py <command> <arguments>
@@ -333,6 +336,42 @@ command:

 The command takes no arguments and processes all your mail accounts and rules.

+.. _utilities-archiver:
+
+Creating archived documents
+===========================
+
+Paperless stores archived PDF/A documents alongside your original documents.
+These archived documents will also contain selectable text for image-only
+originals.
+These documents are derived from the originals, which are always stored
+unmodified. If coming from an earlier version of paperless, your documents
+won't have archived versions.
+
+This command creates PDF/A documents for your documents.
+
+.. code::
+
+    document_archiver --overwrite --document <id>
+
+This command will only attempt to create archived documents when no archived
+document exists yet, unless ``--overwrite`` is specified. If ``--document <id>``
+is specified, the archiver will only process that document.
+
+.. note::
+
+    This command essentially performs OCR on all your documents again,
+    according to your settings. If you run this with ``PAPERLESS_OCR_MODE=redo``,
+    it will potentially run for a very long time. You can cancel the command
+    at any time, since this command will skip already archived versions the next time
+    it is run.
+
+.. note::
+
+    Some documents will cause errors and cannot be converted into PDF/A documents,
+    such as encrypted PDF documents. The archiver will skip over these documents
+    each time it sees them.
+
 .. _utilities-encyption:

 Managing encryption
--- a/docs/advanced_usage.rst
+++ b/docs/advanced_usage.rst
@@ -298,6 +298,7 @@ avoids filename clashes.
 Paperless provides the following placeholders withing filenames:

 * ``{correspondent}``: The name of the correspondent, or "none".
+* ``{document_type}``: The name of the document type, or "none".
 * ``{title}``: The title of the document.
 * ``{created}``: The full date and time the document was created.
 * ``{created_year}``: Year created only.
@@ -307,7 +308,6 @@ Paperless provides the following placeholders withing filenames:
 * ``{added_year}``: Year added only.
 * ``{added_month}``: Month added only (number 1-12).
 * ``{added_day}``: Day added only (number 1-31).
-* ``{tags}``: I don't know how this works. Look at the source.

 Paperless will convert all values for the placeholders into values which are safe
 for use in filenames.
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -13,23 +13,55 @@ available filters and ordering fields.

 The API provides 5 main endpoints:

+*   ``/api/documents/``: Full CRUD support, except POSTing new documents. See below.
 *   ``/api/correspondents/``: Full CRUD support.
 *   ``/api/document_types/``: Full CRUD support.
-*   ``/api/documents/``: Full CRUD support, except POSTing new documents. See below.
 *   ``/api/logs/``: Read-Only.
 *   ``/api/tags/``: Full CRUD support.

-All of these endpoints except for the logging endpoint 
+All of these endpoints except for the logging endpoint
 allow you to fetch, edit and delete individual objects
 by appending their primary key to the path, for example ``/api/documents/454/``.

+The objects served by the document endpoint contain the following fields:
+
+*   ``id``: ID of the document. Read-only.
+*   ``title``: Title of the document.
+*   ``content``: Plain text content of the document.
+*   ``tags``: List of IDs of tags assigned to this document, or empty list.
+*   ``document_type``: Document type of this document, or null.
+*   ``correspondent``:  Correspondent of this document or null.
+*   ``created``: The date at which this document was created.
+*   ``modified``: The date at which this document was last edited in paperless. Read-only.
+*   ``added``: The date at which this document was added to paperless. Read-only.
+*   ``archive_serial_number``: The identifier of this document in a physical document archive.
+*   ``original_file_name``: Verbose filename of the original document. Read-only.
+*   ``archived_file_name``: Verbose filename of the archived document. Read-only. Null if no archived document is available.
+
+
+Downloading documents
+#####################
+
 In addition to that, the document endpoint offers these additional actions on
 individual documents:

-*   ``/api/documents/<pk>/download/``: Download the original document.
-*   ``/api/documents/<pk>/thumb/``: Download the PNG thumbnail of a document.
-*   ``/api/documents/<pk>/preview/``: Display the original document inline,
+*   ``/api/documents/<pk>/download/``: Download the document.
+*   ``/api/documents/<pk>/preview/``: Display the document inline,
    without downloading it.
+*   ``/api/documents/<pk>/thumb/``: Download the PNG thumbnail of a document.
+
+Paperless generates archived PDF/A documents from consumed files and stores both
+the original files as well as the archived files. By default, the endpoints
+for previews and downloads serve the archived file, if it is available.
+Otherwise, the original file is served.
+Some document cannot be archived.
+
+The endpoints correctly serve the response header fields ``Content-Disposition``
+and ``Content-Type`` to indicate the filename for download and the type of content of
+the document.
+
+In order to download or preview the original document when an archied document is available,
+supply the query parameter ``original=true``.

 .. hint::

@@ -38,6 +70,80 @@ individual documents:
    are in place. However, if you use these old URLs to access documents, you
    should update your app or script to use the new URLs.

+
+Getting document metadata
+#########################
+
+The api also has an endpoint to retrieve read-only metadata about specific documents. this
+information is not served along with the document objects, since it requires reading
+files and would therefore slow down document lists considerably.
+
+Access the metadata of a document with an ID ``id`` at ``/api/documents/<id>/metadata/``.
+
+The endpoint reports the following data:
+
+*   ``original_checksum``: MD5 checksum of the original document.
+*   ``original_size``: Size of the original document, in bytes.
+*   ``original_mime_type``: Mime type of the original document.
+*   ``media_filename``: Current filename of the document, under which it is stored inside the media directory.
+*   ``has_archive_version``: True, if this document is archived, false otherwise.
+*   ``original_metadata``: A list of metadata associated with the original document. See below.
+*   ``archive_checksum``: MD5 checksum of the archived document, or null.
+*   ``archive_size``: Size of the archived document in bytes, or null.
+*   ``archive_metadata``: Metadata associated with the archived document, or null. See below.
+
+File metadata is reported as a list of objects in the following form:
+
+.. code:: json
+
+    [
+        {
+            "namespace": "http://ns.adobe.com/pdf/1.3/",
+            "prefix": "pdf",
+            "key": "Producer",
+            "value": "SparklePDF, Fancy edition"
+        },
+    ]
+
+``namespace`` and ``prefix`` can be null. The actual metadata reported depends on the file type and the metadata
+available in that specific document. Paperless only reports PDF metadata at this point.
+
+Authorization
+#############
+
+The REST api provides three different forms of authentication.
+
+1.  Basic authentication
+
+    Authorize by providing a HTTP header in the form
+
+    .. code::
+
+        Authorization: Basic <credentials>
+
+    where ``credentials`` is a base64-encoded string of ``<username>:<password>``
+
+2.  Session authentication
+
+    When you're logged into paperless in your browser, you're automatically
+    logged into the API as well and don't need to provide any authorization
+    headers.
+
+3.  Token authentication
+
+    Paperless also offers an endpoint to acquire authentication tokens.
+
+    POST a username and password as a form or json string to ``/api/token/``
+    and paperless will respond with a token, if the login data is correct.
+    This token can be used to authenticate other requests with the
+    following HTTP header:
+
+    .. code::
+
+        Authorization: Token <token>
+
+    Tokens can be managed and revoked in the paperless admin.
+
 Searching for documents
 #######################

@@ -65,8 +171,9 @@ Result list object returned by the endpoint:
        "count": 1,
        "page": 1,
        "page_count": 1,
+        "corrected_query": "",
        "results": [
-            
+
        ]
    }

@@ -75,6 +182,8 @@ Result list object returned by the endpoint:
    the page you requested, if you requested a page that is behind
    the last page. In that case, the last page is returned.
 *   ``page_count``: The total number of pages.
+*   ``corrected_query``: Corrected version of the query string. Can be null.
+    If not null, can be used verbatim to start a new query.
 *   ``results``: A list of result objects on the current page.

 Result object:
@@ -84,12 +193,12 @@ Result object:
    {
        "id": 1,
        "highlights": [
-            
+
        ],
        "score": 6.34234,
        "rank": 23,
        "document": {
-            
+
        }
    }

@@ -121,7 +230,7 @@ Each fragment contains a list of strings, and some of them are marked as a highl
            {"text": " fragment with a highlight."}
        ]
    ]
-    
+


 When ``term`` is present within a string, the word within ``text`` should be highlighted.
@@ -166,8 +275,17 @@ The API provides a special endpoint for file uploads:

 POST a multipart form to this endpoint, where the form field ``document`` contains
 the document that you want to upload to paperless. The filename is sanitized and
-then used to store the document in the consumption folder, where the consumer will
-detect the document and process it as any other document.
+then used to store the document in a temporary directory, and the consumer will
+be instructed to consume the document from there.

-The endpoint will immediately return "OK." if the document was stored in the
-consumption directory.
+The endpoint supports the following optional form fields:
+
+*   ``title``: Specify a title that the consumer should use for the document.
+*   ``correspondent``: Specify the ID of a correspondent that the consumer should use for the document.
+*   ``document_type``: Similar to correspondent.
+*   ``tags``: Similar to correspondent. Specify this multiple times to have multiple tags added
+    to the document.
+
+The endpoint will immediately return "OK" if the document consumption process
+was started successfully. No additional status information about the consumption
+process itself is available, since that happens in a different process.
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -5,6 +5,97 @@
 Changelog
 *********

+paperless-ng 0.9.6
+##################
+
+This release focusses primarily on many small issues with the UI.
+
+* Front end
+
+  * Paperless now has proper window titles.
+  * Fixed an issue with the small cards when more than 7 tags were used.
+  * Navigation of the "Show all" links adjusted. They navigate to the saved view now, if available in the sidebar.
+  * Some indication on the document lists that a filter is active was added.
+  * There's a new filter to filter for documents that do *not* have a certain tag.
+  * The file upload box now shows upload progress.
+  * The document edit page was reorganized.
+  * The document edit page shows various information about a document.
+  * An issue with the height of the preview was fixed.
+  * Table issues with too long document titles fixed.
+
+* API
+
+  * The API now serves file names with documents.
+  * The API now serves various metadata about documents.
+  * API documentation updated.
+
+* Other
+
+  * Fixed an issue with the docker image when a non-standard PostgreSQL port was used.
+  * The docker image was trying check for installed languages before actually installing them.
+  * ``FILENAME_FORMAT`` placeholder for document types.
+  * The filename formatter is now less restrictive with file names and tries to
+    conserve the original correspondents, types and titles as much as possible.
+  * The filename formatter does not include the document ID in filenames anymore. It will
+    rather append ``_01``, ``_02``, etc when it detects duplicate filenames.
+
+.. note::
+
+  The changes to the filename format will apply to newly added documents and changed documents.
+  If you want all files to reflect these changes, execute the ``document_renamer`` management
+  command.
+
+
+paperless-ng 0.9.5
+##################
+
+This release concludes the big changes I wanted to get rolled into paperless. The next releases before 1.0 will
+focus on fixing issues, primarily.
+
+* OCR
+
+  * Paperless now uses `OCRmyPDF <https://github.com/jbarlow83/OCRmyPDF>`_ to perform OCR on documents.
+    It still uses tesseract under the hood, but the PDF parser of Paperless has changed considerably and
+    will behave different for some douments.
+  * OCRmyPDF creates archived PDF/A documents with embedded text that can be selected in the front end.
+  * Paperless stores archived versions of documents alongside with the originals. The originals can be
+    accessed on the document edit page. If available, a dropdown menu will appear next to the download button.
+  * Many of the configuration options regarding OCR have changed. See :ref:`configuration-ocr` for details.
+  * Paperless no longer guesses the language of your documents. It always uses the language that you
+    specified with ``PAPERLESS_OCR_LANGUAGE``. Be sure to set this to the language the majority of your
+    documents are in. Multiple languages can be specified, but that requires more CPU time.
+  * The management command :ref:`document_archiver <utilities-archiver>` can be used to create archived versions for already
+    existing documents.
+
+* Tags from consumption folder.
+
+  * Thanks to `jayme-github`_, paperless now consumes files from sub folders in the consumption folder and is able to assign tags
+    based on the sub folders a document was found in. This can be configured with ``PAPERLESS_CONSUMER_RECURSIVE`` and
+    ``PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS``.
+
+* API
+
+  * The API now offers token authentication.
+  * The endpoint for uploading documents now supports specifying custom titles, correspondents, tags and types.
+    This can be used by clients to override the default behavior of paperless. See :ref:`api-file_uploads`.
+  * The document endpoint of API now serves documents in this form:
+
+    * correspondents, document types and tags are referenced by their ID in the fields ``correspondent``, ``document_type`` and ``tags``. The ``*_id`` versions are gone. These fields are read/write.
+    * paperless does not serve nested tags, correspondents or types anymore.
+
+* Front end
+
+  * Paperless does some basic caching of correspondents, tags and types and will only request them from the server when necessary or when entirely reloading the page.
+  * Document list fetching is about 10%-30% faster now, especially when lots of tags/correspondents are present.
+  * Some minor improvements to the front end, such as document count in the document list, better highlighting of the current page, and improvements to the filter behavior.
+
+* Fixes:
+
+  * A bug with the generation of filenames for files with unsupported types caused the exporter and
+    document saving to crash.
+  * Mail handling no longer exits entirely when encountering errors. It will skip the account/rule/message on which the error occured.
+  * Assigning correspondents from mail sender names failed for very long names. Paperless no longer assigns correspondents in these cases.
+
 paperless-ng 0.9.4
 ##################

@@ -750,6 +841,7 @@ bulk of the work on this big change.

 * Initial release

+.. _jayme-github: http://github.com/jayme-github
 .. _Brian Conn: https://github.com/TheConnMan
 .. _Christopher Luu: https://github.com/nuudles
 .. _Florian Jung: https://github.com/the01
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,48 +1,21 @@
-# -*- coding: utf-8 -*-
-#
-# Paperless documentation build configuration file, created by
-# sphinx-quickstart on Mon Oct 26 18:36:52 2015.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
+import sphinx_rtd_theme
+

 __version__ = None
 exec(open("../src/paperless/version.py").read())


-# Believe it or not, this is the officially sanctioned way to add custom CSS.
-def setup(app):
-    app.add_stylesheet("custom.css")
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.insert(0, os.path.abspath('.'))
-
-# -- General configuration ------------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
 extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.intersphinx',
    'sphinx.ext.todo',
    'sphinx.ext.imgmath',
    'sphinx.ext.viewcode',
+    'sphinx_rtd_theme',
 ]

 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+# templates_path = ['_templates']

 # The suffix of source filenames.
 source_suffix = '.rst'
@@ -115,7 +88,7 @@ pygments_style = 'sphinx'

 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-html_theme = 'default'
+html_theme = 'sphinx_rtd_theme'

 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
@@ -195,20 +168,6 @@ html_static_path = ['_static']
 # Output file base name for HTML help builder.
 htmlhelp_basename = 'paperless'

-
-#
-# Attempt to use the ReadTheDocs theme.  If it's not installed, fallback to
-# the default.
-#
-
-try:
-    import sphinx_rtd_theme
-    html_theme = "sphinx_rtd_theme"
-    html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
-except ImportError as e:
-    print("error " + str(e))
-    pass
-
 # -- Options for LaTeX output ---------------------------------------------

 latex_elements = {
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -152,6 +152,122 @@ PAPERLESS_AUTO_LOGIN_USERNAME=<username>

    Defaults to none, which disables this feature.

+.. _configuration-ocr:
+
+OCR settings
+############
+
+Paperless uses `OCRmyPDF <https://ocrmypdf.readthedocs.io/en/latest/>`_ for
+performing OCR on documents and images. Paperless uses sensible defaults for
+most settings, but all of them can be configured to your needs.
+
+
+PAPERLESS_OCR_LANGUAGE=<lang>
+    Customize the language that paperless will attempt to use when
+    parsing documents.
+
+    It should be a 3-letter language code consistent with ISO
+    639: https://www.loc.gov/standards/iso639-2/php/code_list.php
+
+    Set this to the language most of your documents are written in.
+
+    This can be a combination of multiple languages such as ``deu+eng``,
+    in which case tesseract will use whatever language matches best.
+    Keep in mind that tesseract uses much more cpu time with multiple
+    languages enabled.
+
+    Defaults to "eng".
+
+PAPERLESS_OCR_MODE=<mode>
+    Tell paperless when and how to perform ocr on your documents. Four modes
+    are available:
+
+    *   ``skip``: Paperless skips all pages and will perform ocr only on pages
+        where no text is present. This is the safest option.
+    *   ``skip_noarchive``: In addition to skip, paperless won't create an
+        archived version of your documents when it finds any text in them.
+        This is useful if you don't want to have two almost-identical versions
+        of your digital documents in the media folder. This is the fastest option.
+    *   ``redo``: Paperless will OCR all pages of your documents and attempt to
+        replace any existing text layers with new text. This will be useful for
+        documents from scanners that already performed OCR with insufficient
+        results. It will also perform OCR on purely digital documents.
+
+        This option may fail on some documents that have features that cannot
+        be removed, such as forms. In this case, the text from the document is
+        used instead.
+    *   ``force``: Paperless rasterizes your documents, converting any text
+        into images and puts the OCRed text on top. This works for all documents,
+        however, the resulting document may be significantly larger and text
+        won't appear as sharp when zoomed in.
+    
+    The default is ``skip``, which only performs OCR when necessary and always
+    creates archived documents.
+
+PAPERLESS_OCR_OUTPUT_TYPE=<type>
+    Specify the the type of PDF documents that paperless should produce.
+    
+    *   ``pdf``: Modify the PDF document as little as possible.
+    *   ``pdfa``: Convert PDF documents into PDF/A-2b documents, which is a
+        subset of the entire PDF specification and meant for storing
+        documents long term.
+    *   ``pdfa-1``, ``pdfa-2``, ``pdfa-3`` to specify the exact version of
+        PDF/A you wish to use.
+    
+    If not specified, ``pdfa`` is used. Remember that paperless also keeps
+    the original input file as well as the archived version.
+
+
+PAPERLESS_OCR_PAGES=<num>
+    Tells paperless to use only the specified amount of pages for OCR. Documents
+    with less than the specified amount of pages get OCR'ed completely.
+
+    Specifying 1 here will only use the first page.
+
+    When combined with ``PAPERLESS_OCR_MODE=redo`` or ``PAPERLESS_OCR_MODE=force``,
+    paperless will not modify any text it finds on excluded pages and copy it
+    verbatim.
+
+    Defaults to 0, which disables this feature and always uses all pages.
+
+
+PAPERLESS_OCR_IMAGE_DPI=<num>
+    Paperless will OCR any images you put into the system and convert them
+    into PDF documents. This is useful if your scanner produces images.
+    In order to do so, paperless needs to know the DPI of the image.
+    Most images from scanners will have this information embedded and
+    paperless will detect and use that information. In case this fails, it
+    uses this value as a fallback.
+
+    Set this to the DPI your scanner produces images at.
+
+    Default is none, which causes paperless to fail if no DPI information is
+    present in an image.
+
+
+PAPERLESS_OCR_USER_ARG=<json>
+    OCRmyPDF offers many more options. Use this parameter to specify any
+    additional arguments you wish to pass to OCRmyPDF. Since Paperless uses
+    the API of OCRmyPDF, you have to specify these in a format that can be
+    passed to the API. See `the API reference of OCRmyPDF <https://ocrmypdf.readthedocs.io/en/latest/api.html#reference>`_
+    for valid parameters. All command line options are supported, but they
+    use underscores instead of dashed.
+
+    .. caution::
+
+        Paperless has been tested to work with the OCR options provided
+        above. There are many options that are incompatible with each other,
+        so specifying invalid options may prevent paperless from consuming
+        any documents.
+
+    Specify arguments as a JSON dictionary. Keep note of lower case booleans
+    and double quoted parameter names and strings. Examples:
+
+    .. code:: json
+
+        {"deskew": true, "optimize": 3, "unpaper_args": "--pre-rotate 90"}    
+    
+    
 Software tweaks
 ###############

@@ -160,6 +276,7 @@ PAPERLESS_TASK_WORKERS=<num>
    maintain the automatic matching algorithm, check emails, consume documents,
    etc. This variable specifies how many things it will do in parallel.

+
 PAPERLESS_THREADS_PER_WORKER=<num>
    Furthermore, paperless uses multiple threads when consuming documents to
    speed up OCR. This variable specifies how many pages paperless will process
@@ -184,7 +301,6 @@ PAPERLESS_THREADS_PER_WORKER=<num>
    PAPERLESS_THREADS_PER_WORKER automatically.


-
 PAPERLESS_TIME_ZONE=<timezone>
    Set the time zone here.
    See https://docs.djangoproject.com/en/3.1/ref/settings/#std:setting-TIME_ZONE
@@ -193,37 +309,6 @@ PAPERLESS_TIME_ZONE=<timezone>
    Defaults to UTC.


-
-PAPERLESS_OCR_PAGES=<num>
-    Tells paperless to use only the specified amount of pages for OCR. Documents
-    with less than the specified amount of pages get OCR'ed completely.
-
-    Specifying 1 here will only use the first page.
-
-    Defaults to 0, which disables this feature and always uses all pages.
-
-
-
-PAPERLESS_OCR_LANGUAGE=<lang>
-    Customize the default language that tesseract will attempt to use when
-    parsing documents. The default language is used whenever
-
-    * No language could be detected on a document
-    * No tesseract data files are available for the detected language
-
-    It should be a 3-letter language code consistent with ISO
-    639: https://www.loc.gov/standards/iso639-2/php/code_list.php
-
-    Set this to the language most of your documents are written in.
-
-    Defaults to "eng".
-
-PAPERLESS_OCR_ALWAYS=<bool>
-    By default Paperless does not OCR a document if the text can be retrieved from
-    the document directly. Set to true to always OCR documents.
-
-    Defaults to false.
-
 PAPERLESS_CONSUMER_POLLING=<num>
    If paperless won't find documents added to your consume folder, it might
    not be able to automatically detect filesystem changes. In that case,
@@ -232,12 +317,32 @@ PAPERLESS_CONSUMER_POLLING=<num>

    Defaults to 0, which disables polling and uses filesystem notifications.

+
 PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>
    When the consumer detects a duplicate document, it will not touch the
    original document. This default behavior can be changed here.

    Defaults to false.

+
+PAPERLESS_CONSUMER_RECURSIVE=<bool>
+    Enable recursive watching of the consumption directory. Paperless will
+    then pickup files from files in subdirectories within your consumption
+    directory as well.
+
+    Defaults to false.
+
+
+PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=<bool>
+    Set the names of subdirectories as tags for consumed files.
+    E.g. <CONSUMPTION_DIR>/foo/bar/file.pdf will add the tags "foo" and "bar" to
+    the consumed file. Paperless will create any tags that don't exist yet.
+
+    PAPERLESS_CONSUMER_RECURSIVE must be enabled for this to work.
+
+    Defaults to false.
+
+
 PAPERLESS_CONVERT_MEMORY_LIMIT=<num>
    On smaller systems, or even in the case of Very Large Documents, the consumer
    may explode, complaining about how it's "unable to extend pixel cache".  In
@@ -261,18 +366,6 @@ PAPERLESS_CONVERT_TMPDIR=<path>

    Default is none, which disables the temporary directory.

-PAPERLESS_CONVERT_DENSITY=<num>
-    This setting has a high impact on the physical size of tmp page files,
-    the speed of document conversion, and can affect the accuracy of OCR
-    results. Individual results can vary and this setting should be tested
-    thoroughly against the documents you are importing to see if it has any
-    impacts either negative or positive.
-    Testing on limited document sets has shown a setting of 200 can cut the
-    size of tmp files by 1/3, and speed up conversion by up to 4x
-    with little impact to OCR accuracy.
-
-    Default is 300.
-
 PAPERLESS_OPTIMIZE_THUMBNAILS=<bool>
    Use optipng to optimize thumbnails. This usually reduces the size of
    thumbnails by about 20%, but uses considerable compute time during
@@ -319,8 +412,5 @@ PAPERLESS_CONVERT_BINARY=<path>
 PAPERLESS_GS_BINARY=<path>
    Defaults to "/usr/bin/gs".

-PAPERLESS_UNPAPER_BINARY=<path>
-    Defaults to "/usr/bin/unpaper".
-
 PAPERLESS_OPTIPNG_BINARY=<path>
    Defaults to "/usr/bin/optipng".
--- a/docs/extending.rst
+++ b/docs/extending.rst
@@ -118,114 +118,80 @@ This will test and assemble everything and also build and tag a docker image.
 Extending Paperless
 ===================

-.. warning::
+Paperless does not have any fancy plugin systems and will probably never have. However,
+some parts of the application have been designed to allow easy integration of additional
+features without any modification to the base code.

-    This section is not updated to paperless-ng yet.
+Making custom parsers
+---------------------

-For the most part, Paperless is monolithic, so extending it is often best
-managed by way of modifying the code directly and issuing a pull request on
-`GitHub`_.  However, over time the project has been evolving to be a little
-more "pluggable" so that users can write their own stuff that talks to it.
+Paperless uses parsers to add documents to paperless. A parser is responsible for:

-.. _GitHub: https://github.com/the-paperless-project/paperless
+*   Retrieve the content from the original
+*   Create a thumbnail
+*   Optional: Retrieve a created date from the original
+*   Optional: Create an archived document from the original

+Custom parsers can be added to paperless to support more file types. In order to do that,
+you need to write the parser itself and announce its existence to paperless.

-.. _extending-parsers:
-
-Parsers
-------
-
-You can leverage Paperless' consumption model to have it consume files *other*
-than ones handled by default like ``.pdf``, ``.jpg``, and ``.tiff``.  To do so,
-you simply follow Django's convention of creating a new app, with a few key
-requirements.
-
-
-.. _extending-parsers-parserspy:
-
-parsers.py
-..........
-
-In this file, you create a class that extends
-``documents.parsers.DocumentParser`` and go about implementing the three
-required methods:
-
-* ``get_thumbnail()``: Returns the path to a file we can use as a thumbnail for
-  this document.
-* ``get_text()``: Returns the text from the document and only the text.
-* ``get_date()``: If possible, this returns the date of the document, otherwise
-  it should return ``None``.
-
-
-.. _extending-parsers-signalspy:
-
-signals.py
-..........
-
-At consumption time, Paperless emits a ``document_consumer_declaration``
-signal which your module has to react to in order to let the consumer know
-whether or not it's capable of handling a particular file.  Think of it like
-this:
-
-1. Consumer finds a file in the consumption directory.
-2. It asks all the available parsers: *"Hey, can you handle this file?"*
-3. Each parser responds with either ``None`` meaning they can't handle the
-   file, or a dictionary in the following format:
+The parser itself must extend ``documents.parsers.DocumentParser`` and must implement the
+methods ``parse`` and ``get_thumbnail``. You can provide your own implementation to
+``get_date`` if you don't want to rely on paperless' default date guessing mechanisms.

 .. code:: python

-    {
-        "parser": <the class name>,
-        "weight": <an integer>
-    }
+    class MyCustomParser(DocumentParser):

-The consumer compares the ``weight`` values from all respondents and uses the
-class with the highest value to consume the document.  The default parser,
-``RasterisedDocumentParser`` has a weight of ``0``.
+        def parse(self, document_path, mime_type):
+            # This method does not return anything. Rather, you should assign
+            # whatever you got from the document to the following fields:

+            # The content of the document.
+            self.text = "content"
+            
+            # Optional: path to a PDF document that you created from the original.
+            self.archive_path = os.path.join(self.tempdir, "archived.pdf")

-.. _extending-parsers-appspy:
+            # Optional: "created" date of the document.
+            self.date = get_created_from_metadata(document_path)

-apps.py
-.......
+        def get_thumbnail(self, document_path, mime_type):
+            # This should return the path to a thumbnail you created for this
+            # document.
+            return os.path.join(self.tempdir, "thumb.png")

-This is a standard Django file, but you'll need to add some code to it to
-connect your parser to the ``document_consumer_declaration`` signal.
+If you encounter any issues during parsing, raise a ``documents.parsers.ParseError``.

+The ``self.tempdir`` directory is a temporary directory that is guaranteed to be empty
+and removed after consumption finished. You can use that directory to store any
+intermediate files and also use it to store the thumbnail / archived document.

-.. _extending-parsers-finally:
-
-Finally
-.......
-
-The last step is to update ``settings.py`` to include your new module.
-Eventually, this will be dynamic, but at the moment, you have to edit the
-``INSTALLED_APPS`` section manually.  Simply add the path to your AppConfig to
-the list like this:
+After that, you need to announce your parser to paperless. You need to connect a
+handler to the ``document_consumer_declaration`` signal. Have a look in the file
+``src/paperless_tesseract/apps.py`` on how that's done. The handler is a method
+that returns information about your parser:

 .. code:: python

-    INSTALLED_APPS = [
-        ...
-        "my_module.apps.MyModuleConfig",
-        ...
-    ]
+    def myparser_consumer_declaration(sender, **kwargs):
+        return {
+            "parser": MyCustomParser,
+            "weight": 0,
+            "mime_types": {
+                "application/pdf": ".pdf",
+                "image/jpeg": ".jpg",
+            }
+        }

-Order doesn't matter, but generally it's a good idea to place your module lower
-in the list so that you don't end up accidentally overriding project defaults
-somewhere.
+*   ``parser`` is a reference to a class that extends ``DocumentParser``.

+*   ``weight`` is used whenever two or more parsers are able to parse a file: The parser with
+    the higher weight wins. This can be used to override the parsers provided by
+    paperless.

-.. _extending-parsers-example:
-
-An Example
-..........
-
-The core Paperless functionality is based on this design, so if you want to see
-what a parser module should look like, have a look at `parsers.py`_,
-`signals.py`_, and `apps.py`_ in the `paperless_tesseract`_ module.
-
-.. _parsers.py: https://github.com/the-paperless-project/paperless/blob/master/src/paperless_tesseract/parsers.py
-.. _signals.py: https://github.com/the-paperless-project/paperless/blob/master/src/paperless_tesseract/signals.py
-.. _apps.py: https://github.com/the-paperless-project/paperless/blob/master/src/paperless_tesseract/apps.py
-.. _paperless_tesseract: https://github.com/the-paperless-project/paperless/blob/master/src/paperless_tesseract/
+*   ``mime_types`` is a dictionary. The keys are the mime types your parser supports and the value
+    is the default file extension that paperless should use when storing files and serving them for
+    download. We could guess that from the file extensions, but some mime types have many extensions
+    associated with them and the python methods responsible for guessing the extension do not always
+    return the same value.
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -3,6 +3,18 @@
 Frequently asked questions
 **************************

+**Q:** *What's the general plan for Paperless-ng?*
+
+**A:** Paperless-ng is already almost feature-complete. This project will remain
+as simple as it is right now. It will see improvements to features that are already there.
+If you need advanced features such as document versions,
+workflows or multi-user with customizable access to individual files, this is
+not the tool for you.
+
+Features that *are* planned are some more quality of life extensions for the searching
+(i.e., search for similar documents, group results by correspondents with "more from this"
+links, etc), bulk editing and hierarchical tags.
+
 **Q:** *I'm using docker. Where are my documents?*

 **A:** Your documents are stored inside the docker volume ``paperless_media``.
@@ -21,11 +33,23 @@ is
    files around manually. This folder is meant to be entirely managed by docker
    and paperless.

+**Q:** *Let's say you don't support this project anymore in a year. Can I easily move to other systems?*
+
+**A:** Your documents are stored as plain files inside the media folder. You can always drag those files
+out of that folder to use them elsewhere. Here are a couple notes about that.
+
+*   Paperless never modifies your original documents. It keeps checksums of all documents and uses a
+    scheduled sanity checker to check that they remain the same.
+*   By default, paperless uses the internal ID of each document as its filename. This might not be very
+    convenient for export. However, you can adjust the way files are stored in paperless by
+    :ref:`configuring the filename format <advanced-file_name_handling>`.
+*   :ref:`The exporter <utilities-exporter>` is another easy way to get your files out of paperless with reasonable file names.
+
 **Q:** *What file types does paperless-ng support?*

 **A:** Currently, the following files are supported:

-*   PDF documents, PNG images and JPEG images are processed with OCR.
+*   PDF documents, PNG images, JPEG images, TIFF images and GIF images are processed with OCR and converted into PDF documents.
 *   Plain text documents are supported as well and are added verbatim
    to paperless.

@@ -49,7 +73,28 @@ in your browser and paperless has to do much less work to serve the data.

 **Q:** *How do I install paperless-ng on Raspberry Pi?*

-**A:** There is not docker image for ARM available. If you know how to build
+**A:** There is no docker image for ARM available. If you know how to build
 that automatically, I'm all ears. For now, you have to grab the latest release
 archive from the project page and build the image yourself. The release comes
 with the front end already compiled, so you don't have to do this on the Pi.
+
+**Q:** *How do I run this on my toaster?*
+
+**A:** I honestly don't know! As for all other devices that might be able
+to run paperless, you're a bit on your own. If you can't run the docker image,
+the documentation has instructions for bare metal installs. I'm running
+paperless on an i3 processor from 2015 or so. This is also what I use to test
+new releases with. Apart from that, I also have a Raspberry Pi, which I
+occasionally build the image on and see if it works.
+
+**Q:** *How do I proxy this with NGINX?*
+
+.. code::
+
+    location / {
+        proxy_pass http://localhost:8000/
+    }
+
+And that's about it. Paperless serves everything, including static files by itself
+when running the docker image. If you want to do anything fancy, you have to
+install paperless bare metal.
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -42,6 +42,9 @@ resources in the documentation:
    learn about how paperless automates all tagging using machine learning.
 *   Paperless now comes with a :ref:`proper email consumer <usage-email>`
    that's fully tested and production ready.
+*   Paperless creates searchable PDF/A documents from whatever you you put into
+    the consumption directory. This means that you can select text in
+    image-only documents coming from your scanner.
 *   See :ref:`this note <utilities-encyption>` about GnuPG encryption in
    paperless-ng.
 *   Paperless is now integrated with a
--- a/docs/setup.rst
+++ b/docs/setup.rst
@@ -220,16 +220,25 @@ writing. Windows is not and will never be supported.
    *   ``python3-dev``

    *   ``imagemagick`` >= 6 for PDF conversion
-    *   ``unpaper`` for cleaning documents before OCR
-    *   ``ghostscript``
    *   ``optipng`` for optimising thumbnails
-    *   ``tesseract-ocr`` >= 4.0.0 for OCR
-    *   ``tesseract-ocr`` language packs (``tesseract-ocr-eng``, ``tesseract-ocr-deu``, etc)
    *   ``gnupg`` for handling encrypted documents
    *   ``libpoppler-cpp-dev`` for PDF to text conversion
    *   ``libmagic-dev`` for mime type detection
    *   ``libpq-dev`` for PostgreSQL

+    These dependencies are required for OCRmyPDF, which is used for text recognition.
+
+    *   ``unpaper``
+    *   ``ghostscript``
+    *   ``icc-profiles-free``
+    *   ``qpdf``
+    *   ``liblept5``
+    *   ``libxml2``
+    *   ``pngquant``
+    *   ``zlib1g``
+    *   ``tesseract-ocr`` >= 4.0.0 for OCR
+    *   ``tesseract-ocr`` language packs (``tesseract-ocr-eng``, ``tesseract-ocr-deu``, etc)
+
    You will also need ``build-essential``, ``python3-setuptools`` and ``python3-wheel``
    for installing some of the python dependencies. You can remove that
    again after installation.
@@ -404,7 +413,14 @@ Migration to paperless-ng is then performed in a few simple steps:
    ``docker-compose.env`` to your needs.
    See `docker route`_ for details on which edits are advised.

-6.  In order to find your existing documents with the new search feature, you need
+6.  Since ``docker-compose`` would just use the the old paperless image, we need to
+    manually build a new image:
+
+    .. code:: shell-session
+
+        $ docker-compose build
+
+7.  In order to find your existing documents with the new search feature, you need
    to invoke a one-time operation that will create the search index:

    .. code:: shell-session
@@ -414,7 +430,7 @@ Migration to paperless-ng is then performed in a few simple steps:
    This will migrate your database and create the search index. After that,
    paperless will take care of maintaining the index by itself.

-7.  Start paperless-ng.
+8.  Start paperless-ng.

    .. code:: bash

@@ -422,11 +438,11 @@ Migration to paperless-ng is then performed in a few simple steps:

    This will run paperless in the background and automatically start it on system boot.

-8.  Paperless installed a permanent redirect to ``admin/`` in your browser. This
+9.  Paperless installed a permanent redirect to ``admin/`` in your browser. This
    redirect is still in place and prevents access to the new UI. Clear
    browsing cache in order to fix this.

-9.  Optionally, follow the instructions below to migrate your existing data to PostgreSQL.
+10.  Optionally, follow the instructions below to migrate your existing data to PostgreSQL.


 .. _setup-sqlite_to_psql:
@@ -545,12 +561,10 @@ configuring some options in paperless can help improve performance immensely:
    sluggish response times during consumption, so you might want to lower these
    settings (example: 2 workers and 1 thread to always have some computing power
    left for other tasks).
-*   Keep ``PAPERLESS_OCR_ALWAYS`` at its default value 'false' and consider OCR'ing
+*   Keep ``PAPERLESS_OCR_MODE`` at its default value ``skip`` and consider OCR'ing
    your documents before feeding them into paperless. Some scanners are able to
-    do this!
-*   Lower ``PAPERLESS_CONVERT_DENSITY`` from its default value 300 to 200. This
-    will still result in rather accurate OCR, but will decrease consumption time
-    by quite a bit.
+    do this! You might want to even specify ``skip_noarchive`` to skip archive
+    file generation for already ocr'ed documents entirely.
 *   Set ``PAPERLESS_OPTIMIZE_THUMBNAILS`` to 'false' if you want faster consumption
    times. Thumbnails will be about 20% larger.

--- a/docs/troubleshooting.rst
+++ b/docs/troubleshooting.rst
@@ -29,75 +29,23 @@ Check for the following issues:
 Consumer fails to pickup any new files
 ######################################

-If you notice, that the consumer will only pickup files in the consumption
+If you notice that the consumer will only pickup files in the consumption
 directory at startup, but won't find any other files added later, check out
 the configuration file and enable filesystem polling with the setting
 ``PAPERLESS_CONSUMER_POLLING``.

+Operation not permitted
+#######################

-Consumer warns ``OCR for XX failed``
-####################################
+You might see errors such as:

-If you find the OCR accuracy to be too low, and/or the document consumer warns
-that ``OCR for XX failed, but we're going to stick with what we've got since
-FORGIVING_OCR is enabled``, then you might need to install the
-`Tesseract language files <http://packages.ubuntu.com/search?keywords=tesseract-ocr>`_
-marching your document's languages.
+.. code::

-As an example, if you are running Paperless from any Ubuntu or Debian
-box, and your documents are written in Spanish you may need to run::
+    chown: changing ownership of '../export': Operation not permitted

-    apt-get install -y tesseract-ocr-spa
+The container tries to set file ownership on the listed directories. This is
+required so that the user running paperless inside docker has write permissions
+to these folders. This happens when pointing these directories to NFS shares,
+for example.

-
-
-Consumer dies with ``convert: unable to extent pixel cache``
-############################################################
-
-During the consumption process, Paperless invokes ImageMagick's ``convert``
-program to translate the source document into something that the OCR engine can
-understand and this can burn a Very Large amount of memory if the original
-document is rather long.  Similarly, if your system doesn't have a lot of
-memory to begin with (ie. a Raspberry Pi), then this can happen for even
-medium-sized documents.
-
-The solution is to tell ImageMagick *not* to Use All The RAM, as is its
-default, and instead tell it to used a fixed amount.  ``convert`` will then
-break up the job into hundreds of individual files and use them to slowly
-compile the finished image.  Simply set ``PAPERLESS_CONVERT_MEMORY_LIMIT`` in
-``/etc/paperless.conf`` to something like ``32000000`` and you'll limit
-``convert`` to 32MB.  Fiddle with this value as you like.
-
-**HOWEVER**: Simply setting this value may not be enough on system where
-``/tmp`` is mounted as tmpfs, as this is where ``convert`` will write its
-temporary files.  In these cases (most Systemd machines), you need to tell
-ImageMagick to use a different space for its scratch work.  You do this by
-setting ``PAPERLESS_CONVERT_TMPDIR`` in ``/etc/paperless.conf`` to somewhere
-that's actually on a physical disk (and writable by the user running
-Paperless), like ``/var/tmp/paperless`` or ``/home/my_user/tmp`` in a pinch.
-
-
-DecompressionBombWarning and/or no text in the OCR output
-#########################################################
-
-Some users have had issues using Paperless to consume PDFs that were created
-by merging Very Large Scanned Images into one PDF.  If this happens to you,
-it's likely because the PDF you've created contains some very large pages
-(millions of pixels) and the process of converting the PDF to a OCR-friendly
-image is exploding.
-
-Typically, this happens because the scanned images are created with a high
-DPI and then rolled into the PDF with an assumed DPI of 72 (the default).
-The best solution then is to specify the DPI used in the scan in the
-conversion-to-PDF step.  So for example, if you scanned the original image
-with a DPI of 300, then merging the images into the single PDF with
-``convert`` should look like this:
-
-.. code:: bash
-
-    $ convert -density 300 *.jpg finished.pdf
-
-For more information on this and situations like it, you should take a look
-at `Issue #118`_ as that's where this tip originated.
-
-.. _Issue #118: https://github.com/the-paperless-project/paperless/issues/118
+Ensure that `chown` is possible on these directories.
--- a/docs/usage_overview.rst
+++ b/docs/usage_overview.rst
@@ -57,9 +57,33 @@ Adding documents to paperless
 #############################

 Once you've got Paperless setup, you need to start feeding documents into it.
-Currently, there are three options: the consumption directory, IMAP (email), and
+Currently, there are four options: the consumption directory, the dashboard, IMAP (email), and
 HTTP POST.

+When adding documents to paperless, it will perform the following operations on
+your documents:
+
+1.  OCR the document, if it has no text. Digital documents usually have text,
+    and this step will be skipped for those documents.
+2.  Paperless will create an archiveable PDF/A document from your document.
+    If this document is coming from your scanner, it will have embedded selectable text.
+3.  Paperless performs automatic matching of tags, correspondents and types on the
+    document before storing it in the database.
+
+.. hint::
+
+    This process can be configured to fit your needs. If you don't want paperless
+    to create archived versions for digital documents, you can configure that by
+    configuring ``PAPERLESS_OCR_MODE=skip_noarchive``. Please read the 
+    :ref:`relevant section in the documentation <configuration-ocr>`.
+
+.. note::
+
+    No matter which options you choose, Paperless will always store the original
+    document that it found in the consumption directory or in the mail and
+    will never overwrite that document. Archived versions are stored alongside the
+    original versions.
+

 The consumption directory
 =========================
@@ -82,6 +106,12 @@ files from the scanner.  Typically, you're looking at an FTP server like

 .. TODO: hyperref to configuration of the location of this magic folder.

+Dashboard upload
+================
+
+The dashboard has a file drop field to upload documents to paperless. Simply drag a file
+onto this field or select a file with the file dialog. Multiple files are supported.
+
 .. _usage-email:

 IMAP (Email)
@@ -158,6 +188,63 @@ You can also submit a document using the REST API, see :ref:`api-file_uploads` f

 .. _basic-searching:

+
+Best practices
+##############
+
+Paperless offers a couple tools that help you organize your document collection. However,
+it is up to you to use them in a way that helps you organize documents and find specific
+documents when you need them. This section offers a couple ideas for managing your collection.
+
+Document types allow you to classify documents according to what they are. You can define
+types such as "Receipt", "Invoice", or "Contract". If you used to collect all your receipts
+in a single binder, you can recreate that system in paperless by defining a document type,
+assigning documents to that type and then filtering by that type to only see all receipts.
+
+Not all documents need document types. Sometimes its hard to determine what the type of a
+document is or it is hard to justify creating a document type that you only need once or twice.
+This is okay. As long as the types you define help you organize your collection in the way
+you want, paperless is doing its job.
+
+Tags can be used in many different ways. Think of tags are more versatile folders or binders.
+If you have a binder for documents related to university / your car or health care, you can
+create these binders in paperless by creating tags and assigning them to relevant documents.
+Just as with documents, you can filter the document list by tags and only see documents of
+a certain topic.
+
+With physical documents, you'll often need to decide which folder the document belongs to.
+The advantage of tags over folders and binders is that a single document can have multiple
+tags. A physical document cannot magically appear in two different folders, but with tags,
+this is entirely possible.
+
+.. hint::
+
+  This can be used in many different ways. One example: Imagine you're working on a particular
+  task, such as signing up for university. Usually you'll need to collect a bunch of different
+  documents that are already sorted into various folders. With the tag system of paperless,
+  you can create a new group of documents that are relevant to this task without destroying
+  the already existing organization. When you're done with the task, you could delete the
+  tag again, which would be equal to sorting documents back into the folder they belong into.
+  Or keep the tag, up to you.
+
+All of the logic above applies to correspondents as well. Attach them to documents if you
+feel that they help you organize your collection.
+
+When you've started organizing your documents, create a couple saved views for document collections
+you regularly access. This is equal to having labeled physical binders on your desk, except
+that these saved views are dynamic and simply update themselves as you add documents to the system.
+
+Here are a couple examples of tags and types that you could use in your collection.
+
+* An ``inbox`` tag for newly added documents that you haven't manually edited yet.
+* A tag ``car`` for everything car related (repairs, registration, insurance, etc)
+* A tag ``todo`` for documents that you still need to do something with, such as reply, or
+  perform some task online.
+* A tag ``bank account x`` for all bank statement related to that account.
+* A tag ``mail`` for anything that you added to paperless via its mail processing capabilities.
+* A tag ``missing_metadata`` when you still need to add some metadata to a document, but can't
+  or don't want to do this right now.
+
 Searching
 #########

@@ -176,20 +263,20 @@ further.

 Matching documents with logical expressions:

-.. code:: none
+.. code::

  shopname AND (product1 OR product2)

 Matching specific tags, correspondents or types:

-.. code:: none
+.. code::

  type:invoice tag:unpaid
  correspondent:university certificate

 Matching dates:

-.. code:: none
+.. code::
  
  created:[2005 to 2009]
  added:yesterday
@@ -197,7 +284,7 @@ Matching dates:

 Matching inexact words:

-.. code:: none
+.. code::

  produ*name

--- a/paperless.conf.example
+++ b/paperless.conf.example
@@ -31,19 +31,24 @@
 #PAPERLESS_STATIC_URL=/static/
 #PAPERLESS_AUTO_LOGIN_USERNAME=

+# OCR settings
+
+#PAPERLESS_OCR_LANGUAGE=eng
+#PAPERLESS_OCR_MODE=skip
+#PAPERLESS_OCR_OUTPUT_TYPE=pdfa
+#PAPERLESS_OCR_PAGES=1
+#PAPERLESS_OCR_IMAGE_DPI=300
+#PAPERLESS_OCR_USER_ARG={}
+#PAPERLESS_CONVERT_MEMORY_LIMIT=0
+#PAPERLESS_CONVERT_TMPDIR=/var/tmp/paperless
+
 # Software tweaks

 #PAPERLESS_TASK_WORKERS=1
 #PAPERLESS_THREADS_PER_WORKER=1
 #PAPERLESS_TIME_ZONE=UTC
-#PAPERLESS_OCR_PAGES=1
-#PAPERLESS_OCR_LANGUAGE=eng
-#PAPERLESS_OCR_ALWAYS=false
 #PAPERLESS_CONSUMER_POLLING=10
 #PAPERLESS_CONSUMER_DELETE_DUPLICATES=false
-#PAPERLESS_CONVERT_MEMORY_LIMIT=0
-#PAPERLESS_CONVERT_TMPDIR=/var/tmp/paperless
-#PAPERLESS_CONVERT_DENSITY=300
 #PAPERLESS_OPTIMIZE_THUMBNAILS=true
 #PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
 #PAPERLESS_FILENAME_DATE_ORDER=YMD
@@ -53,5 +58,4 @@

 #PAPERLESS_CONVERT_BINARY=/usr/bin/convert
 #PAPERLESS_GS_BINARY=/usr/bin/gs
-#PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper
 #PAPERLESS_OPTIPNG_BINARY=/usr/bin/optipng
--- a/src-ui/src/app/app.module.ts
+++ b/src-ui/src/app/app.module.ts
@@ -45,6 +45,9 @@ import { SavedViewWidgetComponent } from './components/dashboard/widgets/saved-v
 import { StatisticsWidgetComponent } from './components/dashboard/widgets/statistics-widget/statistics-widget.component';
 import { UploadFileWidgetComponent } from './components/dashboard/widgets/upload-file-widget/upload-file-widget.component';
 import { WidgetFrameComponent } from './components/dashboard/widgets/widget-frame/widget-frame.component';
+import { WelcomeWidgetComponent } from './components/dashboard/widgets/welcome-widget/welcome-widget.component';
+import { YesNoPipe } from './pipes/yes-no.pipe';
+import { FileSizePipe } from './pipes/file-size.pipe';

@NgModule({
  declarations: [
@@ -82,7 +85,10 @@ import { WidgetFrameComponent } from './components/dashboard/widgets/widget-fram
    SavedViewWidgetComponent,
    StatisticsWidgetComponent,
    UploadFileWidgetComponent,
-    WidgetFrameComponent
+    WidgetFrameComponent,
+    WelcomeWidgetComponent,
+    YesNoPipe,
+    FileSizePipe
  ],
  imports: [
    BrowserModule,
--- a/src-ui/src/app/components/app-frame/app-frame.component.scss
+++ b/src-ui/src/app/components/app-frame/app-frame.component.scss
@@ -50,6 +50,7 @@

 .sidebar .nav-link.active {
  color: $primary;
+  font-weight: bold;
 }

 .sidebar .nav-link:hover .sidebaricon,
--- a/src-ui/src/app/components/app-frame/app-frame.component.ts
+++ b/src-ui/src/app/components/app-frame/app-frame.component.ts
@@ -90,7 +90,9 @@ export class AppFrameComponent implements OnInit, OnDestroy {
  }

  ngOnDestroy() {
-    this.openDocumentsSubscription.unsubscribe()
+    if (this.openDocumentsSubscription) {
+      this.openDocumentsSubscription.unsubscribe()
+    }
  }

 }
--- a/src-ui/src/app/components/common/input/date-time/date-time.component.html
+++ b/src-ui/src/app/components/common/input/date-time/date-time.component.html
@@ -3,11 +3,10 @@
      <label for="created_date">{{titleDate}}</label>
      <input type="date" class="form-control" id="created_date" [(ngModel)]="dateValue" (change)="dateOrTimeChanged()">
  </div>
-  <div class="form-group col">
+  <div class="form-group col" *ngIf="titleTime">
      <label for="created_time">{{titleTime}}</label>
      <input type="time" class="form-control" id="created_time" [(ngModel)]="timeValue" (change)="dateOrTimeChanged()">
  </div>
-
 </div>


--- a/src-ui/src/app/components/common/input/date-time/date-time.component.ts
+++ b/src-ui/src/app/components/common/input/date-time/date-time.component.ts
@@ -40,7 +40,7 @@ export class DateTimeComponent implements OnInit,ControlValueAccessor  {
  titleDate: string = "Date"

  @Input()
-  titleTime: string = "Time"
+  titleTime: string

  @Input()
  disabled: boolean = false
--- a/src-ui/src/app/components/common/input/tags/tags.component.html
+++ b/src-ui/src/app/components/common/input/tags/tags.component.html
@@ -8,7 +8,7 @@

    <div class="input-group-append" ngbDropdown placement="top-right">
      <button class="btn btn-outline-secondary" type="button" ngbDropdownToggle></button>
-      <div ngbDropdownMenu class="scrollable-menu">
+      <div ngbDropdownMenu class="scrollable-menu shadow">
        <button type="button" *ngFor="let tag of tags" ngbDropdownItem (click)="addTag(tag.id)">
          <app-tag [tag]="tag"></app-tag>
        </button>
--- a/src-ui/src/app/components/common/input/text/text.component.ts
+++ b/src-ui/src/app/components/common/input/text/text.component.ts
@@ -1,6 +1,5 @@
-import { Component, forwardRef, Input, OnInit } from '@angular/core';
-import { ControlValueAccessor, NG_VALUE_ACCESSOR } from '@angular/forms';
-import { v4 as uuidv4 } from 'uuid';
+import { Component, forwardRef } from '@angular/core';
+import { NG_VALUE_ACCESSOR } from '@angular/forms';
 import { AbstractInputComponent } from '../abstract-input';

@Component({
--- a/src-ui/src/app/components/dashboard/dashboard.component.html
+++ b/src-ui/src/app/components/dashboard/dashboard.component.html
@@ -3,23 +3,19 @@
 </app-page-header>

 <div class='row'>
-  <div class="col-lg">
-    <app-widget-frame title="Saved views" *ngIf="savedViews.length == 0">
-      <p class="card-text">This space is reserved to display your saved views. Go to your documents and save a view
-        to have it displayed
-        here!</p>
-    </app-widget-frame>
+  <div class="col-lg-8">
+    <app-welcome-widget *ngIf="savedViews.length == 0"></app-welcome-widget>

    <ng-container *ngFor="let v of savedViews">
      <app-saved-view-widget [savedView]="v"></app-saved-view-widget>
    </ng-container>

  </div>
-  <div class="col-lg">
+  <div class="col-lg-4">

    <app-statistics-widget></app-statistics-widget>

    <app-upload-file-widget></app-upload-file-widget>

  </div>
-</div>
+</div>
--- a/src-ui/src/app/components/dashboard/dashboard.component.ts
+++ b/src-ui/src/app/components/dashboard/dashboard.component.ts
@@ -1,5 +1,7 @@
 import { Component, OnInit } from '@angular/core';
+import { Title } from '@angular/platform-browser';
 import { SavedViewConfigService } from 'src/app/services/saved-view-config.service';
+import { environment } from 'src/environments/environment';


@Component({
@@ -10,13 +12,15 @@ import { SavedViewConfigService } from 'src/app/services/saved-view-config.servi
 export class DashboardComponent implements OnInit {

  constructor(
-    public savedViewConfigService: SavedViewConfigService) { }
+    public savedViewConfigService: SavedViewConfigService,
+    private titleService: Title) { }


  savedViews = []

  ngOnInit(): void {
    this.savedViews = this.savedViewConfigService.getDashboardConfigs()
+    this.titleService.setTitle(`Dashboard - ${environment.appTitle}`)
  }

 }
--- a/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.html
+++ b/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.html
@@ -13,7 +13,7 @@
    <tbody>
      <tr *ngFor="let doc of documents" routerLink="/documents/{{doc.id}}">
        <td>{{doc.created | date}}</td>
-        <td>{{doc.title}}<app-tag [tag]="t" *ngFor="let t of doc.tags" class="ml-1"></app-tag>
+        <td>{{doc.title}}<app-tag [tag]="t" *ngFor="let t of doc.tags$ | async" class="ml-1"></app-tag>
      </tr>
    </tbody>
  </table>
--- a/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.ts
+++ b/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.ts
@@ -29,8 +29,12 @@ export class SavedViewWidgetComponent implements OnInit {
  }

  showAll() {
-    this.list.load(this.savedView)
-    this.router.navigate(["documents"])
+    if (this.savedView.showInSideBar) {
+      this.router.navigate(['view', this.savedView.id])
+    } else {
+      this.list.load(this.savedView)
+      this.router.navigate(["documents"])
+      }
  }

 }
--- a/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.html
+++ b/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.html
@@ -1,15 +1,18 @@
 <app-widget-frame title="Upload new documents">

-  <form content>
-    <ngx-file-drop 
-      dropZoneLabel="Drop documents here or" (onFileDrop)="dropped($event)"
-      (onFileOver)="fileOver($event)" (onFileLeave)="fileLeave($event)"
-      dropZoneClassName="bg-light card"
-      multiple="true"
-      contentClassName="justify-content-center d-flex align-items-center p-5"
-      [showBrowseBtn]=true
-      browseBtnClassName="btn btn-sm btn-outline-primary ml-2">
+  <div content>
+    <form>
+      <ngx-file-drop dropZoneLabel="Drop documents here or" (onFileDrop)="dropped($event)"
+        (onFileOver)="fileOver($event)" (onFileLeave)="fileLeave($event)" dropZoneClassName="bg-light card"
+        multiple="true" contentClassName="justify-content-center d-flex align-items-center p-5" [showBrowseBtn]=true
+        browseBtnClassName="btn btn-sm btn-outline-primary ml-2">

-    </ngx-file-drop>
-  </form>
+      </ngx-file-drop>
+    </form>
+    <div *ngIf="uploadVisible" class="mt-3">
+      <p>Uploading {{uploadStatus.length}} file(s)</p>
+      <ngb-progressbar [value]="loadedSum" [max]="totalSum" [striped]="true" [animated]="uploadStatus.length > 0">
+      </ngb-progressbar>
+    </div>
+  </div>
 </app-widget-frame>
--- a/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.ts
+++ b/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.ts
@@ -1,8 +1,15 @@
+import { HttpEventType } from '@angular/common/http';
 import { Component, OnInit } from '@angular/core';
 import { FileSystemFileEntry, NgxFileDropEntry } from 'ngx-file-drop';
 import { DocumentService } from 'src/app/services/rest/document.service';
 import { Toast, ToastService } from 'src/app/services/toast.service';

+
+interface UploadStatus {
+  loaded: number
+  total: number 
+}
+
@Component({
  selector: 'app-upload-file-widget',
  templateUrl: './upload-file-widget.component.html',
@@ -16,26 +23,59 @@ export class UploadFileWidgetComponent implements OnInit {
  }

  public fileOver(event){
-    console.log(event);
  }
- 
+
  public fileLeave(event){
-    console.log(event);
  }
- 
+
+  uploadStatus: UploadStatus[] = []
+  completedFiles = 0
+
+  uploadVisible = false
+
+  get loadedSum() {
+    return this.uploadStatus.map(s => s.loaded).reduce((a,b) => a+b, this.completedFiles > 0 ? 1 : 0)
+  }
+
+  get totalSum() {
+    return this.uploadStatus.map(s => s.total).reduce((a,b) => a+b, 1)
+  }
+
  public dropped(files: NgxFileDropEntry[]) {
    for (const droppedFile of files) {
      if (droppedFile.fileEntry.isFile) {
-        const fileEntry = droppedFile.fileEntry as FileSystemFileEntry;
-        console.log(fileEntry)
+      let uploadStatusObject: UploadStatus = {loaded: 0, total: 1}
+      this.uploadStatus.push(uploadStatusObject)
+      this.uploadVisible = true
+
+      const fileEntry = droppedFile.fileEntry as FileSystemFileEntry;
        fileEntry.file((file: File) => {
-          console.log(file)
-          const formData = new FormData()
+          let formData = new FormData()
          formData.append('document', file, file.name)
-          this.documentService.uploadDocument(formData).subscribe(result => {
-            this.toastService.showToast(Toast.make("Information", "The document has been uploaded and will be processed by the consumer shortly."))
+
+          this.documentService.uploadDocument(formData).subscribe(event => {
+            if (event.type == HttpEventType.UploadProgress) {
+              uploadStatusObject.loaded = event.loaded
+              uploadStatusObject.total = event.total
+            } else if (event.type == HttpEventType.Response) {
+              this.uploadStatus.splice(this.uploadStatus.indexOf(uploadStatusObject), 1)
+              this.completedFiles += 1
+              this.toastService.showToast(Toast.make("Information", "The document has been uploaded and will be processed by the consumer shortly."))
+            }
+            
          }, error => {
-            this.toastService.showToast(Toast.makeError("An error has occured while uploading the document. Sorry!"))
+            this.uploadStatus.splice(this.uploadStatus.indexOf(uploadStatusObject), 1)
+            this.completedFiles += 1
+            switch (error.status) {
+              case 400: {
+                this.toastService.showToast(Toast.makeError(`There was an error while uploading the document: ${error.error.document}`))
+                break;
+              }
+              default: {
+                this.toastService.showToast(Toast.makeError("An error has occurred while uploading the document. Sorry!"))
+                break;
+              }
+            }
          })
        });
      }
--- a/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.html
+++ b/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.html
@@ -0,0 +1,16 @@
+<app-widget-frame title="First steps">
+
+  <ng-container content>
+    <img src="assets/save-filter.png" class="float-right">
+    <p>Paperless is running! :)</p>
+    <p>You can start uploading documents by dropping them in the file upload box to the right or by dropping them in the configured consumption folder and they'll start showing up in the documents list.
+      After you've added some metadata to your documents, use the filtering mechanisms of paperless to create custom views (such as 'Recently added', 'Tagged TODO') and have them displayed on the dashboard instead of this message.</p>
+    <p>Paperless offers some more features that try to make your life easier, such as:</p>
+    <ul>
+      <li>Once you've got a couple documents in paperless and added metadata to them, paperless can assign that metadata to new documents automatically.</li>
+      <li>You can configure paperless to read your mails and add documents from attached files.</li>
+    </ul>
+    <p>Consult the documentation on how to use these features. The section on basic usage also has some information on how to use paperless in general.</p>
+  </ng-container>
+
+</app-widget-frame>
--- a/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.scss
+++ b/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.scss
--- a/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.spec.ts
+++ b/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.spec.ts
@@ -0,0 +1,25 @@
+import { ComponentFixture, TestBed } from '@angular/core/testing';
+
+import { WelcomeWidgetComponent } from './welcome-widget.component';
+
+describe('WelcomeWidgetComponent', () => {
+  let component: WelcomeWidgetComponent;
+  let fixture: ComponentFixture<WelcomeWidgetComponent>;
+
+  beforeEach(async () => {
+    await TestBed.configureTestingModule({
+      declarations: [ WelcomeWidgetComponent ]
+    })
+    .compileComponents();
+  });
+
+  beforeEach(() => {
+    fixture = TestBed.createComponent(WelcomeWidgetComponent);
+    component = fixture.componentInstance;
+    fixture.detectChanges();
+  });
+
+  it('should create', () => {
+    expect(component).toBeTruthy();
+  });
+});
--- a/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.ts
+++ b/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.ts
@@ -0,0 +1,15 @@
+import { Component, OnInit } from '@angular/core';
+
+@Component({
+  selector: 'app-welcome-widget',
+  templateUrl: './welcome-widget.component.html',
+  styleUrls: ['./welcome-widget.component.scss']
+})
+export class WelcomeWidgetComponent implements OnInit {
+
+  constructor() { }
+
+  ngOnInit(): void {
+  }
+
+}
--- a/src-ui/src/app/components/dashboard/widgets/widget-frame/widget-frame.component.html
+++ b/src-ui/src/app/components/dashboard/widgets/widget-frame/widget-frame.component.html
@@ -1,4 +1,4 @@
-<div class="card mb-3 shadow">
+<div class="card mb-3 shadow-sm">
  <div class="card-header">
    <div class="d-flex justify-content-between align-items-center">
      <h5 class="card-title mb-0">{{title}}</h5>
--- a/src-ui/src/app/components/document-detail/document-detail.component.html
+++ b/src-ui/src/app/components/document-detail/document-detail.component.html
@@ -5,12 +5,26 @@
        </svg>
        <span class="d-none d-lg-inline"> Delete</span>
    </button>
-    <a [href]="downloadUrl" class="btn btn-sm btn-outline-primary mr-2">
-        <svg class="buttonicon" fill="currentColor">
-            <use xlink:href="assets/bootstrap-icons.svg#download" />
-        </svg>
-        <span class="d-none d-lg-inline"> Download</span>
-    </a>
+
+    <div class="btn-group mr-2">
+
+        <a [href]="downloadUrl" class="btn btn-sm btn-outline-primary">
+            <svg class="buttonicon" fill="currentColor">
+                <use xlink:href="assets/bootstrap-icons.svg#download" />
+            </svg>
+            <span class="d-none d-lg-inline"> Download</span>
+        </a>
+
+        <div class="btn-group" ngbDropdown role="group" *ngIf="metadata?.has_archive_version">
+            <button class="btn btn-sm btn-outline-primary dropdown-toggle-split" ngbDropdownToggle></button>
+            <div class="dropdown-menu shadow" ngbDropdownMenu>
+                <a ngbDropdownItem [href]="downloadOriginalUrl">Download original</a>
+            </div>
+        </div>
+
+    </div>
+
+
    <button type="button" class="btn btn-sm btn-outline-primary" (click)="close()">
        <svg class="buttonicon" fill="currentColor">
            <use xlink:href="assets/bootstrap-icons.svg#x" />
@@ -22,40 +36,146 @@

 <div class="row">
    <div class="col-xl">
+
        <form [formGroup]='documentForm' (ngSubmit)="save()">

-            <app-input-text title="Title" formControlName="title"></app-input-text>
+            <ul ngbNav #nav="ngbNav" class="nav-tabs">
+                <li [ngbNavItem]="1">
+                    <a ngbNavLink>Details</a>
+                    <ng-template ngbNavContent>

-            <div class="form-group">
-                <label for="archive_serial_number">Archive Serial Number</label>
-                <input type="number" class="form-control" id="archive_serial_number"
-                    formControlName='archive_serial_number'>
-            </div>
+                        <app-input-text title="Title" formControlName="title"></app-input-text>
+                        <div class="form-group">
+                            <label for="archive_serial_number">Archive Serial Number</label>
+                            <input type="number" class="form-control" id="archive_serial_number"
+                                formControlName='archive_serial_number'>
+                        </div>
+                        <app-input-date-time titleDate="Date created" formControlName="created"></app-input-date-time>
+                        <app-input-select [items]="correspondents" title="Correspondent" formControlName="correspondent"
+                            allowNull="true" (createNew)="createCorrespondent()"></app-input-select>
+                        <app-input-select [items]="documentTypes" title="Document type" formControlName="document_type"
+                            allowNull="true" (createNew)="createDocumentType()"></app-input-select>
+                        <app-input-tags formControlName="tags" title="Tags"></app-input-tags>

-            <app-input-date-time title="Date created" titleTime="Time created" formControlName="created"></app-input-date-time>
+                    </ng-template>
+                </li>

-            <div class="form-group">
-                <label for="content">Content</label>
-                <textarea class="form-control" id="content" rows="5" formControlName='content'></textarea>
-            </div>
+                <li [ngbNavItem]="2">
+                    <a ngbNavLink>Content</a>
+                    <ng-template ngbNavContent>
+                        <div class="form-group">
+                            <textarea class="form-control" id="content" rows="20" formControlName='content'></textarea>
+                        </div>
+                    </ng-template>
+                </li>

-            <app-input-select [items]="correspondents" title="Correspondent" formControlName="correspondent_id" allowNull="true" (createNew)="createCorrespondent()"></app-input-select>
+                <li [ngbNavItem]="3">
+                    <a ngbNavLink>Metadata</a>
+                    <ng-template ngbNavContent>

-            <app-input-select [items]="documentTypes" title="Document type" formControlName="document_type_id" allowNull="true" (createNew)="createDocumentType()"></app-input-select>
+                        <table class="table table-borderless">
+                            <tbody>
+                                <tr>
+                                    <td>Date modified</td>
+                                    <td>{{document.modified | date:'medium'}}</td>
+                                </tr>
+                                <tr>
+                                    <td>Date added</td>
+                                    <td>{{document.added | date:'medium'}}</td>
+                                </tr>
+                                <tr>
+                                    <td>Media filename</td>
+                                    <td>{{metadata?.media_filename}}</td>
+                                </tr>
+                                <tr>
+                                    <td>Original MD5 Checksum</td>
+                                    <td>{{metadata?.original_checksum}}</td>
+                                </tr>
+                                <tr>
+                                    <td>Original file size</td>
+                                    <td>{{metadata?.original_size | fileSize}}</td>
+                                </tr>
+                                <tr>
+                                    <td>Original mime type</td>
+                                    <td>{{metadata?.original_mime_type}}</td>
+                                </tr>
+                                <tr *ngIf="metadata?.has_archive_version">
+                                    <td>Archive MD5 Checksum</td>
+                                    <td>{{metadata?.archive_checksum}}</td>
+                                </tr>
+                                <tr *ngIf="metadata?.has_archive_version">
+                                    <td>Archive file size</td>
+                                    <td>{{metadata?.archive_size | fileSize}}</td>
+                                </tr>
+                            </tbody>
+                        </table>

-            <app-input-tags formControlName="tags_id" title="Tags"></app-input-tags>
+                        <h6 *ngIf="metadata?.original_metadata.length > 0">
+                            <button type="button" class="btn btn-outline-secondary btn-sm mr-2"
+                                (click)="expandOriginalMetadata = !expandOriginalMetadata" aria-controls="collapseExample">
+                                <svg class="buttonicon" fill="currentColor" *ngIf="!expandOriginalMetadata">
+                                    <use xlink:href="assets/bootstrap-icons.svg#caret-down" />
+                                </svg>
+                                <svg class="buttonicon" fill="currentColor" *ngIf="expandOriginalMetadata">
+                                    <use xlink:href="assets/bootstrap-icons.svg#caret-up" />
+                                </svg>
+                            </button>
+                            Original document metadata
+                        </h6>
+
+                        <div #collapse="ngbCollapse" [(ngbCollapse)]="!expandOriginalMetadata">
+                            <table class="table table-borderless">
+                                <tbody>
+                                    <tr *ngFor="let m of metadata?.original_metadata">
+                                        <td>{{m.prefix}}:{{m.key}}</td>
+                                        <td>{{m.value}}</td>
+                                    </tr>
+                                </tbody>
+                            </table>
+                        </div>
+
+                        <h6 *ngIf="metadata?.has_archive_version && metadata?.archive_metadata.length > 0">
+                            <button type="button" class="btn btn-outline-secondary btn-sm mr-2"
+                                (click)="expandArchivedMetadata = !expandArchivedMetadata" aria-controls="collapseExample">
+                                <svg class="buttonicon" fill="currentColor" *ngIf="!expandArchivedMetadata">
+                                    <use xlink:href="assets/bootstrap-icons.svg#caret-down" />
+                                </svg>
+                                <svg class="buttonicon" fill="currentColor" *ngIf="expandArchivedMetadata">
+                                    <use xlink:href="assets/bootstrap-icons.svg#caret-up" />
+                                </svg>
+                            </button>
+                            Archived document metadata
+                        </h6>
+
+                        <div #collapse="ngbCollapse" [(ngbCollapse)]="!expandArchivedMetadata">
+                            <table class="table table-borderless">
+                                <tbody>
+                                    <tr *ngFor="let m of metadata?.archive_metadata">
+                                        <td>{{m.prefix}}:{{m.key}}</td>
+                                        <td>{{m.value}}</td>
+                                    </tr>
+                                </tbody>
+                            </table>
+                        </div>
+
+                    </ng-template>
+                </li>
+            </ul>
+
+            <div [ngbNavOutlet]="nav" class="mt-2"></div>

            <button type="button" class="btn btn-outline-secondary" (click)="discard()">Discard</button>&nbsp;
-            <button type="button" class="btn btn-outline-primary" (click)="saveEditNext()" *ngIf="hasNext()">Save & edit next</button>&nbsp;
+            <button type="button" class="btn btn-outline-primary" (click)="saveEditNext()" *ngIf="hasNext()">Save & edit
+                next</button>&nbsp;
            <button type="submit" class="btn btn-primary">Save</button>&nbsp;
        </form>
    </div>

-    <div class="col-xl">
+    <div class="col-xl d-none d-xl-block document-preview">
        <object [data]="previewUrl | safe" type="application/pdf" width="100%" height="100%">
            <p>Your browser does not support PDFs.
                <a href="previewUrl">Download the PDF</a>.</p>
        </object>

    </div>
-</div>
+</div>
--- a/src-ui/src/app/components/document-detail/document-detail.component.scss
+++ b/src-ui/src/app/components/document-detail/document-detail.component.scss
@@ -0,0 +1,5 @@
+.document-preview {
+  height: calc(100vh - 180px);
+  top: 70px;
+  position: sticky;
+}
--- a/src-ui/src/app/components/document-detail/document-detail.component.ts
+++ b/src-ui/src/app/components/document-detail/document-detail.component.ts
@@ -1,15 +1,18 @@
 import { Component, OnInit } from '@angular/core';
 import { FormControl, FormGroup } from '@angular/forms';
+import { Title } from '@angular/platform-browser';
 import { ActivatedRoute, Router } from '@angular/router';
 import { NgbModal } from '@ng-bootstrap/ng-bootstrap';
 import { PaperlessCorrespondent } from 'src/app/data/paperless-correspondent';
 import { PaperlessDocument } from 'src/app/data/paperless-document';
+import { PaperlessDocumentMetadata } from 'src/app/data/paperless-document-metadata';
 import { PaperlessDocumentType } from 'src/app/data/paperless-document-type';
 import { DocumentListViewService } from 'src/app/services/document-list-view.service';
 import { OpenDocumentsService } from 'src/app/services/open-documents.service';
 import { CorrespondentService } from 'src/app/services/rest/correspondent.service';
 import { DocumentTypeService } from 'src/app/services/rest/document-type.service';
 import { DocumentService } from 'src/app/services/rest/document.service';
+import { environment } from 'src/environments/environment';
 import { DeleteDialogComponent } from '../common/delete-dialog/delete-dialog.component';
 import { CorrespondentEditDialogComponent } from '../manage/correspondent-list/correspondent-edit-dialog/correspondent-edit-dialog.component';
 import { DocumentTypeEditDialogComponent } from '../manage/document-type-list/document-type-edit-dialog/document-type-edit-dialog.component';
@@ -21,11 +24,16 @@ import { DocumentTypeEditDialogComponent } from '../manage/document-type-list/do
 })
 export class DocumentDetailComponent implements OnInit {

+  public expandOriginalMetadata = false;
+  public expandArchivedMetadata = false;
+
  documentId: number
  document: PaperlessDocument
+  metadata: PaperlessDocumentMetadata
  title: string
  previewUrl: string
  downloadUrl: string
+  downloadOriginalUrl: string

  correspondents: PaperlessCorrespondent[]
  documentTypes: PaperlessDocumentType[]
@@ -34,10 +42,10 @@ export class DocumentDetailComponent implements OnInit {
    title: new FormControl(''),
    content: new FormControl(''),
    created: new FormControl(),
-    correspondent_id: new FormControl(),
-    document_type_id: new FormControl(),
+    correspondent: new FormControl(),
+    document_type: new FormControl(),
    archive_serial_number: new FormControl(),
-    tags_id: new FormControl([])
+    tags: new FormControl([])
  })

  constructor(
@@ -48,7 +56,8 @@ export class DocumentDetailComponent implements OnInit {
    private router: Router,
    private modalService: NgbModal,
    private openDocumentService: OpenDocumentsService,
-    private documentListViewService: DocumentListViewService) { }
+    private documentListViewService: DocumentListViewService,
+    private titleService: Title) { }

  ngOnInit(): void {
    this.documentForm.valueChanges.subscribe(wow => {
@@ -62,6 +71,7 @@ export class DocumentDetailComponent implements OnInit {
      this.documentId = +paramMap.get('id')
      this.previewUrl = this.documentsService.getPreviewUrl(this.documentId)
      this.downloadUrl = this.documentsService.getDownloadUrl(this.documentId)
+      this.downloadOriginalUrl = this.documentsService.getDownloadUrl(this.documentId, true)
      if (this.openDocumentService.getOpenDocument(this.documentId)) {
        this.updateComponent(this.openDocumentService.getOpenDocument(this.documentId))
      } else {
@@ -76,6 +86,10 @@ export class DocumentDetailComponent implements OnInit {

  updateComponent(doc: PaperlessDocument) {
    this.document = doc
+    this.titleService.setTitle(`${doc.title} - ${environment.appTitle}`)
+    this.documentsService.getMetadata(doc.id).subscribe(result => {
+      this.metadata = result
+    })
    this.title = doc.title
    this.documentForm.patchValue(doc)
  }
@@ -86,7 +100,7 @@ export class DocumentDetailComponent implements OnInit {
    modal.componentInstance.success.subscribe(newDocumentType => {
      this.documentTypeService.listAll().subscribe(documentTypes => {
        this.documentTypes = documentTypes.results
-        this.documentForm.get('document_type_id').setValue(newDocumentType.id)
+        this.documentForm.get('document_type').setValue(newDocumentType.id)
      })
    })
  }
@@ -97,7 +111,7 @@ export class DocumentDetailComponent implements OnInit {
    modal.componentInstance.success.subscribe(newCorrespondent => {
      this.correspondentService.listAll().subscribe(correspondents => {
        this.correspondents = correspondents.results
-        this.documentForm.get('correspondent_id').setValue(newCorrespondent.id)
+        this.documentForm.get('correspondent').setValue(newCorrespondent.id)
      })
    })
  }
--- a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html
+++ b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html
@@ -9,11 +9,11 @@
        <div class="d-flex justify-content-between align-items-center">
          <h5 class="card-title">    
            <ng-container *ngIf="document.correspondent">
-              <a *ngIf="clickCorrespondent.observers.length ; else nolink" [routerLink]="" title="Filter by correspondent" (click)="clickCorrespondent.emit(document.correspondent)" class="font-weight-bold">{{document.correspondent.name}}</a>
-              <ng-template #nolink>{{document.correspondent.name}}</ng-template>:
+              <a *ngIf="clickCorrespondent.observers.length ; else nolink" [routerLink]="" title="Filter by correspondent" (click)="clickCorrespondent.emit(document.correspondent)" class="font-weight-bold">{{(document.correspondent$ | async)?.name}}</a>
+              <ng-template #nolink>{{(document.correspondent$ | async)?.name}}</ng-template>:
            </ng-container>
            {{document.title}}
-            <app-tag [tag]="t" linkTitle="Filter by tag" *ngFor="let t of document.tags" class="ml-1" (click)="clickTag.emit(t)" [clickable]="clickTag.observers.length"></app-tag>
+            <app-tag [tag]="t" linkTitle="Filter by tag" *ngFor="let t of document.tags$ | async" class="ml-1" (click)="clickTag.emit(t.id)" [clickable]="clickTag.observers.length"></app-tag>
          </h5>
          <h5 class="card-title" *ngIf="document.archive_serial_number">#{{document.archive_serial_number}}</h5>
        </div>
--- a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.ts
+++ b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.ts
@@ -20,10 +20,10 @@ export class DocumentCardLargeComponent implements OnInit {
  details: any

  @Output()
-  clickTag = new EventEmitter<PaperlessTag>()
+  clickTag = new EventEmitter<number>()

  @Output()
-  clickCorrespondent = new EventEmitter<PaperlessDocument>()
+  clickCorrespondent = new EventEmitter<number>()

  ngOnInit(): void {
  }
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html
@@ -1,15 +1,21 @@
 <div class="col p-2 h-100" style="width: 16rem;">
  <div class="card h-100 shadow-sm">
-    <div class=" border-bottom doc-img pr-1" [ngStyle]="{'background-image': 'url(' + getThumbUrl() + ')'}">
-      <div class="row" *ngFor="let t of document.tags">
-        <app-tag style="font-size: large;" [tag]="t" class="col text-right" (click)="clickTag.emit(t)" [clickable]="true" linkTitle="Filter by tag"></app-tag>
+    <div class="border-bottom">
+      <img class="card-img doc-img" [src]="getThumbUrl()">
+      <div style="top: 0; right: 0; font-size: large" class="text-right position-absolute mr-1">
+        <div *ngFor="let t of getTagsLimited$() | async">
+          <app-tag [tag]="t" (click)="clickTag.emit(t.id)" [clickable]="true" linkTitle="Filter by tag"></app-tag>
+        </div>
+        <div *ngIf="moreTags">
+          <span class="badge badge-secondary">+ {{moreTags}}</span>
+        </div>
      </div>
    </div>
    
    <div class="card-body p-2">
      <p class="card-text">
        <ng-container *ngIf="document.correspondent">
-          <a [routerLink]="" title="Filter by correspondent" (click)="clickCorrespondent.emit(document.correspondent)" class="font-weight-bold">{{document.correspondent.name}}</a>:
+          <a [routerLink]="" title="Filter by correspondent" (click)="clickCorrespondent.emit(document.correspondent)" class="font-weight-bold">{{(document.correspondent$ | async)?.name}}</a>:
        </ng-container>
        {{document.title}}
      </p>
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.scss
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.scss
@@ -1,5 +1,5 @@
 .doc-img {
-  background-size: cover;
-  background-position: top;
+  object-fit: cover;
+  object-position: top;
  height: 200px;
 }
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.ts
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.ts
@@ -1,4 +1,5 @@
 import { Component, EventEmitter, Input, OnInit, Output } from '@angular/core';
+import { map } from 'rxjs/operators';
 import { PaperlessDocument } from 'src/app/data/paperless-document';
 import { PaperlessTag } from 'src/app/data/paperless-tag';
 import { DocumentService } from 'src/app/services/rest/document.service';
@@ -16,10 +17,12 @@ export class DocumentCardSmallComponent implements OnInit {
  document: PaperlessDocument

  @Output()
-  clickTag = new EventEmitter<PaperlessTag>()
+  clickTag = new EventEmitter<number>()

  @Output()
-  clickCorrespondent = new EventEmitter<PaperlessDocument>()
+  clickCorrespondent = new EventEmitter<number>()
+
+  moreTags: number = null

  ngOnInit(): void {
  }
@@ -35,4 +38,18 @@ export class DocumentCardSmallComponent implements OnInit {
  getPreviewUrl() {
    return this.documentService.getPreviewUrl(this.document.id)
  }
+
+  getTagsLimited$() {
+    return this.document.tags$.pipe(
+      map(tags => {
+        if (tags.length > 7) {
+          this.moreTags = tags.length - 6
+          return tags.slice(0, 6)
+        } else {
+          return tags
+        }
+      })
+    )
+  }
+
 }
--- a/src-ui/src/app/components/document-list/document-list.component.html
+++ b/src-ui/src/app/components/document-list/document-list.component.html
@@ -24,7 +24,7 @@
  <div class="btn-group btn-group-toggle ml-2" ngbRadioGroup [(ngModel)]="list.sortDirection">
    <div ngbDropdown class="btn-group">
      <button class="btn btn-outline-primary btn-sm" id="dropdownBasic1" ngbDropdownToggle>Sort by</button>
-      <div ngbDropdownMenu aria-labelledby="dropdownBasic1">
+      <div ngbDropdownMenu aria-labelledby="dropdownBasic1" class="shadow">
        <button *ngFor="let f of getSortFields()" ngbDropdownItem (click)="list.sortField = f.field"
          [class.active]="list.sortField == f.field">{{f.name}}</button>
      </div>
@@ -44,7 +44,7 @@
  </div>
  <div class="btn-group ml-2">

-    <button type="button" class="btn btn-sm btn-outline-primary" (click)="showFilter=!showFilter">
+    <button type="button" class="btn btn-sm" [ngClass]="isFiltered ? 'btn-primary' : 'btn-outline-primary'" (click)="showFilter=!showFilter">
      <svg class="toolbaricon" fill="currentColor">
        <use xlink:href="assets/bootstrap-icons.svg#funnel" />
      </svg>
@@ -53,7 +53,7 @@

    <div class="btn-group" ngbDropdown role="group">
      <button class="btn btn-sm btn-outline-primary dropdown-toggle-split" ngbDropdownToggle></button>
-      <div class="dropdown-menu" ngbDropdownMenu>
+      <div class="dropdown-menu" ngbDropdownMenu class="shadow">
        <ng-container *ngIf="!list.savedViewId" >
          <button ngbDropdownItem *ngFor="let config of savedViewConfigService.getConfigs()" (click)="loadViewConfig(config)">{{config.title}}</button>
          <div class="dropdown-divider" *ngIf="savedViewConfigService.getConfigs().length > 0"></div>
@@ -70,11 +70,12 @@
 <div class="card w-100 mb-3" [hidden]="!showFilter">
  <div class="card-body">
    <h5 class="card-title">Filter</h5>
-    <app-filter-editor [(filterRules)]="filterRules" (apply)="applyFilterRules()"></app-filter-editor>
+    <app-filter-editor [(filterRules)]="filterRules" (apply)="applyFilterRules()" (clear)="clearFilterRules()"></app-filter-editor>
  </div>
 </div>

-<div class="row m-0 justify-content-end">
+<div class="d-flex justify-content-between align-items-center">
+  <p>{{list.collectionSize || 0}} document(s) <span *ngIf="isFiltered">(filtered)</span></p>
  <ngb-pagination [pageSize]="list.currentPageSize" [collectionSize]="list.collectionSize" [(page)]="list.currentPage" [maxSize]="5"
  [rotate]="true" (pageChange)="list.reload()" aria-label="Default pagination"></ngb-pagination>
 </div>
@@ -84,7 +85,7 @@
  </app-document-card-large>
 </div>

-<table class="table table-sm border shadow" *ngIf="displayMode == 'details'">
+<table class="table table-sm border shadow-sm" *ngIf="displayMode == 'details'">
  <thead>
    <th class="d-none d-lg-table-cell">ASN</th>
    <th class="d-none d-md-table-cell">Correspondent</th>
@@ -100,16 +101,16 @@
      </td>
      <td class="d-none d-md-table-cell">
        <ng-container *ngIf="d.correspondent">
-          <a [routerLink]="" (click)="filterByCorrespondent(d.correspondent)" title="Filter by correspondent">{{d.correspondent.name}}</a>
+          <a [routerLink]="" (click)="filterByCorrespondent(d.correspondent)" title="Filter by correspondent">{{(d.correspondent$ | async)?.name}}</a>
        </ng-container>
      </td>
      <td>
-        <a routerLink="/documents/{{d.id}}" title="Edit document">{{d.title}}</a>
-        <app-tag [tag]="t" *ngFor="let t of d.tags" class="ml-1" clickable="true" linkTitle="Filter by tag" (click)="filterByTag(t)"></app-tag>
+        <a routerLink="/documents/{{d.id}}" title="Edit document" style="overflow-wrap: anywhere;">{{d.title}}</a>
+        <app-tag [tag]="t" *ngFor="let t of d.tags$ | async" class="ml-1" clickable="true" linkTitle="Filter by tag" (click)="filterByTag(t.id)"></app-tag>
      </td>
      <td class="d-none d-xl-table-cell">
        <ng-container *ngIf="d.document_type">
-          <a [routerLink]="" (click)="filterByDocumentType(d.document_type)" title="Filter by document type">{{d.document_type.name}}</a>
+          <a [routerLink]="" (click)="filterByDocumentType(d.document_type)" title="Filter by document type">{{(d.document_type$ | async)?.name}}</a>
        </ng-container>
      </td>
      <td>
@@ -126,5 +127,3 @@
 <div class=" m-n2 row" *ngIf="displayMode == 'smallCards'">
  <app-document-card-small [document]="d" *ngFor="let d of list.documents" (clickTag)="filterByTag($event)" (clickCorrespondent)="filterByCorrespondent($event)"></app-document-card-small>    
 </div>
-
-<p *ngIf="list.documents.length == 0" class="mx-auto">No results</p>
--- a/src-ui/src/app/components/document-list/document-list.component.ts
+++ b/src-ui/src/app/components/document-list/document-list.component.ts
@@ -1,16 +1,15 @@
 import { Component, OnInit } from '@angular/core';
+import { Title } from '@angular/platform-browser';
 import { ActivatedRoute } from '@angular/router';
 import { NgbModal } from '@ng-bootstrap/ng-bootstrap';
 import { cloneFilterRules, FilterRule } from 'src/app/data/filter-rule';
 import { FILTER_CORRESPONDENT, FILTER_DOCUMENT_TYPE, FILTER_HAS_TAG, FILTER_RULE_TYPES } from 'src/app/data/filter-rule-type';
-import { PaperlessCorrespondent } from 'src/app/data/paperless-correspondent';
-import { PaperlessDocumentType } from 'src/app/data/paperless-document-type';
-import { PaperlessTag } from 'src/app/data/paperless-tag';
 import { SavedViewConfig } from 'src/app/data/saved-view-config';
 import { DocumentListViewService } from 'src/app/services/document-list-view.service';
 import { DOCUMENT_SORT_FIELDS } from 'src/app/services/rest/document.service';
 import { SavedViewConfigService } from 'src/app/services/saved-view-config.service';
 import { Toast, ToastService } from 'src/app/services/toast.service';
+import { environment } from 'src/environments/environment';
 import { SaveViewConfigDialogComponent } from './save-view-config-dialog/save-view-config-dialog.component';

@Component({
@@ -25,13 +24,18 @@ export class DocumentListComponent implements OnInit {
    public savedViewConfigService: SavedViewConfigService,
    public route: ActivatedRoute,
    private toastService: ToastService,
-    public modalService: NgbModal) { }
+    public modalService: NgbModal,
+    private titleService: Title) { }

  displayMode = 'smallCards' // largeCards, smallCards, details

  filterRules: FilterRule[] = []
  showFilter = false

+  get isFiltered() {
+    return this.list.filterRules?.length > 0
+  }
+
  getTitle() {
    return this.list.savedViewTitle || "Documents"
  }
@@ -51,13 +55,16 @@ export class DocumentListComponent implements OnInit {
    this.route.paramMap.subscribe(params => {
      if (params.has('id')) {
        this.list.savedView = this.savedViewConfigService.getConfig(params.get('id'))
+        this.filterRules = this.list.filterRules
+        this.showFilter = false
+        this.titleService.setTitle(`${this.list.savedView.title} - ${environment.appTitle}`)
      } else {
        this.list.savedView = null
+        this.filterRules = this.list.filterRules
+        this.showFilter = this.filterRules.length > 0
+        this.titleService.setTitle(`Documents - ${environment.appTitle}`)
      }
-      this.filterRules = this.list.filterRules
-      //this.showFilter = this.filterRules.length > 0
-      // prevents temporarily visible results from previous views
-      this.list.documents = []
+      this.list.clear()
      this.list.reload()
    })
  }
@@ -66,6 +73,11 @@ export class DocumentListComponent implements OnInit {
    this.list.filterRules = this.filterRules
  }

+  clearFilterRules() {
+    this.list.filterRules = this.filterRules
+    this.showFilter = false
+  }
+
  loadViewConfig(config: SavedViewConfig) {
    this.filterRules = cloneFilterRules(config.filterRules)
    this.list.load(config)
@@ -91,32 +103,42 @@ export class DocumentListComponent implements OnInit {
    })
  }

-  filterByTag(t: PaperlessTag) {
-    if (this.filterRules.find(rule => rule.type.id == FILTER_HAS_TAG && rule.value == t.id)) {
+  filterByTag(tag_id: number) {
+    let filterRules = this.list.filterRules
+    if (filterRules.find(rule => rule.type.id == FILTER_HAS_TAG && rule.value == tag_id)) {
      return
    }

-    this.filterRules.push({type: FILTER_RULE_TYPES.find(t => t.id == FILTER_HAS_TAG), value: t.id})
+    filterRules.push({type: FILTER_RULE_TYPES.find(t => t.id == FILTER_HAS_TAG), value: tag_id})
+    this.filterRules = filterRules
    this.applyFilterRules()
  }

-  filterByCorrespondent(c: PaperlessCorrespondent) {
-    let existing_rule = this.filterRules.find(rule => rule.type.id == FILTER_CORRESPONDENT)
-    if (existing_rule) {
-      existing_rule.value = c.id
+  filterByCorrespondent(correspondent_id: number) {
+    let filterRules = this.list.filterRules
+    let existing_rule = filterRules.find(rule => rule.type.id == FILTER_CORRESPONDENT)
+    if (existing_rule && existing_rule.value == correspondent_id) {
+      return
+    } else if (existing_rule) {
+      existing_rule.value = correspondent_id
    } else {
-      this.filterRules.push({type: FILTER_RULE_TYPES.find(t => t.id == FILTER_CORRESPONDENT), value: c.id})
+      filterRules.push({type: FILTER_RULE_TYPES.find(t => t.id == FILTER_CORRESPONDENT), value: correspondent_id})
    }
+    this.filterRules = filterRules
    this.applyFilterRules()
  }

-  filterByDocumentType(dt: PaperlessDocumentType) {
-    let existing_rule = this.filterRules.find(rule => rule.type.id == FILTER_DOCUMENT_TYPE)
-    if (existing_rule) {
-      existing_rule.value = dt.id
+  filterByDocumentType(document_type_id: number) {
+    let filterRules = this.list.filterRules
+    let existing_rule = filterRules.find(rule => rule.type.id == FILTER_DOCUMENT_TYPE)
+    if (existing_rule && existing_rule.value == document_type_id) {
+      return
+    } else if (existing_rule) {
+      existing_rule.value = document_type_id
    } else {
-      this.filterRules.push({type: FILTER_RULE_TYPES.find(t => t.id == FILTER_DOCUMENT_TYPE), value: dt.id})
+      filterRules.push({type: FILTER_RULE_TYPES.find(t => t.id == FILTER_DOCUMENT_TYPE), value: document_type_id})
    }
+    this.filterRules = filterRules
    this.applyFilterRules()
  }

--- a/src-ui/src/app/components/filter-editor/filter-editor.component.ts
+++ b/src-ui/src/app/components/filter-editor/filter-editor.component.ts
@@ -18,6 +18,9 @@ export class FilterEditorComponent implements OnInit {

  constructor(private documentTypeService: DocumentTypeService, private tagService: TagService, private correspondentService: CorrespondentService) { }

+  @Output()
+  clear = new EventEmitter()
+
  @Input()
  filterRules: FilterRule[] = []

@@ -31,7 +34,7 @@ export class FilterEditorComponent implements OnInit {
  documentTypes: PaperlessDocumentType[] = []

  newRuleClicked() {
-    this.filterRules.push({type: this.selectedRuleType, value: null})
+    this.filterRules.push({type: this.selectedRuleType, value: this.selectedRuleType.default})
    this.selectedRuleType = this.getRuleTypes().length > 0 ? this.getRuleTypes()[0] : null
  }

@@ -48,7 +51,7 @@ export class FilterEditorComponent implements OnInit {

  clearClicked() {
    this.filterRules.splice(0,this.filterRules.length)
-    this.apply.next()
+    this.clear.next()
  }

  ngOnInit(): void {
--- a/src-ui/src/app/components/manage/correspondent-list/correspondent-list.component.ts
+++ b/src-ui/src/app/components/manage/correspondent-list/correspondent-list.component.ts
@@ -1,7 +1,9 @@
-import { Component } from '@angular/core';
+import { Component, OnInit } from '@angular/core';
+import { Title } from '@angular/platform-browser';
 import { NgbModal } from '@ng-bootstrap/ng-bootstrap';
 import { PaperlessCorrespondent } from 'src/app/data/paperless-correspondent';
 import { CorrespondentService } from 'src/app/services/rest/correspondent.service';
+import { environment } from 'src/environments/environment';
 import { GenericListComponent } from '../generic-list/generic-list.component';
 import { CorrespondentEditDialogComponent } from './correspondent-edit-dialog/correspondent-edit-dialog.component';

@@ -10,14 +12,19 @@ import { CorrespondentEditDialogComponent } from './correspondent-edit-dialog/co
  templateUrl: './correspondent-list.component.html',
  styleUrls: ['./correspondent-list.component.scss']
 })
-export class CorrespondentListComponent extends GenericListComponent<PaperlessCorrespondent> {
+export class CorrespondentListComponent extends GenericListComponent<PaperlessCorrespondent> implements OnInit {

-  constructor(correspondentsService: CorrespondentService,
-    modalService: NgbModal) { 
-      super(correspondentsService,modalService,CorrespondentEditDialogComponent)
-    }
+  constructor(correspondentsService: CorrespondentService, modalService: NgbModal, private titleService: Title) { 
+    super(correspondentsService,modalService,CorrespondentEditDialogComponent)
+  }
+
+  getObjectName(object: PaperlessCorrespondent) {
+    return `correspondent '${object.name}'`
+  }
+
+  ngOnInit(): void {
+    super.ngOnInit()
+    this.titleService.setTitle(`Correspondents - ${environment.appTitle}`)
+  }

-    getObjectName(object: PaperlessCorrespondent) {
-      return `correspondent '${object.name}'`
-    }
 }
--- a/src-ui/src/app/components/manage/document-type-list/document-type-list.component.ts
+++ b/src-ui/src/app/components/manage/document-type-list/document-type-list.component.ts
@@ -1,7 +1,9 @@
 import { Component, OnInit } from '@angular/core';
+import { Title } from '@angular/platform-browser';
 import { NgbModal } from '@ng-bootstrap/ng-bootstrap';
 import { PaperlessDocumentType } from 'src/app/data/paperless-document-type';
 import { DocumentTypeService } from 'src/app/services/rest/document-type.service';
+import { environment } from 'src/environments/environment';
 import { GenericListComponent } from '../generic-list/generic-list.component';
 import { DocumentTypeEditDialogComponent } from './document-type-edit-dialog/document-type-edit-dialog.component';

@@ -10,13 +12,18 @@ import { DocumentTypeEditDialogComponent } from './document-type-edit-dialog/doc
  templateUrl: './document-type-list.component.html',
  styleUrls: ['./document-type-list.component.scss']
 })
-export class DocumentTypeListComponent extends GenericListComponent<PaperlessDocumentType> {
+export class DocumentTypeListComponent extends GenericListComponent<PaperlessDocumentType> implements OnInit {

-  constructor(service: DocumentTypeService, modalService: NgbModal) {
+  constructor(service: DocumentTypeService, modalService: NgbModal, private titleService: Title) {
    super(service, modalService, DocumentTypeEditDialogComponent)
-   }
+  }

-   getObjectName(object: PaperlessDocumentType) {
+  getObjectName(object: PaperlessDocumentType) {
    return `document type '${object.name}'`
  }
+
+  ngOnInit(): void {
+    super.ngOnInit()
+    this.titleService.setTitle(`Document types - ${environment.appTitle}`)
+  }
 }
--- a/src-ui/src/app/components/manage/logs/logs.component.ts
+++ b/src-ui/src/app/components/manage/logs/logs.component.ts
@@ -1,7 +1,8 @@
 import { Component, OnInit } from '@angular/core';
-import { kMaxLength } from 'buffer';
+import { Title } from '@angular/platform-browser';
 import { LOG_LEVELS, LOG_LEVEL_INFO, PaperlessLog } from 'src/app/data/paperless-log';
 import { LogService } from 'src/app/services/rest/log.service';
+import { environment } from 'src/environments/environment';

@Component({
  selector: 'app-logs',
@@ -10,13 +11,14 @@ import { LogService } from 'src/app/services/rest/log.service';
 })
 export class LogsComponent implements OnInit {

-  constructor(private logService: LogService) { }
+  constructor(private logService: LogService, private titleService: Title) { }

  logs: PaperlessLog[] = []
  level: number = LOG_LEVEL_INFO

  ngOnInit(): void {
    this.reload()
+    this.titleService.setTitle(`Logs - ${environment.appTitle}`)
  }

  reload() {
--- a/src-ui/src/app/components/manage/settings/settings.component.html
+++ b/src-ui/src/app/components/manage/settings/settings.component.html
@@ -46,8 +46,8 @@
          <tbody>
            <tr *ngFor="let config of savedViewConfigService.getConfigs()">
              <td>{{ config.title }}</td>
-              <td>{{ config.showInDashboard }}</td>
-              <td>{{ config.showInSideBar }}</td>
+              <td>{{ config.showInDashboard | yesno }}</td>
+              <td>{{ config.showInSideBar | yesno }}</td>
              <td><button type="button" class="btn btn-sm btn-outline-danger" (click)="deleteViewConfig(config)">Delete</button></td>
            </tr>
          </tbody>
--- a/src-ui/src/app/components/manage/settings/settings.component.ts
+++ b/src-ui/src/app/components/manage/settings/settings.component.ts
@@ -1,9 +1,11 @@
 import { Component, OnInit } from '@angular/core';
 import { FormControl, FormGroup } from '@angular/forms';
+import { Title } from '@angular/platform-browser';
 import { SavedViewConfig } from 'src/app/data/saved-view-config';
 import { GENERAL_SETTINGS } from 'src/app/data/storage-keys';
 import { DocumentListViewService } from 'src/app/services/document-list-view.service';
 import { SavedViewConfigService } from 'src/app/services/saved-view-config.service';
+import { environment } from 'src/environments/environment';

@Component({
  selector: 'app-settings',
@@ -18,10 +20,12 @@ export class SettingsComponent implements OnInit {

  constructor(
    private savedViewConfigService: SavedViewConfigService,
-    private documentListViewService: DocumentListViewService
+    private documentListViewService: DocumentListViewService,
+    private titleService: Title
  ) { }

  ngOnInit(): void {
+    this.titleService.setTitle(`Settings - ${environment.appTitle}`)
  }

  deleteViewConfig(config: SavedViewConfig) {
--- a/src-ui/src/app/components/manage/tag-list/tag-list.component.ts
+++ b/src-ui/src/app/components/manage/tag-list/tag-list.component.ts
@@ -1,8 +1,9 @@
-import { Component } from '@angular/core';
+import { Component, OnInit } from '@angular/core';
+import { Title } from '@angular/platform-browser';
 import { NgbModal } from '@ng-bootstrap/ng-bootstrap';
 import { TAG_COLOURS, PaperlessTag } from 'src/app/data/paperless-tag';
 import { TagService } from 'src/app/services/rest/tag.service';
-import { CorrespondentEditDialogComponent } from '../correspondent-list/correspondent-edit-dialog/correspondent-edit-dialog.component';
+import { environment } from 'src/environments/environment';
 import { GenericListComponent } from '../generic-list/generic-list.component';
 import { TagEditDialogComponent } from './tag-edit-dialog/tag-edit-dialog.component';

@@ -11,11 +12,17 @@ import { TagEditDialogComponent } from './tag-edit-dialog/tag-edit-dialog.compon
  templateUrl: './tag-list.component.html',
  styleUrls: ['./tag-list.component.scss']
 })
-export class TagListComponent extends GenericListComponent<PaperlessTag> {
+export class TagListComponent extends GenericListComponent<PaperlessTag> implements OnInit {

-  constructor(tagService: TagService, modalService: NgbModal) {
+  constructor(tagService: TagService, modalService: NgbModal, private titleService: Title) {
    super(tagService, modalService, TagEditDialogComponent)
-   }
+  }
+
+
+  ngOnInit(): void {
+    super.ngOnInit()
+    this.titleService.setTitle(`Tags - ${environment.appTitle}`)
+  }

  getColor(id) {
    return TAG_COLOURS.find(c => c.id == id)
--- a/src-ui/src/app/components/search/search.component.ts
+++ b/src-ui/src/app/components/search/search.component.ts
@@ -1,7 +1,9 @@
 import { Component, OnInit } from '@angular/core';
+import { Title } from '@angular/platform-browser';
 import { ActivatedRoute, Router } from '@angular/router';
 import { SearchHit } from 'src/app/data/search-result';
 import { SearchService } from 'src/app/services/rest/search.service';
+import { environment } from 'src/environments/environment';

@Component({
  selector: 'app-search',
@@ -26,7 +28,7 @@ export class SearchComponent implements OnInit {

  errorMessage: string

-  constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { }
+  constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router, private titleService: Title) { }

  ngOnInit(): void {
    this.route.queryParamMap.subscribe(paramMap => {
@@ -34,6 +36,7 @@ export class SearchComponent implements OnInit {
      this.searching = true
      this.currentPage = 1
      this.loadPage()
+      this.titleService.setTitle(`Search: ${this.query} - ${environment.appTitle}`)
    })

  }
--- a/src-ui/src/app/data/filter-rule-type.ts
+++ b/src-ui/src/app/data/filter-rule-type.ts
@@ -16,19 +16,22 @@ export const FILTER_ADDED_AFTER = 14
 export const FILTER_MODIFIED_BEFORE = 15
 export const FILTER_MODIFIED_AFTER = 16

+export const FILTER_DOES_NOT_HAVE_TAG = 17
+
 export const FILTER_RULE_TYPES: FilterRuleType[] = [

-  {id: FILTER_TITLE, name: "Title contains", filtervar: "title__icontains", datatype: "string", multi: false},
-  {id: FILTER_CONTENT, name: "Content contains", filtervar: "content__icontains", datatype: "string", multi: false},
+  {id: FILTER_TITLE, name: "Title contains", filtervar: "title__icontains", datatype: "string", multi: false, default: ""},
+  {id: FILTER_CONTENT, name: "Content contains", filtervar: "content__icontains", datatype: "string", multi: false, default: ""},
  
  {id: FILTER_ASN, name: "ASN is", filtervar: "archive_serial_number", datatype: "number", multi: false},
  
  {id: FILTER_CORRESPONDENT, name: "Correspondent is", filtervar: "correspondent__id", datatype: "correspondent", multi: false},
  {id: FILTER_DOCUMENT_TYPE, name: "Document type is", filtervar: "document_type__id", datatype: "document_type", multi: false},

-  {id: FILTER_IS_IN_INBOX, name: "Is in Inbox", filtervar: "is_in_inbox", datatype: "boolean", multi: false},  
+  {id: FILTER_IS_IN_INBOX, name: "Is in Inbox", filtervar: "is_in_inbox", datatype: "boolean", multi: false, default: true},  
  {id: FILTER_HAS_TAG, name: "Has tag", filtervar: "tags__id__all", datatype: "tag", multi: true},  
-  {id: FILTER_HAS_ANY_TAG, name: "Has any tag", filtervar: "is_tagged", datatype: "boolean", multi: false},
+  {id: FILTER_DOES_NOT_HAVE_TAG, name: "Does not have tag", filtervar: "tags__id__none", datatype: "tag", multi: true},  
+  {id: FILTER_HAS_ANY_TAG, name: "Has any tag", filtervar: "is_tagged", datatype: "boolean", multi: false, default: true},

  {id: FILTER_CREATED_BEFORE, name: "Created before", filtervar: "created__date__lt", datatype: "date", multi: false},
  {id: FILTER_CREATED_AFTER, name: "Created after", filtervar: "created__date__gt", datatype: "date", multi: false},
@@ -50,4 +53,5 @@ export interface FilterRuleType {
  filtervar: string
  datatype: string //number, string, boolean, date
  multi: boolean
+  default?: any
 }
--- a/src-ui/src/app/data/paperless-document-metadata.ts
+++ b/src-ui/src/app/data/paperless-document-metadata.ts
@@ -0,0 +1,13 @@
+export interface PaperlessDocumentMetadata {
+    
+  original_checksum?: string
+
+  archived_checksum?: string
+
+  original_mime_type?: string
+
+  media_filename?: string
+
+  has_archive_version?: boolean
+
+}
--- a/src-ui/src/app/data/paperless-document.ts
+++ b/src-ui/src/app/data/paperless-document.ts
@@ -2,16 +2,17 @@ import { PaperlessCorrespondent } from './paperless-correspondent'
 import { ObjectWithId } from './object-with-id'
 import { PaperlessTag } from './paperless-tag'
 import { PaperlessDocumentType } from './paperless-document-type'
+import { Observable } from 'rxjs'

 export interface PaperlessDocument extends ObjectWithId {

-    correspondent?: PaperlessCorrespondent
+    correspondent$?: Observable<PaperlessCorrespondent>

-    correspondent_id?: number
+    correspondent?: number

-    document_type?: PaperlessDocumentType
+    document_type$?: Observable<PaperlessDocumentType>

-    document_type_id?: number
+    document_type?: number

    title?: string

@@ -19,9 +20,9 @@ export interface PaperlessDocument extends ObjectWithId {

    file_type?: string

-    tags?: PaperlessTag[]
+    tags$?: Observable<PaperlessTag[]>

-    tags_id?: number[]
+    tags?: number[]

    checksum?: string

--- a/src-ui/src/app/pipes/file-size.pipe.spec.ts
+++ b/src-ui/src/app/pipes/file-size.pipe.spec.ts
@@ -0,0 +1,8 @@
+import { FileSizePipe } from './file-size.pipe';
+
+describe('FileSizePipe', () => {
+  it('create an instance', () => {
+    const pipe = new FileSizePipe();
+    expect(pipe).toBeTruthy();
+  });
+});
--- a/src-ui/src/app/pipes/file-size.pipe.ts
+++ b/src-ui/src/app/pipes/file-size.pipe.ts
@@ -0,0 +1,77 @@
+/**
+ * https://gist.github.com/JonCatmull/ecdf9441aaa37336d9ae2c7f9cb7289a
+ * 
+ * @license
+ * Copyright (c) 2019 Jonathan Catmull.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+import { Pipe, PipeTransform } from '@angular/core';
+
+type unit = 'bytes' | 'KB' | 'MB' | 'GB' | 'TB' | 'PB';
+type unitPrecisionMap = {
+  [u in unit]: number;
+};
+
+const defaultPrecisionMap: unitPrecisionMap = {
+  bytes: 0,
+  KB: 0,
+  MB: 1,
+  GB: 1,
+  TB: 2,
+  PB: 2
+};
+
+/*
+ * Convert bytes into largest possible unit.
+ * Takes an precision argument that can be a number or a map for each unit.
+ * Usage:
+ *   bytes | fileSize:precision
+ * @example
+ * // returns 1 KB
+ * {{ 1500 | fileSize }}
+ * @example
+ * // returns 2.1 GB
+ * {{ 2100000000 | fileSize }}
+ * @example
+ * // returns 1.46 KB
+ * {{ 1500 | fileSize:2 }}
+ */
+@Pipe({ name: 'fileSize' })
+export class FileSizePipe implements PipeTransform {
+  private readonly units: unit[] = ['bytes', 'KB', 'MB', 'GB', 'TB', 'PB'];
+
+  transform(bytes: number = 0, precision: number | unitPrecisionMap = defaultPrecisionMap): string {
+    if (isNaN(parseFloat(String(bytes))) || !isFinite(bytes)) return '?';
+
+    let unitIndex = 0;
+
+    while (bytes >= 1024) {
+      bytes /= 1024;
+      unitIndex++;
+    }
+
+    const unit = this.units[unitIndex];
+
+    if (typeof precision === 'number') {
+      return `${bytes.toFixed(+precision)} ${unit}`;
+    }
+    return `${bytes.toFixed(precision[unit])} ${unit}`;
+  }
+}
--- a/src-ui/src/app/pipes/yes-no.pipe.spec.ts
+++ b/src-ui/src/app/pipes/yes-no.pipe.spec.ts
@@ -0,0 +1,8 @@
+import { YesNoPipe } from './yes-no.pipe';
+
+describe('YesNoPipe', () => {
+  it('create an instance', () => {
+    const pipe = new YesNoPipe();
+    expect(pipe).toBeTruthy();
+  });
+});
--- a/src-ui/src/app/pipes/yes-no.pipe.ts
+++ b/src-ui/src/app/pipes/yes-no.pipe.ts
@@ -0,0 +1,12 @@
+import { Pipe, PipeTransform } from '@angular/core';
+
+@Pipe({
+  name: 'yesno'
+})
+export class YesNoPipe implements PipeTransform {
+
+  transform(value: boolean): unknown {
+    return value ? "Yes" : "No"
+  }
+
+}
--- a/src-ui/src/app/services/document-list-view.service.ts
+++ b/src-ui/src/app/services/document-list-view.service.ts
@@ -82,6 +82,12 @@ export class DocumentListViewService {
    this.reload()
  }

+  clear() {
+    this.collectionSize = null
+    this.documents = []
+    this.currentPage = 1
+  }
+
  reload(onFinish?) {
    this.isReloading = true
    this.documentService.list(
--- a/src-ui/src/app/services/rest/abstract-paperless-service.ts
+++ b/src-ui/src/app/services/rest/abstract-paperless-service.ts
@@ -1,5 +1,6 @@
 import { HttpClient, HttpParams } from '@angular/common/http'
-import { Observable } from 'rxjs'
+import { Observable, of, Subject } from 'rxjs'
+import { map, publishReplay, refCount } from 'rxjs/operators'
 import { ObjectWithId } from 'src/app/data/object-with-id'
 import { Results } from 'src/app/data/results'
 import { environment } from 'src/environments/environment'
@@ -51,8 +52,28 @@ export abstract class AbstractPaperlessService<T extends ObjectWithId> {
    return this.http.get<Results<T>>(this.getResourceUrl(), {params: httpParams})
  }

+  private _listAll: Observable<Results<T>>
+
  listAll(ordering?: string, extraParams?): Observable<Results<T>> {
-    return this.list(1, 100000, ordering, extraParams)
+    if (!this._listAll) {
+      this._listAll = this.list(1, 100000, ordering, extraParams).pipe(
+        publishReplay(1),
+        refCount()
+      )
+    }
+    return this._listAll
+  }
+
+  getCached(id: number): Observable<T> {
+    return this.listAll().pipe(
+      map(list => list.results.find(o => o.id == id))
+    )
+  }
+
+  getCachedMany(ids: number[]): Observable<T[]> {
+    return this.listAll().pipe(
+      map(list => ids.map(id => list.results.find(o => o.id == id)))
+    )
  }

  get(id: number): Observable<T> {
@@ -60,14 +81,17 @@ export abstract class AbstractPaperlessService<T extends ObjectWithId> {
  }

  create(o: T): Observable<T> {
+    this._listAll = null
    return this.http.post<T>(this.getResourceUrl(), o)
  }

  delete(o: T): Observable<any> {
+    this._listAll = null
    return this.http.delete(this.getResourceUrl(o.id))
  }

  update(o: T): Observable<T> {
+    this._listAll = null
    return this.http.put<T>(this.getResourceUrl(o.id), o)
  }
 }
--- a/src-ui/src/app/services/rest/document.service.ts
+++ b/src-ui/src/app/services/rest/document.service.ts
@@ -1,10 +1,15 @@
 import { Injectable } from '@angular/core';
 import { PaperlessDocument } from 'src/app/data/paperless-document';
+import { PaperlessDocumentMetadata } from 'src/app/data/paperless-document-metadata';
 import { AbstractPaperlessService } from './abstract-paperless-service';
 import { HttpClient } from '@angular/common/http';
 import { Observable } from 'rxjs';
 import { Results } from 'src/app/data/results';
 import { FilterRule } from 'src/app/data/filter-rule';
+import { map } from 'rxjs/operators';
+import { CorrespondentService } from './correspondent.service';
+import { DocumentTypeService } from './document-type.service';
+import { TagService } from './tag.service';


 export const DOCUMENT_SORT_FIELDS = [
@@ -26,7 +31,7 @@ export const SORT_DIRECTION_DESCENDING = "des"
 })
 export class DocumentService extends AbstractPaperlessService<PaperlessDocument> {

-  constructor(http: HttpClient) {
+  constructor(http: HttpClient, private correspondentService: CorrespondentService, private documentTypeService: DocumentTypeService, private tagService: TagService) {
    super(http, 'documents')
  }

@@ -46,24 +51,54 @@ export class DocumentService extends AbstractPaperlessService<PaperlessDocument>
    }
  }

-  list(page?: number, pageSize?: number, sortField?: string, sortDirection?: string, filterRules?: FilterRule[]): Observable<Results<PaperlessDocument>> {
-    return super.list(page, pageSize, sortField, sortDirection, this.filterRulesToQueryParams(filterRules))
+  addObservablesToDocument(doc: PaperlessDocument) {
+    if (doc.correspondent) {
+      doc.correspondent$ = this.correspondentService.getCached(doc.correspondent)
+    }
+    if (doc.document_type) {
+      doc.document_type$ = this.documentTypeService.getCached(doc.document_type)
+    }
+    if (doc.tags) {
+      doc.tags$ = this.tagService.getCachedMany(doc.tags)
+    }
+    return doc
  }

-  getPreviewUrl(id: number): string {
-    return this.getResourceUrl(id, 'preview')
+  list(page?: number, pageSize?: number, sortField?: string, sortDirection?: string, filterRules?: FilterRule[]): Observable<Results<PaperlessDocument>> {
+    return super.list(page, pageSize, sortField, sortDirection, this.filterRulesToQueryParams(filterRules)).pipe(
+      map(results => {
+        results.results.forEach(doc => this.addObservablesToDocument(doc))
+        return results
+      })
+    )
+  }
+
+  getPreviewUrl(id: number, original: boolean = false): string {
+    let url = this.getResourceUrl(id, 'preview')
+    if (original) {
+      url += "?original=true"
+    }
+    return url
  }

  getThumbUrl(id: number): string {
    return this.getResourceUrl(id, 'thumb')
  }

-  getDownloadUrl(id: number): string {
-    return this.getResourceUrl(id, 'download')
+  getDownloadUrl(id: number, original: boolean = false): string {
+    let url = this.getResourceUrl(id, 'download')
+    if (original) {
+      url += "?original=true"
+    }
+    return url
  }

  uploadDocument(formData) {
-    return this.http.post(this.getResourceUrl(null, 'post_document'), formData)
+    return this.http.post(this.getResourceUrl(null, 'post_document'), formData, {reportProgress: true, observe: "events"})
+  }
+
+  getMetadata(id: number): Observable<PaperlessDocumentMetadata> {
+    return this.http.get<PaperlessDocumentMetadata>(this.getResourceUrl(id, 'metadata'))
  }

 }
--- a/src-ui/src/app/services/rest/search.service.ts
+++ b/src-ui/src/app/services/rest/search.service.ts
@@ -1,9 +1,11 @@
 import { HttpClient, HttpParams } from '@angular/common/http';
 import { Injectable } from '@angular/core';
 import { Observable } from 'rxjs';
+import { map } from 'rxjs/operators';
 import { PaperlessDocument } from 'src/app/data/paperless-document';
 import { SearchResult } from 'src/app/data/search-result';
 import { environment } from 'src/environments/environment';
+import { DocumentService } from './document.service';


@Injectable({
@@ -11,14 +13,19 @@ import { environment } from 'src/environments/environment';
 })
 export class SearchService {
  
-  constructor(private http: HttpClient) { }
+  constructor(private http: HttpClient, private documentService: DocumentService) { }

  search(query: string, page?: number): Observable<SearchResult> {
    let httpParams = new HttpParams().set('query', query)
    if (page) {
      httpParams = httpParams.set('page', page.toString())
    }
-    return this.http.get<SearchResult>(`${environment.apiBaseUrl}search/`, {params: httpParams})
+    return this.http.get<SearchResult>(`${environment.apiBaseUrl}search/`, {params: httpParams}).pipe(
+      map(result => {
+        result.results.forEach(hit => this.documentService.addObservablesToDocument(hit.document))
+        return result
+      })
+    )
  }

  autocomplete(term: string): Observable<string[]> {
--- a/src-ui/src/assets/save-filter.png
+++ b/src-ui/src/assets/save-filter.png
--- a/src-ui/src/environments/environment.prod.ts
+++ b/src-ui/src/environments/environment.prod.ts
@@ -1,4 +1,5 @@
 export const environment = {
  production: true,
-  apiBaseUrl: "/api/"
+  apiBaseUrl: "/api/",
+  appTitle: "Paperless-ng"
 };
--- a/src-ui/src/environments/environment.ts
+++ b/src-ui/src/environments/environment.ts
@@ -4,7 +4,8 @@

 export const environment = {
  production: false,
-  apiBaseUrl: "http://localhost:8000/api/"
+  apiBaseUrl: "http://localhost:8000/api/",
+  appTitle: "DEVELOPMENT P-NG"
 };

 /*
--- a/src/documents/admin.py
+++ b/src/documents/admin.py
@@ -17,8 +17,6 @@ class CorrespondentAdmin(admin.ModelAdmin):
    list_filter = ("matching_algorithm",)
    list_editable = ("match", "matching_algorithm")

-    readonly_fields = ("slug",)
-

 class TagAdmin(admin.ModelAdmin):

@@ -31,8 +29,6 @@ class TagAdmin(admin.ModelAdmin):
    list_filter = ("colour", "matching_algorithm")
    list_editable = ("colour", "match", "matching_algorithm")

-    readonly_fields = ("slug", )
-

 class DocumentTypeAdmin(admin.ModelAdmin):

@@ -44,13 +40,16 @@ class DocumentTypeAdmin(admin.ModelAdmin):
    list_filter = ("matching_algorithm",)
    list_editable = ("match", "matching_algorithm")

-    readonly_fields = ("slug",)
-

 class DocumentAdmin(admin.ModelAdmin):

    search_fields = ("correspondent__name", "title", "content", "tags__name")
-    readonly_fields = ("added", "mime_type", "storage_type", "filename")
+    readonly_fields = (
+        "added",
+        "modified",
+        "mime_type",
+        "storage_type",
+        "filename")

    list_display_links = ("title",)

@@ -101,7 +100,7 @@ class DocumentAdmin(admin.ModelAdmin):
        for tag in obj.tags.all():
            r += self._html_tag(
                "span",
-                tag.slug + ", "
+                tag.name + ", "
            )
        return r

--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -6,14 +6,16 @@ import os
 import magic
 from django.conf import settings
 from django.db import transaction
+from django.db.models import Q
 from django.utils import timezone
+from filelock import FileLock

 from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
-from .file_handling import create_source_path_directory
+from .file_handling import create_source_path_directory, \
+    generate_unique_filename
 from .loggers import LoggingMixin
 from .models import Document, FileInfo, Correspondent, DocumentType, Tag
-from .parsers import ParseError, get_parser_class_for_mime_type, \
-    get_supported_file_extensions
+from .parsers import ParseError, get_parser_class_for_mime_type, parse_date
 from .signals import (
    document_consumption_finished,
    document_consumption_started
@@ -37,30 +39,23 @@ class Consumer(LoggingMixin):

    def pre_check_file_exists(self):
        if not os.path.isfile(self.path):
+            self.log(
+                "error",
+                "Cannot consume {}: It is not a file.".format(self.path)
+            )
            raise ConsumerError("Cannot consume {}: It is not a file".format(
                self.path))

-    def pre_check_file_extension(self):
-        extensions = get_supported_file_extensions()
-        _, ext = os.path.splitext(self.filename)
-
-        if not ext:
-            raise ConsumerError(
-                f"Not consuming {self.filename}: File type unknown."
-            )
-
-        if ext not in extensions:
-            raise ConsumerError(
-                f"Not consuming {self.filename}: File extension {ext} does "
-                f"not map to any known file type ({str(extensions)})"
-            )
-
    def pre_check_duplicate(self):
        with open(self.path, "rb") as f:
            checksum = hashlib.md5(f.read()).hexdigest()
-        if Document.objects.filter(checksum=checksum).exists():
+        if Document.objects.filter(Q(checksum=checksum) | Q(archive_checksum=checksum)).exists():  # NOQA: E501
            if settings.CONSUMER_DELETE_DUPLICATES:
                os.unlink(self.path)
+            self.log(
+                "error",
+                "Not consuming {}: It is a duplicate.".format(self.filename)
+            )
            raise ConsumerError(
                "Not consuming {}: It is a duplicate.".format(self.filename)
            )
@@ -69,6 +64,7 @@ class Consumer(LoggingMixin):
        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
        os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True)
        os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
+        os.makedirs(settings.ARCHIVE_DIR, exist_ok=True)

    def try_consume_file(self,
                         path,
@@ -96,7 +92,6 @@ class Consumer(LoggingMixin):
        # Make sure that preconditions for consuming the file are met.

        self.pre_check_file_exists()
-        self.pre_check_file_extension()
        self.pre_check_directories()
        self.pre_check_duplicate()

@@ -124,7 +119,7 @@ class Consumer(LoggingMixin):

        # This doesn't parse the document yet, but gives us a parser.

-        document_parser = parser_class(self.path, self.logging_group)
+        document_parser = parser_class(self.logging_group)

        # However, this already created working directories which we have to
        # clean up.
@@ -132,13 +127,24 @@ class Consumer(LoggingMixin):
        # Parse the document. This may take some time.

        try:
-            self.log("debug", f"Generating thumbnail for {self.filename}...")
-            thumbnail = document_parser.get_optimised_thumbnail()
            self.log("debug", "Parsing {}...".format(self.filename))
+            document_parser.parse(self.path, mime_type)
+
+            self.log("debug", f"Generating thumbnail for {self.filename}...")
+            thumbnail = document_parser.get_optimised_thumbnail(
+                self.path, mime_type)
+
            text = document_parser.get_text()
            date = document_parser.get_date()
+            if not date:
+                date = parse_date(self.filename, text)
+            archive_path = document_parser.get_archive_path()
+
        except ParseError as e:
            document_parser.cleanup()
+            self.log(
+                "error",
+                f"Error while consuming document {self.filename}: {e}")
            raise ConsumerError(e)

        # Prepare the document classifier.
@@ -151,8 +157,9 @@ class Consumer(LoggingMixin):
            classifier = DocumentClassifier()
            classifier.reload()
        except (FileNotFoundError, IncompatibleClassifierVersionError) as e:
-            logging.getLogger(__name__).warning(
-                "Cannot classify documents: {}.".format(e))
+            self.log(
+                "warning",
+                f"Cannot classify documents: {e}.")
            classifier = None

        # now that everything is done, we can start to store the document
@@ -179,22 +186,39 @@ class Consumer(LoggingMixin):

                # After everything is in the database, copy the files into
                # place. If this fails, we'll also rollback the transaction.
+                with FileLock(settings.MEDIA_LOCK):
+                    document.filename = generate_unique_filename(
+                        document, settings.ORIGINALS_DIR)
+                    create_source_path_directory(document.source_path)

-                create_source_path_directory(document.source_path)
-                self._write(document, self.path, document.source_path)
-                self._write(document, thumbnail, document.thumbnail_path)
+                    self._write(document.storage_type,
+                                self.path, document.source_path)

-                # Afte performing all database operations and moving files
-                # into place, tell paperless where the file is.
-                document.filename = os.path.basename(document.source_path)
-                # Saving the document now will trigger the filename handling
-                # logic.
+                    self._write(document.storage_type,
+                                thumbnail, document.thumbnail_path)
+
+                    if archive_path and os.path.isfile(archive_path):
+                        create_source_path_directory(document.archive_path)
+                        self._write(document.storage_type,
+                                    archive_path, document.archive_path)
+
+                        with open(archive_path, 'rb') as f:
+                            document.archive_checksum = hashlib.md5(
+                                f.read()).hexdigest()
+
+                # Don't save with the lock active. Saving will cause the file
+                # renaming logic to aquire the lock as well.
                document.save()

                # Delete the file only if it was successfully consumed
                self.log("debug", "Deleting file {}".format(self.path))
                os.unlink(self.path)
        except Exception as e:
+            self.log(
+                "error",
+                f"The following error occured while consuming "
+                f"{self.filename}: {e}"
+            )
            raise ConsumerError(e)
        finally:
            document_parser.cleanup()
@@ -224,7 +248,7 @@ class Consumer(LoggingMixin):
        with open(self.path, "rb") as f:
            document = Document.objects.create(
                correspondent=file_info.correspondent,
-                title=file_info.title,
+                title=(self.override_title or file_info.title)[:127],
                content=text,
                mime_type=mime_type,
                checksum=hashlib.md5(f.read()).hexdigest(),
@@ -235,18 +259,17 @@ class Consumer(LoggingMixin):

        relevant_tags = set(file_info.tags)
        if relevant_tags:
-            tag_names = ", ".join([t.slug for t in relevant_tags])
+            tag_names = ", ".join([t.name for t in relevant_tags])
            self.log("debug", "Tagging with {}".format(tag_names))
            document.tags.add(*relevant_tags)

        self.apply_overrides(document)

+        document.save()
+
        return document

    def apply_overrides(self, document):
-        if self.override_title:
-            document.title = self.override_title
-
        if self.override_correspondent_id:
            document.correspondent = Correspondent.objects.get(
                pk=self.override_correspondent_id)
@@ -259,7 +282,7 @@ class Consumer(LoggingMixin):
            for tag_id in self.override_tag_ids:
                document.tags.add(Tag.objects.get(pk=tag_id))

-    def _write(self, document, source, target):
+    def _write(self, storage_type, source, target):
        with open(source, "rb") as read_file:
            with open(target, "wb") as write_file:
                write_file.write(read_file.read())
--- a/src/documents/file_handling.py
+++ b/src/documents/file_handling.py
@@ -1,7 +1,9 @@
+import datetime
 import logging
 import os
 from collections import defaultdict

+import pathvalidate
 from django.conf import settings
 from django.template.defaultfilters import slugify

@@ -10,10 +12,13 @@ def create_source_path_directory(source_path):
    os.makedirs(os.path.dirname(source_path), exist_ok=True)


-def delete_empty_directories(directory):
+def delete_empty_directories(directory, root):
+    if not os.path.isdir(directory):
+        return
+
    # Go up in the directory hierarchy and try to delete all directories
    directory = os.path.normpath(directory)
-    root = os.path.normpath(settings.ORIGINALS_DIR)
+    root = os.path.normpath(root)

    if not directory.startswith(root + os.path.sep):
        # don't do anything outside our originals folder.
@@ -65,21 +70,53 @@ def many_to_dictionary(field):
    return mydictionary


-def generate_filename(doc):
+def generate_unique_filename(doc, root):
+    counter = 0
+
+    while True:
+        new_filename = generate_filename(doc, counter)
+        if new_filename == doc.filename:
+            # still the same as before.
+            return new_filename
+
+        if os.path.exists(os.path.join(root, new_filename)):
+            counter += 1
+        else:
+            return new_filename
+
+
+def generate_filename(doc, counter=0):
    path = ""

    try:
        if settings.PAPERLESS_FILENAME_FORMAT is not None:
            tags = defaultdict(lambda: slugify(None),
                               many_to_dictionary(doc.tags))
+
+            if doc.correspondent:
+                correspondent = pathvalidate.sanitize_filename(
+                    doc.correspondent.name, replacement_text="-"
+                )
+            else:
+                correspondent = "none"
+
+            if doc.document_type:
+                document_type = pathvalidate.sanitize_filename(
+                    doc.document_type.name, replacement_text="-"
+                )
+            else:
+                document_type = "none"
+
            path = settings.PAPERLESS_FILENAME_FORMAT.format(
-                correspondent=slugify(doc.correspondent),
-                title=slugify(doc.title),
-                created=slugify(doc.created),
+                title=pathvalidate.sanitize_filename(
+                    doc.title, replacement_text="-"),
+                correspondent=correspondent,
+                document_type=document_type,
+                created=datetime.date.isoformat(doc.created),
                created_year=doc.created.year if doc.created else "none",
                created_month=doc.created.month if doc.created else "none",
                created_day=doc.created.day if doc.created else "none",
-                added=slugify(doc.added),
+                added=datetime.date.isoformat(doc.added),
                added_year=doc.added.year if doc.added else "none",
                added_month=doc.added.month if doc.added else "none",
                added_day=doc.added.day if doc.added else "none",
@@ -90,14 +127,19 @@ def generate_filename(doc):
            f"Invalid PAPERLESS_FILENAME_FORMAT: "
            f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")

-    # Always append the primary key to guarantee uniqueness of filename
+    counter_str = f"_{counter:02}" if counter else ""
    if len(path) > 0:
-        filename = "%s-%07i%s" % (path, doc.pk, doc.file_type)
+        filename = f"{path}{counter_str}{doc.file_type}"
    else:
-        filename = "%07i%s" % (doc.pk, doc.file_type)
+        filename = f"{doc.pk:07}{counter_str}{doc.file_type}"

    # Append .gpg for encrypted files
    if doc.storage_type == doc.STORAGE_TYPE_GPG:
        filename += ".gpg"

    return filename
+
+
+def archive_name_from_filename(filename):
+
+    return os.path.splitext(filename)[0] + ".pdf"
--- a/src/documents/filters.py
+++ b/src/documents/filters.py
@@ -37,6 +37,10 @@ class DocumentTypeFilterSet(FilterSet):

 class TagsFilter(Filter):

+    def __init__(self, exclude=False):
+        super(TagsFilter, self).__init__()
+        self.exclude = exclude
+
    def filter(self, qs, value):
        if not value:
            return qs
@@ -47,7 +51,10 @@ class TagsFilter(Filter):
            return qs

        for tag_id in tag_ids:
-            qs = qs.filter(tags__id=tag_id)
+            if self.exclude:
+                qs = qs.exclude(tags__id=tag_id)
+            else:
+                qs = qs.filter(tags__id=tag_id)

        return qs

@@ -74,6 +81,8 @@ class DocumentFilterSet(FilterSet):

    tags__id__all = TagsFilter()

+    tags__id__none = TagsFilter(exclude=True)
+
    is_in_inbox = InboxFilter()

    class Meta:
--- a/src/documents/forms.py
+++ b/src/documents/forms.py
@@ -1,59 +0,0 @@
-import os
-import tempfile
-from datetime import datetime
-from time import mktime
-
-import magic
-from django import forms
-from django.conf import settings
-from django_q.tasks import async_task
-from pathvalidate import validate_filename, ValidationError
-
-from documents.parsers import is_mime_type_supported
-
-
-class UploadForm(forms.Form):
-
-    document = forms.FileField()
-
-    def clean_document(self):
-        document_name = self.cleaned_data.get("document").name
-
-        try:
-            validate_filename(document_name)
-        except ValidationError:
-            raise forms.ValidationError("That filename is suspicious.")
-
-        document_data = self.cleaned_data.get("document").read()
-
-        mime_type = magic.from_buffer(document_data, mime=True)
-
-        if not is_mime_type_supported(mime_type):
-            raise forms.ValidationError("This mime type is not supported.")
-
-        return document_name, document_data
-
-    def save(self):
-        """
-        Since the consumer already does a lot of work, it's easier just to save
-        to-be-consumed files to the consumption directory rather than have the
-        form do that as well.  Think of it as a poor-man's queue server.
-        """
-
-        original_filename, data = self.cleaned_data.get("document")
-
-        t = int(mktime(datetime.now().timetuple()))
-
-        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
-
-        with tempfile.NamedTemporaryFile(prefix="paperless-upload-",
-                                         dir=settings.SCRATCH_DIR,
-                                         delete=False) as f:
-
-            f.write(data)
-            os.utime(f.name, times=(t, t))
-
-            async_task("documents.tasks.consume_file",
-                       f.name,
-                       override_filename=original_filename,
-                       task_name=os.path.basename(original_filename)[:100])
--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -82,6 +82,10 @@ def open_index(recreate=False):


 def update_document(writer, doc):
+    # TODO: this line caused many issues all around, since:
+    #  We need to make sure that this method does not get called with
+    #  deserialized documents (i.e, document objects that don't come from
+    #  Django's ORM interfaces directly.
    logger.debug("Indexing {}...".format(doc))
    tags = ",".join([t.name for t in doc.tags.all()])
    writer.update_document(
@@ -98,6 +102,7 @@ def update_document(writer, doc):


 def remove_document(writer, doc):
+    # TODO: see above.
    logger.debug("Removing {} from index...".format(doc))
    writer.delete_by_term('id', doc.pk)

--- a/src/documents/loggers.py
+++ b/src/documents/loggers.py
@@ -28,10 +28,10 @@ class LoggingMixin:
    def renew_logging_group(self):
        self.logging_group = uuid.uuid4()

-    def log(self, level, message):
+    def log(self, level, message, **kwargs):
        target = ".".join([self.__class__.__module__, self.__class__.__name__])
        logger = logging.getLogger(target)

        getattr(logger, level)(message, extra={
            "group": self.logging_group
-        })
+        }, **kwargs)
--- a/src/documents/management/commands/decrypt_documents.py
+++ b/src/documents/management/commands/decrypt_documents.py
@@ -82,7 +82,8 @@ class Command(BaseCommand):
            with open(document.thumbnail_path, "wb") as f:
                f.write(raw_thumb)

-            document.save(update_fields=("storage_type", "filename"))
+            Document.objects.filter(id=document.id).update(
+                storage_type=document.storage_type, filename=document.filename)

            for path in old_paths:
                os.unlink(path)
--- a/src/documents/management/commands/document_archiver.py
+++ b/src/documents/management/commands/document_archiver.py
@@ -0,0 +1,128 @@
+import hashlib
+import multiprocessing
+
+import logging
+import os
+import shutil
+import uuid
+
+import tqdm
+from django import db
+from django.conf import settings
+from django.core.management.base import BaseCommand
+from django.db import transaction
+from whoosh.writing import AsyncWriter
+
+from documents.models import Document
+from ... import index
+from ...file_handling import create_source_path_directory
+from ...mixins import Renderable
+from ...parsers import get_parser_class_for_mime_type
+
+
+logger = logging.getLogger(__name__)
+
+
+def handle_document(document_id):
+    document = Document.objects.get(id=document_id)
+
+    mime_type = document.mime_type
+
+    parser_class = get_parser_class_for_mime_type(mime_type)
+
+    parser = parser_class(logging_group=uuid.uuid4())
+
+    try:
+        parser.parse(document.source_path, mime_type)
+
+        if parser.get_archive_path():
+            with transaction.atomic():
+                with open(parser.get_archive_path(), 'rb') as f:
+                    checksum = hashlib.md5(f.read()).hexdigest()
+                # i'm going to save first so that in case the file move
+                # fails, the database is rolled back.
+                # we also don't use save() since that triggers the filehandling
+                # logic, and we don't want that yet (file not yet in place)
+                Document.objects.filter(pk=document.pk).update(
+                    archive_checksum=checksum,
+                    content=parser.get_text()
+                )
+                create_source_path_directory(document.archive_path)
+                shutil.move(parser.get_archive_path(), document.archive_path)
+
+        with AsyncWriter(index.open_index()) as writer:
+            index.update_document(writer, document)
+
+    except Exception as e:
+        logger.error(f"Error while parsing document {document}: {str(e)}")
+    finally:
+        parser.cleanup()
+
+
+class Command(Renderable, BaseCommand):
+
+    help = """
+        Using the current classification model, assigns correspondents, tags
+        and document types to all documents, effectively allowing you to
+        back-tag all previously indexed documents with metadata created (or
+        modified) after their initial import.
+    """.replace("    ", "")
+
+    def __init__(self, *args, **kwargs):
+        self.verbosity = 0
+        BaseCommand.__init__(self, *args, **kwargs)
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "-f", "--overwrite",
+            default=False,
+            action="store_true",
+            help="Recreates the archived document for documents that already "
+                 "have an archived version."
+        )
+        parser.add_argument(
+            "-d", "--document",
+            default=None,
+            type=int,
+            required=False,
+            help="Specify the ID of a document, and this command will only "
+                 "run on this specific document."
+        )
+
+    def handle(self, *args, **options):
+
+        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
+
+        overwrite = options["overwrite"]
+
+        if options['document']:
+            documents = Document.objects.filter(pk=options['document'])
+        else:
+            documents = Document.objects.all()
+
+        document_ids = list(map(
+            lambda doc: doc.id,
+            filter(
+                lambda d: overwrite or not d.archive_checksum,
+                documents
+            )
+        ))
+
+        # Note to future self: this prevents django from reusing database
+        # conncetions between processes, which is bad and does not work
+        # with postgres.
+        db.connections.close_all()
+
+        try:
+
+            logging.getLogger().handlers[0].level = logging.ERROR
+            with multiprocessing.Pool(processes=settings.TASK_WORKERS) as pool:
+                list(tqdm.tqdm(
+                    pool.imap_unordered(
+                        handle_document,
+                        document_ids
+                    ),
+                    total=len(document_ids)
+                ))
+        except KeyboardInterrupt:
+            print("Aborting...")
--- a/src/documents/management/commands/document_consumer.py
+++ b/src/documents/management/commands/document_consumer.py
@@ -1,31 +1,68 @@
 import logging
 import os
+from pathlib import Path
 from time import sleep

 from django.conf import settings
 from django.core.management.base import BaseCommand, CommandError
+from django.utils.text import slugify
 from django_q.tasks import async_task
 from watchdog.events import FileSystemEventHandler
 from watchdog.observers.polling import PollingObserver

+from documents.models import Tag
+from documents.parsers import is_file_ext_supported
+
 try:
-    from inotify_simple import INotify, flags
+    from inotifyrecursive import INotify, flags
 except ImportError:
    INotify = flags = None

 logger = logging.getLogger(__name__)


-def _consume(file):
-    try:
-        if os.path.isfile(file):
-            async_task("documents.tasks.consume_file",
-                       file,
-                       task_name=os.path.basename(file)[:100])
-        else:
-            logger.debug(
-                f"Not consuming file {file}: File has moved.")
+def _tags_from_path(filepath):
+    """Walk up the directory tree from filepath to CONSUMPTION_DIr
+       and get or create Tag IDs for every directory.
+    """
+    tag_ids = set()
+    path_parts = Path(filepath).relative_to(
+                settings.CONSUMPTION_DIR).parent.parts
+    for part in path_parts:
+        tag_ids.add(Tag.objects.get_or_create(name__iexact=part, defaults={
+            "name": part
+        })[0].pk)

+    return tag_ids
+
+
+def _consume(filepath):
+    if os.path.isdir(filepath):
+        return
+
+    if not os.path.isfile(filepath):
+        logger.debug(
+            f"Not consuming file {filepath}: File has moved.")
+        return
+
+    if not is_file_ext_supported(os.path.splitext(filepath)[1]):
+        logger.debug(
+            f"Not consuming file {filepath}: Unknown file extension.")
+        return
+
+    tag_ids = None
+    try:
+        if settings.CONSUMER_SUBDIRS_AS_TAGS:
+            tag_ids = _tags_from_path(filepath)
+    except Exception as e:
+        logger.error(
+            "Error creating tags from path: {}".format(e))
+
+    try:
+        async_task("documents.tasks.consume_file",
+                   filepath,
+                   override_tag_ids=tag_ids if tag_ids else None,
+                   task_name=os.path.basename(filepath)[:100])
    except Exception as e:
        # Catch all so that the consumer won't crash.
        # This is also what the test case is listening for to check for
@@ -94,6 +131,7 @@ class Command(BaseCommand):

    def handle(self, *args, **options):
        directory = options["directory"]
+        recursive = settings.CONSUMER_RECURSIVE

        if not directory:
            raise CommandError(
@@ -104,24 +142,30 @@ class Command(BaseCommand):
            raise CommandError(
                f"Consumption directory {directory} does not exist")

-        for entry in os.scandir(directory):
-            _consume(entry.path)
+        if recursive:
+            for dirpath, _, filenames in os.walk(directory):
+                for filename in filenames:
+                    filepath = os.path.join(dirpath, filename)
+                    _consume(filepath)
+        else:
+            for entry in os.scandir(directory):
+                _consume(entry.path)

        if options["oneshot"]:
            return

        if settings.CONSUMER_POLLING == 0 and INotify:
-            self.handle_inotify(directory)
+            self.handle_inotify(directory, recursive)
        else:
-            self.handle_polling(directory)
+            self.handle_polling(directory, recursive)

        logger.debug("Consumer exiting.")

-    def handle_polling(self, directory):
+    def handle_polling(self, directory, recursive):
        logging.getLogger(__name__).info(
            f"Polling directory for changes: {directory}")
        self.observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
-        self.observer.schedule(Handler(), directory, recursive=False)
+        self.observer.schedule(Handler(), directory, recursive=recursive)
        self.observer.start()
        try:
            while self.observer.is_alive():
@@ -132,18 +176,26 @@ class Command(BaseCommand):
            self.observer.stop()
        self.observer.join()

-    def handle_inotify(self, directory):
+    def handle_inotify(self, directory, recursive):
        logging.getLogger(__name__).info(
            f"Using inotify to watch directory for changes: {directory}")

        inotify = INotify()
-        descriptor = inotify.add_watch(
-            directory, flags.CLOSE_WRITE | flags.MOVED_TO)
+        inotify_flags = flags.CLOSE_WRITE | flags.MOVED_TO
+        if recursive:
+            descriptor = inotify.add_watch_recursive(directory, inotify_flags)
+        else:
+            descriptor = inotify.add_watch(directory, inotify_flags)
+
        try:
            while not self.stop_flag:
-                for event in inotify.read(timeout=1000, read_delay=1000):
-                    file = os.path.join(directory, event.name)
-                    _consume(file)
+                for event in inotify.read(timeout=1000):
+                    if recursive:
+                        path = inotify.get_path(event.wd)
+                    else:
+                        path = directory
+                    filepath = os.path.join(path, event.name)
+                    _consume(filepath)
        except KeyboardInterrupt:
            pass

--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -7,7 +7,8 @@ from django.core import serializers
 from django.core.management.base import BaseCommand, CommandError

 from documents.models import Document, Correspondent, Tag, DocumentType
-from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
+from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
+    EXPORTER_ARCHIVE_NAME
 from paperless.db import GnuPG
 from ...mixins import Renderable

@@ -37,6 +38,9 @@ class Command(Renderable, BaseCommand):
        if not os.access(self.target, os.W_OK):
            raise CommandError("That path doesn't appear to be writable")

+        if os.listdir(self.target):
+            raise CommandError("That directory is not empty.")
+
        self.dump()

    def dump(self):
@@ -53,34 +57,56 @@ class Command(Renderable, BaseCommand):

            document = document_map[document_dict["pk"]]

-            unique_filename = f"{document.pk:07}_{document.file_name}"
+            print(f"Exporting: {document}")

-            file_target = os.path.join(self.target, unique_filename)
+            filename_counter = 0
+            while True:
+                original_name = document.get_public_filename(
+                    counter=filename_counter)
+                original_target = os.path.join(self.target, original_name)

-            thumbnail_name = unique_filename + "-thumbnail.png"
+                if not os.path.exists(original_target):
+                    break
+                else:
+                    filename_counter += 1
+
+            thumbnail_name = original_name + "-thumbnail.png"
            thumbnail_target = os.path.join(self.target, thumbnail_name)

-            document_dict[EXPORTER_FILE_NAME] = unique_filename
+            document_dict[EXPORTER_FILE_NAME] = original_name
            document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name

-            print(f"Exporting: {file_target}")
+            if os.path.exists(document.archive_path):
+                archive_name = document.get_public_filename(
+                    archive=True, counter=filename_counter, suffix="_archive")
+                archive_target = os.path.join(self.target, archive_name)
+                document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
+            else:
+                archive_target = None

            t = int(time.mktime(document.created.timetuple()))
            if document.storage_type == Document.STORAGE_TYPE_GPG:

-                with open(file_target, "wb") as f:
+                with open(original_target, "wb") as f:
                    f.write(GnuPG.decrypted(document.source_file))
-                    os.utime(file_target, times=(t, t))
+                    os.utime(original_target, times=(t, t))

                with open(thumbnail_target, "wb") as f:
                    f.write(GnuPG.decrypted(document.thumbnail_file))
                    os.utime(thumbnail_target, times=(t, t))

+                if archive_target:
+                    with open(archive_target, "wb") as f:
+                        f.write(GnuPG.decrypted(document.archive_path))
+                        os.utime(archive_target, times=(t, t))
            else:

-                shutil.copy(document.source_path, file_target)
+                shutil.copy(document.source_path, original_target)
                shutil.copy(document.thumbnail_path, thumbnail_target)

+                if archive_target:
+                    shutil.copy(document.archive_path, archive_target)
+
        manifest += json.loads(
            serializers.serialize("json", Correspondent.objects.all()))

--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -5,11 +5,13 @@ import shutil
 from django.conf import settings
 from django.core.management import call_command
 from django.core.management.base import BaseCommand, CommandError
+from filelock import FileLock

 from documents.models import Document
-from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
-from paperless.db import GnuPG
-from ...file_handling import generate_filename, create_source_path_directory
+from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
+    EXPORTER_ARCHIVE_NAME
+from ...file_handling import create_source_path_directory, \
+    generate_unique_filename
 from ...mixins import Renderable


@@ -79,32 +81,55 @@ class Command(Renderable, BaseCommand):
                    'appear to be in the source directory.'.format(doc_file)
                )

+            if EXPORTER_ARCHIVE_NAME in record:
+                archive_file = record[EXPORTER_ARCHIVE_NAME]
+                if not os.path.exists(os.path.join(self.source, archive_file)):
+                    raise CommandError(
+                        f"The manifest file refers to {archive_file} which "
+                        f"does not appear to be in the source directory."
+                    )
+
    def _import_files_from_manifest(self):

-        storage_type = Document.STORAGE_TYPE_UNENCRYPTED
+        os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
+        os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True)
+        os.makedirs(settings.ARCHIVE_DIR, exist_ok=True)

        for record in self.manifest:

            if not record["model"] == "documents.document":
                continue

-            doc_file = record[EXPORTER_FILE_NAME]
-            thumb_file = record[EXPORTER_THUMBNAIL_NAME]
            document = Document.objects.get(pk=record["pk"])

+            doc_file = record[EXPORTER_FILE_NAME]
            document_path = os.path.join(self.source, doc_file)
+
+            thumb_file = record[EXPORTER_THUMBNAIL_NAME]
            thumbnail_path = os.path.join(self.source, thumb_file)

-            document.storage_type = storage_type
-            document.filename = generate_filename(document)
+            if EXPORTER_ARCHIVE_NAME in record:
+                archive_file = record[EXPORTER_ARCHIVE_NAME]
+                archive_path = os.path.join(self.source, archive_file)
+            else:
+                archive_path = None

-            if os.path.isfile(document.source_path):
-                raise FileExistsError(document.source_path)
+            document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED

-            create_source_path_directory(document.source_path)
+            with FileLock(settings.MEDIA_LOCK):
+                document.filename = generate_unique_filename(
+                    document, settings.ORIGINALS_DIR)

-            print(f"Moving {document_path} to {document.source_path}")
-            shutil.copy(document_path, document.source_path)
-            shutil.copy(thumbnail_path, document.thumbnail_path)
+                if os.path.isfile(document.source_path):
+                    raise FileExistsError(document.source_path)
+
+                create_source_path_directory(document.source_path)
+
+                print(f"Moving {document_path} to {document.source_path}")
+                shutil.copy(document_path, document.source_path)
+                shutil.copy(thumbnail_path, document.thumbnail_path)
+                if archive_path:
+                    create_source_path_directory(document.archive_path)
+                    shutil.copy(archive_path, document.archive_path)

            document.save()
--- a/src/documents/management/commands/document_renamer.py
+++ b/src/documents/management/commands/document_renamer.py
@@ -1,3 +1,6 @@
+import logging
+
+import tqdm
 from django.core.management.base import BaseCommand

 from documents.models import Document
@@ -18,6 +21,8 @@ class Command(Renderable, BaseCommand):

        self.verbosity = options["verbosity"]

-        for document in Document.objects.all():
+        logging.getLogger().handlers[0].level = logging.ERROR
+
+        for document in tqdm.tqdm(Document.objects.all()):
            # Saving the document again will generate a new filename and rename
            document.save()
--- a/src/documents/migrations/1005_checksums.py
+++ b/src/documents/migrations/1005_checksums.py
@@ -0,0 +1,23 @@
+# Generated by Django 3.1.3 on 2020-11-29 00:48
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '1004_sanity_check_schedule'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='document',
+            name='archive_checksum',
+            field=models.CharField(blank=True, editable=False, help_text='The checksum of the archived document.', max_length=32, null=True),
+        ),
+        migrations.AlterField(
+            model_name='document',
+            name='checksum',
+            field=models.CharField(editable=False, help_text='The checksum of the original document.', max_length=32, unique=True),
+        ),
+    ]
--- a/src/documents/migrations/1006_auto_20201208_2209.py
+++ b/src/documents/migrations/1006_auto_20201208_2209.py
@@ -0,0 +1,25 @@
+# Generated by Django 3.1.4 on 2020-12-08 22:09
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '1005_checksums'),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name='correspondent',
+            name='slug',
+        ),
+        migrations.RemoveField(
+            model_name='documenttype',
+            name='slug',
+        ),
+        migrations.RemoveField(
+            model_name='tag',
+            name='slug',
+        ),
+    ]
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -1,16 +1,19 @@
 # coding=utf-8
-
+import datetime
 import logging
 import os
 import re
 from collections import OrderedDict

+import pathvalidate
+
 import dateutil.parser
 from django.conf import settings
 from django.db import models
 from django.utils import timezone
 from django.utils.text import slugify

+from documents.file_handling import archive_name_from_filename
 from documents.parsers import get_default_file_extension


@@ -33,7 +36,6 @@ class MatchingModel(models.Model):
    )

    name = models.CharField(max_length=128, unique=True)
-    slug = models.SlugField(blank=True, editable=False)

    match = models.CharField(max_length=256, blank=True)
    matching_algorithm = models.PositiveIntegerField(
@@ -66,7 +68,6 @@ class MatchingModel(models.Model):
    def save(self, *args, **kwargs):

        self.match = self.match.lower()
-        self.slug = slugify(self.name)

        models.Model.save(self, *args, **kwargs)

@@ -158,13 +159,20 @@ class Document(models.Model):
        max_length=32,
        editable=False,
        unique=True,
-        help_text="The checksum of the original document (before it was "
-                  "encrypted).  We use this to prevent duplicate document "
-                  "imports."
+        help_text="The checksum of the original document."
+    )
+
+    archive_checksum = models.CharField(
+        max_length=32,
+        editable=False,
+        blank=True,
+        null=True,
+        help_text="The checksum of the archived document."
    )

    created = models.DateTimeField(
        default=timezone.now, db_index=True)
+
    modified = models.DateTimeField(
        auto_now=True, editable=False, db_index=True)

@@ -199,13 +207,11 @@ class Document(models.Model):
        ordering = ("correspondent", "title")

    def __str__(self):
-        created = self.created.strftime("%Y%m%d")
+        created = datetime.date.isoformat(self.created)
        if self.correspondent and self.title:
-            return "{}: {} - {}".format(
-                created, self.correspondent, self.title)
-        if self.correspondent or self.title:
-            return "{}: {}".format(created, self.correspondent or self.title)
-        return str(created)
+            return f"{created} {self.correspondent} {self.title}"
+        else:
+            return f"{created} {self.title}"

    @property
    def source_path(self):
@@ -226,8 +232,36 @@ class Document(models.Model):
        return open(self.source_path, "rb")

    @property
-    def file_name(self):
-        return slugify(str(self)) + self.file_type
+    def archive_path(self):
+        if self.filename:
+            fname = archive_name_from_filename(self.filename)
+        else:
+            fname = "{:07}.pdf".format(self.pk)
+
+        return os.path.join(
+            settings.ARCHIVE_DIR,
+            fname
+        )
+
+    @property
+    def archive_file(self):
+        return open(self.archive_path, "rb")
+
+    def get_public_filename(self, archive=False, counter=0, suffix=None):
+        result = str(self)
+
+        if counter:
+            result += f"_{counter:02}"
+
+        if suffix:
+            result += suffix
+
+        if archive:
+            result += ".pdf"
+        else:
+            result += self.file_type
+
+        return pathvalidate.sanitize_filename(result, replacement_text="-")

    @property
    def file_type(self):
@@ -348,9 +382,7 @@ class FileInfo:
    def _get_correspondent(cls, name):
        if not name:
            return None
-        return Correspondent.objects.get_or_create(name=name, defaults={
-            "slug": slugify(name)
-        })[0]
+        return Correspondent.objects.get_or_create(name=name)[0]

    @classmethod
    def _get_title(cls, title):
@@ -360,10 +392,7 @@ class FileInfo:
    def _get_tags(cls, tags):
        r = []
        for t in tags.split(","):
-            r.append(Tag.objects.get_or_create(
-                slug=slugify(t),
-                defaults={"name": t}
-            )[0])
+            r.append(Tag.objects.get_or_create(name=t)[0])
        return tuple(r)

    @classmethod
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -51,7 +51,18 @@ def get_default_file_extension(mime_type):
        if mime_type in supported_mime_types:
            return supported_mime_types[mime_type]

-    return None
+    ext = mimetypes.guess_extension(mime_type)
+    if ext:
+        return ext
+    else:
+        return ""
+
+
+def is_file_ext_supported(ext):
+    if ext:
+        return ext.lower() in get_supported_file_extensions()
+    else:
+        return False


 def get_supported_file_extensions():
@@ -131,21 +142,59 @@ def run_convert(input_file,
        raise ParseError("Convert failed at {}".format(args))


-def run_unpaper(pnm, logging_group=None):
-    pnm_out = pnm.replace(".pnm", ".unpaper.pnm")
+def parse_date(filename, text):
+    """
+    Returns the date of the document.
+    """

-    command_args = (settings.UNPAPER_BINARY, "--overwrite", "--quiet", pnm,
-                    pnm_out)
+    def __parser(ds, date_order):
+        """
+        Call dateparser.parse with a particular date ordering
+        """
+        return dateparser.parse(
+            ds,
+            settings={
+                "DATE_ORDER": date_order,
+                "PREFER_DAY_OF_MONTH": "first",
+                "RETURN_AS_TIMEZONE_AWARE":
+                True
+            }
+        )

-    logger.debug(f"Execute: {' '.join(command_args)}",
-                 extra={'group': logging_group})
+    date = None

-    if not subprocess.Popen(command_args,
-                            stdout=subprocess.DEVNULL,
-                            stderr=subprocess.DEVNULL).wait() == 0:
-        raise ParseError(f"Unpaper failed at {command_args}")
+    next_year = timezone.now().year + 5  # Arbitrary 5 year future limit

-    return pnm_out
+    # if filename date parsing is enabled, search there first:
+    if settings.FILENAME_DATE_ORDER:
+        for m in re.finditer(DATE_REGEX, filename):
+            date_string = m.group(0)
+
+            try:
+                date = __parser(date_string, settings.FILENAME_DATE_ORDER)
+            except (TypeError, ValueError):
+                # Skip all matches that do not parse to a proper date
+                continue
+
+            if date is not None and next_year > date.year > 1900:
+                return date
+
+    # Iterate through all regex matches in text and try to parse the date
+    for m in re.finditer(DATE_REGEX, text):
+        date_string = m.group(0)
+
+        try:
+            date = __parser(date_string, settings.DATE_ORDER)
+        except (TypeError, ValueError):
+            # Skip all matches that do not parse to a proper date
+            continue
+
+        if date is not None and next_year > date.year > 1900:
+            break
+        else:
+            date = None
+
+    return date


 class ParseError(Exception):
@@ -158,26 +207,35 @@ class DocumentParser(LoggingMixin):
    `paperless_tesseract.parsers` for inspiration.
    """

-    def __init__(self, path, logging_group):
+    def __init__(self, logging_group):
        super().__init__()
        self.logging_group = logging_group
-        self.document_path = path
        self.tempdir = tempfile.mkdtemp(
            prefix="paperless-", dir=settings.SCRATCH_DIR)

-    def get_thumbnail(self):
+        self.archive_path = None
+        self.text = None
+        self.date = None
+
+    def parse(self, document_path, mime_type):
+        raise NotImplementedError()
+
+    def get_archive_path(self):
+        return self.archive_path
+
+    def get_thumbnail(self, document_path, mime_type):
        """
        Returns the path to a file we can use as a thumbnail for this document.
        """
        raise NotImplementedError()

-    def optimise_thumbnail(self, in_path):
-
+    def get_optimised_thumbnail(self, document_path, mime_type):
+        thumbnail = self.get_thumbnail(document_path, mime_type)
        if settings.OPTIMIZE_THUMBNAILS:
-            out_path = os.path.join(self.tempdir, "optipng.png")
+            out_path = os.path.join(self.tempdir, "thumb_optipng.png")

            args = (settings.OPTIPNG_BINARY,
-                    "-silent", "-o5", in_path, "-out", out_path)
+                    "-silent", "-o5", thumbnail, "-out", out_path)

            self.log('debug', f"Execute: {' '.join(args)}")

@@ -186,97 +244,13 @@ class DocumentParser(LoggingMixin):

            return out_path
        else:
-            return in_path
-
-    def get_optimised_thumbnail(self):
-        return self.optimise_thumbnail(self.get_thumbnail())
+            return thumbnail

    def get_text(self):
-        """
-        Returns the text from the document and only the text.
-        """
-        raise NotImplementedError()
+        return self.text

    def get_date(self):
-        """
-        Returns the date of the document.
-        """
-
-        def __parser(ds, date_order):
-            """
-            Call dateparser.parse with a particular date ordering
-            """
-            return dateparser.parse(
-                ds,
-                settings={
-                    "DATE_ORDER": date_order,
-                    "PREFER_DAY_OF_MONTH": "first",
-                    "RETURN_AS_TIMEZONE_AWARE":
-                    True
-                }
-            )
-
-        date = None
-        date_string = None
-
-        next_year = timezone.now().year + 5  # Arbitrary 5 year future limit
-        title = os.path.basename(self.document_path)
-
-        # if filename date parsing is enabled, search there first:
-        if settings.FILENAME_DATE_ORDER:
-            self.log("info", "Checking document title for date")
-            for m in re.finditer(DATE_REGEX, title):
-                date_string = m.group(0)
-
-                try:
-                    date = __parser(date_string, settings.FILENAME_DATE_ORDER)
-                except (TypeError, ValueError):
-                    # Skip all matches that do not parse to a proper date
-                    continue
-
-                if date is not None and next_year > date.year > 1900:
-                    self.log(
-                        "info",
-                        "Detected document date {} based on string {} "
-                        "from document title"
-                        "".format(date.isoformat(), date_string)
-                    )
-                    return date
-
-        try:
-            # getting text after checking filename will save time if only
-            # looking at the filename instead of the whole text
-            text = self.get_text()
-        except ParseError:
-            return None
-
-        # Iterate through all regex matches in text and try to parse the date
-        for m in re.finditer(DATE_REGEX, text):
-            date_string = m.group(0)
-
-            try:
-                date = __parser(date_string, settings.DATE_ORDER)
-            except (TypeError, ValueError):
-                # Skip all matches that do not parse to a proper date
-                continue
-
-            if date is not None and next_year > date.year > 1900:
-                break
-            else:
-                date = None
-
-        if date is not None:
-            self.log(
-                "info",
-                "Detected document date {} based on string {}".format(
-                    date.isoformat(),
-                    date_string
-                )
-            )
-        else:
-            self.log("info", "Unable to detect date for document")
-
-        return date
+        return self.date

    def cleanup(self):
        self.log("debug", "Deleting directory {}".format(self.tempdir))
--- a/src/documents/sanity_checker.py
+++ b/src/documents/sanity_checker.py
@@ -46,8 +46,12 @@ def check_sanity():
        for f in files:
            present_files.append(os.path.normpath(os.path.join(root, f)))

+    lockfile = os.path.normpath(settings.MEDIA_LOCK)
+    if lockfile in present_files:
+        present_files.remove(lockfile)
+
    for doc in Document.objects.all():
-        # Check thumbnail
+        # Check sanity of the thumbnail
        if not os.path.isfile(doc.thumbnail_path):
            messages.append(SanityError(
                f"Thumbnail of document {doc.pk} does not exist."))
@@ -61,26 +65,49 @@ def check_sanity():
                    f"Cannot read thumbnail file of document {doc.pk}: {e}"
                ))

-        # Check document
+        # Check sanity of the original file
+        # TODO: extract method
        if not os.path.isfile(doc.source_path):
            messages.append(SanityError(
                f"Original of document {doc.pk} does not exist."))
        else:
            present_files.remove(os.path.normpath(doc.source_path))
-            checksum = None
            try:
                with doc.source_file as f:
                    checksum = hashlib.md5(f.read()).hexdigest()
            except OSError as e:
                messages.append(SanityError(
                    f"Cannot read original file of document {doc.pk}: {e}"))
+            else:
+                if not checksum == doc.checksum:
+                    messages.append(SanityError(
+                        f"Checksum mismatch of document {doc.pk}. "
+                        f"Stored: {doc.checksum}, actual: {checksum}."
+                    ))

-            if checksum and not checksum == doc.checksum:
+        # Check sanity of the archive file.
+        if doc.archive_checksum:
+            if not os.path.isfile(doc.archive_path):
                messages.append(SanityError(
-                    f"Checksum mismatch of document {doc.pk}. "
-                    f"Stored: {doc.checksum}, actual: {checksum}."
+                    f"Archived version of document {doc.pk} does not exist."
                ))
+            else:
+                present_files.remove(os.path.normpath(doc.archive_path))
+                try:
+                    with doc.archive_file as f:
+                        checksum = hashlib.md5(f.read()).hexdigest()
+                except OSError as e:
+                    messages.append(SanityError(
+                        f"Cannot read archive file of document {doc.pk}: {e}"
+                    ))
+                else:
+                    if not checksum == doc.archive_checksum:
+                        messages.append(SanityError(
+                            f"Checksum mismatch of archive {doc.pk}. "
+                            f"Stored: {doc.checksum}, actual: {checksum}."
+                        ))

+        # other document checks
        if not doc.content:
            messages.append(SanityWarning(
                f"Document {doc.pk} has no content."
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -1,14 +1,23 @@
+import magic
+from django.utils.text import slugify
+from pathvalidate import validate_filename, ValidationError
 from rest_framework import serializers
+from rest_framework.fields import SerializerMethodField

 from .models import Correspondent, Tag, Document, Log, DocumentType
+from .parsers import is_mime_type_supported


-class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
+class CorrespondentSerializer(serializers.ModelSerializer):

    document_count = serializers.IntegerField(read_only=True)

    last_correspondence = serializers.DateTimeField(read_only=True)

+    def get_slug(self, obj):
+        return slugify(obj.name)
+    slug = SerializerMethodField()
+
    class Meta:
        model = Correspondent
        fields = (
@@ -23,10 +32,14 @@ class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
        )


-class DocumentTypeSerializer(serializers.HyperlinkedModelSerializer):
+class DocumentTypeSerializer(serializers.ModelSerializer):

    document_count = serializers.IntegerField(read_only=True)

+    def get_slug(self, obj):
+        return slugify(obj.name)
+    slug = SerializerMethodField()
+
    class Meta:
        model = DocumentType
        fields = (
@@ -40,10 +53,14 @@ class DocumentTypeSerializer(serializers.HyperlinkedModelSerializer):
        )


-class TagSerializer(serializers.HyperlinkedModelSerializer):
+class TagSerializer(serializers.ModelSerializer):

    document_count = serializers.IntegerField(read_only=True)

+    def get_slug(self, obj):
+        return slugify(obj.name)
+    slug = SerializerMethodField()
+
    class Meta:
        model = Tag
        fields = (
@@ -76,11 +93,21 @@ class DocumentTypeField(serializers.PrimaryKeyRelatedField):

 class DocumentSerializer(serializers.ModelSerializer):

-    correspondent_id = CorrespondentField(
-        allow_null=True, source='correspondent')
-    tags_id = TagsField(many=True, source='tags')
-    document_type_id = DocumentTypeField(
-        allow_null=True, source='document_type')
+    correspondent = CorrespondentField(allow_null=True)
+    tags = TagsField(many=True)
+    document_type = DocumentTypeField(allow_null=True)
+
+    original_file_name = SerializerMethodField()
+    archived_file_name = SerializerMethodField()
+
+    def get_original_file_name(self, obj):
+        return obj.get_public_filename()
+
+    def get_archived_file_name(self, obj):
+        if obj.archive_checksum:
+            return obj.get_public_filename(archive=True)
+        else:
+            return None

    class Meta:
        model = Document
@@ -88,17 +115,16 @@ class DocumentSerializer(serializers.ModelSerializer):
        fields = (
            "id",
            "correspondent",
-            "correspondent_id",
            "document_type",
-            "document_type_id",
            "title",
            "content",
            "tags",
-            "tags_id",
            "created",
            "modified",
            "added",
-            "archive_serial_number"
+            "archive_serial_number",
+            "original_file_name",
+            "archived_file_name",
        )


@@ -113,3 +139,82 @@ class LogSerializer(serializers.ModelSerializer):
            "group",
            "level"
        )
+
+
+class PostDocumentSerializer(serializers.Serializer):
+
+    document = serializers.FileField(
+        label="Document",
+        write_only=True,
+    )
+
+    title = serializers.CharField(
+        label="Title",
+        write_only=True,
+        required=False,
+    )
+
+    correspondent = serializers.PrimaryKeyRelatedField(
+        queryset=Correspondent.objects.all(),
+        label="Correspondent",
+        allow_null=True,
+        write_only=True,
+        required=False,
+    )
+
+    document_type = serializers.PrimaryKeyRelatedField(
+        queryset=DocumentType.objects.all(),
+        label="Document type",
+        allow_null=True,
+        write_only=True,
+        required=False,
+    )
+
+    tags = serializers.PrimaryKeyRelatedField(
+        many=True,
+        queryset=Tag.objects.all(),
+        label="Tags",
+        write_only=True,
+        required=False,
+    )
+
+    def validate_document(self, document):
+
+        try:
+            validate_filename(document.name)
+        except ValidationError:
+            raise serializers.ValidationError("Invalid filename.")
+
+        document_data = document.file.read()
+        mime_type = magic.from_buffer(document_data, mime=True)
+
+        if not is_mime_type_supported(mime_type):
+            raise serializers.ValidationError(
+                "This file type is not supported.")
+
+        return document.name, document_data
+
+    def validate_title(self, title):
+        if title:
+            return title
+        else:
+            # do not return empty strings.
+            return None
+
+    def validate_correspondent(self, correspondent):
+        if correspondent:
+            return correspondent.id
+        else:
+            return None
+
+    def validate_document_type(self, document_type):
+        if document_type:
+            return document_type.id
+        else:
+            return None
+
+    def validate_tags(self, tags):
+        if tags:
+            return [tag.id for tag in tags]
+        else:
+            return None
--- a/src/documents/settings.py
+++ b/src/documents/settings.py
@@ -2,3 +2,4 @@
 # for exporting/importing commands
 EXPORTER_FILE_NAME = "__exported_file_name__"
 EXPORTER_THUMBNAIL_NAME = "__exported_thumbnail_name__"
+EXPORTER_ARCHIVE_NAME = "__exported_archive_name__"
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -9,11 +9,13 @@ from django.contrib.contenttypes.models import ContentType
 from django.db import models, DatabaseError
 from django.dispatch import receiver
 from django.utils import timezone
+from filelock import FileLock
 from rest_framework.reverse import reverse

 from .. import index, matching
-from ..file_handling import delete_empty_directories, generate_filename, \
-    create_source_path_directory
+from ..file_handling import delete_empty_directories, \
+    create_source_path_directory, archive_name_from_filename, \
+    generate_unique_filename
 from ..models import Document, Tag


@@ -134,7 +136,7 @@ def set_tags(sender,

    message = 'Tagging "{}" with "{}"'
    logger(
-        message.format(document, ", ".join([t.slug for t in relevant_tags])),
+        message.format(document, ", ".join([t.name for t in relevant_tags])),
        logging_group
    )

@@ -157,25 +159,59 @@ def run_post_consume_script(sender, document, **kwargs):
    Popen((
        settings.POST_CONSUME_SCRIPT,
        str(document.pk),
-        document.file_name,
+        document.get_public_filename(),
        os.path.normpath(document.source_path),
        os.path.normpath(document.thumbnail_path),
        reverse("document-download", kwargs={"pk": document.pk}),
        reverse("document-thumb", kwargs={"pk": document.pk}),
        str(document.correspondent),
-        str(",".join(document.tags.all().values_list("slug", flat=True)))
+        str(",".join(document.tags.all().values_list("name", flat=True)))
    )).wait()


@receiver(models.signals.post_delete, sender=Document)
 def cleanup_document_deletion(sender, instance, using, **kwargs):
-    for f in (instance.source_path, instance.thumbnail_path):
-        try:
-            os.unlink(f)
-        except FileNotFoundError:
-            pass  # The file's already gone, so we're cool with it.
+    with FileLock(settings.MEDIA_LOCK):
+        for f in (instance.source_path,
+                  instance.archive_path,
+                  instance.thumbnail_path):
+            if os.path.isfile(f):
+                try:
+                    os.unlink(f)
+                    logging.getLogger(__name__).debug(
+                        f"Deleted file {f}.")
+                except OSError as e:
+                    logging.getLogger(__name__).warning(
+                        f"While deleting document {str(instance)}, the file "
+                        f"{f} could not be deleted: {e}"
+                    )

-    delete_empty_directories(os.path.dirname(instance.source_path))
+        delete_empty_directories(
+            os.path.dirname(instance.source_path),
+            root=settings.ORIGINALS_DIR
+        )
+
+        delete_empty_directories(
+            os.path.dirname(instance.archive_path),
+            root=settings.ARCHIVE_DIR
+        )
+
+
+def validate_move(instance, old_path, new_path):
+    if not os.path.isfile(old_path):
+        # Can't do anything if the old file does not exist anymore.
+        logging.getLogger(__name__).fatal(
+            f"Document {str(instance)}: File {old_path} has gone.")
+        return False
+
+    if os.path.isfile(new_path):
+        # Can't do anything if the new file already exists. Skip updating file.
+        logging.getLogger(__name__).warning(
+            f"Document {str(instance)}: Cannot rename file "
+            f"since target path {new_path} already exists.")
+        return False
+
+    return True


@receiver(models.signals.m2m_changed, sender=Document.tags.through)
@@ -183,55 +219,104 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
 def update_filename_and_move_files(sender, instance, **kwargs):

    if not instance.filename:
-        # Can't update the filename if there is not filename to begin with
-        # This happens after the consumer creates a new document.
-        # The PK needs to be set first by saving the document once. When this
-        # happens, the file is not yet in the ORIGINALS_DIR, and thus can't be
-        # renamed anyway. In all other cases, instance.filename will be set.
+        # Can't update the filename if there is no filename to begin with
+        # This happens when the consumer creates a new document.
+        # The document is modified and saved multiple times, and only after
+        # everything is done (i.e., the generated filename is final),
+        # filename will be set to the location where the consumer has put
+        # the file.
+        #
+        # This will in turn cause this logic to move the file where it belongs.
        return

-    old_filename = instance.filename
-    old_path = instance.source_path
-    new_filename = generate_filename(instance)
+    with FileLock(settings.MEDIA_LOCK):
+        old_filename = instance.filename
+        new_filename = generate_unique_filename(
+            instance, settings.ORIGINALS_DIR)

-    if new_filename == instance.filename:
-        # Don't do anything if its the same.
-        return
+        if new_filename == instance.filename:
+            # Don't do anything if its the same.
+            return

-    new_path = os.path.join(settings.ORIGINALS_DIR, new_filename)
+        old_source_path = instance.source_path
+        new_source_path = os.path.join(settings.ORIGINALS_DIR, new_filename)

-    if not os.path.isfile(old_path):
-        # Can't do anything if the old file does not exist anymore.
-        logging.getLogger(__name__).fatal(
-            f"Document {str(instance)}: File {old_path} has gone.")
-        return
+        if not validate_move(instance, old_source_path, new_source_path):
+            return

-    if os.path.isfile(new_path):
-        # Can't do anything if the new file already exists. Skip updating file.
-        logging.getLogger(__name__).warning(
-            f"Document {str(instance)}: Cannot rename file "
-            f"since target path {new_path} already exists.")
-        return
+        # archive files are optional, archive checksum tells us if we have one,
+        # since this is None for documents without archived files.
+        if instance.archive_checksum:
+            new_archive_filename = archive_name_from_filename(new_filename)
+            old_archive_path = instance.archive_path
+            new_archive_path = os.path.join(settings.ARCHIVE_DIR,
+                                            new_archive_filename)

-    create_source_path_directory(new_path)
+            if not validate_move(instance, old_archive_path, new_archive_path):
+                return

-    try:
-        os.rename(old_path, new_path)
-        instance.filename = new_filename
-        # Don't save here to prevent infinite recursion.
-        Document.objects.filter(pk=instance.pk).update(filename=new_filename)
+            create_source_path_directory(new_archive_path)
+        else:
+            old_archive_path = None
+            new_archive_path = None

-        logging.getLogger(__name__).debug(
-            f"Moved file {old_path} to {new_path}.")
+        create_source_path_directory(new_source_path)

-    except OSError as e:
-        instance.filename = old_filename
-    except DatabaseError as e:
-        os.rename(new_path, old_path)
-        instance.filename = old_filename
+        try:
+            os.rename(old_source_path, new_source_path)
+            if instance.archive_checksum:
+                os.rename(old_archive_path, new_archive_path)
+            instance.filename = new_filename

-    if not os.path.isfile(old_path):
-        delete_empty_directories(os.path.dirname(old_path))
+            # Don't save() here to prevent infinite recursion.
+            Document.objects.filter(pk=instance.pk).update(
+                filename=new_filename)
+
+            logging.getLogger(__name__).debug(
+                f"Moved file {old_source_path} to {new_source_path}.")
+
+            if instance.archive_checksum:
+                logging.getLogger(__name__).debug(
+                    f"Moved file {old_archive_path} to {new_archive_path}.")
+
+        except OSError as e:
+            instance.filename = old_filename
+            # this happens when we can't move a file. If that's the case for
+            # the archive file, we try our best to revert the changes.
+            # no need to save the instance, the update() has not happened yet.
+            try:
+                os.rename(new_source_path, old_source_path)
+                os.rename(new_archive_path, old_archive_path)
+            except Exception as e:
+                # This is fine, since:
+                # A: if we managed to move source from A to B, we will also
+                #  manage to move it from B to A. If not, we have a serious
+                #  issue that's going to get caught by the santiy checker.
+                #  All files remain in place and will never be overwritten,
+                #  so this is not the end of the world.
+                # B: if moving the orignal file failed, nothing has changed
+                #  anyway.
+                pass
+        except DatabaseError as e:
+            # this happens after moving files, so move them back into place.
+            # since moving them once succeeded, it's very likely going to
+            # succeed again.
+            os.rename(new_source_path, old_source_path)
+            if instance.archive_checksum:
+                os.rename(new_archive_path, old_archive_path)
+            instance.filename = old_filename
+            # again, no need to save the instance, since the actual update()
+            # operation failed.
+
+        # finally, remove any empty sub folders. This will do nothing if
+        # something has failed above.
+        if not os.path.isfile(old_source_path):
+            delete_empty_directories(os.path.dirname(old_source_path),
+                                     root=settings.ORIGINALS_DIR)
+
+        if old_archive_path and not os.path.isfile(old_archive_path):
+            delete_empty_directories(os.path.dirname(old_archive_path),
+                                     root=settings.ARCHIVE_DIR)


 def set_log_entry(sender, document=None, logging_group=None, **kwargs):
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -1,5 +1,6 @@
 import logging

+import tqdm
 from django.conf import settings
 from whoosh.writing import AsyncWriter

@@ -12,7 +13,9 @@ from documents.sanity_checker import SanityFailedError


 def index_optimize():
-    index.open_index().optimize()
+    ix = index.open_index()
+    writer = AsyncWriter(ix)
+    writer.commit(optimize=True)


 def index_reindex():
@@ -21,7 +24,7 @@ def index_reindex():
    ix = index.open_index(recreate=True)

    with AsyncWriter(ix) as writer:
-        for document in documents:
+        for document in tqdm.tqdm(documents):
            index.update_document(writer, document)


--- a/src/paperless_tesseract/tests/samples/no-text.png
+++ b/src/paperless_tesseract/tests/samples/no-text.png
--- a/src/documents/tests/samples/documents/archive/0000001.pdf
+++ b/src/documents/tests/samples/documents/archive/0000001.pdf
--- a/src/documents/tests/test_api.py
+++ b/src/documents/tests/test_api.py
@@ -1,4 +1,5 @@
 import os
+import shutil
 import tempfile
 from unittest import mock

@@ -41,20 +42,13 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        returned_doc = response.data['results'][0]
        self.assertEqual(returned_doc['id'], doc.id)
        self.assertEqual(returned_doc['title'], doc.title)
-        self.assertEqual(returned_doc['correspondent']['name'], c.name)
-        self.assertEqual(returned_doc['document_type']['name'], dt.name)
-        self.assertEqual(returned_doc['correspondent']['id'], c.id)
-        self.assertEqual(returned_doc['document_type']['id'], dt.id)
-        self.assertEqual(returned_doc['correspondent']['id'], returned_doc['correspondent_id'])
-        self.assertEqual(returned_doc['document_type']['id'], returned_doc['document_type_id'])
-        self.assertEqual(len(returned_doc['tags']), 1)
-        self.assertEqual(returned_doc['tags'][0]['name'], tag.name)
-        self.assertEqual(returned_doc['tags'][0]['id'], tag.id)
-        self.assertListEqual(returned_doc['tags_id'], [tag.id])
+        self.assertEqual(returned_doc['correspondent'], c.id)
+        self.assertEqual(returned_doc['document_type'], dt.id)
+        self.assertListEqual(returned_doc['tags'], [tag.id])

        c2 = Correspondent.objects.create(name="c2")

-        returned_doc['correspondent_id'] = c2.pk
+        returned_doc['correspondent'] = c2.pk
        returned_doc['title'] = "the new title"

        response = self.client.put('/api/documents/{}/'.format(doc.pk), returned_doc, format='json')
@@ -100,6 +94,44 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.content, content_thumbnail)

+    def test_download_with_archive(self):
+
+        _, filename = tempfile.mkstemp(dir=self.dirs.originals_dir)
+
+        content = b"This is a test"
+        content_archive = b"This is the same test but archived"
+
+        with open(filename, "wb") as f:
+            f.write(content)
+
+        filename = os.path.basename(filename)
+
+        doc = Document.objects.create(title="none", filename=filename,
+                                      mime_type="application/pdf")
+
+        with open(doc.archive_path, "wb") as f:
+            f.write(content_archive)
+
+        response = self.client.get('/api/documents/{}/download/'.format(doc.pk))
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.content, content_archive)
+
+        response = self.client.get('/api/documents/{}/download/?original=true'.format(doc.pk))
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.content, content)
+
+        response = self.client.get('/api/documents/{}/preview/'.format(doc.pk))
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.content, content_archive)
+
+        response = self.client.get('/api/documents/{}/preview/?original=true'.format(doc.pk))
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.content, content)
+
    def test_document_actions_not_existing_file(self):

        doc = Document.objects.create(title="none", filename=os.path.basename("asd"), mime_type="application/pdf")
@@ -164,6 +196,24 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        results = response.data['results']
        self.assertEqual(len(results), 3)

+        response = self.client.get("/api/documents/?tags__id__none={}".format(tag_3.id))
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 2)
+        self.assertEqual(results[0]['id'], doc1.id)
+        self.assertEqual(results[1]['id'], doc2.id)
+
+        response = self.client.get("/api/documents/?tags__id__none={},{}".format(tag_3.id, tag_2.id))
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['id'], doc1.id)
+
+        response = self.client.get("/api/documents/?tags__id__none={},{}".format(tag_2.id, tag_inbox.id))
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 0)
+
    def test_search_no_query(self):
        response = self.client.get("/api/search/")
        results = response.data['results']
@@ -320,7 +370,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertEqual(response.data['documents_total'], 3)
        self.assertEqual(response.data['documents_inbox'], 1)

-    @mock.patch("documents.forms.async_task")
+    @mock.patch("documents.views.async_task")
    def test_upload(self, m):

        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
@@ -332,8 +382,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):

        args, kwargs = m.call_args
        self.assertEqual(kwargs['override_filename'], "simple.pdf")
+        self.assertIsNone(kwargs['override_title'])
+        self.assertIsNone(kwargs['override_correspondent_id'])
+        self.assertIsNone(kwargs['override_document_type_id'])
+        self.assertIsNone(kwargs['override_tag_ids'])

-    @mock.patch("documents.forms.async_task")
+    @mock.patch("documents.views.async_task")
    def test_upload_invalid_form(self, m):

        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
@@ -341,7 +395,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertEqual(response.status_code, 400)
        m.assert_not_called()

-    @mock.patch("documents.forms.async_task")
+    @mock.patch("documents.views.async_task")
    def test_upload_invalid_file(self, m):

        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), "rb") as f:
@@ -349,8 +403,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertEqual(response.status_code, 400)
        m.assert_not_called()

-    @mock.patch("documents.forms.async_task")
-    @mock.patch("documents.forms.validate_filename")
+    @mock.patch("documents.views.async_task")
+    @mock.patch("documents.serialisers.validate_filename")
    def test_upload_invalid_filename(self, validate_filename, async_task):
        validate_filename.side_effect = ValidationError()
        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
@@ -358,3 +412,116 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertEqual(response.status_code, 400)

        async_task.assert_not_called()
+
+    @mock.patch("documents.views.async_task")
+    def test_upload_with_title(self, async_task):
+        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
+            response = self.client.post("/api/documents/post_document/", {"document": f, "title": "my custom title"})
+        self.assertEqual(response.status_code, 200)
+
+        async_task.assert_called_once()
+
+        args, kwargs = async_task.call_args
+
+        self.assertEqual(kwargs['override_title'], "my custom title")
+
+    @mock.patch("documents.views.async_task")
+    def test_upload_with_correspondent(self, async_task):
+        c = Correspondent.objects.create(name="test-corres")
+        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
+            response = self.client.post("/api/documents/post_document/", {"document": f, "correspondent": c.id})
+        self.assertEqual(response.status_code, 200)
+
+        async_task.assert_called_once()
+
+        args, kwargs = async_task.call_args
+
+        self.assertEqual(kwargs['override_correspondent_id'], c.id)
+
+    @mock.patch("documents.views.async_task")
+    def test_upload_with_invalid_correspondent(self, async_task):
+        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
+            response = self.client.post("/api/documents/post_document/", {"document": f, "correspondent": 3456})
+        self.assertEqual(response.status_code, 400)
+
+        async_task.assert_not_called()
+
+    @mock.patch("documents.views.async_task")
+    def test_upload_with_document_type(self, async_task):
+        dt = DocumentType.objects.create(name="invoice")
+        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
+            response = self.client.post("/api/documents/post_document/", {"document": f, "document_type": dt.id})
+        self.assertEqual(response.status_code, 200)
+
+        async_task.assert_called_once()
+
+        args, kwargs = async_task.call_args
+
+        self.assertEqual(kwargs['override_document_type_id'], dt.id)
+
+    @mock.patch("documents.views.async_task")
+    def test_upload_with_invalid_document_type(self, async_task):
+        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
+            response = self.client.post("/api/documents/post_document/", {"document": f, "document_type": 34578})
+        self.assertEqual(response.status_code, 400)
+
+        async_task.assert_not_called()
+
+    @mock.patch("documents.views.async_task")
+    def test_upload_with_tags(self, async_task):
+        t1 = Tag.objects.create(name="tag1")
+        t2 = Tag.objects.create(name="tag2")
+        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
+            response = self.client.post(
+                "/api/documents/post_document/",
+                {"document": f, "tags": [t2.id, t1.id]})
+        self.assertEqual(response.status_code, 200)
+
+        async_task.assert_called_once()
+
+        args, kwargs = async_task.call_args
+
+        self.assertCountEqual(kwargs['override_tag_ids'], [t1.id, t2.id])
+
+    @mock.patch("documents.views.async_task")
+    def test_upload_with_invalid_tags(self, async_task):
+        t1 = Tag.objects.create(name="tag1")
+        t2 = Tag.objects.create(name="tag2")
+        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
+            response = self.client.post(
+                "/api/documents/post_document/",
+                {"document": f, "tags": [t2.id, t1.id, 734563]})
+        self.assertEqual(response.status_code, 400)
+
+        async_task.assert_not_called()
+
+    def test_get_metadata(self):
+        doc = Document.objects.create(title="test", filename="file.pdf", mime_type="image/png", archive_checksum="A")
+
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png"), doc.source_path)
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), doc.archive_path)
+
+        response = self.client.get(f"/api/documents/{doc.pk}/metadata/")
+        self.assertEqual(response.status_code, 200)
+
+        meta = response.data
+
+        self.assertEqual(meta['original_mime_type'], "image/png")
+        self.assertTrue(meta['has_archive_version'])
+        self.assertEqual(len(meta['original_metadata']), 0)
+        self.assertGreater(len(meta['archive_metadata']), 0)
+
+    def test_get_metadata_no_archive(self):
+        doc = Document.objects.create(title="test", filename="file.pdf", mime_type="application/pdf")
+
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), doc.source_path)
+
+        response = self.client.get(f"/api/documents/{doc.pk}/metadata/")
+        self.assertEqual(response.status_code, 200)
+
+        meta = response.data
+
+        self.assertEqual(meta['original_mime_type'], "application/pdf")
+        self.assertFalse(meta['has_archive_version'])
+        self.assertGreater(len(meta['original_metadata']), 0)
+        self.assertIsNone(meta['archive_metadata'])
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -1,5 +1,6 @@
 import os
 import re
+import shutil
 import tempfile
 from unittest import mock
 from unittest.mock import MagicMock
@@ -26,7 +27,7 @@ class TestAttributes(TestCase):

        self.assertEqual(file_info.title, title, filename)

-        self.assertEqual(tuple([t.slug for t in file_info.tags]), tags, filename)
+        self.assertEqual(tuple([t.name for t in file_info.tags]), tags, filename)

    def test_guess_attributes_from_name0(self):
        self._test_guess_attributes_from_name(
@@ -187,7 +188,7 @@ class TestFieldPermutations(TestCase):
            self.assertEqual(info.tags, (), filename)
        else:
            self.assertEqual(
-                [t.slug for t in info.tags], tags.split(','),
+                [t.name for t in info.tags], tags.split(','),
                filename
            )

@@ -341,8 +342,8 @@ class TestFieldPermutations(TestCase):
            info = FileInfo.from_filename(filename)
            self.assertEqual(info.title, "0001")
            self.assertEqual(len(info.tags), 2)
-            self.assertEqual(info.tags[0].slug, "tag1")
-            self.assertEqual(info.tags[1].slug, "tag2")
+            self.assertEqual(info.tags[0].name, "tag1")
+            self.assertEqual(info.tags[1].name, "tag2")
            self.assertIsNone(info.created)

        # Complex transformation with date in replacement string
@@ -355,8 +356,8 @@ class TestFieldPermutations(TestCase):
            info = FileInfo.from_filename(filename)
            self.assertEqual(info.title, "0001")
            self.assertEqual(len(info.tags), 2)
-            self.assertEqual(info.tags[0].slug, "tag1")
-            self.assertEqual(info.tags[1].slug, "tag2")
+            self.assertEqual(info.tags[0].name, "tag1")
+            self.assertEqual(info.tags[1].name, "tag2")
            self.assertEqual(info.created.year, 2019)
            self.assertEqual(info.created.month, 9)
            self.assertEqual(info.created.day, 8)
@@ -364,35 +365,36 @@ class TestFieldPermutations(TestCase):

 class DummyParser(DocumentParser):

-    def get_thumbnail(self):
+    def get_thumbnail(self, document_path, mime_type):
        # not important during tests
        raise NotImplementedError()

-    def __init__(self, path, logging_group, scratch_dir):
-        super(DummyParser, self).__init__(path, logging_group)
+    def __init__(self, logging_group, scratch_dir, archive_path):
+        super(DummyParser, self).__init__(logging_group)
        _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
+        self.archive_path = archive_path

-    def get_optimised_thumbnail(self):
+    def get_optimised_thumbnail(self, document_path, mime_type):
        return self.fake_thumb

-    def get_text(self):
-        return "The Text"
+    def parse(self, document_path, mime_type):
+        self.text = "The Text"


 class FaultyParser(DocumentParser):

-    def get_thumbnail(self):
+    def get_thumbnail(self, document_path, mime_type):
        # not important during tests
        raise NotImplementedError()

-    def __init__(self, path, logging_group, scratch_dir):
-        super(FaultyParser, self).__init__(path, logging_group)
+    def __init__(self, logging_group, scratch_dir):
+        super(FaultyParser, self).__init__(logging_group)
        _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)

-    def get_optimised_thumbnail(self):
+    def get_optimised_thumbnail(self, document_path, mime_type):
        return self.fake_thumb

-    def get_text(self):
+    def parse(self, document_path, mime_type):
        raise ParseError("Does not compute.")


@@ -410,11 +412,11 @@ def fake_magic_from_file(file, mime=False):
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
 class TestConsumer(DirectoriesMixin, TestCase):

-    def make_dummy_parser(self, path, logging_group):
-        return DummyParser(path, logging_group, self.dirs.scratch_dir)
+    def make_dummy_parser(self, logging_group):
+        return DummyParser(logging_group, self.dirs.scratch_dir, self.get_test_archive_file())

-    def make_faulty_parser(self, path, logging_group):
-        return FaultyParser(path, logging_group, self.dirs.scratch_dir)
+    def make_faulty_parser(self, logging_group):
+        return FaultyParser(logging_group, self.dirs.scratch_dir)

    def setUp(self):
        super(TestConsumer, self).setUp()
@@ -432,8 +434,16 @@ class TestConsumer(DirectoriesMixin, TestCase):
        self.consumer = Consumer()

    def get_test_file(self):
-        fd, f = tempfile.mkstemp(suffix=".pdf", dir=self.dirs.scratch_dir)
-        return f
+        src = os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000001.pdf")
+        dst = os.path.join(self.dirs.scratch_dir, "sample.pdf")
+        shutil.copy(src, dst)
+        return dst
+
+    def get_test_archive_file(self):
+        src = os.path.join(os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf")
+        dst = os.path.join(self.dirs.scratch_dir, "sample_archive.pdf")
+        shutil.copy(src, dst)
+        return dst

    @override_settings(PAPERLESS_FILENAME_FORMAT=None)
    def testNormalOperation(self):
@@ -455,6 +465,13 @@ class TestConsumer(DirectoriesMixin, TestCase):
            document.thumbnail_path
        ))

+        self.assertTrue(os.path.isfile(
+            document.archive_path
+        ))
+
+        self.assertEqual(document.checksum, "42995833e01aea9b3edee44bbfdd7ce1")
+        self.assertEqual(document.archive_checksum, "62acb0bcbfbcaa62ca6ad3668e4e404b")
+
        self.assertFalse(os.path.isfile(filename))

    def testOverrideFilename(self):
@@ -502,7 +519,7 @@ class TestConsumer(DirectoriesMixin, TestCase):

        self.fail("Should throw exception")

-    def testDuplicates(self):
+    def testDuplicates1(self):
        self.consumer.try_consume_file(self.get_test_file())

        try:
@@ -513,6 +530,21 @@ class TestConsumer(DirectoriesMixin, TestCase):

        self.fail("Should throw exception")

+    def testDuplicates2(self):
+        self.consumer.try_consume_file(self.get_test_file())
+
+        try:
+            self.consumer.try_consume_file(self.get_test_archive_file())
+        except ConsumerError as e:
+            self.assertTrue(str(e).endswith("It is a duplicate."))
+            return
+
+        self.fail("Should throw exception")
+
+    def testDuplicates3(self):
+        self.consumer.try_consume_file(self.get_test_archive_file())
+        self.consumer.try_consume_file(self.get_test_file())
+
    @mock.patch("documents.parsers.document_consumer_declaration.send")
    def testNoParsers(self, m):
        m.return_value = []
@@ -520,7 +552,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
        try:
            self.consumer.try_consume_file(self.get_test_file())
        except ConsumerError as e:
-            self.assertTrue("File extension .pdf does not map to any" in str(e))
+            self.assertTrue("No parsers abvailable for" in str(e))
            return

        self.fail("Should throw exception")
@@ -566,10 +598,10 @@ class TestConsumer(DirectoriesMixin, TestCase):

        self.assertEqual(document.title, "new docs")
        self.assertEqual(document.correspondent.name, "Bank")
-        self.assertEqual(document.filename, "bank/new-docs-0000001.pdf")
+        self.assertEqual(document.filename, "Bank/new docs.pdf")

    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
-    @mock.patch("documents.signals.handlers.generate_filename")
+    @mock.patch("documents.signals.handlers.generate_unique_filename")
    def testFilenameHandlingUnstableFormat(self, m):

        filenames = ["this", "that", "now this", "i cant decide"]
@@ -579,7 +611,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
            filenames.insert(0, f)
            return f

-        m.side_effect = lambda f: get_filename()
+        m.side_effect = lambda f, root: get_filename()

        filename = self.get_test_file()

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
jonaswinkler	48e785f7b6	Merge branch 'dev'	2020-12-10 02:24:58 +01:00
jonaswinkler	0cc22017de	revert last commit.	2020-12-10 02:24:36 +01:00
jonaswinkler	3584f732a7	added another library that's required to get this running on raspberry pi	2020-12-10 02:14:26 +01:00
jonaswinkler	476beacd7f	changelog	2020-12-10 01:12:30 +01:00
jonaswinkler	69c6d68219	a print() command somehow sneaked past my commit checks.	2020-12-10 00:59:03 +01:00
jonaswinkler	24d8a50f01	fixed an issue with the docker entrypoint script.	2020-12-10 00:54:37 +01:00
jonaswinkler	2df1894683	changelog	2020-12-10 00:30:35 +01:00
jonaswinkler	3f03cbf66c	excluded the lockfile from the sanity checker.	2020-12-10 00:29:47 +01:00
jonaswinkler	b3daf0efc3	added progress bar to the document renamer.	2020-12-10 00:10:36 +01:00
jonaswinkler	46c0ab943f	added a progress bar to the reindex command.	2020-12-10 00:02:45 +01:00
jonaswinkler	2b57b80656	fixes #113	2020-12-09 23:45:53 +01:00
jonaswinkler	0b1b9de3cc	layout fix	2020-12-09 22:38:52 +01:00
jonaswinkler	20c46278dc	removed a janky test case that caused other test cases to fail	2020-12-09 22:18:03 +01:00
jonaswinkler	70cbdbf23b	locking media directory while deleting files	2020-12-09 22:17:23 +01:00
jonaswinkler	6003122b06	fixes #112	2020-12-09 22:16:57 +01:00
jonaswinkler	8ca97924be	shadows	2020-12-09 13:44:37 +01:00
jonaswinkler	2be0ba9f72	fixed test case. fixed bug with the decryption logic.	2020-12-09 13:27:02 +01:00
jonaswinkler	0a0d462938	tags from folders: case insensitive	2020-12-09 00:07:22 +01:00
jonaswinkler	74a99cf330	removed slugs entirely, since their only purpose was purely cosmetic anyway.	2020-12-09 00:04:37 +01:00
jonaswinkler	5753c83618	version bump	2020-12-08 21:20:05 +01:00
jonaswinkler	550a74347c	a test that "verifies" that the file renaming lock works and no inconsistencies are created.	2020-12-08 21:08:44 +01:00
jonaswinkler	e428a8a008	file upload improvements	2020-12-08 17:35:51 +01:00
jonaswinkler	001ab88fff	docs	2020-12-08 17:34:38 +01:00
jonaswinkler	d3cf85b9e9	Added a section on best practices.	2020-12-08 17:34:29 +01:00
jonaswinkler	91c722feff	Merge branch 'master' into dev	2020-12-08 16:46:13 +01:00
jonaswinkler	871e22e3a3	documentation	2020-12-08 16:45:22 +01:00
jonaswinkler	bf3b2249c5	Metadata documentation	2020-12-08 16:36:14 +01:00
jonaswinkler	6613104b4f	date and time in metadata	2020-12-08 16:21:38 +01:00
jonaswinkler	0028fde2fd	more metadata #32	2020-12-08 16:09:47 +01:00
jonaswinkler	ad527fe97c	reading and displaying PDF metadata	2020-12-08 15:45:02 +01:00
jonaswinkler	9da11f29c7	fixes #90	2020-12-08 13:54:49 +01:00
jonaswinkler	c240fa1883	changelog	2020-12-08 11:53:58 +01:00
jonaswinkler	bb33ac5e9e	fixees #77	2020-12-08 01:12:03 +01:00
jonaswinkler	30f200ad39	fix z-order on the edit page.	2020-12-08 00:45:23 +01:00
jonaswinkler	5321ff1f20	upload status addresses #100	2020-12-08 00:45:11 +01:00
jonaswinkler	c4a939dbcc	addresses #104	2020-12-08 00:09:36 +01:00
jonaswinkler	3f05fe45bb	Addresses #99 entirely.	2020-12-07 23:42:18 +01:00
jonaswinkler	dfd844124d	addresses #107	2020-12-07 22:29:51 +01:00
jonaswinkler	d4febbc40f	codestyle	2020-12-07 22:17:47 +01:00
jonaswinkler	dc36e8566a	addresses #106	2020-12-07 22:15:56 +01:00
jonaswinkler	87fa118de0	added filenames to the API #108	2020-12-07 21:52:26 +01:00
jonaswinkler	35c3d5c0b5	Merge branch 'dev' of github.com:jonaswinkler/paperless-ng into dev	2020-12-07 15:27:36 +01:00
jonaswinkler	9e46afafd7	fixes #102	2020-12-07 15:25:06 +01:00
jonaswinkler	56acd4f320	fixes #105	2020-12-07 12:46:46 +01:00
Jonas Winkler	2bbeb8ffe0	Update CONTRIBUTING.md	2020-12-06 23:30:51 +01:00
Jonas Winkler	5e188c0203	Merge pull request #97 from trahflow/dev Dockerfile: Add libqpdf-dev to build dependencies	2020-12-06 23:25:34 +01:00
Wolfhart Feldmeier	8b637214b4	Dockerfile: Add libqpdf-dev to build dependencies	2020-12-06 23:00:20 +01:00
jonaswinkler	eede5595e9	better error messages for file uploads. adresses #91	2020-12-06 22:31:12 +01:00
jonaswinkler	c1fc8b2dac	codestyle	2020-12-06 19:04:32 +01:00
jonaswinkler	28622d700d	changed the way public filenames (i.e., for download and exporting) are generated. #94	2020-12-06 19:03:45 +01:00
jonaswinkler	a079c310b4	changes to filename generation, partially addresses #90	2020-12-06 16:13:37 +01:00
jonaswinkler	278f6da16a	documentation.	2020-12-06 14:41:14 +01:00
jonaswinkler	45e39d04ae	fixes #87	2020-12-06 01:37:44 +01:00
jonaswinkler	e46353cee8	added a welcome widget	2020-12-06 01:25:12 +01:00
jonaswinkler	65816a434c	Update README.md	2020-12-05 15:55:25 +01:00
jonaswinkler	e3104d34fa	Update README.md	2020-12-05 15:40:51 +01:00
jonaswinkler	891bd2de7f	Update README.md	2020-12-05 15:37:23 +01:00
jonaswinkler	805e3d51e8	Merge remote-tracking branch 'origin/master' into dev	2020-12-05 14:54:28 +01:00
jonaswinkler	f1f9a076c9	Merge pull request #86 from bauerj/docs-migration Add missing step to migration guide	2020-12-05 14:36:32 +01:00
jonaswinkler	55cc49cd88	dependencies	2020-12-05 14:00:27 +01:00
jonaswinkler	d52260468c	docs	2020-12-05 14:00:02 +01:00
jonaswinkler	aacd362203	docs config	2020-12-05 13:53:11 +01:00
jonaswinkler	38a651c42a	docs	2020-12-05 13:53:03 +01:00
Johann Bauer	55cc93e5e9	Add missing step to migration guide	2020-12-05 13:41:15 +01:00
jonaswinkler	9ee21f081f	versions	2020-12-05 13:22:08 +01:00
jonaswinkler	bfbdd6e198	testing the importer	2020-12-05 13:19:14 +01:00
jonaswinkler	1e9e347f15	documentation	2020-12-05 12:52:49 +01:00
jonaswinkler	782dbee3a0	removed obsolete option	2020-12-05 01:23:17 +01:00
jonaswinkler	316ee72177	bugfix	2020-12-05 01:21:16 +01:00
jonaswinkler	f88cf69173	bugfix	2020-12-05 00:37:05 +01:00
jonaswinkler	e9758d5224	bugfix	2020-12-04 23:16:04 +01:00
jonaswinkler	5456d5eafa	bugfix	2020-12-04 23:07:11 +01:00
jonaswinkler	dab4b1253a	fixes for the parser.	2020-12-04 16:44:34 +01:00
jonaswinkler	34bc4020c9	documentation	2020-12-04 16:07:31 +01:00
jonaswinkler	ab871d67fc	more tests	2020-12-04 15:56:26 +01:00
jonaswinkler	371745b6dc	mail handling: When exceptions occur during account/rule/message handling, paperless will continue with the next account/rule/message. mail handling: When paperless encounters a very long fixes #82	2020-12-04 15:42:05 +01:00
jonaswinkler	991a46c4f0	disabled thumbnail trimming.	2020-12-04 12:44:02 +01:00
jonaswinkler	eb5bdc48aa	API now supports setting metadata when POSTing documents.	2020-12-04 12:09:21 +01:00
jonaswinkler	3634dfbcf8	Update README.md	2020-12-04 11:12:59 +01:00
jonaswinkler	57ad485913	add observables to search results	2020-12-04 01:26:27 +01:00
jonaswinkler	ceaade29a6	bugfix	2020-12-04 01:26:12 +01:00
jonaswinkler	9a4d410f66	use the observables everywhere in the application.	2020-12-04 01:25:52 +01:00
jonaswinkler	34f353f399	document service adds observables for linked data to its results	2020-12-04 01:24:07 +01:00
jonaswinkler	1d8765100c	caching for listAll methods	2020-12-04 01:22:14 +01:00
jonaswinkler	5bdb57a392	fix a test case.	2020-12-04 01:20:42 +01:00
jonaswinkler	0a18c819d4	remove _object from document results, which makes the API about 33% faster.	2020-12-04 01:17:55 +01:00
jonaswinkler	982ea84906	adjustments of the front end for API changes.	2020-12-03 20:28:17 +01:00
jonaswinkler	68c233005e	api changes.	2020-12-03 19:56:52 +01:00
jonaswinkler	62cc4a7a54	docs	2020-12-03 19:56:33 +01:00
jonaswinkler	6d3e5b0a1b	update dependencies.	2020-12-03 19:56:24 +01:00
jonaswinkler	c4d13b5802	improvements to the filter.	2020-12-03 19:55:42 +01:00
jonaswinkler	c263d8e8f1	docs	2020-12-03 19:02:46 +01:00
jonaswinkler	2a9e6f7a58	Update README.md	2020-12-03 18:59:43 +01:00
jonaswinkler	c02813623d	layout changes	2020-12-03 18:37:58 +01:00
jonaswinkler	1b5b07a020	bugfix	2020-12-03 18:37:25 +01:00
jonaswinkler	8b16cd99dc	updated the API, it now supports tags, correspondents, types and title when uploading documents.	2020-12-03 18:36:23 +01:00
jonaswinkler	20fc065567	hide the filter when it's cleared.	2020-12-03 15:02:27 +01:00
jonaswinkler	802e389198	document count	2020-12-03 01:24:57 +01:00
jonaswinkler	72a4ff0fca	proper document archiver with progress bar.	2020-12-03 01:04:52 +01:00
jonaswinkler	e22769ca63	fixed a test case.	2020-12-03 01:04:13 +01:00
jonaswinkler	d661f87f63	reorganized logging.	2020-12-03 01:03:56 +01:00
jonaswinkler	748b27c680	small fix.	2020-12-03 01:03:28 +01:00
jonaswinkler	6a04e95f69	catch encrypted pdf documents	2020-12-03 01:02:37 +01:00
jonaswinkler	9e9b9ae631	updated dependencies.	2020-12-03 01:01:49 +01:00
jonaswinkler	a47623dbaf	documentation	2020-12-03 00:15:03 +01:00
jonaswinkler	657c41ab37	test fixes and changelog	2020-12-02 22:44:18 +01:00
jonaswinkler	4548cf08c7	fixes #78	2020-12-02 18:00:49 +01:00
jonaswinkler	e3ce573fbb	a couple fixes and more supported image files	2020-12-02 17:39:49 +01:00
jonaswinkler	5e1543bad5	more test	2020-12-02 01:23:55 +01:00
jonaswinkler	282e6f453f	clickable fast filters now disregard any unapplied changes in the filter editor	2020-12-02 01:19:28 +01:00
jonaswinkler	a4f60c48ea	testing and fixing the sanity checker	2020-12-02 01:18:11 +01:00
jonaswinkler	f3f5227776	fix some tests.	2020-12-01 23:54:33 +01:00
jonaswinkler	19bb29d5cd	documentation	2020-12-01 23:38:42 +01:00
jonaswinkler	8cad12b154	documentation.	2020-12-01 15:26:22 +01:00
jonaswinkler	384d381acf	more testing.	2020-12-01 15:26:15 +01:00
jonaswinkler	834352130c	checking file types against parsers in the consumer.	2020-12-01 15:26:05 +01:00
jonaswinkler	b0c8ade241	code style	2020-12-01 14:33:37 +01:00
jonaswinkler	a33082235b	Merge branch 'feature-ocrmypdf' into dev	2020-12-01 14:32:09 +01:00
jonaswinkler	ec6d01f7a5	better indication of what item is selected in the menu	2020-12-01 14:31:50 +01:00
jonaswinkler	1ce6466ef8	tests for mail tasks	2020-12-01 14:31:36 +01:00
jonaswinkler	f677ed8798	small fix.	2020-12-01 14:30:35 +01:00
jonaswinkler	12fa844c7f	testing the new noarchive option.	2020-12-01 14:30:13 +01:00
jonaswinkler	fd3df1ec58	some more tests.	2020-12-01 14:15:43 +01:00
jonaswinkler	24b8c358cc	Merge branch 'dev' into feature-ocrmypdf	2020-11-30 23:53:19 +01:00
jonaswinkler	d58706a34b	pipfile update.	2020-11-30 23:45:21 +01:00
jonaswinkler	3168602610	Merge branch 'master' into dev	2020-11-30 23:03:13 +01:00
jonaswinkler	756c80690d	fix for the docs.	2020-11-30 23:02:59 +01:00
jonaswinkler	c5dbd7a6fb	Merge pull request #69 from jayme-github/feature-directory-tags Create tags from sub directories	2020-11-30 22:53:52 +01:00
jonaswinkler	8a5c782425	filename handling for archive files.	2020-11-30 21:38:42 +01:00
jonaswinkler	aaa6599283	Merge branch 'dev' into feature-ocrmypdf	2020-11-30 16:48:09 +01:00
jayme-github	fa9a5cc247	Create tags from sub directories The names of sub directories in the consumer directory will be added as tags for the document to be consumed. To enable this, set: PAPERLESS_CONSUMER_RECURSIVE=1 PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=1 Fixes #50	2020-11-30 14:22:35 +01:00
jonaswinkler	ac1b701000	more tests!	2020-11-29 19:58:48 +01:00
jonaswinkler	a3143ec512	more tests!	2020-11-29 19:22:49 +01:00
jonaswinkler	39c682dc07	Merge branch 'dev' into feature-ocrmypdf	2020-11-29 18:37:38 +01:00
jonaswinkler	eeb63693c9	tests	2020-11-29 18:37:31 +01:00
jonaswinkler	51851ff15a	tests	2020-11-29 13:37:39 +01:00
jonaswinkler	490f59451b	Merge branch 'dev' into feature-ocrmypdf	2020-11-29 13:08:32 +01:00
jonaswinkler	fca98b411e	reorganised settings documentation and added OCR_USER_ARGS	2020-11-29 12:38:32 +01:00
jonaswinkler	2f7396e2aa	code style.	2020-11-29 12:37:22 +01:00
jonaswinkler	9677631bb2	error logging.	2020-11-29 12:37:11 +01:00
jonaswinkler	0565118a01	fixed checking the installed languages.	2020-11-29 12:31:42 +01:00
jonaswinkler	24767f62c7	added checksums for archived documents.	2020-11-29 12:31:26 +01:00
jonaswinkler	fdaf419a7e	Merge branch 'dev' into feature-ocrmypdf	2020-11-29 01:35:37 +01:00
jonaswinkler	bc3ae34c26	Merge branch 'dev' into feature-ocrmypdf	2020-11-28 13:17:18 +01:00
jonaswinkler	074b682312	added a simple document archiver that produces archived versions of all originals.	2020-11-28 11:49:07 +01:00
jonaswinkler	f7e554a3c1	Adjusted the exporter and importer so that they take archived documents into account.	2020-11-28 11:24:59 +01:00
jonaswinkler	5b020bb8d2	Adjusted the sanity checker so that it takes archived documents into account.	2020-11-28 11:24:19 +01:00
jonaswinkler	6388d19f7a	Merge branch 'dev' into feature-ocrmypdf	2020-11-27 19:16:59 +01:00
jonaswinkler	06cfc3113a	test case fixes.	2020-11-27 14:06:37 +01:00
jonaswinkler	ea9de1bcf1	Merge branch 'dev' into feature-ocrmypdf	2020-11-27 14:03:19 +01:00
jonaswinkler	89a31443a5	use a more recent version of ubuntu in travis so that tesseract 4.0 is installed.	2020-11-27 12:08:23 +01:00
jonaswinkler	202b88632c	updated docs	2020-11-27 12:02:36 +01:00
jonaswinkler	8bcc40a182	Pipfile.lock post merge	2020-11-27 00:10:40 +01:00
jonaswinkler	24381ad5dc	Merge branch 'dev' into feature-ocrmypdf	2020-11-27 00:06:20 +01:00
jonaswinkler	7bb1982c48	fixed lockfile due to merge.	2020-11-26 18:40:51 +01:00
jonaswinkler	f956073f4a	Merge branch 'dev' into feature-ocrmypdf	2020-11-26 18:40:01 +01:00
Jonas Winkler	e87575240d	more tests of the new parser	2020-11-26 00:08:23 +01:00
Jonas Winkler	39fa02dcb1	more test	2020-11-25 21:38:19 +01:00
Jonas Winkler	7e84863beb	Merge branch 'dev' into feature-ocrmypdf	2020-11-25 21:13:02 +01:00
Jonas Winkler	6f30ceea38	GnuPG for archive file.	2020-11-25 20:16:27 +01:00
Jonas Winkler	f51d2be303	fixed the test cases	2020-11-25 19:51:09 +01:00
Jonas Winkler	9bd0bee2f6	codestyle	2020-11-25 19:51:02 +01:00
Jonas Winkler	a60a4babf6	OMP_THREAD_LIMIT	2020-11-25 19:37:59 +01:00
Jonas Winkler	a03315102a	added image DPI detection to the tesseract parser.	2020-11-25 19:37:48 +01:00
Jonas Winkler	df801d17e1	reworked the interface of the parsers.	2020-11-25 19:36:39 +01:00
Jonas Winkler	d3c13f6c93	removed unused settings.	2020-11-25 19:30:11 +01:00
Jonas Winkler	fe7aa10d2c	frontend support for downloading originals	2020-11-25 18:01:43 +01:00
Jonas Winkler	17a581495c	proper filenames for originals and archived documents	2020-11-25 18:01:29 +01:00
Jonas Winkler	64180b5668	fixed up a test case	2020-11-25 17:28:49 +01:00
Jonas Winkler	81aaadb2a3	codestyle	2020-11-25 17:23:57 +01:00
Jonas Winkler	b1110f7291	update git ignore	2020-11-25 17:22:51 +01:00
Jonas Winkler	cb9e5b5ee3	Add metadata field: has archive version	2020-11-25 17:18:57 +01:00
Jonas Winkler	af99cbccd9	Merge branch 'dev' into feature-ocrmypdf	2020-11-25 17:17:14 +01:00
Jonas Winkler	b269af7572	Merge branch 'dev' into feature-ocrmypdf	2020-11-25 16:58:20 +01:00
Jonas Winkler	56ce267f89	removed obsolete tests.	2020-11-25 14:51:32 +01:00
Jonas Winkler	ef6690905e	todo note.	2020-11-25 14:51:00 +01:00
Jonas Winkler	2d559d330d	reworked PDF parser that uses OCRmyPDF and produces archive files.	2020-11-25 14:50:43 +01:00
Jonas Winkler	95ec520f13	api serves archive files by default.	2020-11-25 14:48:36 +01:00
Jonas Winkler	8069c2eb6a	add support for archive files.	2020-11-25 14:47:17 +01:00
Jonas Winkler	9a33f191a7	added archive directory.	2020-11-25 14:45:21 +01:00