Merge remote-tracking branch 'upstream/master'

2026-01-30 23:08:59 -06:00 · 2018-09-11 14:43:59 +02:00
parent c765ef5eeb 2edf65dd1e
commit e72735c4f0
20 changed files with 348 additions and 74 deletions
--- a/2
+++ b/2
@@ -36,3 +36,5 @@ pytest-xdist = "*"
 [dev-packages]
 ipython = "*"
 sphinx = "*"
 tox = "*"
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
    "_meta": {
        "hash": {
-            "sha256": "e20c2294bcafd346ee57901df94a515a12976ed192dc37df848b39b56bdd1f4b"
+            "sha256": "6d8bad24aa5d0c102b13b5ae27acba04836cd5a07a4003cb2763de1e0a3406b7"
        },
        "pipfile-spec": 6,
        "requires": {},
@@ -19,7 +19,7 @@
                "sha256:37228cda29411948b422fae072f57e31d3396d2ee1c9783775980ee9c9990af6",
                "sha256:58587dd4dc3daefad0487f6d9ae32b4542b185e1c36db6993290e7c41ca2b47c"
            ],
-            "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'",
+            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.5"
        },
        "atomicwrites": {
@@ -27,7 +27,7 @@
                "sha256:0312ad34fcad8fac3704d441f7b317e50af620823353ec657a53e981f92920c0",
                "sha256:ec9ae8adaae229e4f8446952d204a3e4b5fdd2d099f9be3aaf556120135fb3ee"
            ],
-            "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'",
+            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.2.1"
        },
        "attrs": {
@@ -85,7 +85,7 @@
                "sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6",
                "sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80"
            ],
-            "markers": "python_version >= '2.6' and python_version != '3.0.*' and python_version != '3.2.*' and python_version < '4' and python_version != '3.1.*'",
+            "markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4'",
            "version": "==4.5.1"
        },
        "coveralls": {
@@ -163,7 +163,7 @@
                "sha256:a7a84d5fa07a089186a329528f127c9d73b9de57f1a1131b82bb5320ee651f6a",
                "sha256:fc155a6b553c66c838d1a22dba1dc9f5f505c43285a878c6f74a79c024750b83"
            ],
-            "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'",
+            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.5.0"
        },
        "factory-boy": {
@@ -179,6 +179,7 @@
                "sha256:ea7cfd3aeb1544732d08bd9cfba40c5b78e3a91e17b1a0698ab81bfc5554c628",
                "sha256:f6d67f04abfb2b4bea7afc7fa6c18cf4c523a67956e455668be9ae42bccc21ad"
            ],
            "markers": "python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.2.*' and python_version >= '2.7'",
            "version": "==0.9.0"
        },
        "filemagic": {
@@ -282,7 +283,7 @@
                "sha256:6e3836e39f4d36ae72840833db137f7b7d35105079aee6ec4a62d9f80d594dd1",
                "sha256:95eb8364a4708392bae89035f45341871286a333f749c3141c20573d2b3876e1"
            ],
-            "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'",
+            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==0.7.1"
        },
        "py": {
@@ -290,7 +291,7 @@
                "sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1",
                "sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6"
            ],
-            "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'",
+            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.6.0"
        },
        "pycodestyle": {
@@ -303,26 +304,26 @@
        },
        "pyocr": {
            "hashes": [
-                "sha256:bdc4d43bf9b63c2a9a4b2c9a1a623a0e63c8e6600eede5dbe866b31f3a5f2207"
+                "sha256:b6ba6263fd92da56627dff6d263d991a2246aacd117d1788f11b93f419ca395f"
            ],
            "index": "pypi",
-            "version": "==0.5.2"
+            "version": "==0.5.3"
        },
        "pytest": {
            "hashes": [
-                "sha256:2d7c49e931316cc7d1638a3e5f54f5d7b4e5225972b3c9838f3584788d27f349",
+                "sha256:453cbbbe5ce6db38717d282b758b917de84802af4288910c12442984bde7b823",
-                "sha256:ad0c7db7b5d4081631e0155f5c61b80ad76ce148551aaafe3a718d65a7508b18"
+                "sha256:a8a07f84e680482eb51e244370aaf2caa6301ef265f37c2bdefb3dd3b663f99d"
            ],
            "index": "pypi",
-            "version": "==3.7.4"
+            "version": "==3.8.0"
        },
        "pytest-cov": {
            "hashes": [
-                "sha256:03aa752cf11db41d281ea1d807d954c4eda35cfa1b21d6971966cc041bbf6e2d",
+                "sha256:513c425e931a0344944f84ea47f3956be0e416d95acbd897a44970c8d926d5d7",
-                "sha256:890fe5565400902b0c78b5357004aab1c814115894f4f21370e2433256a3eeec"
+                "sha256:e360f048b7dae3f2f2a9a4d067b2dd6b6a015d384d1577c994a43f3f7cbad762"
            ],
            "index": "pypi",
-            "version": "==2.5.1"
+            "version": "==2.6.0"
        },
        "pytest-django": {
            "hashes": [
@@ -344,6 +345,7 @@
                "sha256:e4500cd0509ec4a26535f7d4112a8cc0f17d3a41c29ffd4eab479d2a55b30805",
                "sha256:f275cb48a73fc61a6710726348e1da6d68a978f0ec0c54ece5a5fae5977e5a08"
            ],
            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==0.2"
        },
        "pytest-sugar": {
@@ -457,7 +459,7 @@
                "sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf",
                "sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5"
            ],
-            "markers": "python_version >= '2.6' and python_version != '3.3.*' and python_version < '4' and python_version != '3.1.*' and python_version != '3.2.*' and python_version != '3.0.*'",
+            "markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4' and python_version != '3.3.*'",
            "version": "==1.23"
        }
    },
@@ -521,10 +523,11 @@
        },
        "imagesize": {
            "hashes": [
-                "sha256:3620cc0cadba3f7475f9940d22431fc4d407269f1be59ec9b8edcca26440cf18",
+                "sha256:3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8",
-                "sha256:5b326e4678b6925158ccc66a9fa3122b6106d7c876ee32d7de6ce59385b96315"
+                "sha256:f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5"
            ],
-            "version": "==1.0.0"
+            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.1.0"
        },
        "ipython": {
            "hashes": [
@@ -590,6 +593,14 @@
            ],
            "version": "==0.7.4"
        },
        "pluggy": {
            "hashes": [
                "sha256:6e3836e39f4d36ae72840833db137f7b7d35105079aee6ec4a62d9f80d594dd1",
                "sha256:95eb8364a4708392bae89035f45341871286a333f749c3141c20573d2b3876e1"
            ],
            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==0.7.1"
        },
        "prompt-toolkit": {
            "hashes": [
                "sha256:1df952620eccb399c53ebb359cc7d9a8d3a9538cb34c5a1344bdbeb29fbcc381",
@@ -605,6 +616,14 @@
            ],
            "version": "==0.6.0"
        },
        "py": {
            "hashes": [
                "sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1",
                "sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6"
            ],
            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.6.0"
        },
        "pygments": {
            "hashes": [
                "sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
@@ -656,20 +675,28 @@
        },
        "sphinx": {
            "hashes": [
-                "sha256:a07050845cc9a2f4026a6035cc8ed795a5ce7be6528bbc82032385c10807dfe7",
+                "sha256:217a7705adcb573da5bbe1e0f5cab4fa0bd89fd9342c9159121746f593c2d5a4",
-                "sha256:d719de667218d763e8fd144b7fcfeefd8d434a6201f76bf9f0f0c1fa6f47fcdb"
+                "sha256:a602513f385f1d5785ff1ca420d9c7eb1a1b63381733b2f0ea8188a391314a86"
            ],
            "index": "pypi",
-            "version": "==1.7.8"
+            "version": "==1.7.9"
        },
        "sphinxcontrib-websupport": {
            "hashes": [
                "sha256:68ca7ff70785cbe1e7bccc71a48b5b6d965d79ca50629606c7861a21b206d9dd",
                "sha256:9de47f375baf1ea07cdb3436ff39d7a9c76042c10a769c52353ec46e4e8fc3b9"
            ],
-            "markers": "python_version != '3.3.*' and python_version >= '2.7' and python_version != '3.1.*' and python_version != '3.2.*' and python_version != '3.0.*'",
+            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.1.0"
        },
        "tox": {
            "hashes": [
                "sha256:37cf240781b662fb790710c6998527e65ca6851eace84d1595ee71f7af4e85f7",
                "sha256:eb61aa5bcce65325538686f09848f04ef679b5cd9b83cc491272099b28739600"
            ],
            "index": "pypi",
            "version": "==3.2.1"
        },
        "traitlets": {
            "hashes": [
                "sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835",
@@ -682,9 +709,17 @@
                "sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf",
                "sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5"
            ],
-            "markers": "python_version >= '2.6' and python_version != '3.3.*' and python_version < '4' and python_version != '3.1.*' and python_version != '3.2.*' and python_version != '3.0.*'",
+            "markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4' and python_version != '3.3.*'",
            "version": "==1.23"
        },
        "virtualenv": {
            "hashes": [
                "sha256:2ce32cd126117ce2c539f0134eb89de91a8413a29baac49cbab3eb50e2026669",
                "sha256:ca07b4c0b54e14a91af9f34d0919790b016923d157afda5efdde55c96718f752"
            ],
            "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.2.*' and python_version != '3.1.*'",
            "version": "==16.0.0"
        },
        "wcwidth": {
            "hashes": [
                "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e",
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,6 +1,23 @@
 Changelog
 #########
 2.3.0
 =====
 * Support for consuming plain text & markdown documents was added by
  `Joshua Taillon`_!  This was a long-requested feature, and it's addition is
  likely to be greatly appreciated by the community: `#395`_  Thanks also to
  `David Martin`_ for his assistance on the issue.
 * `dubit0`_ found & fixed a bug that prevented management commands from running
  before we had an operational database: `#396`_
 * Joshua also added a simple update to the thumbnail generation process to
  improve performance: `#399`_
 * As his last bit of effort on this release, Joshua also added some code to
  allow you to view the documents inline rather than download them as an
  attachment. `#400`_
 * Finally, `ahyear`_ found a slip in the Docker documentation and patched it. `#401`_
 2.2.1
 =====
@@ -19,6 +36,10 @@ Changelog
  easier on those of us with lots of different tags: `#391`_.
 * `Kilian Koeltzsch`_ noticed a bug in how we capture & automatically create
  tags, so that's fixed now too: `#384`_.
 * `erikarvstedt`_ tweaked the behaviour of the test suite to be better behaved
  for packaging environments: `#383`_.
 * `Lukasz Soluch`_ added CORS support to make building a new Javascript-based front-end
  cleaner & easier: `#387`_.
 2.1.0
@@ -476,6 +497,10 @@ bulk of the work on this big change.
 .. _Tim Brooks: https://github.com/brookst
 .. _Stéphane Brunner: https://github.com/sbrunner
 .. _Kilian Koeltzsch: https://github.com/kiliankoe
 .. _Lukasz Soluch: https://github.com/LukaszSolo
 .. _Joshua Taillon: https://github.com/jat255
 .. _dubit0:  https://github.com/dubit0
 .. _ahyear:  https://github.com/ahyear
 .. _#20: https://github.com/danielquinn/paperless/issues/20
 .. _#44: https://github.com/danielquinn/paperless/issues/44
@@ -550,11 +575,18 @@ bulk of the work on this big change.
 .. _#374: https://github.com/danielquinn/paperless/pull/374
 .. _#375: https://github.com/danielquinn/paperless/pull/375
 .. _#376: https://github.com/danielquinn/paperless/pull/376
 .. _#383: https://github.com/danielquinn/paperless/pull/383
 .. _#384: https://github.com/danielquinn/paperless/issues/384
 .. _#386: https://github.com/danielquinn/paperless/issues/386
 .. _#387: https://github.com/danielquinn/paperless/pull/387
 .. _#391: https://github.com/danielquinn/paperless/pull/391
 .. _#390: https://github.com/danielquinn/paperless/pull/390
 .. _#392: https://github.com/danielquinn/paperless/issues/392
 .. _#395: https://github.com/danielquinn/paperless/pull/395
 .. _#396: https://github.com/danielquinn/paperless/pull/396
 .. _#399: https://github.com/danielquinn/paperless/pull/399
 .. _#400: https://github.com/danielquinn/paperless/pull/400
 .. _#401: https://github.com/danielquinn/paperless/pull/401
 .. _pipenv: https://docs.pipenv.org/
 .. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
--- a/docs/migrating.rst
+++ b/docs/migrating.rst
@@ -101,6 +101,7 @@ is similar:
    $ cd /path/to/project
    $ git pull
    $ docker build -t paperless .
    $ docker-compose run --rm comsumer migrate
    $ docker-compose up -d
 If ``git pull`` doesn't report any changes, there is no need to continue with
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
--- a/paperless.conf.example
+++ b/paperless.conf.example
@@ -89,9 +89,10 @@ PAPERLESS_EMAIL_SECRET=""
 # as is "example.com,www.example.com", but NOT " example.com" or "example.com,"
 #PAPERLESS_ALLOWED_HOSTS="example.com,www.example.com"
-# If you decide to use Paperless APIs in an ajax calls, you need to add your
+# If you decide to use the Paperless API in an ajax call, you need to add your
-# servers to the allowed hosts that can do CORS calls. By default Paperless allows 
+# servers to the list of allowed hosts that can do CORS calls. By default
-# calls from localhost:8080. The same rules as above how the list should look like.
+# Paperless allows calls from localhost:8080, but you'd like to change that,
 # you can set this value to a comma-separated list.
 #PAPERLESS_CORS_ALLOWED_HOSTS="localhost:8080,example.com,localhost:8000"
 # To host paperless under a subpath url like example.com/paperless you set
@@ -116,6 +117,10 @@ PAPERLESS_EMAIL_SECRET=""
 # http://paperless.readthedocs.org/en/latest/consumption.html#hooking-into-the-consumption-process
 #PAPERLESS_POST_CONSUME_SCRIPT="/path/to/an/arbitrary/script.sh"
 # By default, when clicking on a document within the web interface, the
 # browser will prompt the user to save the document to disk. By setting this to
 # "true", the document will instead be opened in the browser, if possible.
 #PAPERLESS_INLINE_DOC="false"
 #
 # The following values use sensible defaults for modern systems, but if you're
--- a/requirements.txt
+++ b/requirements.txt
@@ -29,7 +29,7 @@ pillow==5.2.0
 pluggy==0.7.1; python_version != '3.1.*'
 py==1.6.0; python_version != '3.1.*'
 pycodestyle==2.4.0
-pyocr==0.5.2
+pyocr==0.5.3
 pytest-cov==2.5.1
 pytest-django==3.4.2
 pytest-env==0.6.2
--- a/src/documents/checks.py
+++ b/src/documents/checks.py
@@ -2,7 +2,7 @@ import textwrap
 from django.conf import settings
 from django.core.checks import Error, register
-from django.db.utils import OperationalError
+from django.db.utils import OperationalError, ProgrammingError
@register()
@@ -14,7 +14,7 @@ def changed_password_check(app_configs, **kwargs):
    try:
        encrypted_doc = Document.objects.filter(
            storage_type=Document.STORAGE_TYPE_GPG).first()
-    except OperationalError:
+    except (OperationalError, ProgrammingError):
        return []  # No documents table yet
    if encrypted_doc:
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -1,24 +1,24 @@
 # coding=utf-8
 import dateutil.parser
 import logging
 import os
 import re
 import uuid
 from collections import OrderedDict
 import dateutil.parser
 from django.conf import settings
 from django.db import models
 from django.template.defaultfilters import slugify
 from django.utils import timezone
 from fuzzywuzzy import fuzz
-from django.conf import settings
+from .managers import LogManager
 try:
    from django.core.urlresolvers import reverse
 except ImportError:
    from django.urls import reverse
 from django.db import models
 from django.template.defaultfilters import slugify
 from django.utils import timezone
 from .managers import LogManager
 class MatchingModel(models.Model):
@@ -135,7 +135,7 @@ class MatchingModel(models.Model):
        Example:
          '  some random  words "with   quotes  " and   spaces'
            ==>
-          ["some", "random", "words", "with\s+quotes", "and", "spaces"]
+          ["some", "random", "words", "with+quotes", "and", "spaces"]
        """
        findterms = re.compile(r'"([^"]+)"|(\S+)').findall
        normspace = re.compile(r"\s+").sub
@@ -192,7 +192,11 @@ class Document(models.Model):
    TYPE_JPG = "jpg"
    TYPE_GIF = "gif"
    TYPE_TIF = "tiff"
-    TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,)
+    TYPE_TXT = "txt"
    TYPE_CSV = "csv"
    TYPE_MD = "md"
    TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,
             TYPE_TXT, TYPE_CSV, TYPE_MD)
    STORAGE_TYPE_UNENCRYPTED = "unencrypted"
    STORAGE_TYPE_GPG = "gpg"
@@ -365,51 +369,52 @@ class FileInfo:
        )
    )
    formats = "pdf|jpe?g|png|gif|tiff?|te?xt|md|csv"
    REGEXES = OrderedDict([
        ("created-correspondent-title-tags", re.compile(
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
            r"(?P<correspondent>.*) - "
            r"(?P<title>.*) - "
            r"(?P<tags>[a-z0-9\-,]*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("created-title-tags", re.compile(
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
            r"(?P<title>.*) - "
            r"(?P<tags>[a-z0-9\-,]*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("created-correspondent-title", re.compile(
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
            r"(?P<correspondent>.*) - "
            r"(?P<title>.*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("created-title", re.compile(
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
            r"(?P<title>.*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("correspondent-title-tags", re.compile(
            r"(?P<correspondent>.*) - "
            r"(?P<title>.*) - "
            r"(?P<tags>[a-z0-9\-,]*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("correspondent-title", re.compile(
            r"(?P<correspondent>.*) - "
            r"(?P<title>.*)?"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("title", re.compile(
            r"(?P<title>.*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        ))
    ])
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -1,9 +1,25 @@
 import logging
 import shutil
 import tempfile
 import re
 from django.conf import settings
 # This regular expression will try to find dates in the document at
 # hand and will match the following formats:
 # - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
 # - MONTH ZZZZ, with ZZZZ being 4 digits
 # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
 DATE_REGEX = re.compile(
    r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' +
    r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' +
    r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' +
    r'\b([^\W\d_]{3,9} [0-9]{4})\b'
 )
 class ParseError(Exception):
    pass
--- a/src/documents/tests/test_matchables.py
+++ b/src/documents/tests/test_matchables.py
@@ -166,7 +166,7 @@ class TestMatching(TestCase):
    def test_match_regex(self):
        self._test_matching(
-            "alpha\w+gamma",
+            r"alpha\w+gamma",
            "MATCH_REGEX",
            (
                "I have alpha_and_gamma in me",
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -1,6 +1,8 @@
 from django.http import HttpResponse, HttpResponseBadRequest
 from django.views.generic import DetailView, FormView, TemplateView
 from django_filters.rest_framework import DjangoFilterBackend
 from django.conf import settings
 from paperless.db import GnuPG
 from paperless.mixins import SessionOrBasicAuthMixin
 from paperless.views import StandardPagination
@@ -48,6 +50,9 @@ class FetchView(SessionOrBasicAuthMixin, DetailView):
            Document.TYPE_JPG: "image/jpeg",
            Document.TYPE_GIF: "image/gif",
            Document.TYPE_TIF: "image/tiff",
            Document.TYPE_CSV: "text/csv",
            Document.TYPE_MD:  "text/markdown",
            Document.TYPE_TXT: "text/plain"
        }
        if self.kwargs["kind"] == "thumb":
@@ -60,8 +65,11 @@ class FetchView(SessionOrBasicAuthMixin, DetailView):
            self._get_raw_data(self.object.source_file),
            content_type=content_types[self.object.file_type]
        )
-        response["Content-Disposition"] = 'attachment; filename="{}"'.format(
+
-            self.object.file_name)
+        DISPOSITION = 'inline' if settings.INLINE_DOC else 'attachment'
        response["Content-Disposition"] = '{}; filename="{}"'.format(
            DISPOSITION, self.object.file_name)
        return response
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -22,6 +22,14 @@ elif os.path.exists("/usr/local/etc/paperless.conf"):
    load_dotenv("/usr/local/etc/paperless.conf")
 def __get_boolean(key):
    """
    Return a boolean value based on whatever the user has supplied in the
    environment based on whether the value "looks like" it's True or not.
    """
    return bool(os.getenv(key, "NO").lower() in ("yes", "y", "1", "t", "true"))
 # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -67,6 +75,7 @@ INSTALLED_APPS = [
    "documents.apps.DocumentsConfig",
    "reminders.apps.RemindersConfig",
    "paperless_tesseract.apps.PaperlessTesseractConfig",
    "paperless_text.apps.PaperlessTextConfig",
    "django.contrib.admin",
@@ -221,12 +230,12 @@ OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")
 OCR_THREADS = os.getenv("PAPERLESS_OCR_THREADS")
 # OCR all documents?
-OCR_ALWAYS = bool(os.getenv("PAPERLESS_OCR_ALWAYS", "NO").lower() in ("yes", "y", "1", "t", "true"))  # NOQA
+OCR_ALWAYS = __get_boolean("PAPERLESS_OCR_ALWAYS")
 # If this is true, any failed attempts to OCR a PDF will result in the PDF
 # being indexed anyway, with whatever we could get.  If it's False, the file
 # will simply be left in the CONSUMPTION_DIR.
-FORGIVING_OCR = bool(os.getenv("PAPERLESS_FORGIVING_OCR", "YES").lower() in ("yes", "y", "1", "t", "true"))  # NOQA
+FORGIVING_OCR = __get_boolean("PAPERLESS_FORGIVING_OCR")
 # GNUPG needs a home directory for some reason
 GNUPG_HOME = os.getenv("HOME", "/tmp")
@@ -270,6 +279,9 @@ PASSPHRASE = os.getenv("PAPERLESS_PASSPHRASE")
 PRE_CONSUME_SCRIPT = os.getenv("PAPERLESS_PRE_CONSUME_SCRIPT")
 POST_CONSUME_SCRIPT = os.getenv("PAPERLESS_POST_CONSUME_SCRIPT")
 # Whether to display a selected document inline, or download it as attachment:
 INLINE_DOC = __get_boolean("PAPERLESS_INLINE_DOC")
 # The number of items on each page in the web UI.  This value must be a
 # positive integer, but if you don't define one in paperless.conf, a default of
 # 100 will be used.
--- a/src/paperless/version.py
+++ b/src/paperless/version.py
@@ -1 +1 @@
-__version__ = (2, 2, 1)
+__version__ = (2, 3, 0)
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -14,7 +14,7 @@ from pyocr.libtesseract.tesseract_raw import \
 from pyocr.tesseract import TesseractError
 import pdftotext
-from documents.parsers import DocumentParser, ParseError
+from documents.parsers import DocumentParser, ParseError, DATE_REGEX
 from .languages import ISO639
@@ -50,10 +50,11 @@ class RasterisedDocumentParser(DocumentParser):
            self.CONVERT,
            "-scale", "500x5000",
            "-alpha", "remove",
-            self.document_path, os.path.join(self.tempdir, "convert-%04d.png")
+            "{}[0]".format(self.document_path),
            os.path.join(self.tempdir, "convert.png")
        )
-        return os.path.join(self.tempdir, "convert-0000.png")
+        return os.path.join(self.tempdir, "convert.png")
    def _is_ocred(self):
@@ -210,22 +211,8 @@ class RasterisedDocumentParser(DocumentParser):
        except ParseError as e:
            return None
        # This regular expression will try to find dates in the document at
        # hand and will match the following formats:
        # - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
        # - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
        # - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
        # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
        # - MONTH ZZZZ, with ZZZZ being 4 digits
        # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
        pattern = re.compile(
            r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' +
            r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' +
            r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' +
            r'\b([^\W\d_]{3,9} [0-9]{4})\b')
        # Iterate through all regex matches and try to parse the date
-        for m in re.finditer(pattern, text):
+        for m in re.finditer(DATE_REGEX, text):
            datestring = m.group(0)
            try:
@@ -272,8 +259,9 @@ def run_unpaper(args):
 def strip_excess_whitespace(text):
    collapsed_spaces = re.sub(r"([^\S\r\n]+)", " ", text)
    no_leading_whitespace = re.sub(
-        "([\n\r]+)([^\S\n\r]+)", '\\1', collapsed_spaces)
+        r"([\n\r]+)([^\S\n\r]+)", '\\1', collapsed_spaces)
-    no_trailing_whitespace = re.sub("([^\S\n\r]+)$", '', no_leading_whitespace)
+    no_trailing_whitespace = re.sub(
        r"([^\S\n\r]+)$", '', no_leading_whitespace)
    return no_trailing_whitespace
--- a/src/paperless_tesseract/signals.py
+++ b/src/paperless_tesseract/signals.py
@@ -5,7 +5,7 @@ from .parsers import RasterisedDocumentParser
 class ConsumerDeclaration:
-    MATCHING_FILES = re.compile("^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")
+    MATCHING_FILES = re.compile(r"^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")
    @classmethod
    def handle(cls, sender, **kwargs):
--- a/src/paperless_text/init.py
+++ b/src/paperless_text/init.py
--- a/src/paperless_text/apps.py
+++ b/src/paperless_text/apps.py
@@ -0,0 +1,16 @@
 from django.apps import AppConfig
 class PaperlessTextConfig(AppConfig):
    name = "paperless_text"
    def ready(self):
        from documents.signals import document_consumer_declaration
        from .signals import ConsumerDeclaration
        document_consumer_declaration.connect(ConsumerDeclaration.handle)
        AppConfig.ready(self)
--- a/src/paperless_text/parsers.py
+++ b/src/paperless_text/parsers.py
@@ -0,0 +1,131 @@
 import os
 import re
 import subprocess
 import dateparser
 from django.conf import settings
 from documents.parsers import DocumentParser, ParseError, DATE_REGEX
 class TextDocumentParser(DocumentParser):
    """
    This parser directly parses a text document (.txt, .md, or .csv)
    """
    CONVERT = settings.CONVERT_BINARY
    THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
    UNPAPER = settings.UNPAPER_BINARY
    DATE_ORDER = settings.DATE_ORDER
    DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
    OCR_ALWAYS = settings.OCR_ALWAYS
    def __init__(self, path):
        super().__init__(path)
        self._text = None
    def get_thumbnail(self):
        """
        The thumbnail of a txt is just a 500px wide image of the text
        rendered onto a letter-sized page.
        """
        # The below is heavily cribbed from https://askubuntu.com/a/590951
        bg_color = "white"  # bg color
        text_color = "black"  # text color
        psize = [500, 647]  # icon size
        n_lines = 50  # number of lines to show
        output_file = os.path.join(self.tempdir, "convert-txt.png")
        temp_bg = os.path.join(self.tempdir, "bg.png")
        temp_txlayer = os.path.join(self.tempdir, "tx.png")
        picsize = "x".join([str(n) for n in psize])
        txsize = "x".join([str(n - 8) for n in psize])
        def create_bg():
            work_size = ",".join([str(n - 1) for n in psize])
            r = str(round(psize[0] / 10))
            rounded = ",".join([r, r])
            run_command(self.CONVERT, "-size ", picsize, ' xc:none -draw ',
                        '"fill ', bg_color, ' roundrectangle 0,0,',
                        work_size, ",", rounded, '" ', temp_bg)
        def read_text():
            with open(self.document_path, 'r') as src:
                lines = [l.strip() for l in src.readlines()]
                text = "\n".join([l for l in lines[:n_lines]])
                return text.replace('"', "'")
        def create_txlayer():
            run_command(self.CONVERT,
                        "-background none",
                        "-fill",
                        text_color,
                        "-pointsize", "12",
                        "-border 4 -bordercolor none",
                        "-size ", txsize,
                        ' caption:"', read_text(), '" ',
                        temp_txlayer)
        create_txlayer()
        create_bg()
        run_command(self.CONVERT, temp_bg, temp_txlayer,
                    "-background None -layers merge ", output_file)
        return output_file
    def get_text(self):
        if self._text is not None:
            return self._text
        with open(self.document_path, 'r') as f:
            self._text = f.read()
        return self._text
    def get_date(self):
        date = None
        datestring = None
        try:
            text = self.get_text()
        except ParseError as e:
            return None
        # Iterate through all regex matches and try to parse the date
        for m in re.finditer(DATE_REGEX, text):
            datestring = m.group(0)
            try:
                date = dateparser.parse(
                           datestring,
                           settings={'DATE_ORDER': self.DATE_ORDER,
                                     'PREFER_DAY_OF_MONTH': 'first',
                                     'RETURN_AS_TIMEZONE_AWARE': True})
            except TypeError:
                # Skip all matches that do not parse to a proper date
                continue
            if date is not None:
                break
        if date is not None:
            self.log("info", "Detected document date " + date.isoformat() +
                             " based on string " + datestring)
        else:
            self.log("info", "Unable to detect date for document")
        return date
 def run_command(*args):
    environment = os.environ.copy()
    if settings.CONVERT_MEMORY_LIMIT:
        environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT
    if settings.CONVERT_TMPDIR:
        environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR
    if not subprocess.Popen(' '.join(args), env=environment,
                            shell=True).wait() == 0:
        raise ParseError("Convert failed at {}".format(args))
--- a/src/paperless_text/signals.py
+++ b/src/paperless_text/signals.py
@@ -0,0 +1,23 @@
 import re
 from .parsers import TextDocumentParser
 class ConsumerDeclaration:
    MATCHING_FILES = re.compile(r"^.*\.(te?xt|md|csv)$")
    @classmethod
    def handle(cls, sender, **kwargs):
        return cls.test
    @classmethod
    def test(cls, doc):
        if cls.MATCHING_FILES.match(doc.lower()):
            return {
                "parser": TextDocumentParser,
                "weight": 10
            }
        return None
`@@ -1 +1 @@`
	`__version__ = (2, 2, 1)`	`__version__ = (2, 3, 0)`