Merge pull request #1 from danielquinn/master

Sync with upstream
2026-01-08 21:24:26 -06:00 · 2016-03-08 10:54:26 -06:00
parent 1c45ca10d4 6ca389c28a
commit f5e0a89a3f
57 changed files with 2230 additions and 453 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -57,7 +57,9 @@ docs/_build/
 target/
 # Stored PDFs
-media/*
+media/documents/*.gpg
 media/documents/thumbnails/*.gpg
 media/documents/originals/*.gpg
 # Sqlite database
 db.sqlite3
@@ -68,8 +70,9 @@ db.sqlite3
 # Other stuff that doesn't belong
 virtualenv
 .vagrant
 docker-compose.yml
 docker-compose.env
 # Used for development
 scripts/import-for-development
 environment
--- a/.travis.yml
+++ b/.travis.yml
@@ -0,0 +1,18 @@
 language: python
 sudo: false
 matrix:
    include:
        - python: 3.4
          env: TOXENV=py34
        - python: 3.5
          env: TOXENV=py35
        - python: 3.5
          env: TOXENV=pep8
 install:
    - pip install --requirement requirements.txt
    - pip install tox
 script: tox -c src/tox.ini
--- a/46
+++ b/46
@@ -0,0 +1,46 @@
 FROM python:3.5.1
 MAINTAINER Pit Kleyersburg <pitkley@googlemail.com>
 # Install dependencies
 RUN apt-get update \
    && apt-get install -y --no-install-recommends \
        sudo \
        tesseract-ocr tesseract-ocr-eng imagemagick ghostscript unpaper \
    && rm -rf /var/lib/apt/lists/*
 # Install python dependencies
 RUN mkdir -p /usr/src/paperless
 WORKDIR /usr/src/paperless
 COPY requirements.txt /usr/src/paperless/
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy application
 RUN mkdir -p /usr/src/paperless/src
 RUN mkdir -p /usr/src/paperless/data
 RUN mkdir -p /usr/src/paperless/media
 COPY src/ /usr/src/paperless/src/
 COPY data/ /usr/src/paperless/data/
 COPY media/ /usr/src/paperless/media/
 # Set consumption directory
 ENV PAPERLESS_CONSUMPTION_DIR /consume
 RUN mkdir -p $PAPERLESS_CONSUMPTION_DIR
 # Migrate database
 WORKDIR /usr/src/paperless/src
 RUN ./manage.py migrate
 # Create user
 RUN groupadd -g 1000 paperless \
    && useradd -u 1000 -g 1000 -d /usr/src/paperless paperless \
    && chown -Rh paperless:paperless /usr/src/paperless
 # Setup entrypoint
 COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh
 RUN chmod 755 /sbin/docker-entrypoint.sh
 # Mount volumes
 VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/media", "/consume"]
 ENTRYPOINT ["/sbin/docker-entrypoint.sh"]
 CMD ["--help"]
--- a/README.rst
+++ b/README.rst
@@ -3,6 +3,7 @@ Paperless
 |Documentation|
 |Chat|
 |Travis|
 Scan, index, and archive all of your paper documents
@@ -55,6 +56,7 @@ powerful tools.
 * `ImageMagick`_ converts the images between colour and greyscale.
 * `Tesseract`_ does the character recognition.
 * `Unpaper`_ despeckles and and deskews the scanned image.
 * `GNU Privacy Guard`_ is used as the encryption backend.
 * `Python 3`_ is the language of the project.
@@ -92,6 +94,7 @@ home.
 .. _this one: http://www.brother.ca/en-CA/Scanners/11/ProductDetail/ADS1500W?ProductDetail=productdetail
 .. _ImageMagick: http://imagemagick.org/
 .. _Tesseract: https://github.com/tesseract-ocr
 .. _Unpaper: https://www.flameeyes.eu/projects/unpaper
 .. _GNU Privacy Guard: https://gnupg.org/
 .. _Python 3: https://python.org/
 .. _Pillow: https://pypi.python.org/pypi/pillowfight/
@@ -105,4 +108,5 @@ home.
 .. |Chat| image:: https://badges.gitter.im/danielquinn/paperless.svg
   :alt: Join the chat at https://gitter.im/danielquinn/paperless
   :target: https://gitter.im/danielquinn/paperless?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
-
+.. |Travis| image:: https://travis-ci.org/danielquinn/paperless.svg?branch=master
   :target: https://travis-ci.org/danielquinn/paperless
--- a/docker-compose.env.example
+++ b/docker-compose.env.example
@@ -0,0 +1,15 @@
 # Environment variables to set for Paperless
 # Commented out variables will be replaced by a default within Paperless.
 # Passphrase Paperless uses to encrypt and decrypt your documents
 PAPERLESS_PASSPHRASE=CHANGE_ME
 # The amount of threads to use for text recognition
 # PAPERLESS_OCR_THREADS=4
 # Additional languages to install for text recognition
 # PAPERLESS_OCR_LANGUAGES=deu ita
 # You can change the default user and group id to a custom one
 # USERMAP_UID=1000
 # USERMAP_GID=1000
--- a/docker-compose.yml.example
+++ b/docker-compose.yml.example
@@ -0,0 +1,37 @@
 version: '2'
 services:
    webserver:
        image: paperless
        ports:
            # You can adapt the port you want Paperless to listen on by
            # modifying the part before the `:`.
            - "8000:8000"
        volumes:
            - data:/usr/src/paperless/data
            - media:/usr/src/paperless/media
        env_file: docker-compose.env
        environment:
            - PAPERLESS_OCR_LANGUAGES=
        command: ["runserver", "0.0.0.0:8000"]
    consumer:
        image: paperless
        volumes:
            - data:/usr/src/paperless/data
            - media:/usr/src/paperless/media
            # You have to adapt the local path you want the consumption
            # directory to mount to by modifying the part before the ':'.
            - /path/to/arbitrary/place:/consume
            # Likewise, you can add a local path to mount a directory for
            # exporting. This is not strictly needed for paperless to
            # function, only if you're exporting your files: uncomment
            # it and fill in a local path if you know you're going to
            # want to export your documents.
            # - /path/to/another/arbitrary/place:/export
        env_file: docker-compose.env
        command: ["document_consumer"]
 volumes:
    data:
    media:
--- a/docs/Dockerfile
+++ b/docs/Dockerfile
@@ -0,0 +1,18 @@
 FROM python:3.5.1
 MAINTAINER Pit Kleyersburg <pitkley@googlemail.com>
 # Install Sphinx and Pygments
 RUN pip install Sphinx Pygments
 # Setup directories, copy data
 RUN mkdir /build
 COPY . /build
 WORKDIR /build/docs
 # Build documentation
 RUN make html
 # Start webserver
 WORKDIR /build/docs/_build/html
 EXPOSE 8000/tcp
 CMD ["python3", "-m", "http.server"]
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -0,0 +1,23 @@
 .. _api:
 The REST API
 ############
 Paperless makes use of the `Django REST Framework`_ standard API interface
 because of its inherent awesomeness.  Conveniently, the system is also
 self-documenting, so learn more about the access points, schema, what's
 accepted and what isn't, you need only visit ``/api`` on your local Paperless
 installation.
 .. _Django REST Framework: http://django-rest-framework.org/
 .. _api-uploading:
 Uploading
 ---------
 File uploads in an API are hard and so far as I've been able to tell, there's
 no standard way of accepting them, so rather than crowbar file uploads into the
 REST API and endure that headache, I've left that process to a simple HTTP
 POST, documented on the :ref:`consumption page <consumption-http>`.
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,10 +1,51 @@
 Changelog
 #########
 * 0.1.1
  * Potentially **Breaking Change**: All references to "sender" in the code
    have been renamed to "correspondent" to better reflect the nature of the
    property (one could quite reasonably scan a document before sending it to
    someone.)
  * `#67`_: Rewrote the document exporter and added a new importer that allows
    for full metadata retention without depending on the file name and
    modification time.  A big thanks to `Tikitu de Jager`_, `Pit`_,
    `Florian Jung`_, and `Christopher Luu`_ for their code snippets and
    contributing conversation that lead to this change.
  * `#20`_: Added *unpaper* support to help in cleaning up the scanned image
    before it's OCR'd.  Thanks to `Pit`_ for this one.
  * `#71`_ Added (encrypted) thumbnails in anticipation of a proper UI.
  * `#68`_: Added support for using a proper config file at
    ``/etc/paperless.conf`` and modified the systemd unit files to use it.
  * Refactored the Vagrant installation process to use environment variables
    rather than asking the user to modify ``settings.py``.
  * `#44`_: Harmonise environment variable names with constant names.
  * `#60`_: Setup logging to actually use the Python native logging framework.
  * `#53`_: Fixed an annoying bug that caused ``.jpeg`` and ``.JPG`` images
    to be imported but made unavailable.
 * 0.1.0
  * Docker support!  Big thanks to `Wayne Werner`_, `Brian Conn`_, and
    `Tikitu de Jager`_ for this one, and especially to `Pit`_
    who spearheadded this effort.
  * A simple REST API is in place, but it should be considered unstable.
  * Cleaned up the consumer to use temporary directories instead of a single
    scratch space.  (Thanks `Pit`_)
  * Improved the efficiency of the consumer by parsing pages more intelligently
    and introducing a threaded OCR process (thanks again `Pit`_).
  * `#45`_: Cleaned up the logic for tag matching.  Reported by `darkmatter`_.
  * `#47`_: Auto-rotate landscape documents.  Reported by `Paul`_ and fixed by
    `Pit`_.
  * `#48`_: Matching algorithms should do so on a word boundary (`darkmatter`_)
  * `#54`_: Documented the re-tagger (`zedster`_)
  * `#57`_: Make sure file is preserved on import failure (`darkmatter`_)
  * Added tox with pep8 checking
 * 0.0.6
-  * Added support for parallel OCR (significant work from pitkley)
+  * Added support for parallel OCR (significant work from `Pit`_)
-  * Sped up the language detection (significant work from pitkley)
+  * Sped up the language detection (significant work from `Pit`_)
  * Added simple logging
 * 0.0.5
@@ -35,3 +76,26 @@ Changelog
 * 0.0.1
  * Initial release
 .. _Brian Conn: https://github.com/TheConnMan
 .. _Christopher Luu: https://github.com/nuudles
 .. _Florian Jung: https://github.com/the01
 .. _Tikitu de Jager: https://github.com/tikitu
 .. _Paul: https://github.com/polo2ro
 .. _Pit: https://github.com/pitkley
 .. _Wayne Werner: https://github.com/waynew
 .. _darkmatter: https://github.com/darkmatter
 .. _zedster: https://github.com/zedster
 .. _#20: https://github.com/danielquinn/paperless/issues/20
 .. _#44: https://github.com/danielquinn/paperless/issues/44
 .. _#45: https://github.com/danielquinn/paperless/issues/45
 .. _#47: https://github.com/danielquinn/paperless/issues/47
 .. _#48: https://github.com/danielquinn/paperless/issues/48
 .. _#53: https://github.com/danielquinn/paperless/issues/53
 .. _#54: https://github.com/danielquinn/paperless/issues/54
 .. _#57: https://github.com/danielquinn/paperless/issues/57
 .. _#60: https://github.com/danielquinn/paperless/issues/60
 .. _#67: https://github.com/danielquinn/paperless/issues/67
 .. _#68: https://github.com/danielquinn/paperless/issues/68
 .. _#71: https://github.com/danielquinn/paperless/issues/71
--- a/docs/consumption.rst
+++ b/docs/consumption.rst
@@ -40,14 +40,14 @@ follow the :ref:`consumer <utilities-consumer>` instructions to get it running.
 A Note on File Naming
 ---------------------
-Any document you put into the consumption directory will be consumed, but if you
+Any document you put into the consumption directory will be consumed, but if
-name the file right, it'll automatically set some values in the database for
+you name the file right, it'll automatically set some values in the database
-you.  This is is the logic the consumer follows:
+for you.  This is is the logic the consumer follows:
-1. Try to find the sender, title, and tags in the file name following the
+1. Try to find the correspondent, title, and tags in the file name following
-   pattern: ``Sender - Title - tag,tag,tag.pdf``.
+   the pattern: ``Correspondent - Title - tag,tag,tag.pdf``.
-2. If that doesn't work, try to find the sender and title in the file name
+2. If that doesn't work, try to find the correspondent and title in the file
-   following the pattern:  ``Sender - Title.pdf``.
+   name following the pattern:  ``Correspondent - Title.pdf``.
 3. If that doesn't work, just assume that the name of the file is the title.
 So given the above, the following examples would work as you'd expect:
@@ -97,9 +97,9 @@ So, with all that in mind, here's what you do to get it running:
   the configured email account every 10 minutes for something new and pull down
   whatever it finds.
 4. Send yourself an email!  Note that the subject is treated as the file name,
-   so if you set the subject to ``Sender - Title - tag,tag,tag``, you'll get
+   so if you set the subject to ``Correspondent - Title - tag,tag,tag``, you'll
-   what you expect.  Also, you must include the aforementioned secret string in
+   get what you expect.  Also, you must include the aforementioned secret
-   every email so the fetcher knows that it's safe to import.
+   string in every email so the fetcher knows that it's safe to import.
 5. After a few minutes, the consumer will poll your mailbox, pull down the
   message, and place the attachment in the consumption directory with the
   appropriate name.  A few minutes later, the consumer will import it like any
@@ -111,23 +111,22 @@ So, with all that in mind, here's what you do to get it running:
 HTTP POST
 =========
-Currently, the API is limited to only handling file uploads, it doesn't do tags
+You can also submit a document via HTTP POST.  It doesn't do tags yet, and the
-yet, and the URL schema isn't concrete, but it's a start.  It's also not much of
+URL schema isn't concrete, but it's a start.
 a real API, it's just a URL that accepts an HTTP POST.
-To push your document to *Paperless*, send an HTTP POST to the server with the
+To push your document to Paperless, send an HTTP POST to the server with the
 following name/value pairs:
-* ``sender``: The name of the document's sender.  Note that there are
+* ``correspondent``: The name of the document's correspondent.  Note that there
-  restrictions on what characters you can use here.  Specifically, alphanumeric
+  are restrictions on what characters you can use here.  Specifically,
-  characters, `-`, `,`, `.`, and `'` are ok, everything else it out.  You also
+  alphanumeric characters, `-`, `,`, `.`, and `'` are ok, everything else it
-  can't use the sequence ` - ` (space, dash, space).
+  out.  You also can't use the sequence ` - ` (space, dash, space).
 * ``title``: The title of the document.  The rules for characters is the same
-  here as the sender.
+  here as the correspondent.
-* ``signature``: For security reasons, we have the sender send a signature using
+* ``signature``: For security reasons, we have the correspondent send a
-  a "shared secret" method to make sure that random strangers don't start
+  signature using a "shared secret" method to make sure that random strangers
-  uploading stuff to your server.  The means of generating this signature is
+  don't start uploading stuff to your server.  The means of generating this
-  defined below.
+  signature is defined below.
 Specify ``enctype="multipart/form-data"``, and then POST your file with:::
@@ -146,12 +145,12 @@ verification.
 In the case of *Paperless*, you configure the server with the secret by setting
 ``UPLOAD_SHARED_SECRET``.  Then on your client, you generate your signature by
-concatenating the sender, title, and the secret, and then using sha256 to
+concatenating the correspondent, title, and the secret, and then using sha256
-generate a hexdigest.
+to generate a hexdigest.
 If you're using Python, this is what that looks like:
 .. code:: python
    from hashlib import sha256
-    signature = sha256(sender + title + secret).hexdigest()
+    signature = sha256(correspondent + title + secret).hexdigest()
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -30,6 +30,7 @@ Contents
   requirements
   setup
   consumption
   api
   utilities
   migrating
   changelog
--- a/docs/migrating.rst
+++ b/docs/migrating.rst
@@ -4,31 +4,10 @@ Migrating, Updates, and Backups
 ===============================
 As *Paperless* is still under active development, there's a lot that can change
-as software updates roll out.  The thing you just need to remember for all of
+as software updates roll out.  You should backup often, so if anything goes
-this is that for the most part, **the database is expendable** so long as you
+wrong during an update, you at least have a means of restoring to something
-have your files.  This is because the file name of the exported files includes
+usable.  Thankfully, there are automated ways of backing up, restoring, and
-the name of the sender, the title, and the tags (if any) on each file.
+updating the software.
 .. _migrating-updates:
 Updates
 -------
 For the most part, all you have to do to update *Paperless* is run ``git pull``
 on the directory containing the project files, and then use Django's ``migrate``
 command to execute any database schema updates that might have been rolled in
 as part of the update:
 .. code:: bash
    $ cd /path/to/project
    $ git pull
    $ cd src
    $ ./manage.py migrate
 Note that it's possible (even likely) that while ``git pull`` may update some
 files, the ``migrate`` step may not update anything.  This is totally normal.
 .. _migrating-backup:
@@ -38,20 +17,8 @@ Backing Up
 So you're bored of this whole project, or you want to make a remote backup of
 the unencrypted files for whatever reason.  This is easy to do, simply use the
-:ref:`exporter <utilities-exporter>` to dump your documents out into an
+:ref:`exporter <utilities-exporter>` to dump your documents and database out
-arbitrary directory.
+into an arbitrary directory.
 Additionally however, you'll need to back up the tags themselves.  The file
 names contain the tag names, but you still need to define the tags and their
 matching algorithms in the database for things to work properly.  We do this
 with Django's ``dumpdata`` command, which produces JSON output.
 .. code:: bash
    $ cd /path/to/project
    $ cd src
    $ ./manage.py document_export /path/to/arbitrary/place/
    $ ./manage.py dumpdata documents.Tag > /path/to/arbitrary/place/tags.json
 .. _migrating-restoring:
@@ -66,7 +33,7 @@ create an empty database (just follow the
 ``tags.json`` file you created as part of your backup.  Lastly, copy your
 exported documents into the consumption directory and start up the consumer.
-.. code:: bash
+.. code-block:: shell-session
    $ cd /path/to/project
    $ rm data/db.sqlite3  # Delete the database
@@ -77,3 +44,60 @@ exported documents into the consumption directory and start up the consumer.
    $ cp /path/to/exported/docs/* /path/to/consumption/dir/
    $ ./manage.py document_consumer
 Importing your data if you are :ref:`using Docker <setup-installation-docker>`
 is almost as simple:
 .. code-block:: shell-session
    # Stop and remove your current containers
    $ docker-compose stop
    $ docker-compose rm -f
    # Recreate them, add the superuser
    $ docker-compose up -d
    $ docker-compose run --rm webserver createsuperuser
    # Load the tags
    $ cat /path/to/arbitrary/place/tags.json | docker-compose run --rm webserver loaddata_stdin -
    # Load your exported documents into the consumption directory
    # (How you do this highly depends on how you have set this up)
    $ cp /path/to/exported/docs/* /path/to/mounted/consumption/dir/
 After loading the documents into the consumption directory the consumer will
 immediately start consuming the documents.
 .. _migrating-updates:
 Updates
 -------
 For the most part, all you have to do to update *Paperless* is run ``git pull``
 on the directory containing the project files, and then use Django's ``migrate``
 command to execute any database schema updates that might have been rolled in
 as part of the update:
 .. code-block:: shell-session
    $ cd /path/to/project
    $ git pull
    $ cd src
    $ ./manage.py migrate
 Note that it's possible (even likely) that while ``git pull`` may update some
 files, the ``migrate`` step may not update anything.  This is totally normal.
 If you are :ref:`using Docker <setup-installation-docker>` the update process
 requires only one additional step:
 .. code-block:: shell-session
    $ cd /path/to/project
    $ git pull
    $ docker build -t paperless .
    $ docker-compose up -d
    $ docker-compose run --rm webserver migrate
 If ``git pull`` doesn't report any changes, there is no need to continue with
 the remaining steps.
--- a/docs/requirements.rst
+++ b/docs/requirements.rst
@@ -10,11 +10,13 @@ should work) that has the following software installed on it:
 * `GNU Privacy Guard`_
 * `Tesseract`_
 * `Imagemagick`_
 * `unpaper`_
 .. _Python3: https://python.org/
 .. _GNU Privacy Guard: https://gnupg.org
 .. _Tesseract: https://github.com/tesseract-ocr
 .. _Imagemagick: http://imagemagick.org/
 .. _unpaper: https://www.flameeyes.eu/projects/unpaper
 Notably, you should confirm how you access your Python3 installation.  Many
 Linux distributions will install Python3 in parallel to Python2, using the names
@@ -101,3 +103,16 @@ you'd like to generate your own docs locally, you'll need to:
    $ pip install sphinx
 and then cd into the ``docs`` directory and type ``make html``.
 If you are using Docker, you can use the following commands to build the
 documentation and run a webserver serving it on `port 8001`_:
 .. code:: bash
    $ pwd
    /path/to/paperless
    $ docker build -t paperless:docs -f docs/Dockerfile .
    $ docker run --rm -it -p "8001:8000" paperless:docs
 .. _port 8001: http://127.0.0.1:8001
--- a/docs/setup.rst
+++ b/docs/setup.rst
@@ -37,11 +37,19 @@ or just download the tarball and go that route:
 Installation & Configuration
 ----------------------------
-You can go two routes with setting up and running Paperless.  The *Vagrant*
+You can go multiple routes with setting up and running Paperless. The `Vagrant
-route is quick & easy, but means you're running a VM which comes with memory
+route`_ is quick & easy, but means you're running a VM which comes with memory
-consumption etc.  Alternatively the standard, "bare metal" approach is a little
+consumption etc. We also `support Docker`_, which you can use natively under
-more complicated.
+Linux and in a VM with `Docker Machine`_ (this guide was written for native
 Docker usage under Linux, you might have to adapt it for Docker Machine.)
 Alternatively the standard, `bare metal`_ approach is a little more complicated,
 but worth it because it makes it easier to should you want to contribute some
 code back.
 .. _Vagrant route: setup-installation-vagrant_
 .. _support Docker: setup-installation-docker_
 .. _bare metal: setup-installation-standard_
 .. _Docker Machine: https://docs.docker.com/machine/
 .. _setup-installation-standard:
@@ -91,33 +99,188 @@ Vagrant Method
 2. Run ``vagrant up``.  An instance will start up for you.  When it's ready and
   provisioned...
 3. Run ``vagrant ssh`` and once inside your new vagrant box, edit
-   ``/opt/paperless/src/paperless/settings.py`` and set the values for:
+   ``/etc/paperless.conf`` and set the values for:
-    * ``CONSUMPTION_DIR``: this is where your documents will be dumped to be
+    * ``PAPERLESS_CONSUMPTION_DIR``: this is where your documents will be
-      consumed by Paperless.
+      dumped to be consumed by Paperless.
-    * ``PASSPHRASE``: this is the passphrase Paperless uses to encrypt/decrypt
+    * ``PAPERLESS_PASSPHRASE``: this is the passphrase Paperless uses to
-      the original document.  The default value attempts to source the
+      encrypt/decrypt the original document.
-      passphrase from the environment, so if you don't set it to a static value
+    * ``PAPERLESS_SHARED_SECRET``: this is the "magic word" used when consuming
-      here, you must set ``PAPERLESS_PASSPHRASE=some-secret-string`` on the
+      documents from mail or via the API.  If you don't use either, leaving it
-      command line whenever invoking the consumer or webserver.
+      blank is just fine.
-4. Initialise the database with ``/opt/paperless/src/manage.py migrate``.
+4. Exit the vagrant box and re-enter it with ``vagrant ssh`` again.  This
-5. Still inside your vagrant box, create a user for your Paperless instance with
+   updates the environment to make use of the changes you made to the config
-   ``/opt/paperless/src/manage.py createsuperuser``. Follow the prompts to
+   file.
 5. Initialise the database with ``/opt/paperless/src/manage.py migrate``.
 6. Still inside your vagrant box, create a user for your Paperless instance
   with ``/opt/paperless/src/manage.py createsuperuser``. Follow the prompts to
   create your user.
-6. Start the webserver with ``/opt/paperless/src/manage.py runserver 0.0.0.0:8000``.
+7. Start the webserver with
-   You should now be able to visit your (empty) `Paperless webserver`_ at
+   ``/opt/paperless/src/manage.py runserver 0.0.0.0:8000``. You should now be
-   ``172.28.128.4:8000``.  You can login with the user/pass you created in #5.
+   able to visit your (empty) `Paperless webserver`_ at ``172.28.128.4:8000``.
-7. In a separate window, run ``vagrant ssh`` again, but this time once inside
+   You can login with the user/pass you created in #6.
 8. In a separate window, run ``vagrant ssh`` again, but this time once inside
   your vagrant instance, you should start the consumer script with
   ``/opt/paperless/src/manage.py document_consumer``.
-8. Scan something.  Put it in the ``CONSUMPTION_DIR``.
+9. Scan something.  Put it in the ``CONSUMPTION_DIR``.
-9. Wait a few minutes
+10. Wait a few minutes
-10. Visit the document list on your webserver, and it should be there, indexed
+11. Visit the document list on your webserver, and it should be there, indexed
    and downloadable.
 .. _Vagrant: https://vagrantup.com/
 .. _Paperless server: http://172.28.128.4:8000
 .. _setup-installation-docker:
 Docker Method
 .............
 1. Install `Docker`_.
   .. caution::
      As mentioned earlier, this guide assumes that you use Docker natively
      under Linux. If you are using `Docker Machine`_ under Mac OS X or Windows,
      you will have to adapt IP addresses, volume-mounting, command execution
      and maybe more.
 2. Install `docker-compose`_. [#compose]_
   .. caution::
       If you want to use the included ``docker-compose.yml.example`` file, you
       need to have at least Docker version **1.10.0** and docker-compose
       version **1.6.0**.
       See the `Docker installation guide`_ on how to install the current
       version of Docker for your operating system or Linux distribution of
       choice. To get an up-to-date version of docker-compose, follow the
       `docker-compose installation guide`_ if your package repository doesn't
       include it.
       .. _Docker installation guide: https://docs.docker.com/engine/installation/
       .. _docker-compose installation guide: https://docs.docker.com/compose/install/
 3. Create a copy of ``docker-compose.yml.example`` as ``docker-compose.yml`` and
   a copy of ``docker-compose.env.example`` as ``docker-compose.env``. You'll be
   editing both these files: taking a copy ensures that you can ``git pull`` to
   receive updates without risking merge conflicts with your modified versions
   of the configuration files.
 4. Modify ``docker-compose.yml`` to your preferences, following the instructions
   in comments in the file. The only change that is a hard requirement is to
   specify where the consumption directory should mount.
 5. Modify ``docker-compose.env`` and adapt the following environment variables:
   ``PAPERLESS_PASSPHRASE``
     This is the passphrase Paperless uses to encrypt/decrypt the original
     document.
   ``PAPERLESS_OCR_THREADS``
     This is the number of threads the OCR process will spawn to process
     document pages in parallel. If the variable is not set, Python determines
     the core-count of your CPU and uses that value.
   ``PAPERLESS_OCR_LANGUAGES``
     If you want the OCR to recognize other languages in addition to the default
     English, set this parameter to a space separated list of three-letter
     language-codes after `ISO 639-2/T`_. For a list of available languages --
     including their three letter codes -- see the `Debian packagelist`_.
   ``USERMAP_UID`` and ``USERMAP_GID``
     If you want to mount the consumption volume (directory ``/consume`` within
     the containers) to a host-directory -- which you probably want to do --
     access rights might be an issue. The default user and group ``paperless``
     in the containers have an id of 1000. The containers will enforce that the
     owning group of the consumption directory will be ``paperless`` to be able
     to delete consumed documents. If your host-system has a group with an id of
     1000 and you don't want this group to have access rights to the consumption
     directory, you can use ``USERMAP_GID`` to change the id in the container
     and thus the one of the consumption directory. Furthermore, you can change
     the id of the default user as well using ``USERMAP_UID``.
 6. Run ``docker-compose up -d``. This will create and start the necessary
   containers.
 7. To be able to login, you will need a super user. To create it, execute the
   following command:
   .. code-block:: shell-session
       $ docker-compose run --rm webserver createsuperuser
   This will prompt you to set a username (default ``paperless``), an optional
   e-mail address and finally a password.
 8. The default ``docker-compose.yml`` exports the webserver on your local port
   8000. If you haven't adapted this, you should now be able to visit your
   `Paperless webserver`_ at ``http://127.0.0.1:8000``. You can login with the
   user and password you just created.
 9. Add files to consumption directory the way you prefer to. Following are two
   possible options:
   1. Mount the consumption directory to a local host path by modifying your
      ``docker-compose.yml``:
      .. code-block:: diff
         diff --git a/docker-compose.yml b/docker-compose.yml
         --- a/docker-compose.yml
         +++ b/docker-compose.yml
         @@ -17,9 +18,8 @@ services:
                  volumes:
                      - paperless-data:/usr/src/paperless/data
                      - paperless-media:/usr/src/paperless/media
         -            - /consume
         +            - /local/path/you/choose:/consume
      .. danger::
          While the consumption container will ensure at startup that it can
          **delete** a consumed file from a host-mounted directory, it might not
          be able to **read** the document in the first place if the access
          rights to the file are incorrect.
          Make sure that the documents you put into the consumption directory
          will either be readable by everyone (``chmod o+r file.pdf``) or
          readable by the default user or group id 1000 (or the one you have set
          with ``USERMAP_UID`` or ``USERMAP_GID`` respectively).
   2. Use ``docker cp`` to copy your files directly into the container:
      .. code-block:: shell-session
         $ # Identify your containers
         $ docker-compose ps
                 Name                       Command                State     Ports
         -------------------------------------------------------------------------
         paperless_consumer_1    /sbin/docker-entrypoint.sh ...   Exit 0
         paperless_webserver_1   /sbin/docker-entrypoint.sh ...   Exit 0
         $ docker cp /path/to/your/file.pdf paperless_consumer_1:/consume
      ``docker cp`` is a one-shot-command, just like ``cp``. This means that
      every time you want to consume a new document, you will have to execute
      ``docker cp`` again. You can of course automate this process, but option 1
      is generally the preferred one.
      .. danger::
          ``docker cp`` will change the owning user and group of a copied file
          to the acting user at the destination, which will be ``root``.
          You therefore need to ensure that the documents you want to copy into
          the container are readable by everyone (``chmod o+r file.pdf``) before
          copying them.
 .. _Docker: https://www.docker.com/
 .. _docker-compose: https://docs.docker.com/compose/install/
 .. _ISO 639-2/T: https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
 .. _Debian packagelist: https://packages.debian.org/search?suite=jessie&searchon=names&keywords=tesseract-ocr-
 .. [#compose] You of course don't have to use docker-compose, but it
   simplifies deployment immensely. If you know your way around Docker, feel
   free to tinker around without using compose!
 .. _making-things-a-little-more-permanent:
 Making Things a Little more Permanent
@@ -126,5 +289,9 @@ Making Things a Little more Permanent
 Once you've tested things and are happy with the work flow, you can automate the
 process of starting the webserver and consumer automatically.  If you're running
 on a bare metal system that's using Systemd, you can use the service unit files
-in the ``scripts`` directory to set this up.  If you're on a SysV or other
+in the ``scripts`` directory to set this up.  If you're on another startup
-startup system (like the Vagrant box), then you're currently on your own.
+system or are using a Vagrant box, then you're currently on your own. If you are
 using Docker, you can set a restart-policy_ in the ``docker-compose.yml`` to
 have the containers automatically start with the Docker daemon.
 .. _restart-policy: https://docs.docker.com/engine/reference/commandline/run/#restart-policies-restart
--- a/docs/utilities.rst
+++ b/docs/utilities.rst
@@ -26,7 +26,7 @@ How to Use It
 The webserver is started via the ``manage.py`` script:
-.. code:: bash
+.. code-block:: shell-session
    $ /path/to/paperless/src/manage.py runserver
@@ -64,7 +64,7 @@ How to Use It
 The consumer is started via the ``manage.py`` script:
-.. code:: bash
+.. code-block:: shell-session
    $ /path/to/paperless/src/manage.py document_consumer
@@ -95,13 +95,110 @@ How to Use It
 This too is done via the ``manage.py`` script:
 .. code-block:: shell-session
    $ /path/to/paperless/src/manage.py document_exporter /path/to/somewhere/
 This will dump all of your unencrypted PDFs into ``/path/to/somewhere`` for you
 to do with as you please.  The files are accompanied with a special file,
 ``manifest.json`` which can be used to
 :ref:`import the files <utilities-importer>` at a later date if you wish.
 .. _utilities-exporter-howto-docker:
 Docker
 ______
 If you are :ref:`using Docker <setup-installation-docker>`, running the
 expoorter is almost as easy.  To mount a volume for exports, follow the
 instructions in the ``docker-compose.yml.example`` file for the ``/export``
 volume (making the changes in your own ``docker-compose.yml`` file, of course).
 Once you have the volume mounted, the command to run an export is:
 .. code-block:: shell-session
   $ docker-compose run --rm consumer document_exporter /export
 If you prefer to use ``docker run`` directly, supplying the necessary commandline
 options:
 .. code-block:: shell-session
   $ # Identify your containers
   $ docker-compose ps
           Name                       Command                State     Ports
   -------------------------------------------------------------------------
   paperless_consumer_1    /sbin/docker-entrypoint.sh ...   Exit 0
   paperless_webserver_1   /sbin/docker-entrypoint.sh ...   Exit 0
   $ # Make sure to replace your passphrase and remove or adapt the id mapping
   $ docker run --rm \
       --volumes-from paperless_data_1 \
       --volume /path/to/arbitrary/place:/export \
       -e PAPERLESS_PASSPHRASE=YOUR_PASSPHRASE \
       -e USERMAP_UID=1000 -e USERMAP_GID=1000 \
       paperless document_exporter /export
 .. _utilities-importer:
 The Importer
 ------------
 Looking to transfer Paperless data from one instance to another, or just want
 to restore from a backup?  This is your go-to toy.
 .. _utilities-importer-howto:
 How to Use It
 .............
 The importer works just like the exporter.  You point it at a directory, and
 the script does the rest of the work:
 .. code-block:: shell-session
    $ /path/to/paperless/src/manage.py document_importer /path/to/somewhere/
 Docker
 ______
 Assuming that you've already gone through the steps above in the
 :ref:`export <utilities-exporter-howto-docker>` section, then the easiest thing
 to do is just re-use the ``/export`` path you already setup:
 .. code-block:: shell-session
   $ docker-compose run --rm consumer document_importer /export
 Similarly, if you're not using docker-compose, you can adjust the export
 instructions above to do the import.
 .. _utilities-retagger:
 The Re-tagger
 -------------
 Say you've imported a few hundred documents and now want to introduce a tag
 and apply its matching to all of the currently-imported docs.  This problem is
 common enough that there's a tool for it.
 .. _utilities-retagger-howto:
 How to Use It
 .............
 This too is done via the ``manage.py`` script:
 .. code:: bash
-    $ /path/to/paperless/src/manage.py document_exporter /path/to/somewhere
+    $ /path/to/paperless/src/manage.py document_retagger
-This will dump all of your PDFs into ``/path/to/somewhere`` for you to do with
+That's it.  It'll loop over all of the documents in your database and attempt
-as you please.  The naming scheme on export is identical to that used for
+to match all of your tags to them.  If one matches, it'll be applied.  And
-import, so should you can now safely delete the entire project directly,
+don't worry, you can run this as often as you like, it' won't double-tag
-database, encrypted PDFs and all, and later create it all again simply by
+a document.
 running the consumer again and dumping all of these files into
 ``CONSUMPTION_DIR``.
--- a/media/documents/originals/.keep
+++ b/media/documents/originals/.keep
--- a/media/documents/thumbnails/.keep
+++ b/media/documents/thumbnails/.keep
--- a/paperless.conf.example
+++ b/paperless.conf.example
@@ -0,0 +1,33 @@
 # Sample paperless.conf
 # Copy this file to /etc/paperless.conf and modify it to suit your needs.
 # This where your documents should go to be consumed.  Make sure that it exists
 # and that the user running the paperless service can read/write its contents
 # before you start Paperless.
 PAPERLESS_CONSUMPTION_DIR=""
 # These values are required if you want paperless to check a particular email
 # box every 10 minutes and attempt to consume documents from there.  If you
 # don't define a HOST, mail checking will just be disabled.
 PAPERLESS_CONSUME_MAIL_HOST=""
 PAPERLESS_CONSUME_MAIL_PORT=""
 PAPERLESS_CONSUME_MAIL_USER=""
 PAPERLESS_CONSUME_MAIL_PASS=""
 # You must have a passphrase in order for Paperless to work at all.  If you set
 # this to "", GNUGPG will "encrypt" your PDF by writing it out as a zero-byte
 # file.
 #
 # The passphrase you use here will be used when storing your documents in
 # Paperless, but you can always export them in an unencrypted format by using
 # document exporter.  See the documentaiton for more information.
 #
 # One final note about the passphrase.  Once you've consumed a document with
 # one passphrase, DON'T CHANGE IT.  Paperless assumes this to be a constant and
 # can't properly export documents that were encrypted with an old passphrase if
 # you've since changed it to a new one.
 PAPERLESS_PASSPHRASE="secret"
 # If you intend to consume documents either via HTTP POST or by email, you must
 # have a shared secret here.
 PAPERLESS_SHARED_SECRET=""
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,10 @@
-Django==1.9
+Django==1.9.2
 django-extensions==1.6.1
 djangorestframework==3.3.2
 python-dotenv==0.3.0
 filemagic==1.6
 langdetect==1.0.5
-Pillow==3.0.0
+Pillow==3.1.1
 pyocr==0.3.1
 python-dateutil==2.4.2
 python-gnupg==0.3.8
--- a/scripts/docker-entrypoint.sh
+++ b/scripts/docker-entrypoint.sh
@@ -0,0 +1,74 @@
 #!/bin/bash
 set -e
 # Source: https://github.com/sameersbn/docker-gitlab/
 map_uidgid() {
    USERMAP_ORIG_UID=$(id -u paperless)
    USERMAP_ORIG_UID=$(id -g paperless)
    USERMAP_GID=${USERMAP_GID:-${USERMAP_UID:-$USERMAP_ORIG_GID}}
    USERMAP_UID=${USERMAP_UID:-$USERMAP_ORIG_UID}
    if [[ ${USERMAP_UID} != ${USERMAP_ORIG_UID} || ${USERMAP_GID} != ${USERMAP_ORIG_GID} ]]; then
        echo "Mapping UID and GID for paperless:paperless to $USERMAP_UID:$USERMAP_GID"
        groupmod -g ${USERMAP_GID} paperless
        sed -i -e "s|:${USERMAP_ORIG_UID}:${USERMAP_GID}:|:${USERMAP_UID}:${USERMAP_GID}:|" /etc/passwd
    fi
 }
 set_permissions() {
    # Set permissions for consumption directory
    chgrp paperless "$PAPERLESS_CONSUMPTION_DIR"
    chmod g+x "$PAPERLESS_CONSUMPTION_DIR"
    # Set permissions for application directory
    chown -Rh paperless:paperless /usr/src/paperless
 }
 initialize() {
    map_uidgid
    set_permissions
 }
 install_languages() {
    local langs="$1"
    read -ra langs <<<"$langs"
    # Check that it is not empty
    if [ ${#langs[@]} -eq 0 ]; then
        return
    fi
    # Update apt-lists
    apt-get update
    # Loop over languages to be installed
    for lang in "${langs[@]}"; do
        pkg="tesseract-ocr-$lang"
        if dpkg -s "$pkg" 2>&1 > /dev/null; then
            continue
        fi
        if ! apt-cache show "$pkg" 2>&1 > /dev/null; then
            continue
        fi
        apt-get install "$pkg"
    done
    # Remove apt lists
    rm -rf /var/lib/apt/lists/*
 }
 if [[ "$1" != "/"* ]]; then
    initialize
    # Install additional languages if specified
    if [ ! -z "$PAPERLESS_OCR_LANGUAGES"  ]; then
        install_languages "$PAPERLESS_OCR_LANGUAGES"
    fi
    exec sudo -HEu paperless "/usr/src/paperless/src/manage.py" "$@"
 fi
 exec "$@"
--- a/scripts/paperless-consumer.service
+++ b/scripts/paperless-consumer.service
@@ -2,10 +2,9 @@
 Description=Paperless consumer
 [Service]
 EnvironmentFile=/etc/conf.d/paperless
 User=paperless
 Group=paperless
-ExecStart=/home/paperless/project/virtualenv/bin/python /home/paperless/project/src/manage.py document_consumer -v $PAPERLESS_CONSUMPTION_VERBOSITY
+ExecStart=/home/paperless/project/virtualenv/bin/python /home/paperless/project/src/manage.py document_consumer
 [Install]
 WantedBy=multi-user.target
--- a/scripts/paperless-webserver.service
+++ b/scripts/paperless-webserver.service
@@ -2,7 +2,6 @@
 Description=Paperless webserver
 [Service]
 EnvironmentFile=/etc/conf.d/paperless
 User=paperless
 Group=paperless
 ExecStart=/home/paperless/project/virtualenv/bin/python /home/paperless/project/src/manage.py runserver 0.0.0.0:8000
--- a/scripts/vagrant-provision
+++ b/scripts/vagrant-provision
@@ -1,13 +1,31 @@
 #!/bin/bash
-# install packages
+# Install packages
-sudo apt-get update
+apt-get update
-sudo apt-get build-dep -y python-imaging
+apt-get build-dep -y python-imaging
-sudo apt-get install -y libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev
+apt-get install -y libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev
-sudo apt-get install -y build-essential python3-dev python3-pip sqlite3 libsqlite3-dev git
+apt-get install -y build-essential python3-dev python3-pip sqlite3 libsqlite3-dev git
-sudo apt-get install -y tesseract-ocr tesseract-ocr-eng imagemagick
+apt-get install -y tesseract-ocr tesseract-ocr-eng imagemagick unpaper
-# setup python project
+# Python dependencies
-pushd /opt/paperless
+pip3 install -r /opt/paperless/requirements.txt
-sudo pip3 install -r requirements.txt
+
-popd
+# Create the environment file
 cat /opt/paperless/paperless.conf.example | sed -e 's#CONSUMPTION_DIR=""#CONSUMPTION_DIR="/home/vagrant/consumption"#' > /etc/paperless.conf
 chmod 0640 /etc/paperless.conf
 chown root:vagrant /etc/paperless.conf
 # Create the consumption directory
 mkdir /home/vagrant/consumption
 chown vagrant:vagrant /home/vagrant/consumption
 echo "
 Now follow the remaining steps in the Vagrant section of the setup
 documentation to complete the process:
 http://paperless.readthedocs.org/en/latest/setup.html#setup-installation-vagrant
 "
--- a/src/documents/admin.py
+++ b/src/documents/admin.py
@@ -3,7 +3,7 @@ from django.contrib.auth.models import User, Group
 from django.core.urlresolvers import reverse
 from django.templatetags.static import static
-from .models import Sender, Tag, Document
+from .models import Correspondent, Tag, Document, Log
 class MonthListFilter(admin.SimpleListFilter):
@@ -45,39 +45,73 @@ class DocumentAdmin(admin.ModelAdmin):
            "all": ("paperless.css",)
        }
-    search_fields = ("sender__name", "title", "content")
+    search_fields = ("correspondent__name", "title", "content")
-    list_display = ("created", "sender", "title", "tags_", "document")
+    list_display = ("created_", "correspondent", "title", "tags_", "document")
-    list_filter = ("tags", "sender", MonthListFilter)
+    list_filter = ("tags", "correspondent", MonthListFilter)
    list_per_page = 25
    def created_(self, obj):
        return obj.created.date().strftime("%Y-%m-%d")
    def tags_(self, obj):
        r = ""
        for tag in obj.tags.all():
-            r += '<a class="tag" style="background-color: {};" href="{}">{}</a>'.format(
+            colour = tag.get_colour_display()
-                tag.get_colour_display(),
+            r += self._html_tag(
-                "{}?tags__id__exact={}".format(
+                "a",
-                    reverse("admin:documents_document_changelist"),
+                tag.slug,
-                    tag.pk
+                **{
-                ),
+                    "class": "tag",
-                tag.slug
+                    "style": "background-color: {};".format(colour),
                    "href": "{}?tags__id__exact={}".format(
                        reverse("admin:documents_document_changelist"),
                        tag.pk
                    )
                }
            )
        return r
    tags_.allow_tags = True
    def document(self, obj):
-        return '<a href="{}">' \
+        return self._html_tag(
-                 '<img src="{}" width="22" height="22" alt="{} icon" title="{}">' \
+            "a",
-               '</a>'.format(
+            self._html_tag(
-                    obj.download_url,
+                "img",
-                    static("documents/img/{}.png".format(obj.file_type)),
+                src=static("documents/img/{}.png".format(obj.file_type)),
-                    obj.file_type,
+                width=22,
-                    obj.file_name
+                height=22,
-                )
+                alt=obj.file_type,
                title=obj.file_name
            ),
            href=obj.download_url
        )
    document.allow_tags = True
-admin.site.register(Sender)
+    @staticmethod
    def _html_tag(kind, inside=None, **kwargs):
        attributes = []
        for lft, rgt in kwargs.items():
            attributes.append('{}="{}"'.format(lft, rgt))
        if inside is not None:
            return "<{kind} {attributes}>{inside}</{kind}>".format(
                kind=kind, attributes=" ".join(attributes), inside=inside)
        return "<{} {}/>".format(kind, " ".join(attributes))
 class LogAdmin(admin.ModelAdmin):
    list_display = ("message", "level", "component")
    list_filter = ("level", "component",)
 admin.site.register(Correspondent)
 admin.site.register(Tag, TagAdmin)
 admin.site.register(Document, DocumentAdmin)
 admin.site.register(Log, LogAdmin)
 # Unless we implement multi-user, these default registrations don't make sense.
 admin.site.unregister(Group)
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -1,5 +1,8 @@
 import datetime
 import logging
 import tempfile
 import uuid
 from multiprocessing.pool import Pool
 import itertools
@@ -17,20 +20,14 @@ from PIL import Image
 from django.conf import settings
 from django.utils import timezone
 from django.template.defaultfilters import slugify
 from pyocr.tesseract import TesseractError
 from logger.models import Log
 from paperless.db import GnuPG
-from .models import Sender, Tag, Document
+from .models import Correspondent, Tag, Document, Log
 from .languages import ISO639
 def image_to_string(args):
    self, png, lang = args
    with Image.open(os.path.join(self.SCRATCH, png)) as f:
        return self.OCR.image_to_string(f, lang=lang)
 class OCRError(Exception):
    pass
@@ -42,8 +39,8 @@ class ConsumerError(Exception):
 class Consumer(object):
    """
    Loop over every file found in CONSUMPTION_DIR and:
-      1. Convert it to a greyscale png
+      1. Convert it to a greyscale pnm
-      2. Use tesseract on the png
+      2. Use tesseract on the pnm
      3. Encrypt and store the document in the MEDIA_ROOT
      4. Store the OCR'd text in the database
      5. Delete the document and image(s)
@@ -51,28 +48,29 @@ class Consumer(object):
    SCRATCH = settings.SCRATCH_DIR
    CONVERT = settings.CONVERT_BINARY
    UNPAPER = settings.UNPAPER_BINARY
    CONSUME = settings.CONSUMPTION_DIR
    THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
    OCR = pyocr.get_available_tools()[0]
    DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
    REGEX_TITLE = re.compile(
        r"^.*/(.*)\.(pdf|jpe?g|png|gif|tiff)$",
        flags=re.IGNORECASE
    )
-    REGEX_SENDER_TITLE = re.compile(
+    REGEX_CORRESPONDENT_TITLE = re.compile(
        r"^.*/(.+) - (.*)\.(pdf|jpe?g|png|gif|tiff)$",
        flags=re.IGNORECASE
    )
-    REGEX_SENDER_TITLE_TAGS = re.compile(
+    REGEX_CORRESPONDENT_TITLE_TAGS = re.compile(
        r"^.*/(.*) - (.*) - ([a-z0-9\-,]*)\.(pdf|jpe?g|png|gif|tiff)$",
        flags=re.IGNORECASE
    )
-    def __init__(self, verbosity=1):
+    def __init__(self):
-        self.verbosity = verbosity
+        self.logger = logging.getLogger(__name__)
        self.logging_group = None
        try:
            os.makedirs(self.SCRATCH)
@@ -92,6 +90,12 @@ class Consumer(object):
            raise ConsumerError(
                "Consumption directory {} does not exist".format(self.CONSUME))
    def log(self, level, message):
        getattr(self.logger, level)(message, extra={
            "group": self.logging_group,
            "component": Log.COMPONENT_CONSUMER
        })
    def consume(self):
        for doc in os.listdir(self.CONSUME):
@@ -110,122 +114,156 @@ class Consumer(object):
            if self._is_ready(doc):
                continue
-            Log.info("Consuming {}".format(doc), Log.COMPONENT_CONSUMER)
+            self.logging_group = uuid.uuid4()
            self.log("info", "Consuming {}".format(doc))
            tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH)
-            pngs = self._get_greyscale(tempdir, doc)
+            imgs = self._get_greyscale(tempdir, doc)
            thumbnail = self._get_thumbnail(tempdir, doc)
            try:
-                text = self._get_ocr(pngs)
+                text = self._get_ocr(imgs)
-                self._store(text, doc)
+                self._store(text, doc, thumbnail)
-            except OCRError:
+            except OCRError as e:
                self._ignore.append(doc)
-                Log.error("OCR FAILURE: {}".format(doc), Log.COMPONENT_CONSUMER)
+                self.log("error", "OCR FAILURE for {}: {}".format(doc, e))
                self._cleanup_tempdir(tempdir)
                continue
-            finally:
+            else:
-                self._cleanup(tempdir, doc)
+                self._cleanup_tempdir(tempdir)
                self._cleanup_doc(doc)
    def _get_greyscale(self, tempdir, doc):
        """
        Greyscale images are easier for Tesseract to OCR
        """
-        Log.debug(
+        self.log("info", "Generating greyscale image from {}".format(doc))
            "Generating greyscale image from {}".format(doc),
            Log.COMPONENT_CONSUMER
        )
        png = os.path.join(tempdir, "convert-%04d.jpg")
        # Convert PDF to multiple PNMs
        pnm = os.path.join(tempdir, "convert-%04d.pnm")
        subprocess.Popen((
            self.CONVERT, "-density", "300", "-depth", "8",
-            "-type", "grayscale", doc, png
+            "-type", "grayscale", doc, pnm
        )).wait()
-        pngs = [os.path.join(tempdir, f) for f in os.listdir(tempdir) if f.startswith("convert")]
+        # Get a list of converted images
-        return sorted(filter(lambda f: os.path.isfile(f), pngs))
+        pnms = []
        for f in os.listdir(tempdir):
            if f.endswith(".pnm"):
                pnms.append(os.path.join(tempdir, f))
-    @staticmethod
+        # Run unpaper in parallel on converted images
-    def _guess_language(text):
+        with Pool(processes=self.THREADS) as pool:
            pool.map(run_unpaper, itertools.product([self.UNPAPER], pnms))
        # Return list of converted images, processed with unpaper
        pnms = []
        for f in os.listdir(tempdir):
            if f.endswith(".unpaper.pnm"):
                pnms.append(os.path.join(tempdir, f))
        return sorted(filter(lambda __: os.path.isfile(__), pnms))
    def _get_thumbnail(self, tempdir, doc):
        """
        The thumbnail of a PDF is just a 500px wide image of the first page.
        """
        self.log("info", "Generating the thumbnail")
        subprocess.Popen((
            self.CONVERT,
            "-scale", "500x5000",
            "-alpha", "remove",
            doc,
            os.path.join(tempdir, "convert-%04d.png")
        )).wait()
        return os.path.join(tempdir, "convert-0000.png")
    def _guess_language(self, text):
        try:
            guess = langdetect.detect(text)
-            Log.debug(
+            self.log("debug", "Language detected: {}".format(guess))
                "Language detected: {}".format(guess),
                Log.COMPONENT_CONSUMER
            )
            return guess
        except Exception as e:
-            Log.warning(
+            self.log("warning", "Language detection error: {}".format(e))
                "Language detection error: {}".format(e), Log.COMPONENT_MAIL)
-    def _get_ocr(self, pngs):
+    def _get_ocr(self, imgs):
        """
        Attempts to do the best job possible OCR'ing the document based on
        simple language detection trial & error.
        """
-        if not pngs:
+        if not imgs:
-            raise OCRError
+            raise OCRError("No images found")
-        Log.debug("OCRing the document", Log.COMPONENT_CONSUMER)
+        self.log("info", "OCRing the document")
        # Since the division gets rounded down by int, this calculation works
        # for every edge-case, i.e. 1
-        middle = int(len(pngs) / 2)
+        middle = int(len(imgs) / 2)
-        raw_text = self._ocr([pngs[middle]], self.DEFAULT_OCR_LANGUAGE)
+        raw_text = self._ocr([imgs[middle]], self.DEFAULT_OCR_LANGUAGE)
        guessed_language = self._guess_language(raw_text)
        if not guessed_language or guessed_language not in ISO639:
-            Log.warning("Language detection failed!", Log.COMPONENT_CONSUMER)
+            self.log("warning", "Language detection failed!")
            if settings.FORGIVING_OCR:
-                Log.warning(
+                self.log(
-                    "As FORGIVING_OCR is enabled, we're going to make the best "
+                    "warning",
-                    "with what we have.",
+                    "As FORGIVING_OCR is enabled, we're going to make the "
-                    Log.COMPONENT_CONSUMER
+                    "best with what we have."
                )
-                raw_text = self._assemble_ocr_sections(pngs, middle, raw_text)
+                raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
                return raw_text
-            raise OCRError
+            raise OCRError("Language detection failed")
        if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
-            raw_text = self._assemble_ocr_sections(pngs, middle, raw_text)
+            raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
            return raw_text
        try:
-            return self._ocr(pngs, ISO639[guessed_language])
+            return self._ocr(imgs, ISO639[guessed_language])
        except pyocr.pyocr.tesseract.TesseractError:
            if settings.FORGIVING_OCR:
-                Log.warning(
+                self.log(
                    "warning",
                    "OCR for {} failed, but we're going to stick with what "
                    "we've got since FORGIVING_OCR is enabled.".format(
                        guessed_language
-                    ),
+                    )
                    Log.COMPONENT_CONSUMER
                )
-                raw_text = self._assemble_ocr_sections(pngs, middle, raw_text)
+                raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
                return raw_text
-            raise OCRError
+            raise OCRError(
                "The guessed language is not available in this instance of "
                "Tesseract."
            )
-    def _assemble_ocr_sections(self, pngs, middle, text):
+    def _assemble_ocr_sections(self, imgs, middle, text):
        """
        Given a `middle` value and the text that middle page represents, we OCR
        the remainder of the document and return the whole thing.
        """
-        text = self._ocr(pngs[:middle], self.DEFAULT_OCR_LANGUAGE) + text
+        text = self._ocr(imgs[:middle], self.DEFAULT_OCR_LANGUAGE) + text
-        text += self._ocr(pngs[middle+1:], self.DEFAULT_OCR_LANGUAGE)
+        text += self._ocr(imgs[middle + 1:], self.DEFAULT_OCR_LANGUAGE)
        return text
-    def _ocr(self, pngs, lang):
+    def _ocr(self, imgs, lang):
        """
        Performs a single OCR attempt.
        """
-        if not pngs:
+        if not imgs:
            return ""
-        Log.debug("Parsing for {}".format(lang), Log.COMPONENT_CONSUMER)
+        self.log("info", "Parsing for {}".format(lang))
        with Pool(processes=self.THREADS) as pool:
-            r = pool.map(
+            r = pool.map(image_to_string, itertools.product(imgs, [lang]))
                image_to_string, itertools.product([self], pngs, [lang]))
            r = " ".join(r)
        # Strip out excess white space to allow matching to go smoother
@@ -233,16 +271,18 @@ class Consumer(object):
    def _guess_attributes_from_name(self, parseable):
        """
-        We use a crude naming convention to make handling the sender, title, and
+        We use a crude naming convention to make handling the correspondent,
-        tags easier:
+        title, and tags easier:
-          "<sender> - <title> - <tags>.<suffix>"
+          "<correspondent> - <title> - <tags>.<suffix>"
-          "<sender> - <title>.<suffix>"
+          "<correspondent> - <title>.<suffix>"
          "<title>.<suffix>"
        """
-        def get_sender(sender_name):
+        def get_correspondent(correspondent_name):
-            return Sender.objects.get_or_create(
+            return Correspondent.objects.get_or_create(
-                name=sender_name, defaults={"slug": slugify(sender_name)})[0]
+                name=correspondent_name,
                defaults={"slug": slugify(correspondent_name)}
            )[0]
        def get_tags(tags):
            r = []
@@ -251,40 +291,47 @@ class Consumer(object):
                    Tag.objects.get_or_create(slug=t, defaults={"name": t})[0])
            return tuple(r)
-        # First attempt: "<sender> - <title> - <tags>.<suffix>"
+        def get_suffix(suffix):
-        m = re.match(self.REGEX_SENDER_TITLE_TAGS, parseable)
+            suffix = suffix.lower()
            if suffix == "jpeg":
                return "jpg"
            return suffix
        # First attempt: "<correspondent> - <title> - <tags>.<suffix>"
        m = re.match(self.REGEX_CORRESPONDENT_TITLE_TAGS, parseable)
        if m:
            return (
-                get_sender(m.group(1)),
+                get_correspondent(m.group(1)),
                m.group(2),
                get_tags(m.group(3)),
-                m.group(4)
+                get_suffix(m.group(4))
            )
-        # Second attempt: "<sender> - <title>.<suffix>"
+        # Second attempt: "<correspondent> - <title>.<suffix>"
-        m = re.match(self.REGEX_SENDER_TITLE, parseable)
+        m = re.match(self.REGEX_CORRESPONDENT_TITLE, parseable)
        if m:
-            return get_sender(m.group(1)), m.group(2), (), m.group(3)
+            return (
                get_correspondent(m.group(1)),
                m.group(2),
                (),
                get_suffix(m.group(3))
            )
-        # That didn't work, so we assume sender and tags are None
+        # That didn't work, so we assume correspondent and tags are None
        m = re.match(self.REGEX_TITLE, parseable)
-        return None, m.group(1), (), m.group(2)
+        return None, m.group(1), (), get_suffix(m.group(2))
-    def _store(self, text, doc):
+    def _store(self, text, doc, thumbnail):
        sender, title, tags, file_type = self._guess_attributes_from_name(doc)
-        tags = list(tags)
+        relevant_tags = set(list(Tag.match_all(text)) + list(tags))
        lower_text = text.lower()
        relevant_tags = set(
            [t for t in Tag.objects.all() if t.matches(lower_text)] + tags)
        stats = os.stat(doc)
-        Log.debug("Saving record to database", Log.COMPONENT_CONSUMER)
+        self.log("debug", "Saving record to database")
        document = Document.objects.create(
-            sender=sender,
+            correspondent=sender,
            title=title,
            content=text,
            file_type=file_type,
@@ -296,22 +343,29 @@ class Consumer(object):
        if relevant_tags:
            tag_names = ", ".join([t.slug for t in relevant_tags])
-            Log.debug(
+            self.log("debug", "Tagging with {}".format(tag_names))
                "Tagging with {}".format(tag_names), Log.COMPONENT_CONSUMER)
            document.tags.add(*relevant_tags)
        # Encrypt and store the actual document
        with open(doc, "rb") as unencrypted:
            with open(document.source_path, "wb") as encrypted:
-                Log.debug("Encrypting", Log.COMPONENT_CONSUMER)
+                self.log("debug", "Encrypting the document")
                encrypted.write(GnuPG.encrypted(unencrypted))
-    def _cleanup(self, tempdir, doc):
+        # Encrypt and store the thumbnail
-        # Remove temporary directory recursively
+        with open(thumbnail, "rb") as unencrypted:
-        Log.debug("Deleting directory {}".format(tempdir), Log.COMPONENT_CONSUMER)
+            with open(document.thumbnail_path, "wb") as encrypted:
-        shutil.rmtree(tempdir)
+                self.log("debug", "Encrypting the thumbnail")
                encrypted.write(GnuPG.encrypted(unencrypted))
-        # Remove doc
+        self.log("info", "Completed")
-        Log.debug("Deleting document {}".format(doc), Log.COMPONENT_CONSUMER)
+
    def _cleanup_tempdir(self, d):
        self.log("debug", "Deleting directory {}".format(d))
        shutil.rmtree(d)
    def _cleanup_doc(self, doc):
        self.log("debug", "Deleting document {}".format(doc))
        os.unlink(doc)
    def _is_ready(self, doc):
@@ -329,3 +383,23 @@ class Consumer(object):
        self.stats[doc] = t
        return False
 def image_to_string(args):
    img, lang = args
    ocr = pyocr.get_available_tools()[0]
    with Image.open(os.path.join(Consumer.SCRATCH, img)) as f:
        if ocr.can_detect_orientation():
            try:
                orientation = ocr.detect_orientation(f, lang=lang)
                f = f.rotate(orientation["angle"], expand=1)
            except TesseractError:
                pass
        return ocr.image_to_string(f, lang=lang)
 def run_unpaper(args):
    unpaper, pnm = args
    subprocess.Popen((
        unpaper, pnm, pnm.replace(".pnm", ".unpaper.pnm")
    )).wait()
--- a/src/documents/forms.py
+++ b/src/documents/forms.py
@@ -8,13 +8,13 @@ from time import mktime
 from django import forms
 from django.conf import settings
-from .models import Document, Sender
+from .models import Document, Correspondent
 from .consumer import Consumer
 class UploadForm(forms.Form):
-    SECRET = settings.UPLOAD_SHARED_SECRET
+    SECRET = settings.SHARED_SECRET
    TYPE_LOOKUP = {
        "application/pdf": Document.TYPE_PDF,
        "image/png": Document.TYPE_PNG,
@@ -23,31 +23,36 @@ class UploadForm(forms.Form):
        "image/tiff": Document.TYPE_TIF,
    }
-    sender = forms.CharField(
+    correspondent = forms.CharField(
-        max_length=Sender._meta.get_field("name").max_length, required=False)
+        max_length=Correspondent._meta.get_field("name").max_length,
        required=False
    )
    title = forms.CharField(
-        max_length=Document._meta.get_field("title").max_length, required=False)
+        max_length=Document._meta.get_field("title").max_length,
        required=False
    )
    document = forms.FileField()
    signature = forms.CharField(max_length=256)
-    def clean_sender(self):
+    def clean_correspondent(self):
        """
        I suppose it might look cleaner to use .get_or_create() here, but that
-        would also allow someone to fill up the db with bogus senders before all
+        would also allow someone to fill up the db with bogus correspondents
-        validation was met.
+        before all validation was met.
        """
-        sender = self.cleaned_data.get("sender")
+        corresp = self.cleaned_data.get("correspondent")
-        if not sender:
+        if not corresp:
            return None
-        if not Sender.SAFE_REGEX.match(sender) or " - " in sender:
+        if not Correspondent.SAFE_REGEX.match(corresp) or " - " in corresp:
-            raise forms.ValidationError("That sender name is suspicious.")
+            raise forms.ValidationError(
-        return sender
+                "That correspondent name is suspicious.")
        return corresp
    def clean_title(self):
        title = self.cleaned_data.get("title")
        if not title:
            return None
-        if not Sender.SAFE_REGEX.match(title) or " - " in title:
+        if not Correspondent.SAFE_REGEX.match(title) or " - " in title:
            raise forms.ValidationError("That title is suspicious.")
    def clean_document(self):
@@ -59,10 +64,10 @@ class UploadForm(forms.Form):
        return document, self.TYPE_LOOKUP[file_type]
    def clean(self):
-        sender = self.clened_data("sender")
+        corresp = self.clened_data("correspondent")
        title = self.cleaned_data("title")
        signature = self.cleaned_data("signature")
-        if sha256(sender + title + self.SECRET).hexdigest() == signature:
+        if sha256(corresp + title + self.SECRET).hexdigest() == signature:
            return True
        return False
@@ -73,13 +78,15 @@ class UploadForm(forms.Form):
        form do that as well.  Think of it as a poor-man's queue server.
        """
-        sender = self.clened_data("sender")
+        correspondent = self.clened_data("correspondent")
        title = self.cleaned_data("title")
        document, file_type = self.cleaned_data.get("document")
        t = int(mktime(datetime.now()))
        file_name = os.path.join(
-            Consumer.CONSUME, "{} - {}.{}".format(sender, title, file_type))
+            Consumer.CONSUME,
            "{} - {}.{}".format(correspondent, title, file_type)
        )
        with open(file_name, "wb") as f:
            f.write(document)
--- a/src/documents/languages.py
+++ b/src/documents/languages.py
@@ -185,10 +185,10 @@ ISO639 = {
    "yo": "yor",
    "za": "zha",
-    # Tessdata contains two values for Chinese, "chi_sim" and "chi_tra".  I have
+    # Tessdata contains two values for Chinese, "chi_sim" and "chi_tra".  I
-    # no idea which one is better, so I just picked the bigger file.
+    # have no idea which one is better, so I just picked the bigger file.
    "zh": "chi_tra",
    "zu": "zul"
-}
+}
--- a/src/documents/loggers.py
+++ b/src/documents/loggers.py
@@ -0,0 +1,30 @@
 import logging
 class PaperlessLogger(logging.StreamHandler):
    """
    A logger smart enough to know to log some kinds of messages to the database
    for later retrieval in a pretty interface.
    """
    def emit(self, record):
        logging.StreamHandler.emit(self, record)
        if not hasattr(record, "component"):
            return
        # We have to do the import here or Django will barf when it tries to
        # load this because the apps aren't loaded at that point
        from .models import Log
        kwargs = {
            "message": record.msg,
            "component": record.component,
            "level": record.levelno,
        }
        if hasattr(record, "group"):
            kwargs["group"] = record.group
        Log.objects.create(**kwargs)
--- a/src/documents/mail.py
+++ b/src/documents/mail.py
@@ -1,8 +1,10 @@
 import datetime
 import imaplib
 import logging
 import os
 import re
 import time
 import uuid
 from base64 import b64decode
 from email import policy
@@ -11,10 +13,8 @@ from dateutil import parser
 from django.conf import settings
 from logger.models import Log
 from .consumer import Consumer
-from .models import Sender
+from .models import Correspondent, Log
 class MailFetcherError(Exception):
@@ -25,21 +25,34 @@ class InvalidMessageError(Exception):
    pass
-class Message(object):
+class Loggable(object):
    def __init__(self, group=None):
        self.logger = logging.getLogger(__name__)
        self.logging_group = group or uuid.uuid4()
    def log(self, level, message):
        getattr(self.logger, level)(message, extra={
            "group": self.logging_group,
            "component": Log.COMPONENT_MAIL
        })
 class Message(Loggable):
    """
    A crude, but simple email message class.  We assume that there's a subject
    and n attachments, and that we don't care about the message body.
    """
-    SECRET = settings.UPLOAD_SHARED_SECRET
+    SECRET = settings.SHARED_SECRET
-    def __init__(self, data, verbosity=1):
+    def __init__(self, data, group=None):
        """
        Cribbed heavily from
        https://www.ianlewis.org/en/parsing-email-attachments-python
        """
-        self.verbosity = verbosity
+        Loggable.__init__(self, group=group)
        self.subject = None
        self.time = None
@@ -54,8 +67,7 @@ class Message(object):
        self._set_time(message)
-        Log.info(
+        self.log("info", 'Importing email: "{}"'.format(self.subject))
            'Importing email: "{}"'.format(self.subject), Log.COMPONENT_MAIL)
        attachments = []
        for part in message.walk():
@@ -91,7 +103,7 @@ class Message(object):
    def check_subject(self):
        if self.subject is None:
            raise InvalidMessageError("Message does not have a subject")
-        if not Sender.SAFE_REGEX.match(self.subject):
+        if not Correspondent.SAFE_REGEX.match(self.subject):
            raise InvalidMessageError("Message subject is unsafe: {}".format(
                self.subject))
@@ -134,9 +146,11 @@ class Attachment(object):
        return self.data
-class MailFetcher(object):
+class MailFetcher(Loggable):
-    def __init__(self, verbosity=1):
+    def __init__(self):
        Loggable.__init__(self)
        self._connection = None
        self._host = settings.MAIL_CONSUMPTION["HOST"]
@@ -148,7 +162,6 @@ class MailFetcher(object):
        self._enabled = bool(self._host)
        self.last_checked = datetime.datetime.now()
        self.verbosity = verbosity
    def pull(self):
        """
@@ -159,14 +172,14 @@ class MailFetcher(object):
        if self._enabled:
-            Log.info("Checking mail", Log.COMPONENT_MAIL)
+            # Reset the grouping id for each fetch
            self.logging_group = uuid.uuid4()
            self.log("debug", "Checking mail")
            for message in self._get_messages():
-                Log.debug(
+                self.log("info", 'Storing email: "{}"'.format(message.subject))
                    'Storing email: "{}"'.format(message.subject),
                    Log.COMPONENT_MAIL
                )
                t = int(time.mktime(message.time.timetuple()))
                file_name = os.path.join(Consumer.CONSUME, message.file_name)
@@ -193,7 +206,7 @@ class MailFetcher(object):
            self._connection.logout()
        except Exception as e:
-            Log.error(e, Log.COMPONENT_MAIL)
+            self.log("error", str(e))
        return r
@@ -218,9 +231,9 @@ class MailFetcher(object):
            message = None
            try:
-                message = Message(data[0][1], self.verbosity)
+                message = Message(data[0][1], self.logging_group)
            except InvalidMessageError as e:
-                Log.error(e, Log.COMPONENT_MAIL)
+                self.log("error", str(e))
            else:
                self._connection.store(num, "+FLAGS", "\\Deleted")
--- a/src/documents/management/commands/document_consumer.py
+++ b/src/documents/management/commands/document_consumer.py
@@ -1,10 +1,12 @@
 import datetime
 import logging
 import os
 import time
 from django.conf import settings
 from django.core.management.base import BaseCommand, CommandError
 from ...models import Log
 from ...consumer import Consumer, ConsumerError
 from ...mail import MailFetcher, MailFetcherError
@@ -34,7 +36,7 @@ class Command(BaseCommand):
        self.verbosity = options["verbosity"]
        try:
-            self.file_consumer = Consumer(verbosity=self.verbosity)
+            self.file_consumer = Consumer()
            self.mail_fetcher = MailFetcher()
        except (ConsumerError, MailFetcherError) as e:
            raise CommandError(e)
@@ -44,6 +46,13 @@ class Command(BaseCommand):
        except FileExistsError:
            pass
        logging.getLogger(__name__).info(
            "Starting document consumer at {}".format(
                settings.CONSUMPTION_DIR
            ),
            extra={"component": Log.COMPONENT_CONSUMER}
        )
        try:
            while True:
                self.loop()
--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -1,10 +1,12 @@
 import json
 import os
 import time
 from django.conf import settings
 from django.core.management.base import BaseCommand, CommandError
 from django.core import serializers
-from documents.models import Document
+from documents.models import Document, Correspondent, Tag
 from paperless.db import GnuPG
 from ...mixins import Renderable
@@ -14,21 +16,26 @@ class Command(Renderable, BaseCommand):
    help = """
        Decrypt and rename all files in our collection into a given target
-        directory.  Note that we don't export any of the parsed data since
+        directory.  And include a manifest file containing document data for
-        that can always be re-collected via the consumer.
+        easy import.
    """.replace("    ", "")
    def add_arguments(self, parser):
        parser.add_argument("target")
        parser.add_argument(
            "--legacy",
            action="store_true",
            help="Don't try to export all of the document data, just dump the "
                 "original document files out in a format that makes "
                 "re-consuming them easy."
        )
    def __init__(self, *args, **kwargs):
        self.verbosity = 0
        self.target = None
        BaseCommand.__init__(self, *args, **kwargs)
        self.target = None
    def handle(self, *args, **options):
        self.verbosity = options["verbosity"]
        self.target = options["target"]
        if not os.path.exists(self.target):
@@ -40,9 +47,22 @@ class Command(Renderable, BaseCommand):
        if not settings.PASSPHRASE:
            settings.PASSPHRASE = input("Please enter the passphrase: ")
-        for document in Document.objects.all():
+        if options["legacy"]:
            self.dump_legacy()
        else:
            self.dump()
    def dump(self):
        documents = Document.objects.all()
        document_map = {d.pk: d for d in documents}
        manifest = json.loads(serializers.serialize("json", documents))
        for document_dict in manifest:
            document = document_map[document_dict["pk"]]
            target = os.path.join(self.target, document.file_name)
            document_dict["__exported_file_name__"] = target
            print("Exporting: {}".format(target))
@@ -50,3 +70,37 @@ class Command(Renderable, BaseCommand):
                f.write(GnuPG.decrypted(document.source_file))
                t = int(time.mktime(document.created.timetuple()))
                os.utime(target, times=(t, t))
        manifest += json.loads(
            serializers.serialize("json", Correspondent.objects.all()))
        manifest += json.loads(serializers.serialize(
            "json", Tag.objects.all()))
        with open(os.path.join(self.target, "manifest.json"), "w") as f:
            json.dump(manifest, f, indent=2)
    def dump_legacy(self):
        for document in Document.objects.all():
            target = os.path.join(
                self.target, self._get_legacy_file_name(document))
            print("Exporting: {}".format(target))
            with open(target, "wb") as f:
                f.write(GnuPG.decrypted(document.source_file))
                t = int(time.mktime(document.created.timetuple()))
                os.utime(target, times=(t, t))
    @staticmethod
    def _get_legacy_file_name(doc):
        if doc.correspondent and doc.title:
            tags = ",".join([t.slug for t in doc.tags.all()])
            if tags:
                return "{} - {} - {}.{}".format(
                    doc.correspondent, doc.title, tags, doc.file_type)
            return "{} - {}.{}".format(
                doc.correspondent, doc.title, doc.file_type)
        return os.path.basename(doc.source_path)
--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -0,0 +1,99 @@
 import json
 import os
 from django.conf import settings
 from django.core.management.base import BaseCommand, CommandError
 from django.core.management import call_command
 from documents.models import Document
 from paperless.db import GnuPG
 from ...mixins import Renderable
 class Command(Renderable, BaseCommand):
    help = """
        Using a manifest.json file, load the data from there, and import the
        documents it refers to.
    """.replace("    ", "")
    def add_arguments(self, parser):
        parser.add_argument("source")
    def __init__(self, *args, **kwargs):
        BaseCommand.__init__(self, *args, **kwargs)
        self.source = None
        self.manifest = None
    def handle(self, *args, **options):
        self.source = options["source"]
        if not os.path.exists(self.source):
            raise CommandError("That path doesn't exist")
        if not os.access(self.source, os.R_OK):
            raise CommandError("That path doesn't appear to be readable")
        manifest_path = os.path.join(self.source, "manifest.json")
        self._check_manifest_exists(manifest_path)
        with open(manifest_path) as f:
            self.manifest = json.load(f)
        self._check_manifest()
        if not settings.PASSPHRASE:
            raise CommandError(
                "You need to define a passphrase before continuing.  Please "
                "consult the documentation for setting up Paperless."
            )
        # Fill up the database with whatever is in the manifest
        call_command("loaddata", manifest_path)
        self._import_files_from_manifest()
    @staticmethod
    def _check_manifest_exists(path):
        if not os.path.exists(path):
            raise CommandError(
                "That directory doesn't appear to contain a manifest.json "
                "file."
            )
    def _check_manifest(self):
        for record in self.manifest:
            if not record["model"] == "documents.document":
                continue
            if "__exported_file_name__" not in record:
                raise CommandError(
                    'The manifest file contains a record which does not '
                    'refer to an actual document file.'
                )
            doc_file = record["__exported_file_name__"]
            if not os.path.exists(os.path.join(self.source, doc_file)):
                raise CommandError(
                    'The manifest file refers to "{}" which does not '
                    'appear to be in the source directory.'.format(doc_file)
                )
    def _import_files_from_manifest(self):
        for record in self.manifest:
            if not record["model"] == "documents.document":
                continue
            doc_file = record["__exported_file_name__"]
            document = Document.objects.get(pk=record["pk"])
            with open(doc_file, "rb") as unencrypted:
                with open(document.source_path, "wb") as encrypted:
                    print("Encrypting {} and saving it to {}".format(
                        doc_file, document.source_path))
                    encrypted.write(GnuPG.encrypted(unencrypted))
--- a/src/documents/management/commands/document_retagger.py
+++ b/src/documents/management/commands/document_retagger.py
@@ -10,8 +10,8 @@ class Command(Renderable, BaseCommand):
    help = """
        Using the current set of tagging rules, apply said rules to all
        documents in the database, effectively allowing you to back-tag all
-        previously indexed documents with tags created (or modified) after their
+        previously indexed documents with tags created (or modified) after
-        initial import.
+        their initial import.
    """.replace("    ", "")
    def __init__(self, *args, **kwargs):
@@ -23,9 +23,10 @@ class Command(Renderable, BaseCommand):
        self.verbosity = options["verbosity"]
        for document in Document.objects.all():
            tags = Tag.objects.exclude(
                pk__in=document.tags.values_list("pk", flat=True))
-            for tag in tags:
+
-                if tag.matches(document.content):
+            for tag in Tag.match_all(document.content, tags):
-                    print('Tagging {} with "{}"'.format(document, tag))
+                print('Tagging {} with "{}"'.format(document, tag))
-                    document.tags.add(tag)
+                document.tags.add(tag)
--- a/src/documents/management/commands/loaddata_stdin.py
+++ b/src/documents/management/commands/loaddata_stdin.py
@@ -0,0 +1,20 @@
 import sys
 from django.core.management.commands.loaddata import Command as LoadDataCommand
 class Command(LoadDataCommand):
    """
    Allow the loading of data from standard in.  Sourced originally from:
    https://gist.github.com/bmispelon/ad5a2c333443b3a1d051 (MIT licensed)
    """
    def parse_name(self, fixture_name):
        self.compression_formats['stdin'] = (lambda x, y: sys.stdin, None)
        if fixture_name == '-':
            return '-', 'json', 'stdin'
    def find_fixtures(self, fixture_label):
        if fixture_label == '-':
            return [('-', None, '-')]
        return super(Command, self).find_fixtures(fixture_label)
--- a/src/documents/managers.py
+++ b/src/documents/managers.py
@@ -0,0 +1,70 @@
 from django.conf import settings
 from django.db import models
 from django.db.models.aggregates import Max
 class GroupConcat(models.Aggregate):
    """
    Theoretically, this should work in Sqlite, PostgreSQL, and MySQL, but I've
    only ever tested it in Sqlite.
    """
    ENGINE_SQLITE = 1
    ENGINE_POSTGRESQL = 2
    ENGINE_MYSQL = 3
    ENGINES = {
        "django.db.backends.sqlite3": ENGINE_SQLITE,
        "django.db.backends.postgresql_psycopg2": ENGINE_POSTGRESQL,
        "django.db.backends.postgresql": ENGINE_POSTGRESQL,
        "django.db.backends.mysql": ENGINE_MYSQL
    }
    def __init__(self, expression, separator="\n", **extra):
        self.engine = self._get_engine()
        self.function = self._get_function()
        self.template = self._get_template(separator)
        models.Aggregate.__init__(
            self,
            expression,
            output_field=models.CharField(),
            **extra
        )
    def _get_engine(self):
        engine = settings.DATABASES["default"]["ENGINE"]
        try:
            return self.ENGINES[engine]
        except KeyError:
            raise NotImplementedError(
                "There's currently no support for {} when it comes to group "
                "concatenation in Paperless".format(engine)
            )
    def _get_function(self):
        if self.engine == self.ENGINE_POSTGRESQL:
            return "STRING_AGG"
        return "GROUP_CONCAT"
    def _get_template(self, separator):
        if self.engine == self.ENGINE_MYSQL:
            return "%(function)s(%(expressions)s, SEPARATOR '{}')".format(
                separator)
        return "%(function)s(%(expressions)s, '{}')".format(separator)
 class LogQuerySet(models.query.QuerySet):
    def by_group(self):
        return self.values("group").annotate(
            time=Max("modified"),
            messages=GroupConcat("message"),
        ).order_by("-time")
 class LogManager(models.Manager):
    def get_queryset(self):
        return LogQuerySet(self.model, using=self._db)
--- a/src/logger/migrations/0001_initial.py
+++ b/src/logger/migrations/0001_initial.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Generated by Django 1.9 on 2016-02-14 16:08
+# Generated by Django 1.9 on 2016-02-27 17:54
 from __future__ import unicode_literals
 from django.db import migrations, models
@@ -7,9 +7,8 @@ from django.db import migrations, models
 class Migration(migrations.Migration):
    initial = True
    dependencies = [
        ('documents', '0009_auto_20160214_0040'),
    ]
    operations = [
@@ -17,14 +16,15 @@ class Migration(migrations.Migration):
            name='Log',
            fields=[
                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
-                ('time', models.DateTimeField(auto_now_add=True)),
+                ('group', models.UUIDField(blank=True)),
                ('message', models.TextField()),
-                ('level', models.PositiveIntegerField(choices=[(1, 'Error'), (2, 'Warning'), (3, 'Informational'), (4, 'Debugging')], default=3)),
+                ('level', models.PositiveIntegerField(choices=[(10, 'Debugging'), (20, 'Informational'), (30, 'Warning'), (40, 'Error'), (50, 'Critical')], default=20)),
                ('component', models.PositiveIntegerField(choices=[(1, 'Consumer'), (2, 'Mail Fetcher')])),
                ('created', models.DateTimeField(auto_now_add=True)),
                ('modified', models.DateTimeField(auto_now=True)),
            ],
-        ),
+            options={
-        migrations.AlterModelOptions(
+                'ordering': ('-modified',),
-            name='log',
+            },
            options={'ordering': ('-time',)},
        ),
    ]
--- a/src/documents/migrations/0011_auto_20160303_1929.py
+++ b/src/documents/migrations/0011_auto_20160303_1929.py
@@ -0,0 +1,28 @@
 # -*- coding: utf-8 -*-
 # Generated by Django 1.9.2 on 2016-03-03 19:29
 from __future__ import unicode_literals
 from django.db import migrations
 class Migration(migrations.Migration):
    dependencies = [
        ('documents', '0010_log'),
    ]
    operations = [
        migrations.RenameModel(
            old_name='Sender',
            new_name='Correspondent',
        ),
        migrations.AlterModelOptions(
            name='document',
            options={'ordering': ('correspondent', 'title')},
        ),
        migrations.RenameField(
            model_name='document',
            old_name='sender',
            new_name='correspondent',
        ),
    ]
--- a/src/documents/migrations/0012_auto_20160305_0040.py
+++ b/src/documents/migrations/0012_auto_20160305_0040.py
@@ -0,0 +1,119 @@
 # -*- coding: utf-8 -*-
 # Generated by Django 1.9.2 on 2016-03-05 00:40
 from __future__ import unicode_literals
 import gnupg
 import os
 import re
 import shutil
 import subprocess
 import tempfile
 from django.conf import settings
 from django.db import migrations
 from django.utils.termcolors import colorize as colourise  # Spelling hurts me
 class GnuPG(object):
    """
    A handy singleton to use when handling encrypted files.
    """
    gpg = gnupg.GPG(gnupghome=settings.GNUPG_HOME)
    @classmethod
    def decrypted(cls, file_handle):
        return cls.gpg.decrypt_file(
            file_handle, passphrase=settings.PASSPHRASE).data
    @classmethod
    def encrypted(cls, file_handle):
        return cls.gpg.encrypt_file(
            file_handle,
            recipients=None,
            passphrase=settings.PASSPHRASE,
            symmetric=True
        ).data
 def move_documents_and_create_thumbnails(apps, schema_editor):
    documents = os.listdir(os.path.join(settings.MEDIA_ROOT, "documents"))
    if set(documents) == {"originals", "thumbnails"}:
        return
    print(colourise(
        "\n\n"
        "  This is a one-time only migration to generate thumbnails for all of your\n"
        "  documents so that future UIs will have something to work with.  If you have\n"
        "  a lot of documents though, this may take a while, so a coffee break may be\n"
        "  in order."
        "\n", opts=("bold",)
    ))
    try:
        os.makedirs(settings.SCRATCH_DIR)
    except FileExistsError:
        pass
    for f in sorted(documents):
        if not f.endswith("gpg"):
            continue
        print("    {} {} {}".format(
            colourise("*", fg="green"),
            colourise("Generating a thumbnail for", fg="white"),
            colourise(f, fg="cyan")
        ))
        thumb_temp = tempfile.mkdtemp(
            prefix="paperless", dir=settings.SCRATCH_DIR)
        orig_temp = tempfile.mkdtemp(
            prefix="paperless", dir=settings.SCRATCH_DIR)
        orig_source = os.path.join(settings.MEDIA_ROOT, "documents", f)
        orig_target = os.path.join(orig_temp, f.replace(".gpg", ""))
        with open(orig_source, "rb") as encrypted:
            with open(orig_target, "wb") as unencrypted:
                unencrypted.write(GnuPG.decrypted(encrypted))
        subprocess.Popen((
            settings.CONVERT_BINARY,
            "-scale", "500x5000",
            "-alpha", "remove",
            orig_target,
            os.path.join(thumb_temp, "convert-%04d.png")
        )).wait()
        thumb_source = os.path.join(thumb_temp, "convert-0000.png")
        thumb_target = os.path.join(
            settings.MEDIA_ROOT,
            "documents",
            "thumbnails",
            re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f)
        )
        with open(thumb_source, "rb") as unencrypted:
            with open(thumb_target, "wb") as encrypted:
                encrypted.write(GnuPG.encrypted(unencrypted))
        shutil.rmtree(thumb_temp)
        shutil.rmtree(orig_temp)
        shutil.move(
            os.path.join(settings.MEDIA_ROOT, "documents", f),
            os.path.join(settings.MEDIA_ROOT, "documents", "originals", f),
        )
 class Migration(migrations.Migration):
    dependencies = [
        ('documents', '0011_auto_20160303_1929'),
    ]
    operations = [
        migrations.RunPython(move_documents_and_create_thumbnails),
    ]
--- a/src/documents/mixins.py
+++ b/src/documents/mixins.py
@@ -1,7 +1,7 @@
 class Renderable(object):
    """
-    A handy mixin to make it easier/cleaner to print output based on a verbosity
+    A handy mixin to make it easier/cleaner to print output based on a
-    value.
+    verbosity value.
    """
    def _render(self, text, verbosity):
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -1,5 +1,7 @@
 import logging
 import os
 import re
 import uuid
 from django.conf import settings
 from django.core.urlresolvers import reverse
@@ -7,6 +9,8 @@ from django.db import models
 from django.template.defaultfilters import slugify
 from django.utils import timezone
 from .managers import LogManager
 class SluggedModel(models.Model):
@@ -25,7 +29,7 @@ class SluggedModel(models.Model):
        return self.name
-class Sender(SluggedModel):
+class Correspondent(SluggedModel):
    # This regex is probably more restrictive than it needs to be, but it's
    # better safe than sorry.
@@ -36,7 +40,7 @@ class Sender(SluggedModel):
 class Tag(SluggedModel):
-    
+
    COLOURS = (
        (1, "#a6cee3"),
        (2, "#1f78b4"),
@@ -71,9 +75,9 @@ class Tag(SluggedModel):
        default=MATCH_ANY,
        help_text=(
            "Which algorithm you want to use when matching text to the OCR'd "
-            "PDF.  Here, \"any\" looks for any occurrence of any word provided "
+            "PDF.  Here, \"any\" looks for any occurrence of any word "
-            "in the PDF, while \"all\" requires that every word provided "
+            "provided in the PDF, while \"all\" requires that every word "
-            "appear in the PDF, albeit not in the order provided.  A "
+            "provided appear in the PDF, albeit not in the order provided.  A "
            "\"literal\" match means that the text you enter must appear in "
            "the PDF exactly as you've entered it, and \"regular expression\" "
            "uses a regex to match the PDF.  If you don't know what a regex "
@@ -86,28 +90,40 @@ class Tag(SluggedModel):
        return "{}: \"{}\" ({})".format(
            self.name, self.match, self.get_matching_algorithm_display())
    @classmethod
    def match_all(cls, text, tags=None):
        if tags is None:
            tags = cls.objects.all()
        text = text.lower()
        for tag in tags:
            if tag.matches(text):
                yield tag
    def matches(self, text):
        # Check that match is not empty
        if self.match.strip() == "":
            return False
        if self.matching_algorithm == self.MATCH_ALL:
            for word in self.match.split(" "):
-                if word not in text:
+                if not re.search(r"\b{}\b".format(word), text):
                    return False
            return True
        if self.matching_algorithm == self.MATCH_ANY:
            for word in self.match.split(" "):
-                if word in text:
+                if re.search(r"\b{}\b".format(word), text):
                    return True
            return False
        if self.matching_algorithm == self.MATCH_LITERAL:
-            return self.match in text
+            return bool(re.search(r"\b{}\b".format(self.match), text))
        if self.matching_algorithm == self.MATCH_REGEX:
-            return re.search(re.compile(self.match), text)
+            return bool(re.search(re.compile(self.match), text))
        raise NotImplementedError("Unsupported matching algorithm")
@@ -125,8 +141,8 @@ class Document(models.Model):
    TYPE_TIF = "tiff"
    TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,)
-    sender = models.ForeignKey(
+    correspondent = models.ForeignKey(
-        Sender, blank=True, null=True, related_name="documents")
+        Correspondent, blank=True, null=True, related_name="documents")
    title = models.CharField(max_length=128, blank=True, db_index=True)
    content = models.TextField(db_index=True)
    file_type = models.CharField(
@@ -140,14 +156,15 @@ class Document(models.Model):
    modified = models.DateTimeField(auto_now=True, editable=False)
    class Meta(object):
-        ordering = ("sender", "title")
+        ordering = ("correspondent", "title")
    def __str__(self):
-        created = self.created.strftime("%Y-%m-%d")
+        created = self.created.strftime("%Y%m%d%H%M%S")
-        if self.sender and self.title:
+        if self.correspondent and self.title:
-            return "{}: {}, {}".format(created, self.sender, self.title)
+            return "{}: {} - {}".format(
-        if self.sender or self.title:
+                created, self.correspondent, self.title)
-            return "{}: {}".format(created, self.sender or self.title)
+        if self.correspondent or self.title:
            return "{}: {}".format(created, self.correspondent or self.title)
        return str(created)
    @property
@@ -155,6 +172,7 @@ class Document(models.Model):
        return os.path.join(
            settings.MEDIA_ROOT,
            "documents",
            "originals",
            "{:07}.{}.gpg".format(self.pk, self.file_type)
        )
@@ -164,14 +182,71 @@ class Document(models.Model):
    @property
    def file_name(self):
-        if self.sender and self.title:
+        return slugify(str(self)) + "." + self.file_type
            tags = ",".join([t.slug for t in self.tags.all()])
            if tags:
                return "{} - {} - {}.{}".format(
                    self.sender, self.title, tags, self.file_type)
            return "{} - {}.{}".format(self.sender, self.title, self.file_type)
        return os.path.basename(self.source_path)
    @property
    def download_url(self):
-        return reverse("fetch", kwargs={"pk": self.pk})
+        return reverse("fetch", kwargs={"kind": "doc", "pk": self.pk})
    @property
    def thumbnail_path(self):
        return os.path.join(
            settings.MEDIA_ROOT,
            "documents",
            "thumbnails",
            "{:07}.png.gpg".format(self.pk)
        )
    @property
    def thumbnail_file(self):
        return open(self.thumbnail_path, "rb")
    @property
    def thumbnail_url(self):
        return reverse("fetch", kwargs={"kind": "thumb", "pk": self.pk})
 class Log(models.Model):
    LEVELS = (
        (logging.DEBUG, "Debugging"),
        (logging.INFO, "Informational"),
        (logging.WARNING, "Warning"),
        (logging.ERROR, "Error"),
        (logging.CRITICAL, "Critical"),
    )
    COMPONENT_CONSUMER = 1
    COMPONENT_MAIL = 2
    COMPONENTS = (
        (COMPONENT_CONSUMER, "Consumer"),
        (COMPONENT_MAIL, "Mail Fetcher")
    )
    group = models.UUIDField(blank=True)
    message = models.TextField()
    level = models.PositiveIntegerField(choices=LEVELS, default=logging.INFO)
    component = models.PositiveIntegerField(choices=COMPONENTS)
    created = models.DateTimeField(auto_now_add=True)
    modified = models.DateTimeField(auto_now=True)
    objects = LogManager()
    class Meta(object):
        ordering = ("-modified",)
    def __str__(self):
        return self.message
    def save(self, *args, **kwargs):
        """
        To allow for the case where we don't want to group the message, we
        shouldn't force the caller to specify a one-time group value.  However,
        allowing group=None means that the manager can't differentiate the
        different un-grouped messages, so instead we set a random one here.
        """
        if not self.group:
            self.group = uuid.uuid4()
        models.Model.save(self, *args, **kwargs)
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -0,0 +1,55 @@
 from rest_framework import serializers
 from .models import Correspondent, Tag, Document, Log
 class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
    class Meta(object):
        model = Correspondent
        fields = ("id", "slug", "name")
 class TagSerializer(serializers.HyperlinkedModelSerializer):
    class Meta(object):
        model = Tag
        fields = (
            "id", "slug", "name", "colour", "match", "matching_algorithm")
 class DocumentSerializer(serializers.ModelSerializer):
    correspondent = serializers.HyperlinkedRelatedField(
        read_only=True, view_name="drf:correspondent-detail", allow_null=True)
    tags = serializers.HyperlinkedRelatedField(
        read_only=True, view_name="drf:tag-detail", many=True)
    class Meta(object):
        model = Document
        fields = (
            "id",
            "correspondent",
            "title",
            "content",
            "file_type",
            "tags",
            "created",
            "modified",
            "file_name",
            "download_url",
            "thumbnail_url",
        )
 class LogSerializer(serializers.ModelSerializer):
    time = serializers.DateTimeField()
    messages = serializers.CharField()
    class Meta(object):
        model = Log
        fields = (
            "time",
            "messages"
        )
--- a/src/documents/templates/documents/index.html
+++ b/src/documents/templates/documents/index.html
@@ -0,0 +1,10 @@
 <!DOCTYPE html>
 <html lang="en-gb">
  <head>
    <title>Paperless</title>
    <meta charset="utf-8">
  </head>
  <body>
  </body>
 </html>
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -4,18 +4,26 @@ from ..consumer import Consumer
 class TestAttachment(TestCase):
-    
+
    TAGS = ("tag1", "tag2", "tag3")
    CONSUMER = Consumer()
-    
+    SUFFIXES = (
        "pdf", "png", "jpg", "jpeg", "gif",
        "PDF", "PNG", "JPG", "JPEG", "GIF",
        "PdF", "PnG", "JpG", "JPeG", "GiF",
    )
    def _test_guess_attributes_from_name(self, path, sender, title, tags):
-        for suffix in ("pdf", "png", "jpg", "jpeg", "gif"):
+        for suffix in self.SUFFIXES:
            f = path.format(suffix)
            results = self.CONSUMER._guess_attributes_from_name(f)
            self.assertEqual(results[0].name, sender, f)
            self.assertEqual(results[1], title, f)
            self.assertEqual(tuple([t.slug for t in results[2]]), tags, f)
-            self.assertEqual(results[3], suffix, f)
+            if suffix.lower() == "jpeg":
                self.assertEqual(results[3], "jpg", f)
            else:
                self.assertEqual(results[3], suffix.lower(), f)
    def test_guess_attributes_from_name0(self):
        self._test_guess_attributes_from_name(
--- a/src/documents/tests/test_importer.py
+++ b/src/documents/tests/test_importer.py
@@ -0,0 +1,36 @@
 from django.core.management.base import CommandError
 from django.test import TestCase
 from ..management.commands.document_importer import Command
 class TestImporter(TestCase):
    def __init__(self, *args, **kwargs):
        TestCase.__init__(self, *args, **kwargs)
    def test_check_manifest_exists(self):
        cmd = Command()
        self.assertRaises(
            CommandError, cmd._check_manifest_exists, "/tmp/manifest.json")
    def test_check_manifest(self):
        cmd = Command()
        cmd.source = "/tmp"
        cmd.manifest = [{"model": "documents.document"}]
        with self.assertRaises(CommandError) as cm:
            cmd._check_manifest()
        self.assertTrue(
            'The manifest file contains a record' in str(cm.exception))
        cmd.manifest = [{
            "model": "documents.document",
            "__exported_file_name__": "noexist.pdf"
        }]
        # self.assertRaises(CommandError, cmd._check_manifest)
        with self.assertRaises(CommandError) as cm:
            cmd._check_manifest()
        self.assertTrue(
            'The manifest file refers to "noexist.pdf"' in str(cm.exception))
--- a/src/documents/tests/test_logger.py
+++ b/src/documents/tests/test_logger.py
@@ -0,0 +1,142 @@
 import logging
 import uuid
 from unittest import mock
 from django.test import TestCase
 from ..models import Log
 class TestPaperlessLog(TestCase):
    def __init__(self, *args, **kwargs):
        TestCase.__init__(self, *args, **kwargs)
        self.logger = logging.getLogger(
            "documents.management.commands.document_consumer")
    def test_ignored(self):
        with mock.patch("logging.StreamHandler.emit") as __:
            self.assertEqual(Log.objects.all().count(), 0)
            self.logger.info("This is an informational message")
            self.logger.warning("This is an informational message")
            self.logger.error("This is an informational message")
            self.logger.critical("This is an informational message")
            self.assertEqual(Log.objects.all().count(), 0)
    def test_that_it_saves_at_all(self):
        kw = {
            "group": uuid.uuid4(),
            "component": Log.COMPONENT_MAIL
        }
        self.assertEqual(Log.objects.all().count(), 0)
        with mock.patch("logging.StreamHandler.emit") as __:
            # Debug messages are ignored by default
            self.logger.debug("This is a debugging message", extra=kw)
            self.assertEqual(Log.objects.all().count(), 0)
            self.logger.info("This is an informational message", extra=kw)
            self.assertEqual(Log.objects.all().count(), 1)
            self.logger.warning("This is an warning message", extra=kw)
            self.assertEqual(Log.objects.all().count(), 2)
            self.logger.error("This is an error message", extra=kw)
            self.assertEqual(Log.objects.all().count(), 3)
            self.logger.critical("This is a critical message", extra=kw)
            self.assertEqual(Log.objects.all().count(), 4)
    def test_groups(self):
        kw1 = {
            "group": uuid.uuid4(),
            "component": Log.COMPONENT_MAIL
        }
        kw2 = {
            "group": uuid.uuid4(),
            "component": Log.COMPONENT_MAIL
        }
        self.assertEqual(Log.objects.all().count(), 0)
        with mock.patch("logging.StreamHandler.emit") as __:
            # Debug messages are ignored by default
            self.logger.debug("This is a debugging message", extra=kw1)
            self.assertEqual(Log.objects.all().count(), 0)
            self.logger.info("This is an informational message", extra=kw2)
            self.assertEqual(Log.objects.all().count(), 1)
            self.assertEqual(Log.objects.filter(group=kw2["group"]).count(), 1)
            self.logger.warning("This is an warning message", extra=kw1)
            self.assertEqual(Log.objects.all().count(), 2)
            self.assertEqual(Log.objects.filter(group=kw1["group"]).count(), 1)
            self.logger.error("This is an error message", extra=kw2)
            self.assertEqual(Log.objects.all().count(), 3)
            self.assertEqual(Log.objects.filter(group=kw2["group"]).count(), 2)
            self.logger.critical("This is a critical message", extra=kw1)
            self.assertEqual(Log.objects.all().count(), 4)
            self.assertEqual(Log.objects.filter(group=kw1["group"]).count(), 2)
    def test_components(self):
        c1 = Log.COMPONENT_CONSUMER
        c2 = Log.COMPONENT_MAIL
        kw1 = {
            "group": uuid.uuid4(),
            "component": c1
        }
        kw2 = {
            "group": kw1["group"],
            "component": c2
        }
        self.assertEqual(Log.objects.all().count(), 0)
        with mock.patch("logging.StreamHandler.emit") as __:
            # Debug messages are ignored by default
            self.logger.debug("This is a debugging message", extra=kw1)
            self.assertEqual(Log.objects.all().count(), 0)
            self.logger.info("This is an informational message", extra=kw2)
            self.assertEqual(Log.objects.all().count(), 1)
            self.assertEqual(Log.objects.filter(component=c2).count(), 1)
            self.logger.warning("This is an warning message", extra=kw1)
            self.assertEqual(Log.objects.all().count(), 2)
            self.assertEqual(Log.objects.filter(component=c1).count(), 1)
            self.logger.error("This is an error message", extra=kw2)
            self.assertEqual(Log.objects.all().count(), 3)
            self.assertEqual(Log.objects.filter(component=c2).count(), 2)
            self.logger.critical("This is a critical message", extra=kw1)
            self.assertEqual(Log.objects.all().count(), 4)
            self.assertEqual(Log.objects.filter(component=c1).count(), 2)
    def test_groupped_query(self):
        kw = {
            "group": uuid.uuid4(),
            "component": Log.COMPONENT_MAIL
        }
        with mock.patch("logging.StreamHandler.emit") as __:
            self.logger.info("Message 0", extra=kw)
            self.logger.info("Message 1", extra=kw)
            self.logger.info("Message 2", extra=kw)
            self.logger.info("Message 3", extra=kw)
        self.assertEqual(Log.objects.all().by_group().count(), 1)
        self.assertEqual(
            Log.objects.all().by_group()[0]["messages"],
            "Message 0\nMessage 1\nMessage 2\nMessage 3"
        )
--- a/src/documents/tests/test_mail.py
+++ b/src/documents/tests/test_mail.py
@@ -3,6 +3,7 @@ import os
 import magic
 from hashlib import md5
 from unittest import mock
 from django.conf import settings
 from django.test import TestCase
@@ -27,7 +28,8 @@ class TestMessage(TestCase):
        with open(self.sample, "rb") as f:
-            message = Message(f.read(), verbosity=0)
+            with mock.patch("logging.StreamHandler.emit") as __:
                message = Message(f.read())
            self.assertTrue(message)
            self.assertEqual(message.subject, "Test 0")
--- a/src/documents/tests/test_tags.py
+++ b/src/documents/tests/test_tags.py
@@ -0,0 +1,119 @@
 from django.test import TestCase
 from ..models import Tag
 class TestTagMatching(TestCase):
    def test_match_all(self):
        t = Tag.objects.create(
            name="Test 0",
            match="alpha charlie gamma",
            matching_algorithm=Tag.MATCH_ALL
        )
        self.assertFalse(t.matches("I have alpha in me"))
        self.assertFalse(t.matches("I have charlie in me"))
        self.assertFalse(t.matches("I have gamma in me"))
        self.assertFalse(t.matches("I have alpha and charlie in me"))
        self.assertTrue(t.matches("I have alpha, charlie, and gamma in me"))
        self.assertFalse(t.matches("I have alphas, charlie, and gamma in me"))
        self.assertFalse(t.matches("I have alphas in me"))
        self.assertFalse(t.matches("I have bravo in me"))
        t = Tag.objects.create(
            name="Test 1",
            match="12 34 56",
            matching_algorithm=Tag.MATCH_ALL
        )
        self.assertFalse(t.matches("I have 12 in me"))
        self.assertFalse(t.matches("I have 34 in me"))
        self.assertFalse(t.matches("I have 56 in me"))
        self.assertFalse(t.matches("I have 12 and 34 in me"))
        self.assertTrue(t.matches("I have 12 34, and 56 in me"))
        self.assertFalse(t.matches("I have 120, 34, and 56 in me"))
        self.assertFalse(t.matches("I have 123456 in me"))
        self.assertFalse(t.matches("I have 01234567 in me"))
    def test_match_any(self):
        t = Tag.objects.create(
            name="Test 0",
            match="alpha charlie gamma",
            matching_algorithm=Tag.MATCH_ANY
        )
        self.assertTrue(t.matches("I have alpha in me"))
        self.assertTrue(t.matches("I have charlie in me"))
        self.assertTrue(t.matches("I have gamma in me"))
        self.assertTrue(t.matches("I have alpha and charlie in me"))
        self.assertFalse(t.matches("I have alphas in me"))
        self.assertFalse(t.matches("I have bravo in me"))
        t = Tag.objects.create(
            name="Test 1",
            match="12 34 56",
            matching_algorithm=Tag.MATCH_ANY
        )
        self.assertTrue(t.matches("I have 12 in me"))
        self.assertTrue(t.matches("I have 34 in me"))
        self.assertTrue(t.matches("I have 56 in me"))
        self.assertTrue(t.matches("I have 12 and 34 in me"))
        self.assertTrue(t.matches("I have 12 34, and 56 in me"))
        self.assertTrue(t.matches("I have 120, 34, and 560 in me"))
        self.assertFalse(t.matches("I have 120, 340, and 560 in me"))
        self.assertFalse(t.matches("I have 123456 in me"))
        self.assertFalse(t.matches("I have 01234567 in me"))
    def test_match_literal(self):
        t = Tag.objects.create(
            name="Test 0",
            match="alpha charlie gamma",
            matching_algorithm=Tag.MATCH_LITERAL
        )
        self.assertFalse(t.matches("I have alpha in me"))
        self.assertFalse(t.matches("I have charlie in me"))
        self.assertFalse(t.matches("I have gamma in me"))
        self.assertFalse(t.matches("I have alpha and charlie in me"))
        self.assertFalse(t.matches("I have alpha, charlie, and gamma in me"))
        self.assertFalse(t.matches("I have alphas, charlie, and gamma in me"))
        self.assertTrue(t.matches("I have 'alpha charlie gamma' in me"))
        self.assertFalse(t.matches("I have alphas in me"))
        self.assertFalse(t.matches("I have bravo in me"))
        t = Tag.objects.create(
            name="Test 1",
            match="12 34 56",
            matching_algorithm=Tag.MATCH_LITERAL
        )
        self.assertFalse(t.matches("I have 12 in me"))
        self.assertFalse(t.matches("I have 34 in me"))
        self.assertFalse(t.matches("I have 56 in me"))
        self.assertFalse(t.matches("I have 12 and 34 in me"))
        self.assertFalse(t.matches("I have 12 34, and 56 in me"))
        self.assertFalse(t.matches("I have 120, 34, and 560 in me"))
        self.assertFalse(t.matches("I have 120, 340, and 560 in me"))
        self.assertFalse(t.matches("I have 123456 in me"))
        self.assertFalse(t.matches("I have 01234567 in me"))
        self.assertTrue(t.matches("I have 12 34 56 in me"))
    def test_match_regex(self):
        t = Tag.objects.create(
            name="Test 0",
            match="alpha\w+gamma",
            matching_algorithm=Tag.MATCH_REGEX
        )
        self.assertFalse(t.matches("I have alpha in me"))
        self.assertFalse(t.matches("I have gamma in me"))
        self.assertFalse(t.matches("I have alpha and charlie in me"))
        self.assertTrue(t.matches("I have alpha_and_gamma in me"))
        self.assertTrue(t.matches("I have alphas_and_gamma in me"))
        self.assertFalse(t.matches("I have alpha,and,gamma in me"))
        self.assertFalse(t.matches("I have alpha and gamma in me"))
        self.assertFalse(t.matches("I have alpha, charlie, and gamma in me"))
        self.assertFalse(t.matches("I have alphas, charlie, and gamma in me"))
        self.assertFalse(t.matches("I have alphas in me"))
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -1,21 +1,41 @@
 from django.contrib.auth.mixins import LoginRequiredMixin
 from django.http import HttpResponse
 from django.template.defaultfilters import slugify
 from django.views.decorators.csrf import csrf_exempt
-from django.views.generic import FormView, DetailView
+from django.views.generic import FormView, DetailView, TemplateView
 from rest_framework.mixins import (
    RetrieveModelMixin, UpdateModelMixin, DestroyModelMixin, ListModelMixin)
 from rest_framework.pagination import PageNumberPagination
 from rest_framework.permissions import IsAuthenticated
 from rest_framework.viewsets import (
    ModelViewSet, ReadOnlyModelViewSet, GenericViewSet)
 from paperless.db import GnuPG
 from .models import Document
 from .forms import UploadForm
 from .models import Correspondent, Tag, Document, Log
 from .serialisers import (
    CorrespondentSerializer, TagSerializer, DocumentSerializer, LogSerializer)
-class PdfView(DetailView):
+class IndexView(TemplateView):
    template_name = "documents/index.html"
    def get_context_data(self, **kwargs):
        print(kwargs)
        print(self.request.GET)
        print(self.request.POST)
        return TemplateView.get_context_data(self, **kwargs)
 class FetchView(DetailView):
    model = Document
    def render_to_response(self, context, **response_kwargs):
        """
-        Override the default to return the unencrypted PDF as raw data.
+        Override the default to return the unencrypted image/PDF as raw data.
        """
        content_types = {
@@ -26,19 +46,25 @@ class PdfView(DetailView):
            Document.TYPE_TIF: "image/tiff",
        }
        if self.kwargs["kind"] == "thumb":
            return HttpResponse(
                GnuPG.decrypted(self.object.thumbnail_file),
                content_type=content_types[Document.TYPE_PNG]
            )
        response = HttpResponse(
            GnuPG.decrypted(self.object.source_file),
            content_type=content_types[self.object.file_type]
        )
        response["Content-Disposition"] = 'attachment; filename="{}"'.format(
-            slugify(str(self.object)) + "." + self.object.file_type)
+            self.object.file_name)
        return response
-class PushView(FormView):
+class PushView(LoginRequiredMixin, FormView):
    """
-    A crude REST API for creating documents.
+    A crude REST-ish API for creating documents.
    """
    form_class = UploadForm
@@ -52,3 +78,45 @@ class PushView(FormView):
    def form_invalid(self, form):
        return HttpResponse("0")
 class StandardPagination(PageNumberPagination):
    page_size = 25
    page_size_query_param = "page-size"
    max_page_size = 100000
 class CorrespondentViewSet(ModelViewSet):
    model = Correspondent
    queryset = Correspondent.objects.all()
    serializer_class = CorrespondentSerializer
    pagination_class = StandardPagination
    permission_classes = (IsAuthenticated,)
 class TagViewSet(ModelViewSet):
    model = Tag
    queryset = Tag.objects.all()
    serializer_class = TagSerializer
    pagination_class = StandardPagination
    permission_classes = (IsAuthenticated,)
 class DocumentViewSet(RetrieveModelMixin,
                      UpdateModelMixin,
                      DestroyModelMixin,
                      ListModelMixin,
                      GenericViewSet):
    model = Document
    queryset = Document.objects.all()
    serializer_class = DocumentSerializer
    pagination_class = StandardPagination
    permission_classes = (IsAuthenticated,)
 class LogViewSet(ReadOnlyModelViewSet):
    model = Log
    queryset = Log.objects.all().by_group()
    serializer_class = LogSerializer
    pagination_class = StandardPagination
    permission_classes = (IsAuthenticated,)
--- a/src/logger/admin.py
+++ b/src/logger/admin.py
@@ -1,12 +0,0 @@
 from django.contrib import admin
 from .models import Log
 class LogAdmin(admin.ModelAdmin):
    list_display = ("message", "level", "component")
    list_filter = ("level", "component",)
 admin.site.register(Log, LogAdmin)
--- a/src/logger/apps.py
+++ b/src/logger/apps.py
@@ -1,5 +0,0 @@
 from django.apps import AppConfig
 class LoggerConfig(AppConfig):
    name = 'logger'
--- a/src/logger/models.py
+++ b/src/logger/models.py
@@ -1,50 +0,0 @@
 from django.db import models
 class Log(models.Model):
    LEVEL_ERROR = 1
    LEVEL_WARNING = 2
    LEVEL_INFO = 3
    LEVEL_DEBUG = 4
    LEVELS = (
        (LEVEL_ERROR, "Error"),
        (LEVEL_WARNING, "Warning"),
        (LEVEL_INFO, "Informational"),
        (LEVEL_DEBUG, "Debugging"),
    )
    COMPONENT_CONSUMER = 1
    COMPONENT_MAIL = 2
    COMPONENTS = (
        (COMPONENT_CONSUMER, "Consumer"),
        (COMPONENT_MAIL, "Mail Fetcher")
    )
    time = models.DateTimeField(auto_now_add=True)
    message = models.TextField()
    level = models.PositiveIntegerField(choices=LEVELS, default=LEVEL_INFO)
    component = models.PositiveIntegerField(choices=COMPONENTS)
    class Meta(object):
        ordering = ("-time",)
    @classmethod
    def error(cls, message, component):
        cls.objects.create(
            message=message, level=cls.LEVEL_ERROR, component=component)
    @classmethod
    def warning(cls, message, component):
        cls.objects.create(
            message=message, level=cls.LEVEL_WARNING, component=component)
    @classmethod
    def info(cls, message, component):
        cls.objects.create(
            message=message, level=cls.LEVEL_INFO, component=component)
    @classmethod
    def debug(cls, message, component):
        cls.objects.create(
            message=message, level=cls.LEVEL_DEBUG, component=component)
--- a/src/logger/tests.py
+++ b/src/logger/tests.py
@@ -1,3 +0,0 @@
 from django.test import TestCase
 # Create your tests here.
--- a/src/logger/views.py
+++ b/src/logger/views.py
@@ -1,3 +0,0 @@
 from django.shortcuts import render
 # Create your views here.
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -12,6 +12,8 @@ https://docs.djangoproject.com/en/1.9/ref/settings/
 import os
 from dotenv import load_dotenv
 # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -42,7 +44,8 @@ INSTALLED_APPS = [
    "django_extensions",
    "documents",
-    "logger",
+
    "rest_framework",
 ]
@@ -87,12 +90,12 @@ DATABASES = {
        "NAME": os.path.join(BASE_DIR, "..", "data", "db.sqlite3"),
    }
 }
-if os.environ.get("PAPERLESS_DBUSER") and os.environ.get("PAPERLESS_DBPASS"):
+if os.getenv("PAPERLESS_DBUSER") and os.getenv("PAPERLESS_DBPASS"):
    DATABASES["default"] = {
        "ENGINE": "django.db.backends.postgresql_psycopg2",
-        "NAME": os.environ.get("PAPERLESS_DBNAME", "paperless"),
+        "NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
-        "USER": os.environ.get("PAPERLESS_DBUSER"),
+        "USER": os.getenv("PAPERLESS_DBUSER"),
-        "PASSWORD": os.environ.get("PAPERLESS_DBPASS")
+        "PASSWORD": os.getenv("PAPERLESS_DBPASS")
    }
@@ -139,55 +142,119 @@ STATIC_URL = '/static/'
 MEDIA_URL = "/media/"
-# Paperless-specific stuffs
+# Paperless-specific stuff
-# Change these paths if yours are different
+# You shouldn't have to edit any of these values.  Rather, you can set these
 # values in /etc/paperless.conf instead.
 # ----------------------------------------------------------------------------
 # Tap paperless.conf if it's available
 if os.path.exists("/etc/paperless.conf"):
    load_dotenv("/etc/paperless.conf")
 # Logging
 LOGGING = {
    "version": 1,
    "disable_existing_loggers": False,
    "handlers": {
        "consumer": {
            "class": "documents.loggers.PaperlessLogger",
        }
    },
    "loggers": {
        "documents": {
            "handlers": ["consumer"],
            "level": os.getenv("PAPERLESS_CONSUMER_LOG_LEVEL", "INFO"),
        },
    },
 }
 # The default language that tesseract will attempt to use when parsing
 # documents.  It should be a 3-letter language code consistent with ISO 639.
 OCR_LANGUAGE = "eng"
 # The amount of threads to use for OCR
-OCR_THREADS = os.environ.get("PAPERLESS_OCR_THREADS")
+OCR_THREADS = os.getenv("PAPERLESS_OCR_THREADS")
-# If this is true, any failed attempts to OCR a PDF will result in the PDF being
+# If this is true, any failed attempts to OCR a PDF will result in the PDF
-# indexed anyway, with whatever we could get.  If it's False, the file will
+# being indexed anyway, with whatever we could get.  If it's False, the file
-# simply be left in the CONSUMPTION_DIR.
+# will simply be left in the CONSUMPTION_DIR.
-FORGIVING_OCR = True
+FORGIVING_OCR = bool(os.getenv("PAPERLESS_FORGIVING_OCR", "YES").lower() in ("yes", "y", "1", "t", "true"))
 # GNUPG needs a home directory for some reason
-GNUPG_HOME = os.environ.get("HOME", "/dev/null")
+GNUPG_HOME = os.getenv("HOME", "/tmp")
-# Convert is part of the Imagemagick package
+# Convert is part of the ImageMagick package
-CONVERT_BINARY = "/usr/bin/convert"
+CONVERT_BINARY = os.getenv("PAPERLESS_CONVERT_BINARY")
 # Unpaper
 UNPAPER_BINARY = os.getenv("PAPERLESS_UNPAPER_BINARY", "unpaper")
 # This will be created if it doesn't exist
-SCRATCH_DIR = "/tmp/paperless"
+SCRATCH_DIR = os.getenv("PAPERLESS_SCRATCH_DIR", "/tmp/paperless")
 # This is where Paperless will look for PDFs to index
-CONSUMPTION_DIR = os.environ.get("PAPERLESS_CONSUME")
+CONSUMPTION_DIR = os.getenv("PAPERLESS_CONSUMPTION_DIR")
 # If you want to use IMAP mail consumption, populate this with useful values.
-# If you leave HOST set to None, we assume you're not going to use this feature.
+# If you leave HOST set to None, we assume you're not going to use this
 # feature.
 MAIL_CONSUMPTION = {
-    "HOST": os.environ.get("PAPERLESS_CONSUME_MAIL_HOST"),
+    "HOST": os.getenv("PAPERLESS_CONSUME_MAIL_HOST"),
-    "PORT": os.environ.get("PAPERLESS_CONSUME_MAIL_PORT"),
+    "PORT": os.getenv("PAPERLESS_CONSUME_MAIL_PORT"),
-    "USERNAME": os.environ.get("PAPERLESS_CONSUME_MAIL_USER"),
+    "USERNAME": os.getenv("PAPERLESS_CONSUME_MAIL_USER"),
-    "PASSWORD": os.environ.get("PAPERLESS_CONSUME_MAIL_PASS"),
+    "PASSWORD": os.getenv("PAPERLESS_CONSUME_MAIL_PASS"),
    "USE_SSL": True,  # If True, use SSL/TLS to connect
    "INBOX": "INBOX"  # The name of the inbox on the server
 }
-# This is used to encrypt the original documents and decrypt them later when you
+# This is used to encrypt the original documents and decrypt them later when
-# want to download them.  Set it and change the permissions on this file to
+# you want to download them.  Set it and change the permissions on this file to
 # 0600, or set it to `None` and you'll be prompted for the passphrase at
 # runtime.  The default looks for an environment variable.
 # DON'T FORGET TO SET THIS as leaving it blank may cause some strange things
 # with GPG, including an interesting case where it may "encrypt" zero-byte
 # files.
-PASSPHRASE = os.environ.get("PAPERLESS_PASSPHRASE")
+PASSPHRASE = os.getenv("PAPERLESS_PASSPHRASE")
-# If you intend to use the "API" to push files into the consumer, you'll need to
+# If you intend to use the "API" to push files into the consumer, you'll need
-# provide a shared secret here.  Leaving this as the default will disable the
+# to provide a shared secret here.  Leaving this as the default will disable
-# API.
+# the API.
-UPLOAD_SHARED_SECRET = os.environ.get("PAPERLESS_SECRET", "")
+SHARED_SECRET = os.getenv("PAPERLESS_SHARED_SECRET", "")
 #
 # TODO: Remove after 1.2
 #
 # This logic is here to address issue #44, wherein we were using inconsistent
 # constant names vs. environment variables.  If you're using Paperless for the
 # first time, you can safely ignore everything from here on, so long as you're
 # correctly defining the variables as per the documentation.
 #
 def deprecated(before, after):
    print(
        "\n\n"
        "WARNING: {before} has been renamed to {after}.\n"
        "WARNING: Use of {before} will not work as of version 1.2."
        "\n\n".format(
            before=before,
            after=after
        )
    )
 if not CONVERT_BINARY:
    CONVERT_BINARY = "convert"
    if os.getenv("PAPERLESS_CONVERT"):
        deprecated("PAPERLESS_CONVERT", "PAPERLESS_CONVERT_BINARY")
        CONVERT_BINARY = os.getenv("PAPERLESS_CONVERT", CONVERT_BINARY)
 if not CONSUMPTION_DIR and os.getenv("PAPERLESS_CONSUME"):
    deprecated("PAPERLESS_CONSUME", "PAPERLESS_CONSUMPTION_DIR")
    CONSUMPTION_DIR = os.getenv("PAPERLESS_CONSUME")
 if not SHARED_SECRET and os.getenv("PAPERLESS_SECRET"):
    deprecated("PAPERLESS_SECRET", "PAPERLESS_SHARED_SECRET")
    SHARED_SECRET = os.getenv("PAPERLESS_SECRET", "")
--- a/src/paperless/urls.py
+++ b/src/paperless/urls.py
@@ -15,15 +15,46 @@ Including another URLconf
    3. Add a URL to urlpatterns:  url(r'^blog/', include(blog_urls))
 """
 from django.conf import settings
-from django.conf.urls import url, static
+from django.conf.urls import url, static, include
 from django.contrib import admin
-from documents.views import PdfView, PushView
+from rest_framework.routers import DefaultRouter
 from documents.views import (
    IndexView, FetchView, PushView,
    CorrespondentViewSet, TagViewSet, DocumentViewSet, LogViewSet
 )
 router = DefaultRouter()
 router.register(r'correspondents', CorrespondentViewSet)
 router.register(r'tags', TagViewSet)
 router.register(r'documents', DocumentViewSet)
 router.register(r'logs', LogViewSet)
 urlpatterns = [
-    url(r"^fetch/(?P<pk>\d+)$", PdfView.as_view(), name="fetch"),
+
-    url(r'', admin.site.urls),
+    # API
    url(
        r"^api/auth/",
        include('rest_framework.urls', namespace="rest_framework")
    ),
    url(r"^api/", include(router.urls, namespace="drf")),
    # Normal pages (coming soon)
    # url(r"^$", IndexView.as_view(), name="index"),
    # File downloads
    url(
        r"^fetch/(?P<kind>doc|thumb)/(?P<pk>\d+)$",
        FetchView.as_view(),
        name="fetch"
    ),
    # The Django admin
    url(r"admin/", admin.site.urls),
    url(r"", admin.site.urls),  # This is going away
 ] + static.static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
-if settings.UPLOAD_SHARED_SECRET:
+if settings.SHARED_SECRET:
    urlpatterns.insert(0, url(r"^push$", PushView.as_view(), name="push"))
--- a/src/paperless/version.py
+++ b/src/paperless/version.py
@@ -1 +1 @@
-__version__ = (0, 0, 6)
+__version__ = (0, 1, 1)
--- a/src/tox.ini
+++ b/src/tox.ini
@@ -0,0 +1,23 @@
 # Tox (http://tox.testrun.org/) is a tool for running tests
 # in multiple virtualenvs. This configuration file will run the
 # test suite on all supported python versions. To use it, "pip install tox"
 # and then run "tox" from this directory.
 [tox]
 skipsdist = True
 envlist = py34, py35, pep8
 [testenv]
 commands = {envpython} manage.py test
 deps = -r{toxinidir}/../requirements.txt
 setenv =
    PAPERLESS_CONSUME=/tmp
    PAPERLESS_PASSPHRASE=THISISNOTASECRET
    PAPERLESS_SECRET=paperless
 [testenv:pep8]
 commands=pep8
 deps=pep8
 [pep8]
 exclude=.tox,migrations,paperless/settings.py
		`@@ -1,3 +0,0 @@`
			`from django.test import TestCase`

			`# Create your tests here.`
		`@@ -1,3 +0,0 @@`
			`from django.shortcuts import render`

			`# Create your views here.`
`@@ -1 +1 @@`
	`__version__ = (0, 0, 6)`	`__version__ = (0, 1, 1)`