Merge branch 'dev' into feature-bulk-edit

2025-08-24 01:06:17 +00:00 · 2020-12-10 15:56:03 +01:00
parent 5369e0be03 2f7bb01f34
commit abd54eeb3a
78 changed files with 1375 additions and 541 deletions
--- a/2
+++ b/2
@@ -19,6 +19,7 @@ django-extensions = "*"
 django-filter = "~=2.4.0"
 django-q = "~=1.3.4"
 djangorestframework = "~=3.12.2"
+filelock = "*"
 fuzzywuzzy = "*"
 gunicorn = "*"
 imap-tools = "*"
@@ -26,6 +27,7 @@ langdetect = "*"
 pdftotext = "*"
 pathvalidate = "*"
 pillow = "*"
+pikepdf = "*"
 python-gnupg = "*"
 python-dotenv = "*"
 python-dateutil = "*"
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
    "_meta": {
        "hash": {
-            "sha256": "b10db53eb22d917723aa6107ff0970dc4e2aa886ee03d3ae08a994a856d57986"
+            "sha256": "3d576f289958226a7583e4c471c7f8c11bff6933bf093185f623cfb381a92412"
        },
        "pipfile-spec": 6,
        "requires": {
@@ -197,6 +197,14 @@
            "index": "pypi",
            "version": "==3.12.2"
        },
+        "filelock": {
+            "hashes": [
+                "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59",
+                "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"
+            ],
+            "index": "pypi",
+            "version": "==3.0.12"
+        },
        "fuzzywuzzy": {
            "hashes": [
                "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8",
@@ -425,7 +433,7 @@
                "sha256:fe0ca120e3347c851c34a91041d574f3c588d832023906d8ae18d66d042e8a52",
                "sha256:fe8e0152672f24d8bfdecc725f97e9013f2de1b41849150959526ca3562bd3ef"
            ],
-            "markers": "python_version < '3.9'",
+            "index": "pypi",
            "version": "==2.2.0"
        },
        "pillow": {
@@ -858,10 +866,10 @@
        },
        "certifi": {
            "hashes": [
-                "sha256:1f422849db327d534e3d0c5f02a263458c3955ec0aae4ff09b95f195c59f4edd",
-                "sha256:f05def092c44fbf25834a51509ef6e631dc19765ab8a57b4e7ab85531f0a9cf4"
+                "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c",
+                "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"
            ],
-            "version": "==2020.11.8"
+            "version": "==2020.12.5"
        },
        "chardet": {
            "hashes": [
@@ -961,17 +969,18 @@
        },
        "faker": {
            "hashes": [
-                "sha256:7bca5b074299ac6532be2f72979e6793f1a2403ca8105cb4cf0b385a964469c4",
-                "sha256:fb21a76064847561033d8cab1cfd11af436ddf2c6fe72eb51b3cda51dff86bdc"
+                "sha256:1fcb415562ee6e2395b041e85fa6901d4708d30b84d54015226fa754ed0822c3",
+                "sha256:e8beccb398ee9b8cc1a91d9295121d66512b6753b4846eb1e7370545d46b3311"
            ],
-            "markers": "python_version >= '3.5'",
-            "version": "==5.0.0"
+            "markers": "python_version >= '3.6'",
+            "version": "==5.0.1"
        },
        "filelock": {
            "hashes": [
                "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59",
                "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"
            ],
+            "index": "pypi",
            "version": "==3.0.12"
        },
        "idna": {
@@ -1100,11 +1109,11 @@
        },
        "pygments": {
            "hashes": [
-                "sha256:381985fcc551eb9d37c52088a32914e00517e57f4a21609f48141ba08e193fa0",
-                "sha256:88a0bbcd659fcb9573703957c6b9cff9fab7295e6e76db54c9d00ae42df32773"
+                "sha256:ccf3acacf3782cbed4a989426012f1c535c9a90d3a7fc3f16d231b9372d2b716",
+                "sha256:f275b6c0909e5dafd2d6269a656aa90fa58ebf4a74f8fcf9053195d226b24a08"
            ],
            "markers": "python_version >= '3.5'",
-            "version": "==2.7.2"
+            "version": "==2.7.3"
        },
        "pyparsing": {
            "hashes": [
@@ -1313,11 +1322,11 @@
        },
        "virtualenv": {
            "hashes": [
-                "sha256:07cff122e9d343140366055f31be4dcd61fd598c69d11cd33a9d9c8df4546dd7",
-                "sha256:e0aac7525e880a429764cefd3aaaff54afb5d9f25c82627563603f5d7de5a6e5"
+                "sha256:54b05fc737ea9c9ee9f8340f579e5da5b09fb64fd010ab5757eb90268616907c",
+                "sha256:b7a8ec323ee02fb2312f098b6b4c9de99559b462775bc8fe3627a73706603c1b"
            ],
            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==20.2.1"
+            "version": "==20.2.2"
        },
        "zipp": {
            "hashes": [
--- a/README.md
+++ b/README.md
@@ -38,6 +38,7 @@ Here's what you get:
 	* When adding documents from mails, paperless can move these mails to a new folder, mark them as read, flag them or delete them.
 * Machine learning powered document matching.
 	* Paperless learns from your documents and will be able to automatically assign tags, correspondents and types to documents once you've stored a few documents in paperless.
+* We have a mobile app that offers a 'Share with paperless' option over at https://github.com/qcasey/paperless_share. You can use that in combination with any of the mobile scanning apps out there. It's still a little rough around the edges, but it works!
 * A task processor that processes documents in parallel and also tells you when something goes wrong. On modern multi core systems, consumption is blazing fast.
 * Code cleanup in many, MANY areas. Some of the code from OG paperless was just overly complicated.
 * More tests, more stability.
@@ -50,7 +51,6 @@ For a complete list of changes from paperless, check out the [changelog](https:/

 - Make the front end nice (except mobile).
 - Test coverage at 90%.
- Store archived documents with an embedded OCR text layer, while keeping originals available. Making good progress in the `feature-ocrmypdf` branch.
 - Fix whatever bugs I and you find.

 ## Roadmap for versions beyond 1.0
--- a/docker/docker-entrypoint.sh
+++ b/docker/docker-entrypoint.sh
@@ -25,6 +25,11 @@ wait_for_postgres() {
 	host="${PAPERLESS_DBHOST}"
 	port="${PAPERLESS_DBPORT}"

+	if [[ -z $port ]] ;
+	then
+		port="5432"
+	fi
+
 	while !</dev/tcp/$host/$port ;
 	do

@@ -114,13 +119,13 @@ install_languages() {
    done
 }

-initialize
-
 # Install additional languages if specified
 if [[ ! -z "$PAPERLESS_OCR_LANGUAGES"  ]]; then
 		install_languages "$PAPERLESS_OCR_LANGUAGES"
 fi

+initialize
+
 if [[ "$1" != "/"* ]]; then
 	exec sudo -HEu paperless python3 manage.py "$@"
 else
--- a/docker/hub/docker-compose.postgres.yml
+++ b/docker/hub/docker-compose.postgres.yml
@@ -15,7 +15,7 @@ services:
      POSTGRES_PASSWORD: paperless

  webserver:
-    image: jonaswinkler/paperless-ng:0.9.5
+    image: jonaswinkler/paperless-ng:0.9.6
    restart: always
    depends_on:
      - db
--- a/docker/hub/docker-compose.sqlite.yml
+++ b/docker/hub/docker-compose.sqlite.yml
@@ -5,7 +5,7 @@ services:
    restart: always

  webserver:
-    image: jonaswinkler/paperless-ng:0.9.5
+    image: jonaswinkler/paperless-ng:0.9.6
    restart: always
    depends_on:
      - broker
--- a/docker/local/Dockerfile
+++ b/docker/local/Dockerfile
@@ -18,6 +18,7 @@ RUN apt-get update \
 		libmagic-dev \
 		libpoppler-cpp-dev \
 		libpq-dev \
+		libqpdf-dev \
 		libxml2 \
 		optipng \
 		pngquant \
@@ -34,7 +35,7 @@ RUN apt-get update \
 		zlib1g \
 	&& pip3 install --upgrade supervisor setuptools \
 	&& pip install --no-cache-dir -r requirements.txt \
-	&& apt-get -y purge build-essential \
+	&& apt-get -y purge build-essential libqpdf-dev \
 	&& apt-get -y autoremove --purge \
 	&& rm -rf /var/lib/apt/lists/* \
 	&& mkdir /var/log/supervisord /var/run/supervisord
--- a/docs/administration.rst
+++ b/docs/administration.rst
@@ -119,8 +119,11 @@ Updating paperless without docker

 After grabbing the new release and unpacking the contents, do the following:

-1.  Update python requirements. Paperless uses
-    `Pipenv`_ for managing dependencies:
+1.  Update dependencies. New paperless version may require additional
+    dependencies. The dependencies required are listed in the section about 
+    :ref:`bare metal installations <setup-bare_metal>`.
+
+2.  Update python requirements. If you use Pipenv, this is done with the following steps.

    .. code:: shell-session

@@ -132,14 +135,14 @@ After grabbing the new release and unpacking the contents, do the following:
    This creates a new virtual environment (or uses your existing environment)
    and installs all dependencies into it.

-2.  Collect static files.
+3.  Collect static files.

    .. code:: shell-session

        $ cd src
        $ pipenv run python3 manage.py collectstatic --clear
    
-3.  Migrate the database.
+4.  Migrate the database.

    .. code:: shell-session

@@ -153,14 +156,14 @@ Management utilities
 Paperless comes with some management commands that perform various maintenance
 tasks on your paperless instance. You can invoke these commands either by

-.. code:: bash
+.. code:: shell-session

    $ cd /path/to/paperless
    $ docker-compose run --rm webserver <command> <arguments>

 or

-.. code:: bash
+.. code:: shell-session

    $ cd /path/to/paperless/src
    $ pipenv run python manage.py <command> <arguments>
@@ -366,7 +369,7 @@ is specified, the archiver will only process that document.
 .. note::

    Some documents will cause errors and cannot be converted into PDF/A documents,
-    such as encrypted PDF documents. The archiver will skip over these Documents
+    such as encrypted PDF documents. The archiver will skip over these documents
    each time it sees them.

 .. _utilities-encyption:
--- a/docs/advanced_usage.rst
+++ b/docs/advanced_usage.rst
@@ -298,6 +298,7 @@ avoids filename clashes.
 Paperless provides the following placeholders withing filenames:

 * ``{correspondent}``: The name of the correspondent, or "none".
+* ``{document_type}``: The name of the document type, or "none".
 * ``{title}``: The title of the document.
 * ``{created}``: The full date and time the document was created.
 * ``{created_year}``: Year created only.
@@ -307,7 +308,6 @@ Paperless provides the following placeholders withing filenames:
 * ``{added_year}``: Year added only.
 * ``{added_month}``: Month added only (number 1-12).
 * ``{added_day}``: Day added only (number 1-31).
-* ``{tags}``: I don't know how this works. Look at the source.

 Paperless will convert all values for the placeholders into values which are safe
 for use in filenames.
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -13,23 +13,55 @@ available filters and ordering fields.

 The API provides 5 main endpoints:

+*   ``/api/documents/``: Full CRUD support, except POSTing new documents. See below.
 *   ``/api/correspondents/``: Full CRUD support.
 *   ``/api/document_types/``: Full CRUD support.
-*   ``/api/documents/``: Full CRUD support, except POSTing new documents. See below.
 *   ``/api/logs/``: Read-Only.
 *   ``/api/tags/``: Full CRUD support.

-All of these endpoints except for the logging endpoint 
+All of these endpoints except for the logging endpoint
 allow you to fetch, edit and delete individual objects
 by appending their primary key to the path, for example ``/api/documents/454/``.

+The objects served by the document endpoint contain the following fields:
+
+*   ``id``: ID of the document. Read-only.
+*   ``title``: Title of the document.
+*   ``content``: Plain text content of the document.
+*   ``tags``: List of IDs of tags assigned to this document, or empty list.
+*   ``document_type``: Document type of this document, or null.
+*   ``correspondent``:  Correspondent of this document or null.
+*   ``created``: The date at which this document was created.
+*   ``modified``: The date at which this document was last edited in paperless. Read-only.
+*   ``added``: The date at which this document was added to paperless. Read-only.
+*   ``archive_serial_number``: The identifier of this document in a physical document archive.
+*   ``original_file_name``: Verbose filename of the original document. Read-only.
+*   ``archived_file_name``: Verbose filename of the archived document. Read-only. Null if no archived document is available.
+
+
+Downloading documents
+#####################
+
 In addition to that, the document endpoint offers these additional actions on
 individual documents:

-*   ``/api/documents/<pk>/download/``: Download the original document.
-*   ``/api/documents/<pk>/thumb/``: Download the PNG thumbnail of a document.
-*   ``/api/documents/<pk>/preview/``: Display the original document inline,
+*   ``/api/documents/<pk>/download/``: Download the document.
+*   ``/api/documents/<pk>/preview/``: Display the document inline,
    without downloading it.
+*   ``/api/documents/<pk>/thumb/``: Download the PNG thumbnail of a document.
+
+Paperless generates archived PDF/A documents from consumed files and stores both
+the original files as well as the archived files. By default, the endpoints
+for previews and downloads serve the archived file, if it is available.
+Otherwise, the original file is served.
+Some document cannot be archived.
+
+The endpoints correctly serve the response header fields ``Content-Disposition``
+and ``Content-Type`` to indicate the filename for download and the type of content of
+the document.
+
+In order to download or preview the original document when an archied document is available,
+supply the query parameter ``original=true``.

 .. hint::

@@ -38,13 +70,43 @@ individual documents:
    are in place. However, if you use these old URLs to access documents, you
    should update your app or script to use the new URLs.

-.. note::

-    The document endpoint provides tags, document types and correspondents as
-    ids in their corresponding fields. These are writeable. Paperless also
-    offers read-only objects for assigned tags, types and correspondents,
-    however, these might be removed in the future. As for now, the front end
-    requires them.
+Getting document metadata
+#########################
+
+The api also has an endpoint to retrieve read-only metadata about specific documents. this
+information is not served along with the document objects, since it requires reading
+files and would therefore slow down document lists considerably.
+
+Access the metadata of a document with an ID ``id`` at ``/api/documents/<id>/metadata/``.
+
+The endpoint reports the following data:
+
+*   ``original_checksum``: MD5 checksum of the original document.
+*   ``original_size``: Size of the original document, in bytes.
+*   ``original_mime_type``: Mime type of the original document.
+*   ``media_filename``: Current filename of the document, under which it is stored inside the media directory.
+*   ``has_archive_version``: True, if this document is archived, false otherwise.
+*   ``original_metadata``: A list of metadata associated with the original document. See below.
+*   ``archive_checksum``: MD5 checksum of the archived document, or null.
+*   ``archive_size``: Size of the archived document in bytes, or null.
+*   ``archive_metadata``: Metadata associated with the archived document, or null. See below.
+
+File metadata is reported as a list of objects in the following form:
+
+.. code:: json
+
+    [
+        {
+            "namespace": "http://ns.adobe.com/pdf/1.3/",
+            "prefix": "pdf",
+            "key": "Producer",
+            "value": "SparklePDF, Fancy edition"
+        },
+    ]
+
+``namespace`` and ``prefix`` can be null. The actual metadata reported depends on the file type and the metadata
+available in that specific document. Paperless only reports PDF metadata at this point.

 Authorization
 #############
@@ -54,11 +116,11 @@ The REST api provides three different forms of authentication.
 1.  Basic authentication

    Authorize by providing a HTTP header in the form
-    
+
    .. code::

        Authorization: Basic <credentials>
-    
+
    where ``credentials`` is a base64-encoded string of ``<username>:<password>``

 2.  Session authentication
@@ -79,7 +141,7 @@ The REST api provides three different forms of authentication.
    .. code::

        Authorization: Token <token>
-    
+
    Tokens can be managed and revoked in the paperless admin.

 Searching for documents
@@ -111,7 +173,7 @@ Result list object returned by the endpoint:
        "page_count": 1,
        "corrected_query": "",
        "results": [
-            
+
        ]
    }

@@ -131,12 +193,12 @@ Result object:
    {
        "id": 1,
        "highlights": [
-            
+
        ],
        "score": 6.34234,
        "rank": 23,
        "document": {
-            
+
        }
    }

@@ -168,7 +230,7 @@ Each fragment contains a list of strings, and some of them are marked as a highl
            {"text": " fragment with a highlight."}
        ]
    ]
-    
+


 When ``term`` is present within a string, the word within ``text`` should be highlighted.
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -5,6 +5,47 @@
 Changelog
 *********

+paperless-ng 0.9.6
+##################
+
+This release focusses primarily on many small issues with the UI.
+
+* Front end
+
+  * Paperless now has proper window titles.
+  * Fixed an issue with the small cards when more than 7 tags were used.
+  * Navigation of the "Show all" links adjusted. They navigate to the saved view now, if available in the sidebar.
+  * Some indication on the document lists that a filter is active was added.
+  * There's a new filter to filter for documents that do *not* have a certain tag.
+  * The file upload box now shows upload progress.
+  * The document edit page was reorganized.
+  * The document edit page shows various information about a document.
+  * An issue with the height of the preview was fixed.
+  * Table issues with too long document titles fixed.
+
+* API
+
+  * The API now serves file names with documents.
+  * The API now serves various metadata about documents.
+  * API documentation updated.
+
+* Other
+
+  * Fixed an issue with the docker image when a non-standard PostgreSQL port was used.
+  * The docker image was trying check for installed languages before actually installing them.
+  * ``FILENAME_FORMAT`` placeholder for document types.
+  * The filename formatter is now less restrictive with file names and tries to
+    conserve the original correspondents, types and titles as much as possible.
+  * The filename formatter does not include the document ID in filenames anymore. It will
+    rather append ``_01``, ``_02``, etc when it detects duplicate filenames.
+
+.. note::
+
+  The changes to the filename format will apply to newly added documents and changed documents.
+  If you want all files to reflect these changes, execute the ``document_renamer`` management
+  command.
+
+
 paperless-ng 0.9.5
 ##################

--- a/docs/extending.rst
+++ b/docs/extending.rst
@@ -118,114 +118,80 @@ This will test and assemble everything and also build and tag a docker image.
 Extending Paperless
 ===================

-.. warning::
+Paperless does not have any fancy plugin systems and will probably never have. However,
+some parts of the application have been designed to allow easy integration of additional
+features without any modification to the base code.

-    This section is not updated to paperless-ng yet.
+Making custom parsers
+---------------------

-For the most part, Paperless is monolithic, so extending it is often best
-managed by way of modifying the code directly and issuing a pull request on
-`GitHub`_.  However, over time the project has been evolving to be a little
-more "pluggable" so that users can write their own stuff that talks to it.
+Paperless uses parsers to add documents to paperless. A parser is responsible for:

-.. _GitHub: https://github.com/the-paperless-project/paperless
+*   Retrieve the content from the original
+*   Create a thumbnail
+*   Optional: Retrieve a created date from the original
+*   Optional: Create an archived document from the original

+Custom parsers can be added to paperless to support more file types. In order to do that,
+you need to write the parser itself and announce its existence to paperless.

-.. _extending-parsers:
-
-Parsers
-------
-
-You can leverage Paperless' consumption model to have it consume files *other*
-than ones handled by default like ``.pdf``, ``.jpg``, and ``.tiff``.  To do so,
-you simply follow Django's convention of creating a new app, with a few key
-requirements.
-
-
-.. _extending-parsers-parserspy:
-
-parsers.py
-..........
-
-In this file, you create a class that extends
-``documents.parsers.DocumentParser`` and go about implementing the three
-required methods:
-
-* ``get_thumbnail()``: Returns the path to a file we can use as a thumbnail for
-  this document.
-* ``get_text()``: Returns the text from the document and only the text.
-* ``get_date()``: If possible, this returns the date of the document, otherwise
-  it should return ``None``.
-
-
-.. _extending-parsers-signalspy:
-
-signals.py
-..........
-
-At consumption time, Paperless emits a ``document_consumer_declaration``
-signal which your module has to react to in order to let the consumer know
-whether or not it's capable of handling a particular file.  Think of it like
-this:
-
-1. Consumer finds a file in the consumption directory.
-2. It asks all the available parsers: *"Hey, can you handle this file?"*
-3. Each parser responds with either ``None`` meaning they can't handle the
-   file, or a dictionary in the following format:
+The parser itself must extend ``documents.parsers.DocumentParser`` and must implement the
+methods ``parse`` and ``get_thumbnail``. You can provide your own implementation to
+``get_date`` if you don't want to rely on paperless' default date guessing mechanisms.

 .. code:: python

-    {
-        "parser": <the class name>,
-        "weight": <an integer>
-    }
+    class MyCustomParser(DocumentParser):

-The consumer compares the ``weight`` values from all respondents and uses the
-class with the highest value to consume the document.  The default parser,
-``RasterisedDocumentParser`` has a weight of ``0``.
+        def parse(self, document_path, mime_type):
+            # This method does not return anything. Rather, you should assign
+            # whatever you got from the document to the following fields:

+            # The content of the document.
+            self.text = "content"
+            
+            # Optional: path to a PDF document that you created from the original.
+            self.archive_path = os.path.join(self.tempdir, "archived.pdf")

-.. _extending-parsers-appspy:
+            # Optional: "created" date of the document.
+            self.date = get_created_from_metadata(document_path)

-apps.py
-.......
+        def get_thumbnail(self, document_path, mime_type):
+            # This should return the path to a thumbnail you created for this
+            # document.
+            return os.path.join(self.tempdir, "thumb.png")

-This is a standard Django file, but you'll need to add some code to it to
-connect your parser to the ``document_consumer_declaration`` signal.
+If you encounter any issues during parsing, raise a ``documents.parsers.ParseError``.

+The ``self.tempdir`` directory is a temporary directory that is guaranteed to be empty
+and removed after consumption finished. You can use that directory to store any
+intermediate files and also use it to store the thumbnail / archived document.

-.. _extending-parsers-finally:
-
-Finally
-.......
-
-The last step is to update ``settings.py`` to include your new module.
-Eventually, this will be dynamic, but at the moment, you have to edit the
-``INSTALLED_APPS`` section manually.  Simply add the path to your AppConfig to
-the list like this:
+After that, you need to announce your parser to paperless. You need to connect a
+handler to the ``document_consumer_declaration`` signal. Have a look in the file
+``src/paperless_tesseract/apps.py`` on how that's done. The handler is a method
+that returns information about your parser:

 .. code:: python

-    INSTALLED_APPS = [
-        ...
-        "my_module.apps.MyModuleConfig",
-        ...
-    ]
+    def myparser_consumer_declaration(sender, **kwargs):
+        return {
+            "parser": MyCustomParser,
+            "weight": 0,
+            "mime_types": {
+                "application/pdf": ".pdf",
+                "image/jpeg": ".jpg",
+            }
+        }

-Order doesn't matter, but generally it's a good idea to place your module lower
-in the list so that you don't end up accidentally overriding project defaults
-somewhere.
+*   ``parser`` is a reference to a class that extends ``DocumentParser``.

+*   ``weight`` is used whenever two or more parsers are able to parse a file: The parser with
+    the higher weight wins. This can be used to override the parsers provided by
+    paperless.

-.. _extending-parsers-example:
-
-An Example
-..........
-
-The core Paperless functionality is based on this design, so if you want to see
-what a parser module should look like, have a look at `parsers.py`_,
-`signals.py`_, and `apps.py`_ in the `paperless_tesseract`_ module.
-
-.. _parsers.py: https://github.com/the-paperless-project/paperless/blob/master/src/paperless_tesseract/parsers.py
-.. _signals.py: https://github.com/the-paperless-project/paperless/blob/master/src/paperless_tesseract/signals.py
-.. _apps.py: https://github.com/the-paperless-project/paperless/blob/master/src/paperless_tesseract/apps.py
-.. _paperless_tesseract: https://github.com/the-paperless-project/paperless/blob/master/src/paperless_tesseract/
+*   ``mime_types`` is a dictionary. The keys are the mime types your parser supports and the value
+    is the default file extension that paperless should use when storing files and serving them for
+    download. We could guess that from the file extensions, but some mime types have many extensions
+    associated with them and the python methods responsible for guessing the extension do not always
+    return the same value.
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -73,7 +73,7 @@ in your browser and paperless has to do much less work to serve the data.

 **Q:** *How do I install paperless-ng on Raspberry Pi?*

-**A:** There is not docker image for ARM available. If you know how to build
+**A:** There is no docker image for ARM available. If you know how to build
 that automatically, I'm all ears. For now, you have to grab the latest release
 archive from the project page and build the image yourself. The release comes
 with the front end already compiled, so you don't have to do this on the Pi.
--- a/docs/usage_overview.rst
+++ b/docs/usage_overview.rst
@@ -57,7 +57,7 @@ Adding documents to paperless
 #############################

 Once you've got Paperless setup, you need to start feeding documents into it.
-Currently, there are three options: the consumption directory, IMAP (email), and
+Currently, there are four options: the consumption directory, the dashboard, IMAP (email), and
 HTTP POST.

 When adding documents to paperless, it will perform the following operations on
@@ -82,8 +82,7 @@ your documents:
    No matter which options you choose, Paperless will always store the original
    document that it found in the consumption directory or in the mail and
    will never overwrite that document. Archived versions are stored alongside the
-    digital versions.
-
+    original versions.


 The consumption directory
@@ -107,6 +106,12 @@ files from the scanner.  Typically, you're looking at an FTP server like

 .. TODO: hyperref to configuration of the location of this magic folder.

+Dashboard upload
+================
+
+The dashboard has a file drop field to upload documents to paperless. Simply drag a file
+onto this field or select a file with the file dialog. Multiple files are supported.
+
 .. _usage-email:

 IMAP (Email)
@@ -183,6 +188,63 @@ You can also submit a document using the REST API, see :ref:`api-file_uploads` f

 .. _basic-searching:

+
+Best practices
+##############
+
+Paperless offers a couple tools that help you organize your document collection. However,
+it is up to you to use them in a way that helps you organize documents and find specific
+documents when you need them. This section offers a couple ideas for managing your collection.
+
+Document types allow you to classify documents according to what they are. You can define
+types such as "Receipt", "Invoice", or "Contract". If you used to collect all your receipts
+in a single binder, you can recreate that system in paperless by defining a document type,
+assigning documents to that type and then filtering by that type to only see all receipts.
+
+Not all documents need document types. Sometimes its hard to determine what the type of a
+document is or it is hard to justify creating a document type that you only need once or twice.
+This is okay. As long as the types you define help you organize your collection in the way
+you want, paperless is doing its job.
+
+Tags can be used in many different ways. Think of tags are more versatile folders or binders.
+If you have a binder for documents related to university / your car or health care, you can
+create these binders in paperless by creating tags and assigning them to relevant documents.
+Just as with documents, you can filter the document list by tags and only see documents of
+a certain topic.
+
+With physical documents, you'll often need to decide which folder the document belongs to.
+The advantage of tags over folders and binders is that a single document can have multiple
+tags. A physical document cannot magically appear in two different folders, but with tags,
+this is entirely possible.
+
+.. hint::
+
+  This can be used in many different ways. One example: Imagine you're working on a particular
+  task, such as signing up for university. Usually you'll need to collect a bunch of different
+  documents that are already sorted into various folders. With the tag system of paperless,
+  you can create a new group of documents that are relevant to this task without destroying
+  the already existing organization. When you're done with the task, you could delete the
+  tag again, which would be equal to sorting documents back into the folder they belong into.
+  Or keep the tag, up to you.
+
+All of the logic above applies to correspondents as well. Attach them to documents if you
+feel that they help you organize your collection.
+
+When you've started organizing your documents, create a couple saved views for document collections
+you regularly access. This is equal to having labeled physical binders on your desk, except
+that these saved views are dynamic and simply update themselves as you add documents to the system.
+
+Here are a couple examples of tags and types that you could use in your collection.
+
+* An ``inbox`` tag for newly added documents that you haven't manually edited yet.
+* A tag ``car`` for everything car related (repairs, registration, insurance, etc)
+* A tag ``todo`` for documents that you still need to do something with, such as reply, or
+  perform some task online.
+* A tag ``bank account x`` for all bank statement related to that account.
+* A tag ``mail`` for anything that you added to paperless via its mail processing capabilities.
+* A tag ``missing_metadata`` when you still need to add some metadata to a document, but can't
+  or don't want to do this right now.
+
 Searching
 #########

--- a/src-ui/src/app/app.module.ts
+++ b/src-ui/src/app/app.module.ts
@@ -46,6 +46,8 @@ import { StatisticsWidgetComponent } from './components/dashboard/widgets/statis
 import { UploadFileWidgetComponent } from './components/dashboard/widgets/upload-file-widget/upload-file-widget.component';
 import { WidgetFrameComponent } from './components/dashboard/widgets/widget-frame/widget-frame.component';
 import { WelcomeWidgetComponent } from './components/dashboard/widgets/welcome-widget/welcome-widget.component';
+import { YesNoPipe } from './pipes/yes-no.pipe';
+import { FileSizePipe } from './pipes/file-size.pipe';

@NgModule({
  declarations: [
@@ -84,7 +86,9 @@ import { WelcomeWidgetComponent } from './components/dashboard/widgets/welcome-w
    StatisticsWidgetComponent,
    UploadFileWidgetComponent,
    WidgetFrameComponent,
-    WelcomeWidgetComponent
+    WelcomeWidgetComponent,
+    YesNoPipe,
+    FileSizePipe
  ],
  imports: [
    BrowserModule,
--- a/src-ui/src/app/components/common/input/date-time/date-time.component.html
+++ b/src-ui/src/app/components/common/input/date-time/date-time.component.html
@@ -3,11 +3,10 @@
      <label for="created_date">{{titleDate}}</label>
      <input type="date" class="form-control" id="created_date" [(ngModel)]="dateValue" (change)="dateOrTimeChanged()">
  </div>
-  <div class="form-group col">
+  <div class="form-group col" *ngIf="titleTime">
      <label for="created_time">{{titleTime}}</label>
      <input type="time" class="form-control" id="created_time" [(ngModel)]="timeValue" (change)="dateOrTimeChanged()">
  </div>
-
 </div>


--- a/src-ui/src/app/components/common/input/date-time/date-time.component.ts
+++ b/src-ui/src/app/components/common/input/date-time/date-time.component.ts
@@ -40,7 +40,7 @@ export class DateTimeComponent implements OnInit,ControlValueAccessor  {
  titleDate: string = "Date"

  @Input()
-  titleTime: string = "Time"
+  titleTime: string

  @Input()
  disabled: boolean = false
--- a/src-ui/src/app/components/common/input/tags/tags.component.html
+++ b/src-ui/src/app/components/common/input/tags/tags.component.html
@@ -8,7 +8,7 @@

    <div class="input-group-append" ngbDropdown placement="top-right">
      <button class="btn btn-outline-secondary" type="button" ngbDropdownToggle></button>
-      <div ngbDropdownMenu class="scrollable-menu">
+      <div ngbDropdownMenu class="scrollable-menu shadow">
        <button type="button" *ngFor="let tag of tags" ngbDropdownItem (click)="addTag(tag.id)">
          <app-tag [tag]="tag"></app-tag>
        </button>
--- a/src-ui/src/app/components/common/input/text/text.component.ts
+++ b/src-ui/src/app/components/common/input/text/text.component.ts
@@ -1,6 +1,5 @@
-import { Component, forwardRef, Input, OnInit } from '@angular/core';
-import { ControlValueAccessor, NG_VALUE_ACCESSOR } from '@angular/forms';
-import { v4 as uuidv4 } from 'uuid';
+import { Component, forwardRef } from '@angular/core';
+import { NG_VALUE_ACCESSOR } from '@angular/forms';
 import { AbstractInputComponent } from '../abstract-input';

@Component({
--- a/src-ui/src/app/components/dashboard/dashboard.component.ts
+++ b/src-ui/src/app/components/dashboard/dashboard.component.ts
@@ -1,5 +1,7 @@
 import { Component, OnInit } from '@angular/core';
+import { Title } from '@angular/platform-browser';
 import { SavedViewConfigService } from 'src/app/services/saved-view-config.service';
+import { environment } from 'src/environments/environment';


@Component({
@@ -10,13 +12,15 @@ import { SavedViewConfigService } from 'src/app/services/saved-view-config.servi
 export class DashboardComponent implements OnInit {

  constructor(
-    public savedViewConfigService: SavedViewConfigService) { }
+    public savedViewConfigService: SavedViewConfigService,
+    private titleService: Title) { }


  savedViews = []

  ngOnInit(): void {
    this.savedViews = this.savedViewConfigService.getDashboardConfigs()
+    this.titleService.setTitle(`Dashboard - ${environment.appTitle}`)
  }

 }
--- a/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.ts
+++ b/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.ts
@@ -29,8 +29,12 @@ export class SavedViewWidgetComponent implements OnInit {
  }

  showAll() {
-    this.list.load(this.savedView)
-    this.router.navigate(["documents"])
+    if (this.savedView.showInSideBar) {
+      this.router.navigate(['view', this.savedView.id])
+    } else {
+      this.list.load(this.savedView)
+      this.router.navigate(["documents"])
+      }
  }

 }
--- a/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.html
+++ b/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.html
@@ -1,15 +1,18 @@
 <app-widget-frame title="Upload new documents">

-  <form content>
-    <ngx-file-drop 
-      dropZoneLabel="Drop documents here or" (onFileDrop)="dropped($event)"
-      (onFileOver)="fileOver($event)" (onFileLeave)="fileLeave($event)"
-      dropZoneClassName="bg-light card"
-      multiple="true"
-      contentClassName="justify-content-center d-flex align-items-center p-5"
-      [showBrowseBtn]=true
-      browseBtnClassName="btn btn-sm btn-outline-primary ml-2">
+  <div content>
+    <form>
+      <ngx-file-drop dropZoneLabel="Drop documents here or" (onFileDrop)="dropped($event)"
+        (onFileOver)="fileOver($event)" (onFileLeave)="fileLeave($event)" dropZoneClassName="bg-light card"
+        multiple="true" contentClassName="justify-content-center d-flex align-items-center p-5" [showBrowseBtn]=true
+        browseBtnClassName="btn btn-sm btn-outline-primary ml-2">

-    </ngx-file-drop>
-  </form>
+      </ngx-file-drop>
+    </form>
+    <div *ngIf="uploadVisible" class="mt-3">
+      <p>Uploading {{uploadStatus.length}} file(s)</p>
+      <ngb-progressbar [value]="loadedSum" [max]="totalSum" [striped]="true" [animated]="uploadStatus.length > 0">
+      </ngb-progressbar>
+    </div>
+  </div>
 </app-widget-frame>
--- a/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.ts
+++ b/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.ts
@@ -1,8 +1,15 @@
+import { HttpEventType } from '@angular/common/http';
 import { Component, OnInit } from '@angular/core';
 import { FileSystemFileEntry, NgxFileDropEntry } from 'ngx-file-drop';
 import { DocumentService } from 'src/app/services/rest/document.service';
 import { Toast, ToastService } from 'src/app/services/toast.service';

+
+interface UploadStatus {
+  loaded: number
+  total: number 
+}
+
@Component({
  selector: 'app-upload-file-widget',
  templateUrl: './upload-file-widget.component.html',
@@ -16,26 +23,59 @@ export class UploadFileWidgetComponent implements OnInit {
  }

  public fileOver(event){
-    console.log(event);
  }
- 
+
  public fileLeave(event){
-    console.log(event);
  }
- 
+
+  uploadStatus: UploadStatus[] = []
+  completedFiles = 0
+
+  uploadVisible = false
+
+  get loadedSum() {
+    return this.uploadStatus.map(s => s.loaded).reduce((a,b) => a+b, this.completedFiles > 0 ? 1 : 0)
+  }
+
+  get totalSum() {
+    return this.uploadStatus.map(s => s.total).reduce((a,b) => a+b, 1)
+  }
+
  public dropped(files: NgxFileDropEntry[]) {
    for (const droppedFile of files) {
      if (droppedFile.fileEntry.isFile) {
-        const fileEntry = droppedFile.fileEntry as FileSystemFileEntry;
-        console.log(fileEntry)
+      let uploadStatusObject: UploadStatus = {loaded: 0, total: 1}
+      this.uploadStatus.push(uploadStatusObject)
+      this.uploadVisible = true
+
+      const fileEntry = droppedFile.fileEntry as FileSystemFileEntry;
        fileEntry.file((file: File) => {
-          console.log(file)
-          const formData = new FormData()
+          let formData = new FormData()
          formData.append('document', file, file.name)
-          this.documentService.uploadDocument(formData).subscribe(result => {
-            this.toastService.showToast(Toast.make("Information", "The document has been uploaded and will be processed by the consumer shortly."))
+
+          this.documentService.uploadDocument(formData).subscribe(event => {
+            if (event.type == HttpEventType.UploadProgress) {
+              uploadStatusObject.loaded = event.loaded
+              uploadStatusObject.total = event.total
+            } else if (event.type == HttpEventType.Response) {
+              this.uploadStatus.splice(this.uploadStatus.indexOf(uploadStatusObject), 1)
+              this.completedFiles += 1
+              this.toastService.showToast(Toast.make("Information", "The document has been uploaded and will be processed by the consumer shortly."))
+            }
+            
          }, error => {
-            this.toastService.showToast(Toast.makeError("An error has occured while uploading the document. Sorry!"))
+            this.uploadStatus.splice(this.uploadStatus.indexOf(uploadStatusObject), 1)
+            this.completedFiles += 1
+            switch (error.status) {
+              case 400: {
+                this.toastService.showToast(Toast.makeError(`There was an error while uploading the document: ${error.error.document}`))
+                break;
+              }
+              default: {
+                this.toastService.showToast(Toast.makeError("An error has occurred while uploading the document. Sorry!"))
+                break;
+              }
+            }
          })
        });
      }
--- a/src-ui/src/app/components/dashboard/widgets/widget-frame/widget-frame.component.html
+++ b/src-ui/src/app/components/dashboard/widgets/widget-frame/widget-frame.component.html
@@ -1,4 +1,4 @@
-<div class="card mb-3 shadow">
+<div class="card mb-3 shadow-sm">
  <div class="card-header">
    <div class="d-flex justify-content-between align-items-center">
      <h5 class="card-title mb-0">{{title}}</h5>
--- a/src-ui/src/app/components/document-detail/document-detail.component.html
+++ b/src-ui/src/app/components/document-detail/document-detail.component.html
@@ -14,15 +14,15 @@
            </svg>
            <span class="d-none d-lg-inline"> Download</span>
        </a>
-    
-        <div class="btn-group" ngbDropdown role="group" *ngIf="metadata?.paperless__has_archive_version">
-          <button class="btn btn-sm btn-outline-primary dropdown-toggle-split" ngbDropdownToggle></button>
-          <div class="dropdown-menu" ngbDropdownMenu>
-            <a ngbDropdownItem [href]="downloadOriginalUrl">Download original</a>
-          </div>
+
+        <div class="btn-group" ngbDropdown role="group" *ngIf="metadata?.has_archive_version">
+            <button class="btn btn-sm btn-outline-primary dropdown-toggle-split" ngbDropdownToggle></button>
+            <div class="dropdown-menu shadow" ngbDropdownMenu>
+                <a ngbDropdownItem [href]="downloadOriginalUrl">Download original</a>
+            </div>
        </div>
-    
-      </div>
+
+    </div>


    <button type="button" class="btn btn-sm btn-outline-primary" (click)="close()">
@@ -36,40 +36,146 @@

 <div class="row">
    <div class="col-xl">
+
        <form [formGroup]='documentForm' (ngSubmit)="save()">

-            <app-input-text title="Title" formControlName="title"></app-input-text>
+            <ul ngbNav #nav="ngbNav" class="nav-tabs">
+                <li [ngbNavItem]="1">
+                    <a ngbNavLink>Details</a>
+                    <ng-template ngbNavContent>

-            <div class="form-group">
-                <label for="archive_serial_number">Archive Serial Number</label>
-                <input type="number" class="form-control" id="archive_serial_number"
-                    formControlName='archive_serial_number'>
-            </div>
+                        <app-input-text title="Title" formControlName="title"></app-input-text>
+                        <div class="form-group">
+                            <label for="archive_serial_number">Archive Serial Number</label>
+                            <input type="number" class="form-control" id="archive_serial_number"
+                                formControlName='archive_serial_number'>
+                        </div>
+                        <app-input-date-time titleDate="Date created" formControlName="created"></app-input-date-time>
+                        <app-input-select [items]="correspondents" title="Correspondent" formControlName="correspondent"
+                            allowNull="true" (createNew)="createCorrespondent()"></app-input-select>
+                        <app-input-select [items]="documentTypes" title="Document type" formControlName="document_type"
+                            allowNull="true" (createNew)="createDocumentType()"></app-input-select>
+                        <app-input-tags formControlName="tags" title="Tags"></app-input-tags>

-            <app-input-date-time title="Date created" titleTime="Time created" formControlName="created"></app-input-date-time>
+                    </ng-template>
+                </li>

-            <div class="form-group">
-                <label for="content">Content</label>
-                <textarea class="form-control" id="content" rows="5" formControlName='content'></textarea>
-            </div>
+                <li [ngbNavItem]="2">
+                    <a ngbNavLink>Content</a>
+                    <ng-template ngbNavContent>
+                        <div class="form-group">
+                            <textarea class="form-control" id="content" rows="20" formControlName='content'></textarea>
+                        </div>
+                    </ng-template>
+                </li>

-            <app-input-select [items]="correspondents" title="Correspondent" formControlName="correspondent" allowNull="true" (createNew)="createCorrespondent()"></app-input-select>
+                <li [ngbNavItem]="3">
+                    <a ngbNavLink>Metadata</a>
+                    <ng-template ngbNavContent>

-            <app-input-select [items]="documentTypes" title="Document type" formControlName="document_type" allowNull="true" (createNew)="createDocumentType()"></app-input-select>
+                        <table class="table table-borderless">
+                            <tbody>
+                                <tr>
+                                    <td>Date modified</td>
+                                    <td>{{document.modified | date:'medium'}}</td>
+                                </tr>
+                                <tr>
+                                    <td>Date added</td>
+                                    <td>{{document.added | date:'medium'}}</td>
+                                </tr>
+                                <tr>
+                                    <td>Media filename</td>
+                                    <td>{{metadata?.media_filename}}</td>
+                                </tr>
+                                <tr>
+                                    <td>Original MD5 Checksum</td>
+                                    <td>{{metadata?.original_checksum}}</td>
+                                </tr>
+                                <tr>
+                                    <td>Original file size</td>
+                                    <td>{{metadata?.original_size | fileSize}}</td>
+                                </tr>
+                                <tr>
+                                    <td>Original mime type</td>
+                                    <td>{{metadata?.original_mime_type}}</td>
+                                </tr>
+                                <tr *ngIf="metadata?.has_archive_version">
+                                    <td>Archive MD5 Checksum</td>
+                                    <td>{{metadata?.archive_checksum}}</td>
+                                </tr>
+                                <tr *ngIf="metadata?.has_archive_version">
+                                    <td>Archive file size</td>
+                                    <td>{{metadata?.archive_size | fileSize}}</td>
+                                </tr>
+                            </tbody>
+                        </table>

-            <app-input-tags formControlName="tags" title="Tags"></app-input-tags>
+                        <h6 *ngIf="metadata?.original_metadata.length > 0">
+                            <button type="button" class="btn btn-outline-secondary btn-sm mr-2"
+                                (click)="expandOriginalMetadata = !expandOriginalMetadata" aria-controls="collapseExample">
+                                <svg class="buttonicon" fill="currentColor" *ngIf="!expandOriginalMetadata">
+                                    <use xlink:href="assets/bootstrap-icons.svg#caret-down" />
+                                </svg>
+                                <svg class="buttonicon" fill="currentColor" *ngIf="expandOriginalMetadata">
+                                    <use xlink:href="assets/bootstrap-icons.svg#caret-up" />
+                                </svg>
+                            </button>
+                            Original document metadata
+                        </h6>
+
+                        <div #collapse="ngbCollapse" [(ngbCollapse)]="!expandOriginalMetadata">
+                            <table class="table table-borderless">
+                                <tbody>
+                                    <tr *ngFor="let m of metadata?.original_metadata">
+                                        <td>{{m.prefix}}:{{m.key}}</td>
+                                        <td>{{m.value}}</td>
+                                    </tr>
+                                </tbody>
+                            </table>
+                        </div>
+
+                        <h6 *ngIf="metadata?.has_archive_version && metadata?.archive_metadata.length > 0">
+                            <button type="button" class="btn btn-outline-secondary btn-sm mr-2"
+                                (click)="expandArchivedMetadata = !expandArchivedMetadata" aria-controls="collapseExample">
+                                <svg class="buttonicon" fill="currentColor" *ngIf="!expandArchivedMetadata">
+                                    <use xlink:href="assets/bootstrap-icons.svg#caret-down" />
+                                </svg>
+                                <svg class="buttonicon" fill="currentColor" *ngIf="expandArchivedMetadata">
+                                    <use xlink:href="assets/bootstrap-icons.svg#caret-up" />
+                                </svg>
+                            </button>
+                            Archived document metadata
+                        </h6>
+
+                        <div #collapse="ngbCollapse" [(ngbCollapse)]="!expandArchivedMetadata">
+                            <table class="table table-borderless">
+                                <tbody>
+                                    <tr *ngFor="let m of metadata?.archive_metadata">
+                                        <td>{{m.prefix}}:{{m.key}}</td>
+                                        <td>{{m.value}}</td>
+                                    </tr>
+                                </tbody>
+                            </table>
+                        </div>
+
+                    </ng-template>
+                </li>
+            </ul>
+
+            <div [ngbNavOutlet]="nav" class="mt-2"></div>

            <button type="button" class="btn btn-outline-secondary" (click)="discard()">Discard</button>&nbsp;
-            <button type="button" class="btn btn-outline-primary" (click)="saveEditNext()" *ngIf="hasNext()">Save & edit next</button>&nbsp;
+            <button type="button" class="btn btn-outline-primary" (click)="saveEditNext()" *ngIf="hasNext()">Save & edit
+                next</button>&nbsp;
            <button type="submit" class="btn btn-primary">Save</button>&nbsp;
        </form>
    </div>

-    <div class="col-xl">
+    <div class="col-xl d-none d-xl-block document-preview">
        <object [data]="previewUrl | safe" type="application/pdf" width="100%" height="100%">
            <p>Your browser does not support PDFs.
                <a href="previewUrl">Download the PDF</a>.</p>
        </object>

    </div>
-</div>
+</div>
--- a/src-ui/src/app/components/document-detail/document-detail.component.scss
+++ b/src-ui/src/app/components/document-detail/document-detail.component.scss
@@ -0,0 +1,5 @@
+.document-preview {
+  height: calc(100vh - 180px);
+  top: 70px;
+  position: sticky;
+}
--- a/src-ui/src/app/components/document-detail/document-detail.component.ts
+++ b/src-ui/src/app/components/document-detail/document-detail.component.ts
@@ -1,5 +1,6 @@
 import { Component, OnInit } from '@angular/core';
 import { FormControl, FormGroup } from '@angular/forms';
+import { Title } from '@angular/platform-browser';
 import { ActivatedRoute, Router } from '@angular/router';
 import { NgbModal } from '@ng-bootstrap/ng-bootstrap';
 import { PaperlessCorrespondent } from 'src/app/data/paperless-correspondent';
@@ -11,6 +12,7 @@ import { OpenDocumentsService } from 'src/app/services/open-documents.service';
 import { CorrespondentService } from 'src/app/services/rest/correspondent.service';
 import { DocumentTypeService } from 'src/app/services/rest/document-type.service';
 import { DocumentService } from 'src/app/services/rest/document.service';
+import { environment } from 'src/environments/environment';
 import { DeleteDialogComponent } from '../common/delete-dialog/delete-dialog.component';
 import { CorrespondentEditDialogComponent } from '../manage/correspondent-list/correspondent-edit-dialog/correspondent-edit-dialog.component';
 import { DocumentTypeEditDialogComponent } from '../manage/document-type-list/document-type-edit-dialog/document-type-edit-dialog.component';
@@ -22,6 +24,9 @@ import { DocumentTypeEditDialogComponent } from '../manage/document-type-list/do
 })
 export class DocumentDetailComponent implements OnInit {

+  public expandOriginalMetadata = false;
+  public expandArchivedMetadata = false;
+
  documentId: number
  document: PaperlessDocument
  metadata: PaperlessDocumentMetadata
@@ -51,7 +56,8 @@ export class DocumentDetailComponent implements OnInit {
    private router: Router,
    private modalService: NgbModal,
    private openDocumentService: OpenDocumentsService,
-    private documentListViewService: DocumentListViewService) { }
+    private documentListViewService: DocumentListViewService,
+    private titleService: Title) { }

  ngOnInit(): void {
    this.documentForm.valueChanges.subscribe(wow => {
@@ -80,6 +86,7 @@ export class DocumentDetailComponent implements OnInit {

  updateComponent(doc: PaperlessDocument) {
    this.document = doc
+    this.titleService.setTitle(`${doc.title} - ${environment.appTitle}`)
    this.documentsService.getMetadata(doc.id).subscribe(result => {
      this.metadata = result
    })
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html
@@ -1,8 +1,14 @@
 <div class="col p-2 h-100" style="width: 16rem;">
  <div class="card h-100 shadow-sm">
-    <div class=" border-bottom doc-img pr-1" [ngStyle]="{'background-image': 'url(' + getThumbUrl() + ')'}">
-      <div class="row" *ngFor="let t of document.tags$ | async">
-        <app-tag style="font-size: large;" [tag]="t" class="col text-right" (click)="clickTag.emit(t.id)" [clickable]="true" linkTitle="Filter by tag"></app-tag>
+    <div class="border-bottom">
+      <img class="card-img doc-img" [src]="getThumbUrl()">
+      <div style="top: 0; right: 0; font-size: large" class="text-right position-absolute mr-1">
+        <div *ngFor="let t of getTagsLimited$() | async">
+          <app-tag [tag]="t" (click)="clickTag.emit(t.id)" [clickable]="true" linkTitle="Filter by tag"></app-tag>
+        </div>
+        <div *ngIf="moreTags">
+          <span class="badge badge-secondary">+ {{moreTags}}</span>
+        </div>
      </div>
    </div>
    
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.scss
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.scss
@@ -1,5 +1,5 @@
 .doc-img {
-  background-size: cover;
-  background-position: top;
+  object-fit: cover;
+  object-position: top;
  height: 200px;
 }
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.ts
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.ts
@@ -1,4 +1,5 @@
 import { Component, EventEmitter, Input, OnInit, Output } from '@angular/core';
+import { map } from 'rxjs/operators';
 import { PaperlessDocument } from 'src/app/data/paperless-document';
 import { PaperlessTag } from 'src/app/data/paperless-tag';
 import { DocumentService } from 'src/app/services/rest/document.service';
@@ -21,6 +22,8 @@ export class DocumentCardSmallComponent implements OnInit {
  @Output()
  clickCorrespondent = new EventEmitter<number>()

+  moreTags: number = null
+
  ngOnInit(): void {
  }

@@ -35,4 +38,18 @@ export class DocumentCardSmallComponent implements OnInit {
  getPreviewUrl() {
    return this.documentService.getPreviewUrl(this.document.id)
  }
+
+  getTagsLimited$() {
+    return this.document.tags$.pipe(
+      map(tags => {
+        if (tags.length > 7) {
+          this.moreTags = tags.length - 6
+          return tags.slice(0, 6)
+        } else {
+          return tags
+        }
+      })
+    )
+  }
+
 }
--- a/src-ui/src/app/components/document-list/document-list.component.html
+++ b/src-ui/src/app/components/document-list/document-list.component.html
@@ -50,7 +50,7 @@
  <div class="btn-group btn-group-toggle ml-2" ngbRadioGroup [(ngModel)]="list.sortDirection">
    <div ngbDropdown class="btn-group">
      <button class="btn btn-outline-primary btn-sm" id="dropdownBasic1" ngbDropdownToggle>Sort by</button>
-      <div ngbDropdownMenu aria-labelledby="dropdownBasic1">
+      <div ngbDropdownMenu aria-labelledby="dropdownBasic1" class="shadow">
        <button *ngFor="let f of getSortFields()" ngbDropdownItem (click)="list.sortField = f.field"
          [class.active]="list.sortField == f.field">{{f.name}}</button>
      </div>
@@ -70,7 +70,7 @@
  </div>
  <div class="btn-group ml-2">

-    <button type="button" class="btn btn-sm btn-outline-primary" (click)="showFilter=!showFilter">
+    <button type="button" class="btn btn-sm" [ngClass]="isFiltered ? 'btn-primary' : 'btn-outline-primary'" (click)="showFilter=!showFilter">
      <svg class="toolbaricon" fill="currentColor">
        <use xlink:href="assets/bootstrap-icons.svg#funnel" />
      </svg>
@@ -79,7 +79,7 @@

    <div class="btn-group" ngbDropdown role="group">
      <button class="btn btn-sm btn-outline-primary dropdown-toggle-split" ngbDropdownToggle></button>
-      <div class="dropdown-menu" ngbDropdownMenu>
+      <div class="dropdown-menu" ngbDropdownMenu class="shadow">
        <ng-container *ngIf="!list.savedViewId" >
          <button ngbDropdownItem *ngFor="let config of savedViewConfigService.getConfigs()" (click)="loadViewConfig(config)">{{config.title}}</button>
          <div class="dropdown-divider" *ngIf="savedViewConfigService.getConfigs().length > 0"></div>
@@ -101,7 +101,7 @@
 </div>

 <div class="d-flex justify-content-between align-items-center">
-  <p>{{list.collectionSize || 0}} document(s)</p>
+  <p>{{list.collectionSize || 0}} document(s) <span *ngIf="isFiltered">(filtered)</span></p>
  <ngb-pagination [pageSize]="list.currentPageSize" [collectionSize]="list.collectionSize" [(page)]="list.currentPage" [maxSize]="5"
  [rotate]="true" (pageChange)="list.reload()" aria-label="Default pagination"></ngb-pagination>
 </div>
@@ -111,7 +111,7 @@
  </app-document-card-large>
 </div>

-<table class="table table-sm border shadow" *ngIf="displayMode == 'details'">
+<table class="table table-sm border shadow-sm" *ngIf="displayMode == 'details'">
  <thead>
    <th class="d-none d-lg-table-cell">ASN</th>
    <th class="d-none d-md-table-cell">Correspondent</th>
@@ -131,7 +131,7 @@
        </ng-container>
      </td>
      <td>
-        <a routerLink="/documents/{{d.id}}" title="Edit document">{{d.title}}</a>
+        <a routerLink="/documents/{{d.id}}" title="Edit document" style="overflow-wrap: anywhere;">{{d.title}}</a>
        <app-tag [tag]="t" *ngFor="let t of d.tags$ | async" class="ml-1" clickable="true" linkTitle="Filter by tag" (click)="filterByTag(t.id)"></app-tag>
      </td>
      <td class="d-none d-xl-table-cell">
--- a/src-ui/src/app/components/document-list/document-list.component.ts
+++ b/src-ui/src/app/components/document-list/document-list.component.ts
@@ -1,4 +1,5 @@
 import { Component, OnInit } from '@angular/core';
+import { Title } from '@angular/platform-browser';
 import { ActivatedRoute } from '@angular/router';
 import { NgbModal } from '@ng-bootstrap/ng-bootstrap';
 import { cloneFilterRules, FilterRule } from 'src/app/data/filter-rule';
@@ -8,6 +9,7 @@ import { DocumentListViewService } from 'src/app/services/document-list-view.ser
 import { DOCUMENT_SORT_FIELDS } from 'src/app/services/rest/document.service';
 import { SavedViewConfigService } from 'src/app/services/saved-view-config.service';
 import { Toast, ToastService } from 'src/app/services/toast.service';
+import { environment } from 'src/environments/environment';
 import { SaveViewConfigDialogComponent } from './save-view-config-dialog/save-view-config-dialog.component';

@Component({
@@ -22,13 +24,18 @@ export class DocumentListComponent implements OnInit {
    public savedViewConfigService: SavedViewConfigService,
    public route: ActivatedRoute,
    private toastService: ToastService,
-    public modalService: NgbModal) { }
+    public modalService: NgbModal,
+    private titleService: Title) { }

  displayMode = 'smallCards' // largeCards, smallCards, details

  filterRules: FilterRule[] = []
  showFilter = false

+  get isFiltered() {
+    return this.list.filterRules?.length > 0
+  }
+
  getTitle() {
    return this.list.savedViewTitle || "Documents"
  }
@@ -50,10 +57,12 @@ export class DocumentListComponent implements OnInit {
        this.list.savedView = this.savedViewConfigService.getConfig(params.get('id'))
        this.filterRules = this.list.filterRules
        this.showFilter = false
+        this.titleService.setTitle(`${this.list.savedView.title} - ${environment.appTitle}`)
      } else {
        this.list.savedView = null
        this.filterRules = this.list.filterRules
        this.showFilter = this.filterRules.length > 0
+        this.titleService.setTitle(`Documents - ${environment.appTitle}`)
      }
      this.list.clear()
      this.list.reload()
--- a/src-ui/src/app/components/filter-editor/filter-editor.component.ts
+++ b/src-ui/src/app/components/filter-editor/filter-editor.component.ts
@@ -34,7 +34,7 @@ export class FilterEditorComponent implements OnInit {
  documentTypes: PaperlessDocumentType[] = []

  newRuleClicked() {
-    this.filterRules.push({type: this.selectedRuleType, value: null})
+    this.filterRules.push({type: this.selectedRuleType, value: this.selectedRuleType.default})
    this.selectedRuleType = this.getRuleTypes().length > 0 ? this.getRuleTypes()[0] : null
  }

--- a/src-ui/src/app/components/manage/correspondent-list/correspondent-list.component.ts
+++ b/src-ui/src/app/components/manage/correspondent-list/correspondent-list.component.ts
@@ -1,7 +1,9 @@
-import { Component } from '@angular/core';
+import { Component, OnInit } from '@angular/core';
+import { Title } from '@angular/platform-browser';
 import { NgbModal } from '@ng-bootstrap/ng-bootstrap';
 import { PaperlessCorrespondent } from 'src/app/data/paperless-correspondent';
 import { CorrespondentService } from 'src/app/services/rest/correspondent.service';
+import { environment } from 'src/environments/environment';
 import { GenericListComponent } from '../generic-list/generic-list.component';
 import { CorrespondentEditDialogComponent } from './correspondent-edit-dialog/correspondent-edit-dialog.component';

@@ -10,14 +12,19 @@ import { CorrespondentEditDialogComponent } from './correspondent-edit-dialog/co
  templateUrl: './correspondent-list.component.html',
  styleUrls: ['./correspondent-list.component.scss']
 })
-export class CorrespondentListComponent extends GenericListComponent<PaperlessCorrespondent> {
+export class CorrespondentListComponent extends GenericListComponent<PaperlessCorrespondent> implements OnInit {

-  constructor(correspondentsService: CorrespondentService,
-    modalService: NgbModal) { 
-      super(correspondentsService,modalService,CorrespondentEditDialogComponent)
-    }
+  constructor(correspondentsService: CorrespondentService, modalService: NgbModal, private titleService: Title) { 
+    super(correspondentsService,modalService,CorrespondentEditDialogComponent)
+  }
+
+  getObjectName(object: PaperlessCorrespondent) {
+    return `correspondent '${object.name}'`
+  }
+
+  ngOnInit(): void {
+    super.ngOnInit()
+    this.titleService.setTitle(`Correspondents - ${environment.appTitle}`)
+  }

-    getObjectName(object: PaperlessCorrespondent) {
-      return `correspondent '${object.name}'`
-    }
 }
--- a/src-ui/src/app/components/manage/document-type-list/document-type-list.component.ts
+++ b/src-ui/src/app/components/manage/document-type-list/document-type-list.component.ts
@@ -1,7 +1,9 @@
 import { Component, OnInit } from '@angular/core';
+import { Title } from '@angular/platform-browser';
 import { NgbModal } from '@ng-bootstrap/ng-bootstrap';
 import { PaperlessDocumentType } from 'src/app/data/paperless-document-type';
 import { DocumentTypeService } from 'src/app/services/rest/document-type.service';
+import { environment } from 'src/environments/environment';
 import { GenericListComponent } from '../generic-list/generic-list.component';
 import { DocumentTypeEditDialogComponent } from './document-type-edit-dialog/document-type-edit-dialog.component';

@@ -10,13 +12,18 @@ import { DocumentTypeEditDialogComponent } from './document-type-edit-dialog/doc
  templateUrl: './document-type-list.component.html',
  styleUrls: ['./document-type-list.component.scss']
 })
-export class DocumentTypeListComponent extends GenericListComponent<PaperlessDocumentType> {
+export class DocumentTypeListComponent extends GenericListComponent<PaperlessDocumentType> implements OnInit {

-  constructor(service: DocumentTypeService, modalService: NgbModal) {
+  constructor(service: DocumentTypeService, modalService: NgbModal, private titleService: Title) {
    super(service, modalService, DocumentTypeEditDialogComponent)
-   }
+  }

-   getObjectName(object: PaperlessDocumentType) {
+  getObjectName(object: PaperlessDocumentType) {
    return `document type '${object.name}'`
  }
+
+  ngOnInit(): void {
+    super.ngOnInit()
+    this.titleService.setTitle(`Document types - ${environment.appTitle}`)
+  }
 }
--- a/src-ui/src/app/components/manage/logs/logs.component.ts
+++ b/src-ui/src/app/components/manage/logs/logs.component.ts
@@ -1,7 +1,8 @@
 import { Component, OnInit } from '@angular/core';
-import { kMaxLength } from 'buffer';
+import { Title } from '@angular/platform-browser';
 import { LOG_LEVELS, LOG_LEVEL_INFO, PaperlessLog } from 'src/app/data/paperless-log';
 import { LogService } from 'src/app/services/rest/log.service';
+import { environment } from 'src/environments/environment';

@Component({
  selector: 'app-logs',
@@ -10,13 +11,14 @@ import { LogService } from 'src/app/services/rest/log.service';
 })
 export class LogsComponent implements OnInit {

-  constructor(private logService: LogService) { }
+  constructor(private logService: LogService, private titleService: Title) { }

  logs: PaperlessLog[] = []
  level: number = LOG_LEVEL_INFO

  ngOnInit(): void {
    this.reload()
+    this.titleService.setTitle(`Logs - ${environment.appTitle}`)
  }

  reload() {
--- a/src-ui/src/app/components/manage/settings/settings.component.html
+++ b/src-ui/src/app/components/manage/settings/settings.component.html
@@ -46,8 +46,8 @@
          <tbody>
            <tr *ngFor="let config of savedViewConfigService.getConfigs()">
              <td>{{ config.title }}</td>
-              <td>{{ config.showInDashboard }}</td>
-              <td>{{ config.showInSideBar }}</td>
+              <td>{{ config.showInDashboard | yesno }}</td>
+              <td>{{ config.showInSideBar | yesno }}</td>
              <td><button type="button" class="btn btn-sm btn-outline-danger" (click)="deleteViewConfig(config)">Delete</button></td>
            </tr>
          </tbody>
--- a/src-ui/src/app/components/manage/settings/settings.component.ts
+++ b/src-ui/src/app/components/manage/settings/settings.component.ts
@@ -1,9 +1,11 @@
 import { Component, OnInit } from '@angular/core';
 import { FormControl, FormGroup } from '@angular/forms';
+import { Title } from '@angular/platform-browser';
 import { SavedViewConfig } from 'src/app/data/saved-view-config';
 import { GENERAL_SETTINGS } from 'src/app/data/storage-keys';
 import { DocumentListViewService } from 'src/app/services/document-list-view.service';
 import { SavedViewConfigService } from 'src/app/services/saved-view-config.service';
+import { environment } from 'src/environments/environment';

@Component({
  selector: 'app-settings',
@@ -18,10 +20,12 @@ export class SettingsComponent implements OnInit {

  constructor(
    private savedViewConfigService: SavedViewConfigService,
-    private documentListViewService: DocumentListViewService
+    private documentListViewService: DocumentListViewService,
+    private titleService: Title
  ) { }

  ngOnInit(): void {
+    this.titleService.setTitle(`Settings - ${environment.appTitle}`)
  }

  deleteViewConfig(config: SavedViewConfig) {
--- a/src-ui/src/app/components/manage/tag-list/tag-list.component.ts
+++ b/src-ui/src/app/components/manage/tag-list/tag-list.component.ts
@@ -1,8 +1,9 @@
-import { Component } from '@angular/core';
+import { Component, OnInit } from '@angular/core';
+import { Title } from '@angular/platform-browser';
 import { NgbModal } from '@ng-bootstrap/ng-bootstrap';
 import { TAG_COLOURS, PaperlessTag } from 'src/app/data/paperless-tag';
 import { TagService } from 'src/app/services/rest/tag.service';
-import { CorrespondentEditDialogComponent } from '../correspondent-list/correspondent-edit-dialog/correspondent-edit-dialog.component';
+import { environment } from 'src/environments/environment';
 import { GenericListComponent } from '../generic-list/generic-list.component';
 import { TagEditDialogComponent } from './tag-edit-dialog/tag-edit-dialog.component';

@@ -11,11 +12,17 @@ import { TagEditDialogComponent } from './tag-edit-dialog/tag-edit-dialog.compon
  templateUrl: './tag-list.component.html',
  styleUrls: ['./tag-list.component.scss']
 })
-export class TagListComponent extends GenericListComponent<PaperlessTag> {
+export class TagListComponent extends GenericListComponent<PaperlessTag> implements OnInit {

-  constructor(tagService: TagService, modalService: NgbModal) {
+  constructor(tagService: TagService, modalService: NgbModal, private titleService: Title) {
    super(tagService, modalService, TagEditDialogComponent)
-   }
+  }
+
+
+  ngOnInit(): void {
+    super.ngOnInit()
+    this.titleService.setTitle(`Tags - ${environment.appTitle}`)
+  }

  getColor(id) {
    return TAG_COLOURS.find(c => c.id == id)
--- a/src-ui/src/app/components/search/search.component.ts
+++ b/src-ui/src/app/components/search/search.component.ts
@@ -1,7 +1,9 @@
 import { Component, OnInit } from '@angular/core';
+import { Title } from '@angular/platform-browser';
 import { ActivatedRoute, Router } from '@angular/router';
 import { SearchHit } from 'src/app/data/search-result';
 import { SearchService } from 'src/app/services/rest/search.service';
+import { environment } from 'src/environments/environment';

@Component({
  selector: 'app-search',
@@ -26,7 +28,7 @@ export class SearchComponent implements OnInit {

  errorMessage: string

-  constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { }
+  constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router, private titleService: Title) { }

  ngOnInit(): void {
    this.route.queryParamMap.subscribe(paramMap => {
@@ -34,6 +36,7 @@ export class SearchComponent implements OnInit {
      this.searching = true
      this.currentPage = 1
      this.loadPage()
+      this.titleService.setTitle(`Search: ${this.query} - ${environment.appTitle}`)
    })

  }
--- a/src-ui/src/app/data/filter-rule-type.ts
+++ b/src-ui/src/app/data/filter-rule-type.ts
@@ -16,19 +16,22 @@ export const FILTER_ADDED_AFTER = 14
 export const FILTER_MODIFIED_BEFORE = 15
 export const FILTER_MODIFIED_AFTER = 16

+export const FILTER_DOES_NOT_HAVE_TAG = 17
+
 export const FILTER_RULE_TYPES: FilterRuleType[] = [

-  {id: FILTER_TITLE, name: "Title contains", filtervar: "title__icontains", datatype: "string", multi: false},
-  {id: FILTER_CONTENT, name: "Content contains", filtervar: "content__icontains", datatype: "string", multi: false},
+  {id: FILTER_TITLE, name: "Title contains", filtervar: "title__icontains", datatype: "string", multi: false, default: ""},
+  {id: FILTER_CONTENT, name: "Content contains", filtervar: "content__icontains", datatype: "string", multi: false, default: ""},
  
  {id: FILTER_ASN, name: "ASN is", filtervar: "archive_serial_number", datatype: "number", multi: false},
  
  {id: FILTER_CORRESPONDENT, name: "Correspondent is", filtervar: "correspondent__id", datatype: "correspondent", multi: false},
  {id: FILTER_DOCUMENT_TYPE, name: "Document type is", filtervar: "document_type__id", datatype: "document_type", multi: false},

-  {id: FILTER_IS_IN_INBOX, name: "Is in Inbox", filtervar: "is_in_inbox", datatype: "boolean", multi: false},  
+  {id: FILTER_IS_IN_INBOX, name: "Is in Inbox", filtervar: "is_in_inbox", datatype: "boolean", multi: false, default: true},  
  {id: FILTER_HAS_TAG, name: "Has tag", filtervar: "tags__id__all", datatype: "tag", multi: true},  
-  {id: FILTER_HAS_ANY_TAG, name: "Has any tag", filtervar: "is_tagged", datatype: "boolean", multi: false},
+  {id: FILTER_DOES_NOT_HAVE_TAG, name: "Does not have tag", filtervar: "tags__id__none", datatype: "tag", multi: true},  
+  {id: FILTER_HAS_ANY_TAG, name: "Has any tag", filtervar: "is_tagged", datatype: "boolean", multi: false, default: true},

  {id: FILTER_CREATED_BEFORE, name: "Created before", filtervar: "created__date__lt", datatype: "date", multi: false},
  {id: FILTER_CREATED_AFTER, name: "Created after", filtervar: "created__date__gt", datatype: "date", multi: false},
@@ -50,4 +53,5 @@ export interface FilterRuleType {
  filtervar: string
  datatype: string //number, string, boolean, date
  multi: boolean
+  default?: any
 }
--- a/src-ui/src/app/data/paperless-document-metadata.ts
+++ b/src-ui/src/app/data/paperless-document-metadata.ts
@@ -1,11 +1,13 @@
 export interface PaperlessDocumentMetadata {
    
-  paperless__checksum?: string
+  original_checksum?: string

-  paperless__mime_type?: string
+  archived_checksum?: string

-  paperless__filename?: string
+  original_mime_type?: string

-  paperless__has_archive_version?: boolean
+  media_filename?: string
+
+  has_archive_version?: boolean

 }
--- a/src-ui/src/app/pipes/file-size.pipe.spec.ts
+++ b/src-ui/src/app/pipes/file-size.pipe.spec.ts
@@ -0,0 +1,8 @@
+import { FileSizePipe } from './file-size.pipe';
+
+describe('FileSizePipe', () => {
+  it('create an instance', () => {
+    const pipe = new FileSizePipe();
+    expect(pipe).toBeTruthy();
+  });
+});
--- a/src-ui/src/app/pipes/file-size.pipe.ts
+++ b/src-ui/src/app/pipes/file-size.pipe.ts
@@ -0,0 +1,77 @@
+/**
+ * https://gist.github.com/JonCatmull/ecdf9441aaa37336d9ae2c7f9cb7289a
+ * 
+ * @license
+ * Copyright (c) 2019 Jonathan Catmull.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+import { Pipe, PipeTransform } from '@angular/core';
+
+type unit = 'bytes' | 'KB' | 'MB' | 'GB' | 'TB' | 'PB';
+type unitPrecisionMap = {
+  [u in unit]: number;
+};
+
+const defaultPrecisionMap: unitPrecisionMap = {
+  bytes: 0,
+  KB: 0,
+  MB: 1,
+  GB: 1,
+  TB: 2,
+  PB: 2
+};
+
+/*
+ * Convert bytes into largest possible unit.
+ * Takes an precision argument that can be a number or a map for each unit.
+ * Usage:
+ *   bytes | fileSize:precision
+ * @example
+ * // returns 1 KB
+ * {{ 1500 | fileSize }}
+ * @example
+ * // returns 2.1 GB
+ * {{ 2100000000 | fileSize }}
+ * @example
+ * // returns 1.46 KB
+ * {{ 1500 | fileSize:2 }}
+ */
+@Pipe({ name: 'fileSize' })
+export class FileSizePipe implements PipeTransform {
+  private readonly units: unit[] = ['bytes', 'KB', 'MB', 'GB', 'TB', 'PB'];
+
+  transform(bytes: number = 0, precision: number | unitPrecisionMap = defaultPrecisionMap): string {
+    if (isNaN(parseFloat(String(bytes))) || !isFinite(bytes)) return '?';
+
+    let unitIndex = 0;
+
+    while (bytes >= 1024) {
+      bytes /= 1024;
+      unitIndex++;
+    }
+
+    const unit = this.units[unitIndex];
+
+    if (typeof precision === 'number') {
+      return `${bytes.toFixed(+precision)} ${unit}`;
+    }
+    return `${bytes.toFixed(precision[unit])} ${unit}`;
+  }
+}
--- a/src-ui/src/app/pipes/yes-no.pipe.spec.ts
+++ b/src-ui/src/app/pipes/yes-no.pipe.spec.ts
@@ -0,0 +1,8 @@
+import { YesNoPipe } from './yes-no.pipe';
+
+describe('YesNoPipe', () => {
+  it('create an instance', () => {
+    const pipe = new YesNoPipe();
+    expect(pipe).toBeTruthy();
+  });
+});
--- a/src-ui/src/app/pipes/yes-no.pipe.ts
+++ b/src-ui/src/app/pipes/yes-no.pipe.ts
@@ -0,0 +1,12 @@
+import { Pipe, PipeTransform } from '@angular/core';
+
+@Pipe({
+  name: 'yesno'
+})
+export class YesNoPipe implements PipeTransform {
+
+  transform(value: boolean): unknown {
+    return value ? "Yes" : "No"
+  }
+
+}
--- a/src-ui/src/app/services/rest/document.service.ts
+++ b/src-ui/src/app/services/rest/document.service.ts
@@ -94,7 +94,7 @@ export class DocumentService extends AbstractPaperlessService<PaperlessDocument>
  }

  uploadDocument(formData) {
-    return this.http.post(this.getResourceUrl(null, 'post_document'), formData)
+    return this.http.post(this.getResourceUrl(null, 'post_document'), formData, {reportProgress: true, observe: "events"})
  }

  getMetadata(id: number): Observable<PaperlessDocumentMetadata> {
--- a/src-ui/src/environments/environment.prod.ts
+++ b/src-ui/src/environments/environment.prod.ts
@@ -1,4 +1,5 @@
 export const environment = {
  production: true,
-  apiBaseUrl: "/api/"
+  apiBaseUrl: "/api/",
+  appTitle: "Paperless-ng"
 };
--- a/src-ui/src/environments/environment.ts
+++ b/src-ui/src/environments/environment.ts
@@ -4,7 +4,8 @@

 export const environment = {
  production: false,
-  apiBaseUrl: "http://localhost:8000/api/"
+  apiBaseUrl: "http://localhost:8000/api/",
+  appTitle: "DEVELOPMENT P-NG"
 };

 /*
--- a/src/documents/admin.py
+++ b/src/documents/admin.py
@@ -17,8 +17,6 @@ class CorrespondentAdmin(admin.ModelAdmin):
    list_filter = ("matching_algorithm",)
    list_editable = ("match", "matching_algorithm")

-    readonly_fields = ("slug",)
-

 class TagAdmin(admin.ModelAdmin):

@@ -31,8 +29,6 @@ class TagAdmin(admin.ModelAdmin):
    list_filter = ("colour", "matching_algorithm")
    list_editable = ("colour", "match", "matching_algorithm")

-    readonly_fields = ("slug", )
-

 class DocumentTypeAdmin(admin.ModelAdmin):

@@ -44,13 +40,16 @@ class DocumentTypeAdmin(admin.ModelAdmin):
    list_filter = ("matching_algorithm",)
    list_editable = ("match", "matching_algorithm")

-    readonly_fields = ("slug",)
-

 class DocumentAdmin(admin.ModelAdmin):

    search_fields = ("correspondent__name", "title", "content", "tags__name")
-    readonly_fields = ("added", "mime_type", "storage_type", "filename")
+    readonly_fields = (
+        "added",
+        "modified",
+        "mime_type",
+        "storage_type",
+        "filename")

    list_display_links = ("title",)

@@ -101,7 +100,7 @@ class DocumentAdmin(admin.ModelAdmin):
        for tag in obj.tags.all():
            r += self._html_tag(
                "span",
-                tag.slug + ", "
+                tag.name + ", "
            )
        return r

--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -8,13 +8,14 @@ from django.conf import settings
 from django.db import transaction
 from django.db.models import Q
 from django.utils import timezone
+from filelock import FileLock

 from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
-from .file_handling import create_source_path_directory
+from .file_handling import create_source_path_directory, \
+    generate_unique_filename
 from .loggers import LoggingMixin
 from .models import Document, FileInfo, Correspondent, DocumentType, Tag
-from .parsers import ParseError, get_parser_class_for_mime_type, \
-    get_supported_file_extensions, parse_date
+from .parsers import ParseError, get_parser_class_for_mime_type, parse_date
 from .signals import (
    document_consumption_finished,
    document_consumption_started
@@ -38,6 +39,10 @@ class Consumer(LoggingMixin):

    def pre_check_file_exists(self):
        if not os.path.isfile(self.path):
+            self.log(
+                "error",
+                "Cannot consume {}: It is not a file.".format(self.path)
+            )
            raise ConsumerError("Cannot consume {}: It is not a file".format(
                self.path))

@@ -47,6 +52,10 @@ class Consumer(LoggingMixin):
        if Document.objects.filter(Q(checksum=checksum) | Q(archive_checksum=checksum)).exists():  # NOQA: E501
            if settings.CONSUMER_DELETE_DUPLICATES:
                os.unlink(self.path)
+            self.log(
+                "error",
+                "Not consuming {}: It is a duplicate.".format(self.filename)
+            )
            raise ConsumerError(
                "Not consuming {}: It is a duplicate.".format(self.filename)
            )
@@ -148,8 +157,9 @@ class Consumer(LoggingMixin):
            classifier = DocumentClassifier()
            classifier.reload()
        except (FileNotFoundError, IncompatibleClassifierVersionError) as e:
-            logging.getLogger(__name__).warning(
-                "Cannot classify documents: {}.".format(e))
+            self.log(
+                "warning",
+                f"Cannot classify documents: {e}.")
            classifier = None

        # now that everything is done, we can start to store the document
@@ -176,31 +186,28 @@ class Consumer(LoggingMixin):

                # After everything is in the database, copy the files into
                # place. If this fails, we'll also rollback the transaction.
+                with FileLock(settings.MEDIA_LOCK):
+                    document.filename = generate_unique_filename(
+                        document, settings.ORIGINALS_DIR)
+                    create_source_path_directory(document.source_path)

-                # TODO: not required, since this is done by the file handling
-                #  logic
-                create_source_path_directory(document.source_path)
-
-                self._write(document.storage_type,
-                            self.path, document.source_path)
-
-                self._write(document.storage_type,
-                            thumbnail, document.thumbnail_path)
-
-                if archive_path and os.path.isfile(archive_path):
                    self._write(document.storage_type,
-                                archive_path, document.archive_path)
+                                self.path, document.source_path)

-                    with open(archive_path, 'rb') as f:
-                        document.archive_checksum = hashlib.md5(
-                            f.read()).hexdigest()
-                        document.save()
+                    self._write(document.storage_type,
+                                thumbnail, document.thumbnail_path)

-                # Afte performing all database operations and moving files
-                # into place, tell paperless where the file is.
-                document.filename = os.path.basename(document.source_path)
-                # Saving the document now will trigger the filename handling
-                # logic.
+                    if archive_path and os.path.isfile(archive_path):
+                        create_source_path_directory(document.archive_path)
+                        self._write(document.storage_type,
+                                    archive_path, document.archive_path)
+
+                        with open(archive_path, 'rb') as f:
+                            document.archive_checksum = hashlib.md5(
+                                f.read()).hexdigest()
+
+                # Don't save with the lock active. Saving will cause the file
+                # renaming logic to aquire the lock as well.
                document.save()

                # Delete the file only if it was successfully consumed
@@ -241,7 +248,7 @@ class Consumer(LoggingMixin):
        with open(self.path, "rb") as f:
            document = Document.objects.create(
                correspondent=file_info.correspondent,
-                title=file_info.title,
+                title=(self.override_title or file_info.title)[:127],
                content=text,
                mime_type=mime_type,
                checksum=hashlib.md5(f.read()).hexdigest(),
@@ -252,18 +259,17 @@ class Consumer(LoggingMixin):

        relevant_tags = set(file_info.tags)
        if relevant_tags:
-            tag_names = ", ".join([t.slug for t in relevant_tags])
+            tag_names = ", ".join([t.name for t in relevant_tags])
            self.log("debug", "Tagging with {}".format(tag_names))
            document.tags.add(*relevant_tags)

        self.apply_overrides(document)

+        document.save()
+
        return document

    def apply_overrides(self, document):
-        if self.override_title:
-            document.title = self.override_title
-
        if self.override_correspondent_id:
            document.correspondent = Correspondent.objects.get(
                pk=self.override_correspondent_id)
--- a/src/documents/file_handling.py
+++ b/src/documents/file_handling.py
@@ -1,7 +1,9 @@
+import datetime
 import logging
 import os
 from collections import defaultdict

+import pathvalidate
 from django.conf import settings
 from django.template.defaultfilters import slugify

@@ -68,21 +70,53 @@ def many_to_dictionary(field):
    return mydictionary


-def generate_filename(doc):
+def generate_unique_filename(doc, root):
+    counter = 0
+
+    while True:
+        new_filename = generate_filename(doc, counter)
+        if new_filename == doc.filename:
+            # still the same as before.
+            return new_filename
+
+        if os.path.exists(os.path.join(root, new_filename)):
+            counter += 1
+        else:
+            return new_filename
+
+
+def generate_filename(doc, counter=0):
    path = ""

    try:
        if settings.PAPERLESS_FILENAME_FORMAT is not None:
            tags = defaultdict(lambda: slugify(None),
                               many_to_dictionary(doc.tags))
+
+            if doc.correspondent:
+                correspondent = pathvalidate.sanitize_filename(
+                    doc.correspondent.name, replacement_text="-"
+                )
+            else:
+                correspondent = "none"
+
+            if doc.document_type:
+                document_type = pathvalidate.sanitize_filename(
+                    doc.document_type.name, replacement_text="-"
+                )
+            else:
+                document_type = "none"
+
            path = settings.PAPERLESS_FILENAME_FORMAT.format(
-                correspondent=slugify(doc.correspondent),
-                title=slugify(doc.title),
-                created=slugify(doc.created),
+                title=pathvalidate.sanitize_filename(
+                    doc.title, replacement_text="-"),
+                correspondent=correspondent,
+                document_type=document_type,
+                created=datetime.date.isoformat(doc.created),
                created_year=doc.created.year if doc.created else "none",
                created_month=doc.created.month if doc.created else "none",
                created_day=doc.created.day if doc.created else "none",
-                added=slugify(doc.added),
+                added=datetime.date.isoformat(doc.added),
                added_year=doc.added.year if doc.added else "none",
                added_month=doc.added.month if doc.added else "none",
                added_day=doc.added.day if doc.added else "none",
@@ -93,11 +127,11 @@ def generate_filename(doc):
            f"Invalid PAPERLESS_FILENAME_FORMAT: "
            f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")

-    # Always append the primary key to guarantee uniqueness of filename
+    counter_str = f"_{counter:02}" if counter else ""
    if len(path) > 0:
-        filename = "%s-%07i%s" % (path, doc.pk, doc.file_type)
+        filename = f"{path}{counter_str}{doc.file_type}"
    else:
-        filename = "%07i%s" % (doc.pk, doc.file_type)
+        filename = f"{doc.pk:07}{counter_str}{doc.file_type}"

    # Append .gpg for encrypted files
    if doc.storage_type == doc.STORAGE_TYPE_GPG:
--- a/src/documents/filters.py
+++ b/src/documents/filters.py
@@ -37,6 +37,10 @@ class DocumentTypeFilterSet(FilterSet):

 class TagsFilter(Filter):

+    def __init__(self, exclude=False):
+        super(TagsFilter, self).__init__()
+        self.exclude = exclude
+
    def filter(self, qs, value):
        if not value:
            return qs
@@ -47,7 +51,10 @@ class TagsFilter(Filter):
            return qs

        for tag_id in tag_ids:
-            qs = qs.filter(tags__id=tag_id)
+            if self.exclude:
+                qs = qs.exclude(tags__id=tag_id)
+            else:
+                qs = qs.filter(tags__id=tag_id)

        return qs

@@ -74,6 +81,8 @@ class DocumentFilterSet(FilterSet):

    tags__id__all = TagsFilter()

+    tags__id__none = TagsFilter(exclude=True)
+
    is_in_inbox = InboxFilter()

    class Meta:
--- a/src/documents/management/commands/decrypt_documents.py
+++ b/src/documents/management/commands/decrypt_documents.py
@@ -82,7 +82,8 @@ class Command(BaseCommand):
            with open(document.thumbnail_path, "wb") as f:
                f.write(raw_thumb)

-            document.save(update_fields=("storage_type", "filename"))
+            Document.objects.filter(id=document.id).update(
+                storage_type=document.storage_type, filename=document.filename)

            for path in old_paths:
                os.unlink(path)
--- a/src/documents/management/commands/document_consumer.py
+++ b/src/documents/management/commands/document_consumer.py
@@ -29,10 +29,9 @@ def _tags_from_path(filepath):
    path_parts = Path(filepath).relative_to(
                settings.CONSUMPTION_DIR).parent.parts
    for part in path_parts:
-        tag_ids.add(Tag.objects.get_or_create(
-            slug=slugify(part),
-            defaults={"name": part},
-        )[0].pk)
+        tag_ids.add(Tag.objects.get_or_create(name__iexact=part, defaults={
+            "name": part
+        })[0].pk)

    return tag_ids

--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -38,6 +38,9 @@ class Command(Renderable, BaseCommand):
        if not os.access(self.target, os.W_OK):
            raise CommandError("That path doesn't appear to be writable")

+        if os.listdir(self.target):
+            raise CommandError("That directory is not empty.")
+
        self.dump()

    def dump(self):
@@ -54,31 +57,39 @@ class Command(Renderable, BaseCommand):

            document = document_map[document_dict["pk"]]

-            unique_filename = f"{document.pk:07}_{document.file_name}"
-            file_target = os.path.join(self.target, unique_filename)
+            print(f"Exporting: {document}")

-            thumbnail_name = unique_filename + "-thumbnail.png"
+            filename_counter = 0
+            while True:
+                original_name = document.get_public_filename(
+                    counter=filename_counter)
+                original_target = os.path.join(self.target, original_name)
+
+                if not os.path.exists(original_target):
+                    break
+                else:
+                    filename_counter += 1
+
+            thumbnail_name = original_name + "-thumbnail.png"
            thumbnail_target = os.path.join(self.target, thumbnail_name)

-            document_dict[EXPORTER_FILE_NAME] = unique_filename
+            document_dict[EXPORTER_FILE_NAME] = original_name
            document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name

            if os.path.exists(document.archive_path):
-                archive_name = \
-                    f"{document.pk:07}_archive_{document.archive_file_name}"
+                archive_name = document.get_public_filename(
+                    archive=True, counter=filename_counter, suffix="_archive")
                archive_target = os.path.join(self.target, archive_name)
                document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
            else:
                archive_target = None

-            print(f"Exporting: {file_target}")
-
            t = int(time.mktime(document.created.timetuple()))
            if document.storage_type == Document.STORAGE_TYPE_GPG:

-                with open(file_target, "wb") as f:
+                with open(original_target, "wb") as f:
                    f.write(GnuPG.decrypted(document.source_file))
-                    os.utime(file_target, times=(t, t))
+                    os.utime(original_target, times=(t, t))

                with open(thumbnail_target, "wb") as f:
                    f.write(GnuPG.decrypted(document.thumbnail_file))
@@ -90,7 +101,7 @@ class Command(Renderable, BaseCommand):
                        os.utime(archive_target, times=(t, t))
            else:

-                shutil.copy(document.source_path, file_target)
+                shutil.copy(document.source_path, original_target)
                shutil.copy(document.thumbnail_path, thumbnail_target)

                if archive_target:
--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -5,11 +5,13 @@ import shutil
 from django.conf import settings
 from django.core.management import call_command
 from django.core.management.base import BaseCommand, CommandError
+from filelock import FileLock

 from documents.models import Document
 from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
    EXPORTER_ARCHIVE_NAME
-from ...file_handling import generate_filename, create_source_path_directory
+from ...file_handling import create_source_path_directory, \
+    generate_unique_filename
 from ...mixins import Renderable


@@ -114,17 +116,20 @@ class Command(Renderable, BaseCommand):

            document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED

-            document.filename = generate_filename(document)
+            with FileLock(settings.MEDIA_LOCK):
+                document.filename = generate_unique_filename(
+                    document, settings.ORIGINALS_DIR)

-            if os.path.isfile(document.source_path):
-                raise FileExistsError(document.source_path)
+                if os.path.isfile(document.source_path):
+                    raise FileExistsError(document.source_path)

-            create_source_path_directory(document.source_path)
+                create_source_path_directory(document.source_path)

-            print(f"Moving {document_path} to {document.source_path}")
-            shutil.copy(document_path, document.source_path)
-            shutil.copy(thumbnail_path, document.thumbnail_path)
-            if archive_path:
-                shutil.copy(archive_path, document.archive_path)
+                print(f"Moving {document_path} to {document.source_path}")
+                shutil.copy(document_path, document.source_path)
+                shutil.copy(thumbnail_path, document.thumbnail_path)
+                if archive_path:
+                    create_source_path_directory(document.archive_path)
+                    shutil.copy(archive_path, document.archive_path)

            document.save()
--- a/src/documents/management/commands/document_renamer.py
+++ b/src/documents/management/commands/document_renamer.py
@@ -1,3 +1,6 @@
+import logging
+
+import tqdm
 from django.core.management.base import BaseCommand

 from documents.models import Document
@@ -18,6 +21,8 @@ class Command(Renderable, BaseCommand):

        self.verbosity = options["verbosity"]

-        for document in Document.objects.all():
+        logging.getLogger().handlers[0].level = logging.ERROR
+
+        for document in tqdm.tqdm(Document.objects.all()):
            # Saving the document again will generate a new filename and rename
            document.save()
--- a/src/documents/migrations/1006_auto_20201208_2209.py
+++ b/src/documents/migrations/1006_auto_20201208_2209.py
@@ -0,0 +1,25 @@
+# Generated by Django 3.1.4 on 2020-12-08 22:09
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '1005_checksums'),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name='correspondent',
+            name='slug',
+        ),
+        migrations.RemoveField(
+            model_name='documenttype',
+            name='slug',
+        ),
+        migrations.RemoveField(
+            model_name='tag',
+            name='slug',
+        ),
+    ]
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -1,10 +1,12 @@
 # coding=utf-8
-
+import datetime
 import logging
 import os
 import re
 from collections import OrderedDict

+import pathvalidate
+
 import dateutil.parser
 from django.conf import settings
 from django.db import models
@@ -34,7 +36,6 @@ class MatchingModel(models.Model):
    )

    name = models.CharField(max_length=128, unique=True)
-    slug = models.SlugField(blank=True, editable=False)

    match = models.CharField(max_length=256, blank=True)
    matching_algorithm = models.PositiveIntegerField(
@@ -67,7 +68,6 @@ class MatchingModel(models.Model):
    def save(self, *args, **kwargs):

        self.match = self.match.lower()
-        self.slug = slugify(self.name)

        models.Model.save(self, *args, **kwargs)

@@ -172,6 +172,7 @@ class Document(models.Model):

    created = models.DateTimeField(
        default=timezone.now, db_index=True)
+
    modified = models.DateTimeField(
        auto_now=True, editable=False, db_index=True)

@@ -206,13 +207,11 @@ class Document(models.Model):
        ordering = ("correspondent", "title")

    def __str__(self):
-        created = self.created.strftime("%Y%m%d")
+        created = datetime.date.isoformat(self.created)
        if self.correspondent and self.title:
-            return "{}: {} - {}".format(
-                created, self.correspondent, self.title)
-        if self.correspondent or self.title:
-            return "{}: {}".format(created, self.correspondent or self.title)
-        return str(created)
+            return f"{created} {self.correspondent} {self.title}"
+        else:
+            return f"{created} {self.title}"

    @property
    def source_path(self):
@@ -248,13 +247,21 @@ class Document(models.Model):
    def archive_file(self):
        return open(self.archive_path, "rb")

-    @property
-    def file_name(self):
-        return slugify(str(self)) + self.file_type
+    def get_public_filename(self, archive=False, counter=0, suffix=None):
+        result = str(self)

-    @property
-    def archive_file_name(self):
-        return slugify(str(self)) + ".pdf"
+        if counter:
+            result += f"_{counter:02}"
+
+        if suffix:
+            result += suffix
+
+        if archive:
+            result += ".pdf"
+        else:
+            result += self.file_type
+
+        return pathvalidate.sanitize_filename(result, replacement_text="-")

    @property
    def file_type(self):
@@ -375,9 +382,7 @@ class FileInfo:
    def _get_correspondent(cls, name):
        if not name:
            return None
-        return Correspondent.objects.get_or_create(name=name, defaults={
-            "slug": slugify(name)
-        })[0]
+        return Correspondent.objects.get_or_create(name=name)[0]

    @classmethod
    def _get_title(cls, title):
@@ -387,10 +392,7 @@ class FileInfo:
    def _get_tags(cls, tags):
        r = []
        for t in tags.split(","):
-            r.append(Tag.objects.get_or_create(
-                slug=slugify(t),
-                defaults={"name": t}
-            )[0])
+            r.append(Tag.objects.get_or_create(name=t)[0])
        return tuple(r)

    @classmethod
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -210,6 +210,7 @@ class DocumentParser(LoggingMixin):
    def __init__(self, logging_group):
        super().__init__()
        self.logging_group = logging_group
+        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
        self.tempdir = tempfile.mkdtemp(
            prefix="paperless-", dir=settings.SCRATCH_DIR)

@@ -217,6 +218,9 @@ class DocumentParser(LoggingMixin):
        self.text = None
        self.date = None

+    def extract_metadata(self, document_path, mime_type):
+        return []
+
    def parse(self, document_path, mime_type):
        raise NotImplementedError()

--- a/src/documents/sanity_checker.py
+++ b/src/documents/sanity_checker.py
@@ -46,6 +46,10 @@ def check_sanity():
        for f in files:
            present_files.append(os.path.normpath(os.path.join(root, f)))

+    lockfile = os.path.normpath(settings.MEDIA_LOCK)
+    if lockfile in present_files:
+        present_files.remove(lockfile)
+
    for doc in Document.objects.all():
        # Check sanity of the thumbnail
        if not os.path.isfile(doc.thumbnail_path):
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -1,17 +1,23 @@
 import magic
+from django.utils.text import slugify
 from pathvalidate import validate_filename, ValidationError
 from rest_framework import serializers
+from rest_framework.fields import SerializerMethodField

 from .models import Correspondent, Tag, Document, Log, DocumentType
 from .parsers import is_mime_type_supported


-class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
+class CorrespondentSerializer(serializers.ModelSerializer):

    document_count = serializers.IntegerField(read_only=True)

    last_correspondence = serializers.DateTimeField(read_only=True)

+    def get_slug(self, obj):
+        return slugify(obj.name)
+    slug = SerializerMethodField()
+
    class Meta:
        model = Correspondent
        fields = (
@@ -26,10 +32,14 @@ class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
        )


-class DocumentTypeSerializer(serializers.HyperlinkedModelSerializer):
+class DocumentTypeSerializer(serializers.ModelSerializer):

    document_count = serializers.IntegerField(read_only=True)

+    def get_slug(self, obj):
+        return slugify(obj.name)
+    slug = SerializerMethodField()
+
    class Meta:
        model = DocumentType
        fields = (
@@ -43,10 +53,14 @@ class DocumentTypeSerializer(serializers.HyperlinkedModelSerializer):
        )


-class TagSerializer(serializers.HyperlinkedModelSerializer):
+class TagSerializer(serializers.ModelSerializer):

    document_count = serializers.IntegerField(read_only=True)

+    def get_slug(self, obj):
+        return slugify(obj.name)
+    slug = SerializerMethodField()
+
    class Meta:
        model = Tag
        fields = (
@@ -83,6 +97,18 @@ class DocumentSerializer(serializers.ModelSerializer):
    tags = TagsField(many=True)
    document_type = DocumentTypeField(allow_null=True)

+    original_file_name = SerializerMethodField()
+    archived_file_name = SerializerMethodField()
+
+    def get_original_file_name(self, obj):
+        return obj.get_public_filename()
+
+    def get_archived_file_name(self, obj):
+        if obj.archive_checksum:
+            return obj.get_public_filename(archive=True)
+        else:
+            return None
+
    class Meta:
        model = Document
        depth = 1
@@ -96,7 +122,9 @@ class DocumentSerializer(serializers.ModelSerializer):
            "created",
            "modified",
            "added",
-            "archive_serial_number"
+            "archive_serial_number",
+            "original_file_name",
+            "archived_file_name",
        )


@@ -178,8 +206,7 @@ class PostDocumentSerializer(serializers.Serializer):
        required=False,
    )

-    def validate(self, attrs):
-        document = attrs.get('document')
+    def validate_document(self, document):

        try:
            validate_filename(document.name)
@@ -191,32 +218,31 @@ class PostDocumentSerializer(serializers.Serializer):

        if not is_mime_type_supported(mime_type):
            raise serializers.ValidationError(
-                "This mime type is not supported.")
+                "This file type is not supported.")

-        attrs['document_data'] = document_data
+        return document.name, document_data

-        title = attrs.get('title')
+    def validate_title(self, title):
+        if title:
+            return title
+        else:
+            # do not return empty strings.
+            return None

-        if not title:
-            attrs['title'] = None
-
-        correspondent = attrs.get('correspondent')
+    def validate_correspondent(self, correspondent):
        if correspondent:
-            attrs['correspondent_id'] = correspondent.id
+            return correspondent.id
        else:
-            attrs['correspondent_id'] = None
+            return None

-        document_type = attrs.get('document_type')
+    def validate_document_type(self, document_type):
        if document_type:
-            attrs['document_type_id'] = document_type.id
+            return document_type.id
        else:
-            attrs['document_type_id'] = None
+            return None

-        tags = attrs.get('tags')
+    def validate_tags(self, tags):
        if tags:
-            tag_ids = [tag.id for tag in tags]
-            attrs['tag_ids'] = tag_ids
+            return [tag.id for tag in tags]
        else:
-            attrs['tag_ids'] = None
-
-        return attrs
+            return None
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -9,11 +9,13 @@ from django.contrib.contenttypes.models import ContentType
 from django.db import models, DatabaseError
 from django.dispatch import receiver
 from django.utils import timezone
+from filelock import FileLock
 from rest_framework.reverse import reverse

 from .. import index, matching
-from ..file_handling import delete_empty_directories, generate_filename, \
-    create_source_path_directory, archive_name_from_filename
+from ..file_handling import delete_empty_directories, \
+    create_source_path_directory, archive_name_from_filename, \
+    generate_unique_filename
 from ..models import Document, Tag


@@ -134,7 +136,7 @@ def set_tags(sender,

    message = 'Tagging "{}" with "{}"'
    logger(
-        message.format(document, ", ".join([t.slug for t in relevant_tags])),
+        message.format(document, ", ".join([t.name for t in relevant_tags])),
        logging_group
    )

@@ -157,41 +159,42 @@ def run_post_consume_script(sender, document, **kwargs):
    Popen((
        settings.POST_CONSUME_SCRIPT,
        str(document.pk),
-        document.file_name,
+        document.get_public_filename(),
        os.path.normpath(document.source_path),
        os.path.normpath(document.thumbnail_path),
        reverse("document-download", kwargs={"pk": document.pk}),
        reverse("document-thumb", kwargs={"pk": document.pk}),
        str(document.correspondent),
-        str(",".join(document.tags.all().values_list("slug", flat=True)))
+        str(",".join(document.tags.all().values_list("name", flat=True)))
    )).wait()


@receiver(models.signals.post_delete, sender=Document)
 def cleanup_document_deletion(sender, instance, using, **kwargs):
-    for f in (instance.source_path,
-              instance.archive_path,
-              instance.thumbnail_path):
-        if os.path.isfile(f):
-            try:
-                os.unlink(f)
-                logging.getLogger(__name__).debug(
-                    f"Deleted file {f}.")
-            except OSError as e:
-                logging.getLogger(__name__).warning(
-                    f"While deleting document {instance.file_name}, the file "
-                    f"{f} could not be deleted: {e}"
-                )
+    with FileLock(settings.MEDIA_LOCK):
+        for f in (instance.source_path,
+                  instance.archive_path,
+                  instance.thumbnail_path):
+            if os.path.isfile(f):
+                try:
+                    os.unlink(f)
+                    logging.getLogger(__name__).debug(
+                        f"Deleted file {f}.")
+                except OSError as e:
+                    logging.getLogger(__name__).warning(
+                        f"While deleting document {str(instance)}, the file "
+                        f"{f} could not be deleted: {e}"
+                    )

-    delete_empty_directories(
-        os.path.dirname(instance.source_path),
-        root=settings.ORIGINALS_DIR
-    )
+        delete_empty_directories(
+            os.path.dirname(instance.source_path),
+            root=settings.ORIGINALS_DIR
+        )

-    delete_empty_directories(
-        os.path.dirname(instance.archive_path),
-        root=settings.ARCHIVE_DIR
-    )
+        delete_empty_directories(
+            os.path.dirname(instance.archive_path),
+            root=settings.ARCHIVE_DIR
+        )


 def validate_move(instance, old_path, new_path):
@@ -226,81 +229,94 @@ def update_filename_and_move_files(sender, instance, **kwargs):
        # This will in turn cause this logic to move the file where it belongs.
        return

-    old_filename = instance.filename
-    new_filename = generate_filename(instance)
+    with FileLock(settings.MEDIA_LOCK):
+        old_filename = instance.filename
+        new_filename = generate_unique_filename(
+            instance, settings.ORIGINALS_DIR)

-    if new_filename == instance.filename:
-        # Don't do anything if its the same.
-        return
-
-    old_source_path = instance.source_path
-    new_source_path = os.path.join(settings.ORIGINALS_DIR, new_filename)
-
-    if not validate_move(instance, old_source_path, new_source_path):
-        return
-
-    # archive files are optional, archive checksum tells us if we have one,
-    # since this is None for documents without archived files.
-    if instance.archive_checksum:
-        new_archive_filename = archive_name_from_filename(new_filename)
-        old_archive_path = instance.archive_path
-        new_archive_path = os.path.join(settings.ARCHIVE_DIR,
-                                        new_archive_filename)
-
-        if not validate_move(instance, old_archive_path, new_archive_path):
+        if new_filename == instance.filename:
+            # Don't do anything if its the same.
            return

-        create_source_path_directory(new_archive_path)
-    else:
-        old_archive_path = None
-        new_archive_path = None
+        old_source_path = instance.source_path
+        new_source_path = os.path.join(settings.ORIGINALS_DIR, new_filename)

-    create_source_path_directory(new_source_path)
+        if not validate_move(instance, old_source_path, new_source_path):
+            return

-    try:
-        os.rename(old_source_path, new_source_path)
+        # archive files are optional, archive checksum tells us if we have one,
+        # since this is None for documents without archived files.
        if instance.archive_checksum:
-            os.rename(old_archive_path, new_archive_path)
-        instance.filename = new_filename
-        # Don't save here to prevent infinite recursion.
-        Document.objects.filter(pk=instance.pk).update(filename=new_filename)
+            new_archive_filename = archive_name_from_filename(new_filename)
+            old_archive_path = instance.archive_path
+            new_archive_path = os.path.join(settings.ARCHIVE_DIR,
+                                            new_archive_filename)

-        logging.getLogger(__name__).debug(
-            f"Moved file {old_source_path} to {new_source_path}.")
+            if not validate_move(instance, old_archive_path, new_archive_path):
+                return

-        if instance.archive_checksum:
-            logging.getLogger(__name__).debug(
-                f"Moved file {old_archive_path} to {new_archive_path}.")
+            create_source_path_directory(new_archive_path)
+        else:
+            old_archive_path = None
+            new_archive_path = None
+
+        create_source_path_directory(new_source_path)

-    except OSError as e:
-        instance.filename = old_filename
-        # this happens when we can't move a file. If that's the case for the
-        # archive file, we try our best to revert the changes.
        try:
+            os.rename(old_source_path, new_source_path)
+            if instance.archive_checksum:
+                os.rename(old_archive_path, new_archive_path)
+            instance.filename = new_filename
+
+            # Don't save() here to prevent infinite recursion.
+            Document.objects.filter(pk=instance.pk).update(
+                filename=new_filename)
+
+            logging.getLogger(__name__).debug(
+                f"Moved file {old_source_path} to {new_source_path}.")
+
+            if instance.archive_checksum:
+                logging.getLogger(__name__).debug(
+                    f"Moved file {old_archive_path} to {new_archive_path}.")
+
+        except OSError as e:
+            instance.filename = old_filename
+            # this happens when we can't move a file. If that's the case for
+            # the archive file, we try our best to revert the changes.
+            # no need to save the instance, the update() has not happened yet.
+            try:
+                os.rename(new_source_path, old_source_path)
+                os.rename(new_archive_path, old_archive_path)
+            except Exception as e:
+                # This is fine, since:
+                # A: if we managed to move source from A to B, we will also
+                #  manage to move it from B to A. If not, we have a serious
+                #  issue that's going to get caught by the santiy checker.
+                #  All files remain in place and will never be overwritten,
+                #  so this is not the end of the world.
+                # B: if moving the orignal file failed, nothing has changed
+                #  anyway.
+                pass
+        except DatabaseError as e:
+            # this happens after moving files, so move them back into place.
+            # since moving them once succeeded, it's very likely going to
+            # succeed again.
            os.rename(new_source_path, old_source_path)
-            os.rename(new_archive_path, old_archive_path)
-        except Exception as e:
-            # This is fine, since:
-            # A: if we managed to move source from A to B, we will also manage
-            #  to move it from B to A. If not, we have a serious issue
-            #  that's going to get caught by the santiy checker.
-            #  all files remain in place and will never be overwritten,
-            #  so this is not the end of the world.
-            # B: if moving the orignal file failed, nothing has changed anyway.
-            pass
-    except DatabaseError as e:
-        os.rename(new_source_path, old_source_path)
-        if instance.archive_checksum:
-            os.rename(new_archive_path, old_archive_path)
-        instance.filename = old_filename
+            if instance.archive_checksum:
+                os.rename(new_archive_path, old_archive_path)
+            instance.filename = old_filename
+            # again, no need to save the instance, since the actual update()
+            # operation failed.

-    if not os.path.isfile(old_source_path):
-        delete_empty_directories(os.path.dirname(old_source_path),
-                                 root=settings.ORIGINALS_DIR)
+        # finally, remove any empty sub folders. This will do nothing if
+        # something has failed above.
+        if not os.path.isfile(old_source_path):
+            delete_empty_directories(os.path.dirname(old_source_path),
+                                     root=settings.ORIGINALS_DIR)

-    if old_archive_path and not os.path.isfile(old_archive_path):
-        delete_empty_directories(os.path.dirname(old_archive_path),
-                                 root=settings.ARCHIVE_DIR)
+        if old_archive_path and not os.path.isfile(old_archive_path):
+            delete_empty_directories(os.path.dirname(old_archive_path),
+                                     root=settings.ARCHIVE_DIR)


 def set_log_entry(sender, document=None, logging_group=None, **kwargs):
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -1,5 +1,6 @@
 import logging

+import tqdm
 from django.conf import settings
 from whoosh.writing import AsyncWriter

@@ -23,7 +24,7 @@ def index_reindex():
    ix = index.open_index(recreate=True)

    with AsyncWriter(ix) as writer:
-        for document in documents:
+        for document in tqdm.tqdm(documents):
            index.update_document(writer, document)


--- a/src/documents/tests/test_api.py
+++ b/src/documents/tests/test_api.py
@@ -1,4 +1,5 @@
 import os
+import shutil
 import tempfile
 from unittest import mock

@@ -195,6 +196,24 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        results = response.data['results']
        self.assertEqual(len(results), 3)

+        response = self.client.get("/api/documents/?tags__id__none={}".format(tag_3.id))
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 2)
+        self.assertEqual(results[0]['id'], doc1.id)
+        self.assertEqual(results[1]['id'], doc2.id)
+
+        response = self.client.get("/api/documents/?tags__id__none={},{}".format(tag_3.id, tag_2.id))
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['id'], doc1.id)
+
+        response = self.client.get("/api/documents/?tags__id__none={},{}".format(tag_2.id, tag_inbox.id))
+        self.assertEqual(response.status_code, 200)
+        results = response.data['results']
+        self.assertEqual(len(results), 0)
+
    def test_search_no_query(self):
        response = self.client.get("/api/search/")
        results = response.data['results']
@@ -475,3 +494,34 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertEqual(response.status_code, 400)

        async_task.assert_not_called()
+
+    def test_get_metadata(self):
+        doc = Document.objects.create(title="test", filename="file.pdf", mime_type="image/png", archive_checksum="A")
+
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png"), doc.source_path)
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), doc.archive_path)
+
+        response = self.client.get(f"/api/documents/{doc.pk}/metadata/")
+        self.assertEqual(response.status_code, 200)
+
+        meta = response.data
+
+        self.assertEqual(meta['original_mime_type'], "image/png")
+        self.assertTrue(meta['has_archive_version'])
+        self.assertEqual(len(meta['original_metadata']), 0)
+        self.assertGreater(len(meta['archive_metadata']), 0)
+
+    def test_get_metadata_no_archive(self):
+        doc = Document.objects.create(title="test", filename="file.pdf", mime_type="application/pdf")
+
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), doc.source_path)
+
+        response = self.client.get(f"/api/documents/{doc.pk}/metadata/")
+        self.assertEqual(response.status_code, 200)
+
+        meta = response.data
+
+        self.assertEqual(meta['original_mime_type'], "application/pdf")
+        self.assertFalse(meta['has_archive_version'])
+        self.assertGreater(len(meta['original_metadata']), 0)
+        self.assertIsNone(meta['archive_metadata'])
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -27,7 +27,7 @@ class TestAttributes(TestCase):

        self.assertEqual(file_info.title, title, filename)

-        self.assertEqual(tuple([t.slug for t in file_info.tags]), tags, filename)
+        self.assertEqual(tuple([t.name for t in file_info.tags]), tags, filename)

    def test_guess_attributes_from_name0(self):
        self._test_guess_attributes_from_name(
@@ -188,7 +188,7 @@ class TestFieldPermutations(TestCase):
            self.assertEqual(info.tags, (), filename)
        else:
            self.assertEqual(
-                [t.slug for t in info.tags], tags.split(','),
+                [t.name for t in info.tags], tags.split(','),
                filename
            )

@@ -342,8 +342,8 @@ class TestFieldPermutations(TestCase):
            info = FileInfo.from_filename(filename)
            self.assertEqual(info.title, "0001")
            self.assertEqual(len(info.tags), 2)
-            self.assertEqual(info.tags[0].slug, "tag1")
-            self.assertEqual(info.tags[1].slug, "tag2")
+            self.assertEqual(info.tags[0].name, "tag1")
+            self.assertEqual(info.tags[1].name, "tag2")
            self.assertIsNone(info.created)

        # Complex transformation with date in replacement string
@@ -356,8 +356,8 @@ class TestFieldPermutations(TestCase):
            info = FileInfo.from_filename(filename)
            self.assertEqual(info.title, "0001")
            self.assertEqual(len(info.tags), 2)
-            self.assertEqual(info.tags[0].slug, "tag1")
-            self.assertEqual(info.tags[1].slug, "tag2")
+            self.assertEqual(info.tags[0].name, "tag1")
+            self.assertEqual(info.tags[1].name, "tag2")
            self.assertEqual(info.created.year, 2019)
            self.assertEqual(info.created.month, 9)
            self.assertEqual(info.created.day, 8)
@@ -598,10 +598,10 @@ class TestConsumer(DirectoriesMixin, TestCase):

        self.assertEqual(document.title, "new docs")
        self.assertEqual(document.correspondent.name, "Bank")
-        self.assertEqual(document.filename, "bank/new-docs-0000001.pdf")
+        self.assertEqual(document.filename, "Bank/new docs.pdf")

    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
-    @mock.patch("documents.signals.handlers.generate_filename")
+    @mock.patch("documents.signals.handlers.generate_unique_filename")
    def testFilenameHandlingUnstableFormat(self, m):

        filenames = ["this", "that", "now this", "i cant decide"]
@@ -611,7 +611,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
            filenames.insert(0, f)
            return f

-        m.side_effect = lambda f: get_filename()
+        m.side_effect = lambda f, root: get_filename()

        filename = self.get_test_file()

--- a/src/documents/tests/test_document_model.py
+++ b/src/documents/tests/test_document_model.py
@@ -48,19 +48,19 @@ class TestDocument(TestCase):
    def test_file_name(self):

        doc = Document(mime_type="application/pdf", title="test", created=datetime(2020, 12, 25))
-        self.assertEqual(doc.file_name, "20201225-test.pdf")
+        self.assertEqual(doc.get_public_filename(), "2020-12-25 test.pdf")

    def test_file_name_jpg(self):

        doc = Document(mime_type="image/jpeg", title="test", created=datetime(2020, 12, 25))
-        self.assertEqual(doc.file_name, "20201225-test.jpg")
+        self.assertEqual(doc.get_public_filename(), "2020-12-25 test.jpg")

    def test_file_name_unknown(self):

        doc = Document(mime_type="application/zip", title="test", created=datetime(2020, 12, 25))
-        self.assertEqual(doc.file_name, "20201225-test.zip")
+        self.assertEqual(doc.get_public_filename(), "2020-12-25 test.zip")

-    def test_file_name_invalid(self):
+    def test_file_name_invalid_type(self):

        doc = Document(mime_type="image/jpegasd", title="test", created=datetime(2020, 12, 25))
-        self.assertEqual(doc.file_name, "20201225-test")
+        self.assertEqual(doc.get_public_filename(), "2020-12-25 test")
--- a/src/documents/tests/test_file_handling.py
+++ b/src/documents/tests/test_file_handling.py
@@ -1,5 +1,8 @@
+import datetime
+import hashlib
 import os
-import shutil
+import random
+import uuid
 from pathlib import Path
 from unittest import mock

@@ -8,7 +11,8 @@ from django.db import DatabaseError
 from django.test import TestCase, override_settings

 from .utils import DirectoriesMixin
-from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories
+from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories, \
+    generate_unique_filename
 from ..models import Document, Correspondent


@@ -40,13 +44,13 @@ class TestFileHandling(DirectoriesMixin, TestCase):
        document.filename = generate_filename(document)

        # Ensure that filename is properly generated
-        self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
+        self.assertEqual(document.filename, "none/none.pdf")

        # Enable encryption and check again
        document.storage_type = Document.STORAGE_TYPE_GPG
        document.filename = generate_filename(document)
        self.assertEqual(document.filename,
-                         "none/none-{:07d}.pdf.gpg".format(document.pk))
+                         "none/none.pdf.gpg")

        document.save()

@@ -62,7 +66,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
        # Check proper handling of files
        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
-        self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test-{:07d}.pdf.gpg".format(document.pk)), True)
+        self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), True)

    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
    def test_file_renaming_missing_permissions(self):
@@ -74,12 +78,12 @@ class TestFileHandling(DirectoriesMixin, TestCase):
        # Ensure that filename is properly generated
        document.filename = generate_filename(document)
        self.assertEqual(document.filename,
-                         "none/none-{:07d}.pdf".format(document.pk))
+                         "none/none.pdf")
        create_source_path_directory(document.source_path)
        Path(document.source_path).touch()

        # Test source_path
-        self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk))
+        self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none.pdf")

        # Make the folder read- and execute-only (no writing and no renaming)
        os.chmod(settings.ORIGINALS_DIR + "/none", 0o555)
@@ -89,8 +93,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
        document.save()

        # Check proper handling of files
-        self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)), True)
-        self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
+        self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True)
+        self.assertEqual(document.filename, "none/none.pdf")

        os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)

@@ -108,7 +112,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
        # Ensure that filename is properly generated
        document.filename = generate_filename(document)
        self.assertEqual(document.filename,
-                         "none/none-{:07d}.pdf".format(document.pk))
+                         "none/none.pdf")
        create_source_path_directory(document.source_path)
        Path(document.source_path).touch()

@@ -125,8 +129,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):

            # Check proper handling of files
            self.assertTrue(os.path.isfile(document.source_path))
-            self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)), True)
-            self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
+            self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True)
+            self.assertEqual(document.filename, "none/none.pdf")

    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
    def test_document_delete(self):
@@ -138,7 +142,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
        # Ensure that filename is properly generated
        document.filename = generate_filename(document)
        self.assertEqual(document.filename,
-                         "none/none-{:07d}.pdf".format(document.pk))
+                         "none/none.pdf")

        create_source_path_directory(document.source_path)
        Path(document.source_path).touch()
@@ -146,7 +150,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
        # Ensure file deletion after delete
        pk = document.pk
        document.delete()
-        self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(pk)), False)
+        self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False)
        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)

    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@@ -168,7 +172,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
        # Ensure that filename is properly generated
        document.filename = generate_filename(document)
        self.assertEqual(document.filename,
-                         "none/none-{:07d}.pdf".format(document.pk))
+                         "none/none.pdf")

        create_source_path_directory(document.source_path)

@@ -199,7 +203,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):

        # Ensure that filename is properly generated
        self.assertEqual(generate_filename(document),
-                         "demo-{:07d}.pdf".format(document.pk))
+                         "demo.pdf")

    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
    def test_tags_with_dash(self):
@@ -215,7 +219,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):

        # Ensure that filename is properly generated
        self.assertEqual(generate_filename(document),
-                         "demo-{:07d}.pdf".format(document.pk))
+                         "demo.pdf")

    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
    def test_tags_malformed(self):
@@ -231,7 +235,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):

        # Ensure that filename is properly generated
        self.assertEqual(generate_filename(document),
-                         "none-{:07d}.pdf".format(document.pk))
+                         "none.pdf")

    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
    def test_tags_all(self):
@@ -246,7 +250,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):

        # Ensure that filename is properly generated
        self.assertEqual(generate_filename(document),
-                         "demo-{:07d}.pdf".format(document.pk))
+                         "demo.pdf")

    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}")
    def test_tags_out_of_bounds(self):
@@ -261,7 +265,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):

        # Ensure that filename is properly generated
        self.assertEqual(generate_filename(document),
-                         "none-{:07d}.pdf".format(document.pk))
+                         "none.pdf")

    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}")
    def test_nested_directory_cleanup(self):
@@ -272,7 +276,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):

        # Ensure that filename is properly generated
        document.filename = generate_filename(document)
-        self.assertEqual(document.filename, "none/none/none-{:07d}.pdf".format(document.pk))
+        self.assertEqual(document.filename, "none/none/none.pdf")
        create_source_path_directory(document.source_path)
        Path(document.source_path).touch()

@@ -282,7 +286,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
        pk = document.pk
        document.delete()

-        self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none-{:07d}.pdf".format(pk)), False)
+        self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), False)
        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True)
@@ -330,6 +334,48 @@ class TestFileHandling(DirectoriesMixin, TestCase):

        self.assertEqual(generate_filename(document), "0000001.pdf")

+    @override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
+    def test_duplicates(self):
+        document = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="A", pk=1)
+        document2 = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="B", pk=2)
+        Path(document.source_path).touch()
+        Path(document2.source_path).touch()
+        document.filename = "0000001.pdf"
+        document.save()
+
+        self.assertTrue(os.path.isfile(document.source_path))
+        self.assertEqual(document.filename, "qwe.pdf")
+
+        document2.filename = "0000002.pdf"
+        document2.save()
+
+        self.assertTrue(os.path.isfile(document.source_path))
+        self.assertEqual(document2.filename, "qwe_01.pdf")
+
+        # saving should not change the file names.
+
+        document.save()
+
+        self.assertTrue(os.path.isfile(document.source_path))
+        self.assertEqual(document.filename, "qwe.pdf")
+
+        document2.save()
+
+        self.assertTrue(os.path.isfile(document.source_path))
+        self.assertEqual(document2.filename, "qwe_01.pdf")
+
+        document.delete()
+
+        self.assertFalse(os.path.isfile(document.source_path))
+
+        # filename free, should remove _01 suffix
+
+        document2.save()
+
+        self.assertTrue(os.path.isfile(document.source_path))
+        self.assertEqual(document2.filename, "qwe.pdf")
+
+

 class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):

@@ -358,15 +404,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
        self.assertFalse(os.path.isfile(archive))
        self.assertTrue(os.path.isfile(doc.source_path))
        self.assertTrue(os.path.isfile(doc.archive_path))
-        self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc-0000001.pdf"))
-        self.assertEqual(doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc-0000001.pdf"))
+        self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf"))
+        self.assertEqual(doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"))

    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
    def test_move_archive_gone(self):
        original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
        archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
        Path(original).touch()
-        #Path(archive).touch()
        doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")

        self.assertTrue(os.path.isfile(original))
@@ -381,7 +426,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
        Path(original).touch()
        Path(archive).touch()
        os.makedirs(os.path.join(settings.ARCHIVE_DIR, "none"))
-        Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc-0000001.pdf")).touch()
+        Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")).touch()
        doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")

        self.assertTrue(os.path.isfile(original))
@@ -485,3 +530,44 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
        self.assertTrue(os.path.isfile(archive))
        self.assertTrue(os.path.isfile(doc.source_path))
        self.assertTrue(os.path.isfile(doc.archive_path))
+
+class TestFilenameGeneration(TestCase):
+
+    @override_settings(
+        PAPERLESS_FILENAME_FORMAT="{title}"
+    )
+    def test_invalid_characters(self):
+
+        doc = Document.objects.create(title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1")
+        self.assertEqual(generate_filename(doc), "This. is the title.pdf")
+
+        doc = Document.objects.create(title="my\\invalid/../title:yay", mime_type="application/pdf", pk=2, checksum="2")
+        self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay.pdf")
+
+    @override_settings(
+        PAPERLESS_FILENAME_FORMAT="{created}"
+    )
+    def test_date(self):
+        doc = Document.objects.create(title="does not matter", created=datetime.datetime(2020,5,21, 7,36,51, 153), mime_type="application/pdf", pk=2, checksum="2")
+        self.assertEqual(generate_filename(doc), "2020-05-21.pdf")
+
+
+def run():
+    doc = Document.objects.create(checksum=str(uuid.uuid4()), title=str(uuid.uuid4()), content="wow")
+    doc.filename = generate_unique_filename(doc, settings.ORIGINALS_DIR)
+    Path(doc.thumbnail_path).touch()
+    with open(doc.source_path, "w") as f:
+        f.write(str(uuid.uuid4()))
+    with open(doc.source_path, "rb") as f:
+        doc.checksum = hashlib.md5(f.read()).hexdigest()
+
+    with open(doc.archive_path, "w") as f:
+        f.write(str(uuid.uuid4()))
+    with open(doc.archive_path, "rb") as f:
+        doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
+
+    doc.save()
+
+    for i in range(30):
+        doc.title = str(random.randrange(1, 5))
+        doc.save()
--- a/src/documents/tests/test_management_archiver.py
+++ b/src/documents/tests/test_management_archiver.py
@@ -16,25 +16,23 @@ sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
 class TestArchiver(DirectoriesMixin, TestCase):

    def make_models(self):
-        self.d1 = Document.objects.create(checksum="A", title="A", content="first document", pk=1, mime_type="application/pdf")
-        #self.d2 = Document.objects.create(checksum="B", title="B", content="second document")
-        #self.d3 = Document.objects.create(checksum="C", title="C", content="unrelated document")
+        return Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf")

    def test_archiver(self):

-        shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, "0000001.pdf"))
-        self.make_models()
+        doc = self.make_models()
+        shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))

        call_command('document_archiver')

    def test_handle_document(self):

-        shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, "0000001.pdf"))
-        self.make_models()
+        doc = self.make_models()
+        shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))

-        handle_document(self.d1.pk)
+        handle_document(doc.pk)

-        doc = Document.objects.get(id=self.d1.id)
+        doc = Document.objects.get(id=doc.id)

        self.assertIsNotNone(doc.checksum)
        self.assertTrue(os.path.isfile(doc.archive_path))
--- a/src/documents/tests/test_management_consumer.py
+++ b/src/documents/tests/test_management_consumer.py
@@ -230,7 +230,7 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):

        tag_names = ("existingTag", "Space Tag")
        # Create a Tag prior to consuming a file using it in path
-        tag_ids = [Tag.objects.create(name=tag_names[0]).pk,]
+        tag_ids = [Tag.objects.create(name="existingtag").pk,]

        self.t_start()

--- a/src/documents/tests/test_management_decrypt.py
+++ b/src/documents/tests/test_management_decrypt.py
@@ -35,20 +35,20 @@ class TestDecryptDocuments(TestCase):
            PASSPHRASE="test"
        ).enable()

-        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg"))
-        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000002.png.gpg"), os.path.join(thumb_dir, "0000002.png.gpg"))
+        doc = Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg",  mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)

-        Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", id=2, mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg"))
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000002.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"))

        call_command('decrypt_documents')

-        doc = Document.objects.get(id=2)
+        doc.refresh_from_db()

        self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED)
        self.assertEqual(doc.filename, "0000002.pdf")
        self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000002.pdf")))
        self.assertTrue(os.path.isfile(doc.source_path))
-        self.assertTrue(os.path.isfile(os.path.join(thumb_dir, "0000002.png")))
+        self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png")))
        self.assertTrue(os.path.isfile(doc.thumbnail_path))

        with doc.source_file as f:
--- a/src/documents/tests/test_management_exporter.py
+++ b/src/documents/tests/test_management_exporter.py
@@ -24,13 +24,14 @@ class TestExportImport(DirectoriesMixin, TestCase):

        file = os.path.join(self.dirs.originals_dir, "0000001.pdf")

-        Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow", filename="0000001.pdf", id=1, mime_type="application/pdf")
-        Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", id=2, mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
+        Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow", filename="0000001.pdf", mime_type="application/pdf")
+        Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
        Tag.objects.create(name="t")
        DocumentType.objects.create(name="dt")
        Correspondent.objects.create(name="c")

        target = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, target)

        call_command('document_exporter', target)

@@ -66,6 +67,6 @@ class TestExportImport(DirectoriesMixin, TestCase):
    def test_export_missing_files(self):

        target = tempfile.mkdtemp()
-        call_command('document_exporter', target)
-        Document.objects.create(checksum="AAAAAAAAAAAAAAAAA", title="wow", filename="0000004.pdf", id=3, mime_type="application/pdf")
+        self.addCleanup(shutil.rmtree, target)
+        Document.objects.create(checksum="AAAAAAAAAAAAAAAAA", title="wow", filename="0000004.pdf", mime_type="application/pdf")
        self.assertRaises(FileNotFoundError, call_command, 'document_exporter', target)
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -40,6 +40,7 @@ from .filters import (
    LogFilterSet
 )
 from .models import Correspondent, Document, Log, Tag, DocumentType
+from .parsers import get_parser_class_for_mime_type
 from .serialisers import (
    CorrespondentSerializer,
    DocumentSerializer,
@@ -151,11 +152,11 @@ class DocumentViewSet(RetrieveModelMixin,
        doc = Document.objects.get(id=pk)
        if not self.original_requested(request) and os.path.isfile(doc.archive_path):  # NOQA: E501
            file_handle = doc.archive_file
-            filename = doc.archive_file_name
+            filename = doc.get_public_filename(archive=True)
            mime_type = 'application/pdf'
        else:
            file_handle = doc.source_file
-            filename = doc.file_name
+            filename = doc.get_public_filename()
            mime_type = doc.mime_type

        if doc.storage_type == Document.STORAGE_TYPE_GPG:
@@ -166,17 +167,43 @@ class DocumentViewSet(RetrieveModelMixin,
            disposition, filename)
        return response

+    def get_metadata(self, file, mime_type):
+        if not os.path.isfile(file):
+            return None
+
+        parser_class = get_parser_class_for_mime_type(mime_type)
+        if parser_class:
+            parser = parser_class(logging_group=None)
+            return parser.extract_metadata(file, mime_type)
+        else:
+            return []
+
    @action(methods=['get'], detail=True)
    def metadata(self, request, pk=None):
        try:
            doc = Document.objects.get(pk=pk)
-            return Response({
-                "paperless__checksum": doc.checksum,
-                "paperless__mime_type": doc.mime_type,
-                "paperless__filename": doc.filename,
-                "paperless__has_archive_version":
-                    os.path.isfile(doc.archive_path)
-            })
+
+            meta = {
+                "original_checksum": doc.checksum,
+                "original_size": os.stat(doc.source_path).st_size,
+                "original_mime_type": doc.mime_type,
+                "media_filename": doc.filename,
+                "has_archive_version": os.path.isfile(doc.archive_path),
+                "original_metadata": self.get_metadata(
+                    doc.source_path, doc.mime_type)
+            }
+
+            if doc.archive_checksum and os.path.isfile(doc.archive_path):
+                meta['archive_checksum'] = doc.archive_checksum
+                meta['archive_size'] = os.stat(doc.archive_path).st_size,
+                meta['archive_metadata'] = self.get_metadata(
+                    doc.archive_path, "application/pdf")
+            else:
+                meta['archive_checksum'] = None
+                meta['archive_size'] = None
+                meta['archive_metadata'] = None
+
+            return Response(meta)
        except Document.DoesNotExist:
            raise Http404()

@@ -263,12 +290,11 @@ class PostDocumentView(APIView):
        serializer = self.get_serializer(data=request.data)
        serializer.is_valid(raise_exception=True)

-        document = serializer.validated_data['document']
-        document_data = serializer.validated_data['document_data']
-        correspondent_id = serializer.validated_data['correspondent_id']
-        document_type_id = serializer.validated_data['document_type_id']
-        tag_ids = serializer.validated_data['tag_ids']
-        title = serializer.validated_data['title']
+        doc_name, doc_data = serializer.validated_data.get('document')
+        correspondent_id = serializer.validated_data.get('correspondent')
+        document_type_id = serializer.validated_data.get('document_type')
+        tag_ids = serializer.validated_data.get('tags')
+        title = serializer.validated_data.get('title')

        t = int(mktime(datetime.now().timetuple()))

@@ -277,17 +303,17 @@ class PostDocumentView(APIView):
        with tempfile.NamedTemporaryFile(prefix="paperless-upload-",
                                         dir=settings.SCRATCH_DIR,
                                         delete=False) as f:
-            f.write(document_data)
+            f.write(doc_data)
            os.utime(f.name, times=(t, t))

            async_task("documents.tasks.consume_file",
                       f.name,
-                       override_filename=document.name,
+                       override_filename=doc_name,
                       override_title=title,
                       override_correspondent_id=correspondent_id,
                       override_document_type_id=document_type_id,
                       override_tag_ids=tag_ids,
-                       task_name=os.path.basename(document.name)[:100])
+                       task_name=os.path.basename(doc_name)[:100])
        return Response("OK")


--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -53,6 +53,10 @@ ARCHIVE_DIR = os.path.join(MEDIA_ROOT, "documents", "archive")
 THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails")

 DATA_DIR = os.getenv('PAPERLESS_DATA_DIR', os.path.join(BASE_DIR, "..", "data"))
+
+# Lock file for synchronizing changes to the MEDIA directory across multiple
+# threads.
+MEDIA_LOCK = os.path.join(MEDIA_ROOT, "media.lock")
 INDEX_DIR = os.path.join(DATA_DIR, "index")
 MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle")

--- a/src/paperless/version.py
+++ b/src/paperless/version.py
@@ -1 +1 @@
-__version__ = (0, 9, 5)
+__version__ = (0, 9, 6)
--- a/src/paperless_mail/mail.py
+++ b/src/paperless_mail/mail.py
@@ -103,10 +103,7 @@ class MailAccountHandler(LoggingMixin):

    def _correspondent_from_name(self, name):
        try:
-            return Correspondent.objects.get_or_create(
-                name=name, defaults={
-                    "slug": slugify(name)
-                })[0]
+            return Correspondent.objects.get_or_create(name=name)[0]
        except DatabaseError as e:
            self.log(
                "error",
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -5,6 +5,7 @@ import subprocess

 import ocrmypdf
 import pdftotext
+import pikepdf
 from PIL import Image
 from django.conf import settings
 from ocrmypdf import InputFileError, EncryptedPdfError
@@ -18,6 +19,33 @@ class RasterisedDocumentParser(DocumentParser):
    image, whether it's a PDF, or other graphical format (JPEG, TIFF, etc.)
    """

+    def extract_metadata(self, document_path, mime_type):
+        namespace_pattern = re.compile(r"\{(.*)\}(.*)")
+
+        result = []
+        if mime_type == 'application/pdf':
+            pdf = pikepdf.open(document_path)
+            meta = pdf.open_metadata()
+            for key, value in meta.items():
+                if isinstance(value, list):
+                    value = " ".join([str(e) for e in value])
+                value = str(value)
+                try:
+                    m = namespace_pattern.match(key)
+                    result.append({
+                        "namespace": m.group(1),
+                        "prefix": meta.REVERSE_NS[m.group(1)],
+                        "key": m.group(2),
+                        "value": value
+                    })
+                except Exception as e:
+                    self.log(
+                        "warning",
+                        f"Error while reading metadata {key}: {value}. Error: "
+                        f"{e}"
+                    )
+        return result
+
    def get_thumbnail(self, document_path, mime_type):
        """
        The thumbnail of a PDF is just a 500px wide image of the first page.