Merge pull request #1240 from paperless-ngx/beta

[Beta] Paperless-ngx v1.8.0 Release Candidate 1
2026-01-28 22:59:03 -06:00 · 2022-07-28 15:17:30 -07:00
parent 278cedf3d0 a722bfd099
commit 5fe435048b
277 changed files with 56739 additions and 28967 deletions
--- a/docs/Dockerfile
+++ b/docs/Dockerfile
@@ -1,17 +0,0 @@
-FROM python:3.5.1
-
-# Install Sphinx and Pygments
-RUN pip install --no-cache-dir Sphinx Pygments \
-  # Setup directories, copy data
-  && mkdir /build
-
-COPY . /build
-WORKDIR /build/docs
-
-# Build documentation
-RUN make html
-
-# Start webserver
-WORKDIR /build/docs/_build/html
-EXPOSE 8000/tcp
-CMD ["python3", "-m", "http.server"]
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -24,6 +24,7 @@ I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 help:
 	@echo "Please use \`make <target>' where <target> is one of"
 	@echo "  html       to make standalone HTML files"
+	@echo "  livehtml   to preview changes with live reload in your browser"
 	@echo "  dirhtml    to make HTML files named index.html in directories"
 	@echo "  singlehtml to make a single large HTML file"
 	@echo "  pickle     to make pickle files"
@@ -54,6 +55,9 @@ html:
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

+livehtml:
+	sphinx-autobuild "./" "$(BUILDDIR)" $(O)
+
 dirhtml:
 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 	@echo
--- a/docs/_static/js/darkmode.js
+++ b/docs/_static/js/darkmode.js
@@ -1,47 +1,47 @@
-let toggleButton;
-let icon;
+let toggleButton
+let icon

 function load() {
-	"use strict";
+	'use strict'

-	toggleButton = document.createElement("button");
-	toggleButton.setAttribute("title", "Toggle dark mode");
-	toggleButton.classList.add("dark-mode-toggle");
-	icon = document.createElement("i");
-	icon.classList.add("fa", darkModeState ? "fa-sun-o" : "fa-moon-o");
-	toggleButton.appendChild(icon);
-	document.body.prepend(toggleButton);
+	toggleButton = document.createElement('button')
+	toggleButton.setAttribute('title', 'Toggle dark mode')
+	toggleButton.classList.add('dark-mode-toggle')
+	icon = document.createElement('i')
+	icon.classList.add('fa', darkModeState ? 'fa-sun-o' : 'fa-moon-o')
+	toggleButton.appendChild(icon)
+	document.body.prepend(toggleButton)

 	// Listen for changes in the OS settings
 	// addListener is used because older versions of Safari don't support addEventListener
 	// prefersDarkQuery set in <head>
 	if (prefersDarkQuery) {
 		prefersDarkQuery.addListener(function (evt) {
-			toggleDarkMode(evt.matches);
-		});
+			toggleDarkMode(evt.matches)
+		})
 	}

 	// Initial setting depending on the prefers-color-mode or localstorage
 	// darkModeState should be set in the document <head> to prevent flash
-	if (darkModeState == undefined) darkModeState = false;
-	toggleDarkMode(darkModeState);
+	if (darkModeState == undefined) darkModeState = false
+	toggleDarkMode(darkModeState)

 	// Toggles the "dark-mode" class on click and sets localStorage state
-	toggleButton.addEventListener("click", () => {
-		darkModeState = !darkModeState;
+	toggleButton.addEventListener('click', () => {
+		darkModeState = !darkModeState

-		toggleDarkMode(darkModeState);
-		localStorage.setItem("dark-mode", darkModeState);
-	});
+		toggleDarkMode(darkModeState)
+		localStorage.setItem('dark-mode', darkModeState)
+	})
 }

 function toggleDarkMode(state) {
-	document.documentElement.classList.toggle("dark-mode", state);
-	document.documentElement.classList.toggle("light-mode", !state);
-	icon.classList.remove("fa-sun-o");
-	icon.classList.remove("fa-moon-o");
-	icon.classList.add(state ? "fa-sun-o" : "fa-moon-o");
-	darkModeState = state;
+	document.documentElement.classList.toggle('dark-mode', state)
+	document.documentElement.classList.toggle('light-mode', !state)
+	icon.classList.remove('fa-sun-o')
+	icon.classList.remove('fa-moon-o')
+	icon.classList.add(state ? 'fa-sun-o' : 'fa-moon-o')
+	darkModeState = state
 }

-document.addEventListener("DOMContentLoaded", load);
+document.addEventListener('DOMContentLoaded', load)
--- a/docs/administration.rst
+++ b/docs/administration.rst
@@ -287,6 +287,10 @@ When you use the provided docker compose script, put the export inside the
 ``export`` folder in your paperless source directory. Specify ``../export``
 as the ``source``.

+.. note::
+
+    Importing from a previous version of Paperless may work, but for best results
+    it is suggested to match the versions.

 .. _utilities-retagger:

--- a/docs/advanced_usage.rst
+++ b/docs/advanced_usage.rst
@@ -7,12 +7,12 @@ easier.

 .. _advanced-matching:

-Matching tags, correspondents and document types
-################################################
+Matching tags, correspondents, document types, and storage paths
+################################################################

-Paperless will compare the matching algorithms defined by every tag and
-correspondent already set in your database to see if they apply to the text in
-a document.  In other words, if you defined a tag called ``Home Utility``
+Paperless will compare the matching algorithms defined by every tag, correspondent,
+document type, and storage path in your database to see if they apply to the text
+in a document. In other words, if you define a tag called ``Home Utility``
 that had a ``match`` property of ``bc hydro`` and a ``matching_algorithm`` of
 ``literal``, Paperless will automatically tag your newly-consumed document with
 your ``Home Utility`` tag so long as the text ``bc hydro`` appears in the body
@@ -22,10 +22,10 @@ The matching logic is quite powerful. It supports searching the text of your
 document with different algorithms, and as such, some experimentation may be
 necessary to get things right.

-In order to have a tag, correspondent, or type assigned automatically to newly
-consumed documents, assign a match and matching algorithm using the web
-interface. These settings define when to assign correspondents, tags, and types
-to documents.
+In order to have a tag, correspondent, document type, or storage path assigned
+automatically to newly consumed documents, assign a match and matching algorithm
+using the web interface. These settings define when to assign tags, correspondents,
+document types, and storage paths to documents.

 The following algorithms are available:

@@ -37,7 +37,7 @@ The following algorithms are available:
 * **Literal:** Matches only if the match appears exactly as provided (i.e. preserve ordering) in the PDF.
 * **Regular expression:** Parses the match as a regular expression and tries to
  find a match within the document.
-* **Fuzzy match:** I dont know. Look at the source.
+* **Fuzzy match:** I don't know. Look at the source.
 * **Auto:** Tries to automatically match new documents. This does not require you
  to set a match. See the notes below.

@@ -47,9 +47,9 @@ defining a match text of ``"Bank of America" BofA`` using the *any* algorithm,
 will match documents that contain either "Bank of America" or "BofA", but will
 not match documents containing "Bank of South America".

-Then just save your tag/correspondent and run another document through the
-consumer.  Once complete, you should see the newly-created document,
-automatically tagged with the appropriate data.
+Then just save your tag, correspondent, document type, or storage path and run
+another document through the consumer.  Once complete, you should see the
+newly-created document, automatically tagged with the appropriate data.


 .. _advanced-automatic_matching:
@@ -58,9 +58,9 @@ Automatic matching
 ==================

 Paperless-ngx comes with a new matching algorithm called *Auto*. This matching
-algorithm tries to assign tags, correspondents, and document types to your
-documents based on how you have already assigned these on existing documents. It
-uses a neural network under the hood.
+algorithm tries to assign tags, correspondents, document types, and storage paths
+to your documents based on how you have already assigned these on existing documents.
+It uses a neural network under the hood.

 If, for example, all your bank statements of your account 123 at the Bank of
 America are tagged with the tag "bofa_123" and the matching algorithm of this
@@ -80,20 +80,21 @@ feature:
  that the neural network only learns from documents which you have correctly
  tagged before.
 * The matching algorithm can only work if there is a correlation between the
-  tag, correspondent, or document type and the document itself. Your bank
-  statements usually contain your bank account number and the name of the bank,
-  so this works reasonably well, However, tags such as "TODO" cannot be
-  automatically assigned.
+  tag, correspondent, document type, or storage path and the document itself.
+  Your bank statements usually contain your bank account number and the name
+  of the bank, so this works reasonably well, However, tags such as "TODO"
+  cannot be automatically assigned.
 * The matching algorithm needs a reasonable number of documents to identify when
-  to assign tags, correspondents, and types. If one out of a thousand documents
-  has the correspondent "Very obscure web shop I bought something five years
-  ago", it will probably not assign this correspondent automatically if you buy
-  something from them again. The more documents, the better.
+  to assign tags, correspondents, storage paths, and types. If one out of a
+  thousand documents has the correspondent "Very obscure web shop I bought
+  something five years ago", it will probably not assign this correspondent
+  automatically if you buy something from them again. The more documents, the better.
 * Paperless also needs a reasonable amount of negative examples to decide when
-  not to assign a certain tag, correspondent or type. This will usually be the
-  case as you start filling up paperless with documents. Example: If all your
-  documents are either from "Webshop" and "Bank", paperless will assign one of
-  these correspondents to ANY new document, if both are set to automatic matching.
+  not to assign a certain tag, correspondent, document type, or storage path. This will
+  usually be the case as you start filling up paperless with documents.
+  Example: If all your documents are either from "Webshop" and "Bank", paperless
+  will assign one of these correspondents to ANY new document, if both are set
+  to automatic matching.

 Hooking into the consumption process
 ####################################
@@ -120,10 +121,10 @@ Pre-consumption script
 ======================

 Executed after the consumer sees a new document in the consumption folder, but
-before any processing of the document is performed. This script receives exactly
-one argument:
+before any processing of the document is performed. This script can access the
+following relevant environment variables set:

-* Document file name
+* ``DOCUMENT_SOURCE_PATH``

 A simple but common example for this would be creating a simple script like
 this:
@@ -133,7 +134,7 @@ this:
 .. code:: bash

    #!/usr/bin/env bash
-    pdf2pdfocr.py -i ${1}
+    pdf2pdfocr.py -i ${DOCUMENT_SOURCE_PATH}

 ``/etc/paperless.conf``

@@ -156,16 +157,20 @@ Post-consumption script
 =======================

 Executed after the consumer has successfully processed a document and has moved it
-into paperless. It receives the following arguments:
+into paperless. It receives the following environment variables:

-* Document id
-* Generated file name
-* Source path
-* Thumbnail path
-* Download URL
-* Thumbnail URL
-* Correspondent
-* Tags
+* ``DOCUMENT_ID``
+* ``DOCUMENT_FILE_NAME``
+* ``DOCUMENT_CREATED``
+* ``DOCUMENT_MODIFIED``
+* ``DOCUMENT_ADDED``
+* ``DOCUMENT_SOURCE_PATH``
+* ``DOCUMENT_ARCHIVE_PATH``
+* ``DOCUMENT_THUMBNAIL_PATH``
+* ``DOCUMENT_DOWNLOAD_URL``
+* ``DOCUMENT_THUMBNAIL_URL``
+* ``DOCUMENT_CORRESPONDENT``
+* ``DOCUMENT_TAGS``

 The script can be in any language, but for a simple shell script
 example, you can take a look at `post-consumption-example.sh`_ in this project.
@@ -268,6 +273,17 @@ If paperless detects that two documents share the same filename, paperless will
 append ``_01``, ``_02``, etc to the filename. This happens if all the placeholders in a filename
 evaluate to the same value.

+.. hint::
+    You can affect how empty placeholders are treated by changing the following setting to
+    `true`.
+
+    .. code::
+
+        PAPERLESS_FILENAME_FORMAT_REMOVE_NONE=True
+
+    Doing this results in all empty placeholders resolving to "" instead of "none" as stated above.
+    Spaces before empty placeholders are removed as well, empty directories are omitted.
+
 .. hint::

    Paperless checks the filename of a document whenever it is saved. Therefore,
@@ -290,3 +306,59 @@ evaluate to the same value.

    However, keep in mind that inside docker, if files get stored outside of the
    predefined volumes, they will be lost after a restart of paperless.
+
+
+Storage paths
+#############
+
+One of the best things in Paperless is that you can not only access the documents via the
+web interface, but also via the file system.
+
+When as single storage layout is not sufficient for your use case, storage paths come to
+the rescue. Storage paths allow you to configure more precisely where each document is stored
+in the file system.
+
+- Each storage path is a `PAPERLESS_FILENAME_FORMAT` and follows the rules described above
+- Each document is assigned a storage path using the matching algorithms described above, but
+  can be overwritten at any time
+
+For example, you could define the following two storage paths:
+
+1. Normal communications are put into a folder structure sorted by `year/correspondent`
+2. Communications with insurance companies are stored in a flat structure with longer file names,
+   but containing the full date of the correspondence.
+
+.. code::
+
+    By Year = {created_year}/{correspondent}/{title}
+    Insurances = Insurances/{correspondent}/{created_year}-{created_month}-{created_day} {title}
+
+
+If you then map these storage paths to the documents, you might get the following result.
+For simplicity, `By Year` defines the same structure as in the previous example above.
+
+.. code:: text
+
+   2019/                                   # By Year
+      My bank/
+        Statement January.pdf
+        Statement February.pdf
+
+    Insurances/                           # Insurances
+      Healthcare 123/
+        2022-01-01 Statement January.pdf
+        2022-02-02 Letter.pdf
+        2022-02-03 Letter.pdf
+      Dental 456/
+        2021-12-01 New Conditions.pdf
+
+
+.. hint::
+
+    Defining a storage path is optional. If no storage path is defined for a document, the global
+    `PAPERLESS_FILENAME_FORMAT` is applied.
+
+.. caution::
+
+    If you adjust the format of an existing storage path, old documents don't get relocated automatically.
+    You need to run the :ref:`document renamer <utilities-renamer>` to adjust their pathes.
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -31,7 +31,8 @@ The objects served by the document endpoint contain the following fields:
 *   ``tags``: List of IDs of tags assigned to this document, or empty list.
 *   ``document_type``: Document type of this document, or null.
 *   ``correspondent``:  Correspondent of this document or null.
-*   ``created``: The date at which this document was created.
+*   ``created``: The date time at which this document was created.
+*   ``created_date``: The date (YYYY-MM-DD) at which this document was created. Optional. If also passed with created, this is ignored.
 *   ``modified``: The date at which this document was last edited in paperless. Read-only.
 *   ``added``: The date at which this document was added to paperless. Read-only.
 *   ``archive_serial_number``: The identifier of this document in a physical document archive.
@@ -240,11 +241,13 @@ be instructed to consume the document from there.
 The endpoint supports the following optional form fields:

 *   ``title``: Specify a title that the consumer should use for the document.
+*   ``created``: Specify a DateTime where the document was created (e.g. "2016-04-19" or "2016-04-19 06:15:00+02:00").
 *   ``correspondent``: Specify the ID of a correspondent that the consumer should use for the document.
 *   ``document_type``: Similar to correspondent.
 *   ``tags``: Similar to correspondent. Specify this multiple times to have multiple tags added
    to the document.

+
 The endpoint will immediately return "OK" if the document consumption process
 was started successfully. No additional status information about the consumption
 process itself is available, since that happens in a different process.
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -31,7 +31,7 @@ PAPERLESS_REDIS=<url>

 PAPERLESS_DBHOST=<hostname>
    By default, sqlite is used as the database backend. This can be changed here.
-    Set PAPERLESS_DBHOST and PostgreSQL will be used instead of mysql.
+    Set PAPERLESS_DBHOST and PostgreSQL will be used instead of sqlite.

 PAPERLESS_DBPORT=<port>
    Adjust port if necessary.
@@ -60,6 +60,13 @@ PAPERLESS_DBSSLMODE=<mode>

    Default is ``prefer``.

+PAPERLESS_DB_TIMEOUT=<float>
+    Amount of time for a database connection to wait for the database to unlock.
+    Mostly applicable for an sqlite based installation, consider changing to postgresql
+    if you need to increase this.
+
+    Defaults to unset, keeping the Django defaults.
+
 Paths and folders
 #################

@@ -111,6 +118,14 @@ PAPERLESS_FILENAME_FORMAT=<format>

    Default is none, which disables this feature.

+PAPERLESS_FILENAME_FORMAT_REMOVE_NONE=<bool>
+    Tells paperless to replace placeholders in `PAPERLESS_FILENAME_FORMAT` that would resolve
+    to 'none' to be omitted from the resulting filename. This also holds true for directory
+    names.
+    See :ref:`advanced-file_name_handling` for details.
+
+    Defaults to `false` which disables this feature.
+
 PAPERLESS_LOGGING_DIR=<path>
    This is where paperless will store log files.

@@ -416,14 +431,23 @@ PAPERLESS_OCR_IMAGE_DPI=<num>
    the produced PDF documents are A4 sized.

 PAPERLESS_OCR_MAX_IMAGE_PIXELS=<num>
-    Paperless will not OCR images that have more pixels than this limit.
-    This is intended to prevent decompression bombs from overloading paperless.
-    Increasing this limit is desired if you face a DecompressionBombError despite
-    the concerning file not being malicious; this could e.g. be caused by invalidly
-    recognized metadata.
-    If you have enough resources or if you are certain that your uploaded files
-    are not malicious you can increase this value to your needs.
-    The default value is 256000000, an image with more pixels than that would not be parsed.
+    Paperless will raise a warning when OCRing images which are over this limit and
+    will not OCR images which are more than twice this limit.  Note this does not
+    prevent the document from being consumed, but could result in missing text content.
+
+    If unset, will default to the value determined by
+    `Pillow <https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS>`_.
+
+    .. note::
+
+        Increasing this limit could cause Paperless to consume additional resources
+        when consuming a file.  Be sure you have sufficient system resources.
+
+    .. caution::
+
+        The limit is intended to prevent malicious files from consuming system resources
+        and causing crashes and other errors.  Only increase this value if you are certain
+        your documents are not malicious and you need the text which was not OCRed

 PAPERLESS_OCR_USER_ARGS=<json>
    OCRmyPDF offers many more options. Use this parameter to specify any
@@ -519,6 +543,8 @@ PAPERLESS_TASK_WORKERS=<num>
    maintain the automatic matching algorithm, check emails, consume documents,
    etc. This variable specifies how many things it will do in parallel.

+    Defaults to 1
+

 PAPERLESS_THREADS_PER_WORKER=<num>
    Furthermore, paperless uses multiple threads when consuming documents to
@@ -590,6 +616,28 @@ PAPERLESS_CONSUMER_POLLING=<num>

    Defaults to 0, which disables polling and uses filesystem notifications.

+PAPERLESS_CONSUMER_POLLING_RETRY_COUNT=<num>
+    If consumer polling is enabled, sets the number of times paperless will check for a
+    file to remain unmodified.
+
+    Defaults to 5.
+
+PAPERLESS_CONSUMER_POLLING_DELAY=<num>
+    If consumer polling is enabled, sets the delay in seconds between each check (above) paperless
+    will do while waiting for a file to remain unmodified.
+
+    Defaults to 5.
+
+.. _configuration-inotify:
+
+PAPERLESS_CONSUMER_INOTIFY_DELAY=<num>
+    Sets the time in seconds the consumer will wait for additional events
+    from inotify before the consumer will consider a file ready and begin consumption.
+    Certain scanners or network setups may generate multiple events for a single file,
+    leading to multiple consumers working on the same file.  Configure this to
+    prevent that.
+
+    Defaults to 0.5 seconds.

 PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>
    When the consumer detects a duplicate document, it will not touch the
@@ -650,7 +698,6 @@ PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT

  Defaults to "PATCHT"

-
 PAPERLESS_CONVERT_MEMORY_LIMIT=<num>
    On smaller systems, or even in the case of Very Large Documents, the consumer
    may explode, complaining about how it's "unable to extend pixel cache".  In
@@ -674,13 +721,6 @@ PAPERLESS_CONVERT_TMPDIR=<path>

    Default is none, which disables the temporary directory.

-PAPERLESS_OPTIMIZE_THUMBNAILS=<bool>
-    Use optipng to optimize thumbnails. This usually reduces the size of
-    thumbnails by about 20%, but uses considerable compute time during
-    consumption.
-
-    Defaults to true.
-
 PAPERLESS_POST_CONSUME_SCRIPT=<filename>
    After a document is consumed, Paperless can trigger an arbitrary script if
    you like.  This script will be passed a number of arguments for you to work
@@ -696,6 +736,9 @@ PAPERLESS_FILENAME_DATE_ORDER=<format>
    The filename will be checked first, and if nothing is found, the document
    text will be checked as normal.

+    A date in a filename must have some separators (`.`, `-`, `/`, etc)
+    for it to be parsed.
+
    Defaults to none, which disables this feature.

 PAPERLESS_THUMBNAIL_FONT_NAME=<filename>
@@ -713,10 +756,7 @@ PAPERLESS_IGNORE_DATES=<string>
    this process. This is useful for special dates (like date of birth) that appear
    in documents regularly but are very unlikely to be the documents creation date.

-    You may specify dates in a multitude of formats supported by dateparser (see
-    https://dateparser.readthedocs.io/en/latest/#popular-formats) but as the dates
-    need to be comma separated, the options are limited.
-    Example: "2020-12-02,22.04.1999"
+    The date is parsed using the order specified in PAPERLESS_DATE_ORDER

    Defaults to an empty string to not ignore any dates.

@@ -751,9 +791,6 @@ PAPERLESS_CONVERT_BINARY=<path>
 PAPERLESS_GS_BINARY=<path>
    Defaults to "/usr/bin/gs".

-PAPERLESS_OPTIPNG_BINARY=<path>
-    Defaults to "/usr/bin/optipng".
-

 .. _configuration-docker:

@@ -769,9 +806,7 @@ PAPERLESS_WEBSERVER_WORKERS=<num>
    also loads the entire application into memory separately, so increasing this value
    will increase RAM usage.

-    Consider configuring this to 1 on low power devices with limited amount of RAM.
-
-    Defaults to 2.
+    Defaults to 1.

 PAPERLESS_PORT=<port>
    The port number the webserver will listen on inside the container. There are
--- a/docs/scanners.rst
+++ b/docs/scanners.rst
@@ -88,7 +88,7 @@ Physical scanners

 .. [1] Scanners with API Integration allow to push scanned documents directly to :ref:`Paperless API <api-file_uploads>`, sometimes referred to as Webhook or Document POST.
 .. [2] Canon Multi Function Printers show strange behavior over SMB. They close and reopen the file after every page. It's recommended to tune the
-       :ref:`polling <configuration-polling>` configuration values for your scanner. The scanner timeout is 3 minutes, so ``180`` is a good starting point.
+       :ref:`polling <configuration-polling>` and :ref:`inotify <configuration-inotify>` configuration values for your scanner. The scanner timeout is 3 minutes, so ``180`` is a good starting point.

 Mobile phone software
 =====================
--- a/docs/setup.rst
+++ b/docs/setup.rst
@@ -184,6 +184,25 @@ Install Paperless from Docker Hub
    port 8000. Modifying the part before the colon will map requests on another
    port to the webserver running on the default port.

+    **Rootless**
+
+    If you want to run Paperless as a rootless container, you will need to do the
+    following in your ``docker-compose.yml``:
+
+    - set the ``user`` running the container to map to the ``paperless`` user in the
+      container.
+      This value (``user_id`` below), should be the same id that ``USERMAP_UID`` and
+      ``USERMAP_GID`` are set to in the next step.
+      See ``USERMAP_UID`` and ``USERMAP_GID`` :ref:`here <configuration-docker>`.
+
+    Your entry for Paperless should contain something like:
+
+     .. code::
+
+        webserver:
+          image: ghcr.io/paperless-ngx/paperless-ngx:latest
+          user: <user_id>
+
 5.  Modify ``docker-compose.env``, following the comments in the file. The
    most important change is to set ``USERMAP_UID`` and ``USERMAP_GID``
    to the uid and gid of your user on the host system. Use ``id -u`` and
@@ -200,6 +219,19 @@ Install Paperless from Docker Hub
        You can copy any setting from the file ``paperless.conf.example`` and paste it here.
        Have a look at :ref:`configuration` to see what's available.

+    .. note::
+
+        You can utilize Docker secrets for some configuration settings by
+        appending `_FILE` to some configuration values.  This is supported currently
+        only by:
+          * PAPERLESS_DBUSER
+          * PAPERLESS_DBPASS
+          * PAPERLESS_SECRET_KEY
+          * PAPERLESS_AUTO_LOGIN_USERNAME
+          * PAPERLESS_ADMIN_USER
+          * PAPERLESS_ADMIN_MAIL
+          * PAPERLESS_ADMIN_PASSWORD
+
    .. caution::

        Some file systems such as NFS network shares don't support file system
@@ -286,7 +318,6 @@ writing. Windows is not and will never be supported.

    *   ``fonts-liberation`` for generating thumbnails for plain text files
    *   ``imagemagick`` >= 6 for PDF conversion
-    *   ``optipng`` for optimizing thumbnails
    *   ``gnupg`` for handling encrypted documents
    *   ``libpq-dev`` for PostgreSQL
    *   ``libmagic-dev`` for mime type detection
@@ -298,7 +329,7 @@ writing. Windows is not and will never be supported.

    .. code::

-        python3 python3-pip python3-dev imagemagick fonts-liberation optipng gnupg libpq-dev libmagic-dev mime-support libzbar0 poppler-utils
+        python3 python3-pip python3-dev imagemagick fonts-liberation gnupg libpq-dev libmagic-dev mime-support libzbar0 poppler-utils

    These dependencies are required for OCRmyPDF, which is used for text recognition.

@@ -308,7 +339,7 @@ writing. Windows is not and will never be supported.
    *   ``qpdf``
    *   ``liblept5``
    *   ``libxml2``
-    *   ``pngquant``
+    *   ``pngquant`` (suggested for certain PDF image optimizations)
    *   ``zlib1g``
    *   ``tesseract-ocr`` >= 4.0.0 for OCR
    *   ``tesseract-ocr`` language packs (``tesseract-ocr-eng``, ``tesseract-ocr-deu``, etc)
@@ -332,6 +363,12 @@ writing. Windows is not and will never be supported.
 3.  Optional. Install ``postgresql`` and configure a database, user and password for paperless. If you do not wish
    to use PostgreSQL, SQLite is available as well.

+    .. note::
+
+        On bare-metal installations using SQLite, ensure the
+        `JSON1 extension <https://code.djangoproject.com/wiki/JSON1Extension>`_ is enabled. This is
+        usually the case, but not always.
+
 4.  Get the release archive from `<https://github.com/paperless-ngx/paperless-ngx/releases>`_.
    If you clone the git repo as it is, you also have to compile the front end by yourself.
    Extract the archive to a place from where you wish to execute it, such as ``/opt/paperless``.
@@ -724,8 +761,6 @@ configuring some options in paperless can help improve performance immensely:
 *   If you want to perform OCR on the device, consider using ``PAPERLESS_OCR_CLEAN=none``.
    This will speed up OCR times and use less memory at the expense of slightly worse
    OCR results.
-*   Set ``PAPERLESS_OPTIMIZE_THUMBNAILS`` to 'false' if you want faster consumption
-    times. Thumbnails will be about 20% larger.
 *   If using docker, consider setting ``PAPERLESS_WEBSERVER_WORKERS`` to
    1. This will save some memory.

--- a/docs/troubleshooting.rst
+++ b/docs/troubleshooting.rst
@@ -235,3 +235,85 @@ You might find messages like these in your log files:
 This indicates that paperless failed to read PDF metadata from one of your documents. This happens when you
 open the affected documents in paperless for editing. Paperless will continue to work, and will simply not
 show the invalid metadata.
+
+Consumer fails with a FileNotFoundError
+#######################################
+
+You might find messages like these in your log files:
+
+.. code::
+
+    [ERROR] [paperless.consumer] Error while consuming document SCN_0001.pdf: FileNotFoundError: [Errno 2] No such file or directory: '/tmp/ocrmypdf.io.yhk3zbv0/origin.pdf'
+    Traceback (most recent call last):
+      File "/app/paperless/src/paperless_tesseract/parsers.py", line 261, in parse
+        ocrmypdf.ocr(**args)
+      File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/api.py", line 337, in ocr
+        return run_pipeline(options=options, plugin_manager=plugin_manager, api=True)
+      File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_sync.py", line 385, in run_pipeline
+        exec_concurrent(context, executor)
+      File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_sync.py", line 302, in exec_concurrent
+        pdf = post_process(pdf, context, executor)
+      File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_sync.py", line 235, in post_process
+        pdf_out = metadata_fixup(pdf_out, context)
+      File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_pipeline.py", line 798, in metadata_fixup
+        with pikepdf.open(context.origin) as original, pikepdf.open(working_file) as pdf:
+      File "/usr/local/lib/python3.8/dist-packages/pikepdf/_methods.py", line 923, in open
+        pdf = Pdf._open(
+    FileNotFoundError: [Errno 2] No such file or directory: '/tmp/ocrmypdf.io.yhk3zbv0/origin.pdf'
+
+This probably indicates paperless tried to consume the same file twice.  This can happen for a number of reasons,
+depending on how documents are placed into the consume folder.  If paperless is using inotify (the default) to
+check for documents, try adjusting the :ref:`inotify configuration <configuration-inotify>`.  If polling is enabled,
+try adjusting the :ref:`polling configuration <configuration-polling>`.
+
+Consumer fails waiting for file to remain unmodified.
+#####################################################
+
+You might find messages like these in your log files:
+
+.. code::
+
+    [ERROR] [paperless.management.consumer] Timeout while waiting on file /usr/src/paperless/src/../consume/SCN_0001.pdf to remain unmodified.
+
+This indicates paperless timed out while waiting for the file to be completely written to the consume folder.
+Adjusting :ref:`polling configuration <configuration-polling>` values should resolve the issue.
+
+.. note::
+
+    The user will need to manually move the file out of the consume folder and
+    back in, for the initial failing file to be consumed.
+
+Consumer fails reporting "OS reports file as busy still".
+#########################################################
+
+You might find messages like these in your log files:
+
+.. code::
+
+    [WARNING] [paperless.management.consumer] Not consuming file /usr/src/paperless/src/../consume/SCN_0001.pdf: OS reports file as busy still
+
+This indicates paperless was unable to open the file, as the OS reported the file as still being in use.  To prevent a
+crash, paperless did not try to consume the file.  If paperless is using inotify (the default) to
+check for documents, try adjusting the :ref:`inotify configuration <configuration-inotify>`.  If polling is enabled,
+try adjusting the :ref:`polling configuration <configuration-polling>`.
+
+.. note::
+
+    The user will need to manually move the file out of the consume folder and
+    back in, for the initial failing file to be consumed.
+
+Log reports "Creating PaperlessTask failed".
+#########################################################
+
+You might find messages like these in your log files:
+
+.. code::
+
+    [ERROR] [paperless.management.consumer] Creating PaperlessTask failed: db locked
+
+You are likely using an sqlite based installation, with an increased number of workers and are running into sqlite's concurrency limitations.
+Uploading or consuming multiple files at once results in many workers attempting to access the database simultaneously.
+
+Consider changing to the PostgreSQL database if you will be processing many documents at once often.  Otherwise,
+try tweaking the ``PAPERLESS_DB_TIMEOUT`` setting to allow more time for the database to unlock.  This may have
+minor performance implications.
--- a/docs/usage_overview.rst
+++ b/docs/usage_overview.rst
@@ -161,6 +161,9 @@ These are as follows:
    will not consume flagged mails.
 *   **Move to folder:** Moves consumed mails out of the way so that paperless wont
    consume them again.
+*   **Add custom Tag:** Adds a custom tag to mails with consumed documents (the IMAP
+    standard calls these "keywords"). Paperless will not consume mails already tagged.
+    Not all mail servers support this feature!

 .. caution::