Feature: Dynamic document storage pathes (#916)

* Added devcontainer * Add feature storage pathes * Exclude tests and add versioning * Check escaping * Check escaping * Check quoting * Echo * Escape * Escape : * Double escape \ * Escaping * Remove if * Escape colon * Missing \ * Esacpe : * Escape all * test * Remove sed * Fix exclude * Remove SED command * Add LD_LIBRARY_PATH * Adjusted to v1.7 * Updated test-cases * Remove devcontainer * Removed internal build-file * Run pre-commit * Corrected flak8 error * Adjusted to v1.7 * Updated test-cases * Corrected flak8 error * Adjusted to new plural translations * Small adjustments due to code-review backend * Adjusted line-break * Removed PAPERLESS prefix from settings variables * Corrected style change due to search+replace * First documentation draft * Revert changes to Pipfile * Add sphinx-autobuild with keep-outdated * Revert merge error that results in wrong storage path is evaluated * Adjust styles of generated files ... * Adds additional testing to cover dynamic storage path functionality * Remove unnecessary condition * Add hint to edit storage path dialog * Correct spelling of pathes to paths * Minor documentation tweaks * Minor typo * improving wrapping of filter editor buttons with new storage path button * Update .gitignore * Fix select border radius in non input-groups * Better storage path edit hint * Add note to edit storage path dialog re document_renamer * Add note to bulk edit storage path re document_renamer * Rename FILTER_STORAGE_DIRECTORY to PATH * Fix broken filter rule parsing * Show default storage if unspecified * Remove note re storage path on bulk edit * Add basic validation of filename variables Co-authored-by: Markus Kling <markus@markus-kling.net> Co-authored-by: Trenton Holmes <holmes.trenton@gmail.com> Co-authored-by: Michael Shamoon <4887959+shamoon@users.noreply.github.com> Co-authored-by: Quinn Casey <quinn@quinncasey.com>
2025-12-14 01:21:14 -06:00 · 2022-05-19 23:42:25 +02:00
parent c5e03c7f28
commit dd3b5c129c
67 changed files with 1427 additions and 203 deletions
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -24,6 +24,7 @@ I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 help:
 	@echo "Please use \`make <target>' where <target> is one of"
 	@echo "  html       to make standalone HTML files"
+	@echo "  livehtml   to preview changes with live reload in your browser"
 	@echo "  dirhtml    to make HTML files named index.html in directories"
 	@echo "  singlehtml to make a single large HTML file"
 	@echo "  pickle     to make pickle files"
@@ -54,6 +55,9 @@ html:
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

+livehtml:
+	sphinx-autobuild "./" "$(BUILDDIR)" $(O)
+
 dirhtml:
 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 	@echo
--- a/docs/_static/js/darkmode.js
+++ b/docs/_static/js/darkmode.js
@@ -1,47 +1,47 @@
-let toggleButton;
-let icon;
+let toggleButton
+let icon

 function load() {
-	"use strict";
+	'use strict'

-	toggleButton = document.createElement("button");
-	toggleButton.setAttribute("title", "Toggle dark mode");
-	toggleButton.classList.add("dark-mode-toggle");
-	icon = document.createElement("i");
-	icon.classList.add("fa", darkModeState ? "fa-sun-o" : "fa-moon-o");
-	toggleButton.appendChild(icon);
-	document.body.prepend(toggleButton);
+	toggleButton = document.createElement('button')
+	toggleButton.setAttribute('title', 'Toggle dark mode')
+	toggleButton.classList.add('dark-mode-toggle')
+	icon = document.createElement('i')
+	icon.classList.add('fa', darkModeState ? 'fa-sun-o' : 'fa-moon-o')
+	toggleButton.appendChild(icon)
+	document.body.prepend(toggleButton)

 	// Listen for changes in the OS settings
 	// addListener is used because older versions of Safari don't support addEventListener
 	// prefersDarkQuery set in <head>
 	if (prefersDarkQuery) {
 		prefersDarkQuery.addListener(function (evt) {
-			toggleDarkMode(evt.matches);
-		});
+			toggleDarkMode(evt.matches)
+		})
 	}

 	// Initial setting depending on the prefers-color-mode or localstorage
 	// darkModeState should be set in the document <head> to prevent flash
-	if (darkModeState == undefined) darkModeState = false;
-	toggleDarkMode(darkModeState);
+	if (darkModeState == undefined) darkModeState = false
+	toggleDarkMode(darkModeState)

 	// Toggles the "dark-mode" class on click and sets localStorage state
-	toggleButton.addEventListener("click", () => {
-		darkModeState = !darkModeState;
+	toggleButton.addEventListener('click', () => {
+		darkModeState = !darkModeState

-		toggleDarkMode(darkModeState);
-		localStorage.setItem("dark-mode", darkModeState);
-	});
+		toggleDarkMode(darkModeState)
+		localStorage.setItem('dark-mode', darkModeState)
+	})
 }

 function toggleDarkMode(state) {
-	document.documentElement.classList.toggle("dark-mode", state);
-	document.documentElement.classList.toggle("light-mode", !state);
-	icon.classList.remove("fa-sun-o");
-	icon.classList.remove("fa-moon-o");
-	icon.classList.add(state ? "fa-sun-o" : "fa-moon-o");
-	darkModeState = state;
+	document.documentElement.classList.toggle('dark-mode', state)
+	document.documentElement.classList.toggle('light-mode', !state)
+	icon.classList.remove('fa-sun-o')
+	icon.classList.remove('fa-moon-o')
+	icon.classList.add(state ? 'fa-sun-o' : 'fa-moon-o')
+	darkModeState = state
 }

-document.addEventListener("DOMContentLoaded", load);
+document.addEventListener('DOMContentLoaded', load)
--- a/docs/advanced_usage.rst
+++ b/docs/advanced_usage.rst
@@ -7,12 +7,12 @@ easier.

 .. _advanced-matching:

-Matching tags, correspondents and document types
-################################################
+Matching tags, correspondents, document types, and storage paths
+################################################################

-Paperless will compare the matching algorithms defined by every tag and
-correspondent already set in your database to see if they apply to the text in
-a document.  In other words, if you defined a tag called ``Home Utility``
+Paperless will compare the matching algorithms defined by every tag, correspondent,
+document type, and storage path in your database to see if they apply to the text
+in a document. In other words, if you define a tag called ``Home Utility``
 that had a ``match`` property of ``bc hydro`` and a ``matching_algorithm`` of
 ``literal``, Paperless will automatically tag your newly-consumed document with
 your ``Home Utility`` tag so long as the text ``bc hydro`` appears in the body
@@ -22,10 +22,10 @@ The matching logic is quite powerful. It supports searching the text of your
 document with different algorithms, and as such, some experimentation may be
 necessary to get things right.

-In order to have a tag, correspondent, or type assigned automatically to newly
-consumed documents, assign a match and matching algorithm using the web
-interface. These settings define when to assign correspondents, tags, and types
-to documents.
+In order to have a tag, correspondent, document type, or storage path assigned
+automatically to newly consumed documents, assign a match and matching algorithm
+using the web interface. These settings define when to assign tags, correspondents,
+document types, and storage paths to documents.

 The following algorithms are available:

@@ -37,7 +37,7 @@ The following algorithms are available:
 * **Literal:** Matches only if the match appears exactly as provided (i.e. preserve ordering) in the PDF.
 * **Regular expression:** Parses the match as a regular expression and tries to
  find a match within the document.
-* **Fuzzy match:** I dont know. Look at the source.
+* **Fuzzy match:** I don't know. Look at the source.
 * **Auto:** Tries to automatically match new documents. This does not require you
  to set a match. See the notes below.

@@ -47,9 +47,9 @@ defining a match text of ``"Bank of America" BofA`` using the *any* algorithm,
 will match documents that contain either "Bank of America" or "BofA", but will
 not match documents containing "Bank of South America".

-Then just save your tag/correspondent and run another document through the
-consumer.  Once complete, you should see the newly-created document,
-automatically tagged with the appropriate data.
+Then just save your tag, correspondent, document type, or storage path and run
+another document through the consumer.  Once complete, you should see the
+newly-created document, automatically tagged with the appropriate data.


 .. _advanced-automatic_matching:
@@ -58,9 +58,9 @@ Automatic matching
 ==================

 Paperless-ngx comes with a new matching algorithm called *Auto*. This matching
-algorithm tries to assign tags, correspondents, and document types to your
-documents based on how you have already assigned these on existing documents. It
-uses a neural network under the hood.
+algorithm tries to assign tags, correspondents, document types, and storage paths
+to your documents based on how you have already assigned these on existing documents.
+It uses a neural network under the hood.

 If, for example, all your bank statements of your account 123 at the Bank of
 America are tagged with the tag "bofa_123" and the matching algorithm of this
@@ -80,20 +80,21 @@ feature:
  that the neural network only learns from documents which you have correctly
  tagged before.
 * The matching algorithm can only work if there is a correlation between the
-  tag, correspondent, or document type and the document itself. Your bank
-  statements usually contain your bank account number and the name of the bank,
-  so this works reasonably well, However, tags such as "TODO" cannot be
-  automatically assigned.
+  tag, correspondent, document type, or storage path and the document itself.
+  Your bank statements usually contain your bank account number and the name
+  of the bank, so this works reasonably well, However, tags such as "TODO"
+  cannot be automatically assigned.
 * The matching algorithm needs a reasonable number of documents to identify when
-  to assign tags, correspondents, and types. If one out of a thousand documents
-  has the correspondent "Very obscure web shop I bought something five years
-  ago", it will probably not assign this correspondent automatically if you buy
-  something from them again. The more documents, the better.
+  to assign tags, correspondents, storage paths, and types. If one out of a
+  thousand documents has the correspondent "Very obscure web shop I bought
+  something five years ago", it will probably not assign this correspondent
+  automatically if you buy something from them again. The more documents, the better.
 * Paperless also needs a reasonable amount of negative examples to decide when
-  not to assign a certain tag, correspondent or type. This will usually be the
-  case as you start filling up paperless with documents. Example: If all your
-  documents are either from "Webshop" and "Bank", paperless will assign one of
-  these correspondents to ANY new document, if both are set to automatic matching.
+  not to assign a certain tag, correspondent, document type, or storage path. This will
+  usually be the case as you start filling up paperless with documents.
+  Example: If all your documents are either from "Webshop" and "Bank", paperless
+  will assign one of these correspondents to ANY new document, if both are set
+  to automatic matching.

 Hooking into the consumption process
 ####################################
@@ -268,6 +269,17 @@ If paperless detects that two documents share the same filename, paperless will
 append ``_01``, ``_02``, etc to the filename. This happens if all the placeholders in a filename
 evaluate to the same value.

+.. hint::
+    You can affect how empty placeholders are treated by changing the following setting to
+    `true`.
+
+    .. code::
+
+        PAPERLESS_FILENAME_FORMAT_REMOVE_NONE=True
+
+    Doing this results in all empty placeholders resolving to "" instead of "none" as stated above.
+    Spaces before empty placeholders are removed as well, empty directories are omitted.
+
 .. hint::

    Paperless checks the filename of a document whenever it is saved. Therefore,
@@ -290,3 +302,59 @@ evaluate to the same value.

    However, keep in mind that inside docker, if files get stored outside of the
    predefined volumes, they will be lost after a restart of paperless.
+
+
+Storage paths
+#############
+
+One of the best things in Paperless is that you can not only access the documents via the
+web interface, but also via the file system.
+
+When as single storage layout is not sufficient for your use case, storage paths come to
+the rescue. Storage paths allow you to configure more precisely where each document is stored
+in the file system.
+
+- Each storage path is a `PAPERLESS_FILENAME_FORMAT` and follows the rules described above
+- Each document is assigned a storage path using the matching algorithms described above, but
+  can be overwritten at any time
+
+For example, you could define the following two storage paths:
+
+1. Normal communications are put into a folder structure sorted by `year/correspondent`
+2. Communications with insurance companies are stored in a flat structure with longer file names,
+   but containing the full date of the correspondence.
+
+.. code::
+
+    By Year = {created_year}/{correspondent}/{title}
+    Insurances = Insurances/{correspondent}/{created_year}-{created_month}-{created_day} {title}
+
+
+If you then map these storage paths to the documents, you might get the following result.
+For simplicity, `By Year` defines the same structure as in the previous example above.
+
+.. code:: text
+
+   2019/                                   # By Year
+      My bank/
+        Statement January.pdf
+        Statement February.pdf
+
+    Insurances/                           # Insurances
+      Healthcare 123/
+        2022-01-01 Statement January.pdf
+        2022-02-02 Letter.pdf
+        2022-02-03 Letter.pdf
+      Dental 456/
+        2021-12-01 New Conditions.pdf
+
+
+.. hint::
+
+    Defining a storage path is optional. If no storage path is defined for a document, the global
+    `PAPERLESS_FILENAME_FORMAT` is applied.
+
+.. caution::
+
+    If you adjust the format of an existing storage path, old documents don't get relocated automatically.
+    You need to run the :ref:`document renamer <utilities-renamer>` to adjust their pathes.
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -111,6 +111,14 @@ PAPERLESS_FILENAME_FORMAT=<format>

    Default is none, which disables this feature.

+PAPERLESS_FILENAME_FORMAT_REMOVE_NONE=<bool>
+    Tells paperless to replace placeholders in `PAPERLESS_FILENAME_FORMAT` that would resolve
+    to 'none' to be omitted from the resulting filename. This also holds true for directory
+    names.
+    See :ref:`advanced-file_name_handling` for details.
+
+    Defaults to `false` which disables this feature.
+
 PAPERLESS_LOGGING_DIR=<path>
    This is where paperless will store log files.