mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Merge pull request #1240 from paperless-ngx/beta
[Beta] Paperless-ngx v1.8.0 Release Candidate 1
This commit is contained in:
@@ -1,17 +0,0 @@
|
||||
FROM python:3.5.1
|
||||
|
||||
# Install Sphinx and Pygments
|
||||
RUN pip install --no-cache-dir Sphinx Pygments \
|
||||
# Setup directories, copy data
|
||||
&& mkdir /build
|
||||
|
||||
COPY . /build
|
||||
WORKDIR /build/docs
|
||||
|
||||
# Build documentation
|
||||
RUN make html
|
||||
|
||||
# Start webserver
|
||||
WORKDIR /build/docs/_build/html
|
||||
EXPOSE 8000/tcp
|
||||
CMD ["python3", "-m", "http.server"]
|
@@ -24,6 +24,7 @@ I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||
help:
|
||||
@echo "Please use \`make <target>' where <target> is one of"
|
||||
@echo " html to make standalone HTML files"
|
||||
@echo " livehtml to preview changes with live reload in your browser"
|
||||
@echo " dirhtml to make HTML files named index.html in directories"
|
||||
@echo " singlehtml to make a single large HTML file"
|
||||
@echo " pickle to make pickle files"
|
||||
@@ -54,6 +55,9 @@ html:
|
||||
@echo
|
||||
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
|
||||
|
||||
livehtml:
|
||||
sphinx-autobuild "./" "$(BUILDDIR)" $(O)
|
||||
|
||||
dirhtml:
|
||||
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
|
||||
@echo
|
||||
|
52
docs/_static/js/darkmode.js
vendored
52
docs/_static/js/darkmode.js
vendored
@@ -1,47 +1,47 @@
|
||||
let toggleButton;
|
||||
let icon;
|
||||
let toggleButton
|
||||
let icon
|
||||
|
||||
function load() {
|
||||
"use strict";
|
||||
'use strict'
|
||||
|
||||
toggleButton = document.createElement("button");
|
||||
toggleButton.setAttribute("title", "Toggle dark mode");
|
||||
toggleButton.classList.add("dark-mode-toggle");
|
||||
icon = document.createElement("i");
|
||||
icon.classList.add("fa", darkModeState ? "fa-sun-o" : "fa-moon-o");
|
||||
toggleButton.appendChild(icon);
|
||||
document.body.prepend(toggleButton);
|
||||
toggleButton = document.createElement('button')
|
||||
toggleButton.setAttribute('title', 'Toggle dark mode')
|
||||
toggleButton.classList.add('dark-mode-toggle')
|
||||
icon = document.createElement('i')
|
||||
icon.classList.add('fa', darkModeState ? 'fa-sun-o' : 'fa-moon-o')
|
||||
toggleButton.appendChild(icon)
|
||||
document.body.prepend(toggleButton)
|
||||
|
||||
// Listen for changes in the OS settings
|
||||
// addListener is used because older versions of Safari don't support addEventListener
|
||||
// prefersDarkQuery set in <head>
|
||||
if (prefersDarkQuery) {
|
||||
prefersDarkQuery.addListener(function (evt) {
|
||||
toggleDarkMode(evt.matches);
|
||||
});
|
||||
toggleDarkMode(evt.matches)
|
||||
})
|
||||
}
|
||||
|
||||
// Initial setting depending on the prefers-color-mode or localstorage
|
||||
// darkModeState should be set in the document <head> to prevent flash
|
||||
if (darkModeState == undefined) darkModeState = false;
|
||||
toggleDarkMode(darkModeState);
|
||||
if (darkModeState == undefined) darkModeState = false
|
||||
toggleDarkMode(darkModeState)
|
||||
|
||||
// Toggles the "dark-mode" class on click and sets localStorage state
|
||||
toggleButton.addEventListener("click", () => {
|
||||
darkModeState = !darkModeState;
|
||||
toggleButton.addEventListener('click', () => {
|
||||
darkModeState = !darkModeState
|
||||
|
||||
toggleDarkMode(darkModeState);
|
||||
localStorage.setItem("dark-mode", darkModeState);
|
||||
});
|
||||
toggleDarkMode(darkModeState)
|
||||
localStorage.setItem('dark-mode', darkModeState)
|
||||
})
|
||||
}
|
||||
|
||||
function toggleDarkMode(state) {
|
||||
document.documentElement.classList.toggle("dark-mode", state);
|
||||
document.documentElement.classList.toggle("light-mode", !state);
|
||||
icon.classList.remove("fa-sun-o");
|
||||
icon.classList.remove("fa-moon-o");
|
||||
icon.classList.add(state ? "fa-sun-o" : "fa-moon-o");
|
||||
darkModeState = state;
|
||||
document.documentElement.classList.toggle('dark-mode', state)
|
||||
document.documentElement.classList.toggle('light-mode', !state)
|
||||
icon.classList.remove('fa-sun-o')
|
||||
icon.classList.remove('fa-moon-o')
|
||||
icon.classList.add(state ? 'fa-sun-o' : 'fa-moon-o')
|
||||
darkModeState = state
|
||||
}
|
||||
|
||||
document.addEventListener("DOMContentLoaded", load);
|
||||
document.addEventListener('DOMContentLoaded', load)
|
||||
|
@@ -287,6 +287,10 @@ When you use the provided docker compose script, put the export inside the
|
||||
``export`` folder in your paperless source directory. Specify ``../export``
|
||||
as the ``source``.
|
||||
|
||||
.. note::
|
||||
|
||||
Importing from a previous version of Paperless may work, but for best results
|
||||
it is suggested to match the versions.
|
||||
|
||||
.. _utilities-retagger:
|
||||
|
||||
|
@@ -7,12 +7,12 @@ easier.
|
||||
|
||||
.. _advanced-matching:
|
||||
|
||||
Matching tags, correspondents and document types
|
||||
################################################
|
||||
Matching tags, correspondents, document types, and storage paths
|
||||
################################################################
|
||||
|
||||
Paperless will compare the matching algorithms defined by every tag and
|
||||
correspondent already set in your database to see if they apply to the text in
|
||||
a document. In other words, if you defined a tag called ``Home Utility``
|
||||
Paperless will compare the matching algorithms defined by every tag, correspondent,
|
||||
document type, and storage path in your database to see if they apply to the text
|
||||
in a document. In other words, if you define a tag called ``Home Utility``
|
||||
that had a ``match`` property of ``bc hydro`` and a ``matching_algorithm`` of
|
||||
``literal``, Paperless will automatically tag your newly-consumed document with
|
||||
your ``Home Utility`` tag so long as the text ``bc hydro`` appears in the body
|
||||
@@ -22,10 +22,10 @@ The matching logic is quite powerful. It supports searching the text of your
|
||||
document with different algorithms, and as such, some experimentation may be
|
||||
necessary to get things right.
|
||||
|
||||
In order to have a tag, correspondent, or type assigned automatically to newly
|
||||
consumed documents, assign a match and matching algorithm using the web
|
||||
interface. These settings define when to assign correspondents, tags, and types
|
||||
to documents.
|
||||
In order to have a tag, correspondent, document type, or storage path assigned
|
||||
automatically to newly consumed documents, assign a match and matching algorithm
|
||||
using the web interface. These settings define when to assign tags, correspondents,
|
||||
document types, and storage paths to documents.
|
||||
|
||||
The following algorithms are available:
|
||||
|
||||
@@ -37,7 +37,7 @@ The following algorithms are available:
|
||||
* **Literal:** Matches only if the match appears exactly as provided (i.e. preserve ordering) in the PDF.
|
||||
* **Regular expression:** Parses the match as a regular expression and tries to
|
||||
find a match within the document.
|
||||
* **Fuzzy match:** I dont know. Look at the source.
|
||||
* **Fuzzy match:** I don't know. Look at the source.
|
||||
* **Auto:** Tries to automatically match new documents. This does not require you
|
||||
to set a match. See the notes below.
|
||||
|
||||
@@ -47,9 +47,9 @@ defining a match text of ``"Bank of America" BofA`` using the *any* algorithm,
|
||||
will match documents that contain either "Bank of America" or "BofA", but will
|
||||
not match documents containing "Bank of South America".
|
||||
|
||||
Then just save your tag/correspondent and run another document through the
|
||||
consumer. Once complete, you should see the newly-created document,
|
||||
automatically tagged with the appropriate data.
|
||||
Then just save your tag, correspondent, document type, or storage path and run
|
||||
another document through the consumer. Once complete, you should see the
|
||||
newly-created document, automatically tagged with the appropriate data.
|
||||
|
||||
|
||||
.. _advanced-automatic_matching:
|
||||
@@ -58,9 +58,9 @@ Automatic matching
|
||||
==================
|
||||
|
||||
Paperless-ngx comes with a new matching algorithm called *Auto*. This matching
|
||||
algorithm tries to assign tags, correspondents, and document types to your
|
||||
documents based on how you have already assigned these on existing documents. It
|
||||
uses a neural network under the hood.
|
||||
algorithm tries to assign tags, correspondents, document types, and storage paths
|
||||
to your documents based on how you have already assigned these on existing documents.
|
||||
It uses a neural network under the hood.
|
||||
|
||||
If, for example, all your bank statements of your account 123 at the Bank of
|
||||
America are tagged with the tag "bofa_123" and the matching algorithm of this
|
||||
@@ -80,20 +80,21 @@ feature:
|
||||
that the neural network only learns from documents which you have correctly
|
||||
tagged before.
|
||||
* The matching algorithm can only work if there is a correlation between the
|
||||
tag, correspondent, or document type and the document itself. Your bank
|
||||
statements usually contain your bank account number and the name of the bank,
|
||||
so this works reasonably well, However, tags such as "TODO" cannot be
|
||||
automatically assigned.
|
||||
tag, correspondent, document type, or storage path and the document itself.
|
||||
Your bank statements usually contain your bank account number and the name
|
||||
of the bank, so this works reasonably well, However, tags such as "TODO"
|
||||
cannot be automatically assigned.
|
||||
* The matching algorithm needs a reasonable number of documents to identify when
|
||||
to assign tags, correspondents, and types. If one out of a thousand documents
|
||||
has the correspondent "Very obscure web shop I bought something five years
|
||||
ago", it will probably not assign this correspondent automatically if you buy
|
||||
something from them again. The more documents, the better.
|
||||
to assign tags, correspondents, storage paths, and types. If one out of a
|
||||
thousand documents has the correspondent "Very obscure web shop I bought
|
||||
something five years ago", it will probably not assign this correspondent
|
||||
automatically if you buy something from them again. The more documents, the better.
|
||||
* Paperless also needs a reasonable amount of negative examples to decide when
|
||||
not to assign a certain tag, correspondent or type. This will usually be the
|
||||
case as you start filling up paperless with documents. Example: If all your
|
||||
documents are either from "Webshop" and "Bank", paperless will assign one of
|
||||
these correspondents to ANY new document, if both are set to automatic matching.
|
||||
not to assign a certain tag, correspondent, document type, or storage path. This will
|
||||
usually be the case as you start filling up paperless with documents.
|
||||
Example: If all your documents are either from "Webshop" and "Bank", paperless
|
||||
will assign one of these correspondents to ANY new document, if both are set
|
||||
to automatic matching.
|
||||
|
||||
Hooking into the consumption process
|
||||
####################################
|
||||
@@ -120,10 +121,10 @@ Pre-consumption script
|
||||
======================
|
||||
|
||||
Executed after the consumer sees a new document in the consumption folder, but
|
||||
before any processing of the document is performed. This script receives exactly
|
||||
one argument:
|
||||
before any processing of the document is performed. This script can access the
|
||||
following relevant environment variables set:
|
||||
|
||||
* Document file name
|
||||
* ``DOCUMENT_SOURCE_PATH``
|
||||
|
||||
A simple but common example for this would be creating a simple script like
|
||||
this:
|
||||
@@ -133,7 +134,7 @@ this:
|
||||
.. code:: bash
|
||||
|
||||
#!/usr/bin/env bash
|
||||
pdf2pdfocr.py -i ${1}
|
||||
pdf2pdfocr.py -i ${DOCUMENT_SOURCE_PATH}
|
||||
|
||||
``/etc/paperless.conf``
|
||||
|
||||
@@ -156,16 +157,20 @@ Post-consumption script
|
||||
=======================
|
||||
|
||||
Executed after the consumer has successfully processed a document and has moved it
|
||||
into paperless. It receives the following arguments:
|
||||
into paperless. It receives the following environment variables:
|
||||
|
||||
* Document id
|
||||
* Generated file name
|
||||
* Source path
|
||||
* Thumbnail path
|
||||
* Download URL
|
||||
* Thumbnail URL
|
||||
* Correspondent
|
||||
* Tags
|
||||
* ``DOCUMENT_ID``
|
||||
* ``DOCUMENT_FILE_NAME``
|
||||
* ``DOCUMENT_CREATED``
|
||||
* ``DOCUMENT_MODIFIED``
|
||||
* ``DOCUMENT_ADDED``
|
||||
* ``DOCUMENT_SOURCE_PATH``
|
||||
* ``DOCUMENT_ARCHIVE_PATH``
|
||||
* ``DOCUMENT_THUMBNAIL_PATH``
|
||||
* ``DOCUMENT_DOWNLOAD_URL``
|
||||
* ``DOCUMENT_THUMBNAIL_URL``
|
||||
* ``DOCUMENT_CORRESPONDENT``
|
||||
* ``DOCUMENT_TAGS``
|
||||
|
||||
The script can be in any language, but for a simple shell script
|
||||
example, you can take a look at `post-consumption-example.sh`_ in this project.
|
||||
@@ -268,6 +273,17 @@ If paperless detects that two documents share the same filename, paperless will
|
||||
append ``_01``, ``_02``, etc to the filename. This happens if all the placeholders in a filename
|
||||
evaluate to the same value.
|
||||
|
||||
.. hint::
|
||||
You can affect how empty placeholders are treated by changing the following setting to
|
||||
`true`.
|
||||
|
||||
.. code::
|
||||
|
||||
PAPERLESS_FILENAME_FORMAT_REMOVE_NONE=True
|
||||
|
||||
Doing this results in all empty placeholders resolving to "" instead of "none" as stated above.
|
||||
Spaces before empty placeholders are removed as well, empty directories are omitted.
|
||||
|
||||
.. hint::
|
||||
|
||||
Paperless checks the filename of a document whenever it is saved. Therefore,
|
||||
@@ -290,3 +306,59 @@ evaluate to the same value.
|
||||
|
||||
However, keep in mind that inside docker, if files get stored outside of the
|
||||
predefined volumes, they will be lost after a restart of paperless.
|
||||
|
||||
|
||||
Storage paths
|
||||
#############
|
||||
|
||||
One of the best things in Paperless is that you can not only access the documents via the
|
||||
web interface, but also via the file system.
|
||||
|
||||
When as single storage layout is not sufficient for your use case, storage paths come to
|
||||
the rescue. Storage paths allow you to configure more precisely where each document is stored
|
||||
in the file system.
|
||||
|
||||
- Each storage path is a `PAPERLESS_FILENAME_FORMAT` and follows the rules described above
|
||||
- Each document is assigned a storage path using the matching algorithms described above, but
|
||||
can be overwritten at any time
|
||||
|
||||
For example, you could define the following two storage paths:
|
||||
|
||||
1. Normal communications are put into a folder structure sorted by `year/correspondent`
|
||||
2. Communications with insurance companies are stored in a flat structure with longer file names,
|
||||
but containing the full date of the correspondence.
|
||||
|
||||
.. code::
|
||||
|
||||
By Year = {created_year}/{correspondent}/{title}
|
||||
Insurances = Insurances/{correspondent}/{created_year}-{created_month}-{created_day} {title}
|
||||
|
||||
|
||||
If you then map these storage paths to the documents, you might get the following result.
|
||||
For simplicity, `By Year` defines the same structure as in the previous example above.
|
||||
|
||||
.. code:: text
|
||||
|
||||
2019/ # By Year
|
||||
My bank/
|
||||
Statement January.pdf
|
||||
Statement February.pdf
|
||||
|
||||
Insurances/ # Insurances
|
||||
Healthcare 123/
|
||||
2022-01-01 Statement January.pdf
|
||||
2022-02-02 Letter.pdf
|
||||
2022-02-03 Letter.pdf
|
||||
Dental 456/
|
||||
2021-12-01 New Conditions.pdf
|
||||
|
||||
|
||||
.. hint::
|
||||
|
||||
Defining a storage path is optional. If no storage path is defined for a document, the global
|
||||
`PAPERLESS_FILENAME_FORMAT` is applied.
|
||||
|
||||
.. caution::
|
||||
|
||||
If you adjust the format of an existing storage path, old documents don't get relocated automatically.
|
||||
You need to run the :ref:`document renamer <utilities-renamer>` to adjust their pathes.
|
||||
|
@@ -31,7 +31,8 @@ The objects served by the document endpoint contain the following fields:
|
||||
* ``tags``: List of IDs of tags assigned to this document, or empty list.
|
||||
* ``document_type``: Document type of this document, or null.
|
||||
* ``correspondent``: Correspondent of this document or null.
|
||||
* ``created``: The date at which this document was created.
|
||||
* ``created``: The date time at which this document was created.
|
||||
* ``created_date``: The date (YYYY-MM-DD) at which this document was created. Optional. If also passed with created, this is ignored.
|
||||
* ``modified``: The date at which this document was last edited in paperless. Read-only.
|
||||
* ``added``: The date at which this document was added to paperless. Read-only.
|
||||
* ``archive_serial_number``: The identifier of this document in a physical document archive.
|
||||
@@ -240,11 +241,13 @@ be instructed to consume the document from there.
|
||||
The endpoint supports the following optional form fields:
|
||||
|
||||
* ``title``: Specify a title that the consumer should use for the document.
|
||||
* ``created``: Specify a DateTime where the document was created (e.g. "2016-04-19" or "2016-04-19 06:15:00+02:00").
|
||||
* ``correspondent``: Specify the ID of a correspondent that the consumer should use for the document.
|
||||
* ``document_type``: Similar to correspondent.
|
||||
* ``tags``: Similar to correspondent. Specify this multiple times to have multiple tags added
|
||||
to the document.
|
||||
|
||||
|
||||
The endpoint will immediately return "OK" if the document consumption process
|
||||
was started successfully. No additional status information about the consumption
|
||||
process itself is available, since that happens in a different process.
|
||||
|
@@ -31,7 +31,7 @@ PAPERLESS_REDIS=<url>
|
||||
|
||||
PAPERLESS_DBHOST=<hostname>
|
||||
By default, sqlite is used as the database backend. This can be changed here.
|
||||
Set PAPERLESS_DBHOST and PostgreSQL will be used instead of mysql.
|
||||
Set PAPERLESS_DBHOST and PostgreSQL will be used instead of sqlite.
|
||||
|
||||
PAPERLESS_DBPORT=<port>
|
||||
Adjust port if necessary.
|
||||
@@ -60,6 +60,13 @@ PAPERLESS_DBSSLMODE=<mode>
|
||||
|
||||
Default is ``prefer``.
|
||||
|
||||
PAPERLESS_DB_TIMEOUT=<float>
|
||||
Amount of time for a database connection to wait for the database to unlock.
|
||||
Mostly applicable for an sqlite based installation, consider changing to postgresql
|
||||
if you need to increase this.
|
||||
|
||||
Defaults to unset, keeping the Django defaults.
|
||||
|
||||
Paths and folders
|
||||
#################
|
||||
|
||||
@@ -111,6 +118,14 @@ PAPERLESS_FILENAME_FORMAT=<format>
|
||||
|
||||
Default is none, which disables this feature.
|
||||
|
||||
PAPERLESS_FILENAME_FORMAT_REMOVE_NONE=<bool>
|
||||
Tells paperless to replace placeholders in `PAPERLESS_FILENAME_FORMAT` that would resolve
|
||||
to 'none' to be omitted from the resulting filename. This also holds true for directory
|
||||
names.
|
||||
See :ref:`advanced-file_name_handling` for details.
|
||||
|
||||
Defaults to `false` which disables this feature.
|
||||
|
||||
PAPERLESS_LOGGING_DIR=<path>
|
||||
This is where paperless will store log files.
|
||||
|
||||
@@ -416,14 +431,23 @@ PAPERLESS_OCR_IMAGE_DPI=<num>
|
||||
the produced PDF documents are A4 sized.
|
||||
|
||||
PAPERLESS_OCR_MAX_IMAGE_PIXELS=<num>
|
||||
Paperless will not OCR images that have more pixels than this limit.
|
||||
This is intended to prevent decompression bombs from overloading paperless.
|
||||
Increasing this limit is desired if you face a DecompressionBombError despite
|
||||
the concerning file not being malicious; this could e.g. be caused by invalidly
|
||||
recognized metadata.
|
||||
If you have enough resources or if you are certain that your uploaded files
|
||||
are not malicious you can increase this value to your needs.
|
||||
The default value is 256000000, an image with more pixels than that would not be parsed.
|
||||
Paperless will raise a warning when OCRing images which are over this limit and
|
||||
will not OCR images which are more than twice this limit. Note this does not
|
||||
prevent the document from being consumed, but could result in missing text content.
|
||||
|
||||
If unset, will default to the value determined by
|
||||
`Pillow <https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS>`_.
|
||||
|
||||
.. note::
|
||||
|
||||
Increasing this limit could cause Paperless to consume additional resources
|
||||
when consuming a file. Be sure you have sufficient system resources.
|
||||
|
||||
.. caution::
|
||||
|
||||
The limit is intended to prevent malicious files from consuming system resources
|
||||
and causing crashes and other errors. Only increase this value if you are certain
|
||||
your documents are not malicious and you need the text which was not OCRed
|
||||
|
||||
PAPERLESS_OCR_USER_ARGS=<json>
|
||||
OCRmyPDF offers many more options. Use this parameter to specify any
|
||||
@@ -519,6 +543,8 @@ PAPERLESS_TASK_WORKERS=<num>
|
||||
maintain the automatic matching algorithm, check emails, consume documents,
|
||||
etc. This variable specifies how many things it will do in parallel.
|
||||
|
||||
Defaults to 1
|
||||
|
||||
|
||||
PAPERLESS_THREADS_PER_WORKER=<num>
|
||||
Furthermore, paperless uses multiple threads when consuming documents to
|
||||
@@ -590,6 +616,28 @@ PAPERLESS_CONSUMER_POLLING=<num>
|
||||
|
||||
Defaults to 0, which disables polling and uses filesystem notifications.
|
||||
|
||||
PAPERLESS_CONSUMER_POLLING_RETRY_COUNT=<num>
|
||||
If consumer polling is enabled, sets the number of times paperless will check for a
|
||||
file to remain unmodified.
|
||||
|
||||
Defaults to 5.
|
||||
|
||||
PAPERLESS_CONSUMER_POLLING_DELAY=<num>
|
||||
If consumer polling is enabled, sets the delay in seconds between each check (above) paperless
|
||||
will do while waiting for a file to remain unmodified.
|
||||
|
||||
Defaults to 5.
|
||||
|
||||
.. _configuration-inotify:
|
||||
|
||||
PAPERLESS_CONSUMER_INOTIFY_DELAY=<num>
|
||||
Sets the time in seconds the consumer will wait for additional events
|
||||
from inotify before the consumer will consider a file ready and begin consumption.
|
||||
Certain scanners or network setups may generate multiple events for a single file,
|
||||
leading to multiple consumers working on the same file. Configure this to
|
||||
prevent that.
|
||||
|
||||
Defaults to 0.5 seconds.
|
||||
|
||||
PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>
|
||||
When the consumer detects a duplicate document, it will not touch the
|
||||
@@ -650,7 +698,6 @@ PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT
|
||||
|
||||
Defaults to "PATCHT"
|
||||
|
||||
|
||||
PAPERLESS_CONVERT_MEMORY_LIMIT=<num>
|
||||
On smaller systems, or even in the case of Very Large Documents, the consumer
|
||||
may explode, complaining about how it's "unable to extend pixel cache". In
|
||||
@@ -674,13 +721,6 @@ PAPERLESS_CONVERT_TMPDIR=<path>
|
||||
|
||||
Default is none, which disables the temporary directory.
|
||||
|
||||
PAPERLESS_OPTIMIZE_THUMBNAILS=<bool>
|
||||
Use optipng to optimize thumbnails. This usually reduces the size of
|
||||
thumbnails by about 20%, but uses considerable compute time during
|
||||
consumption.
|
||||
|
||||
Defaults to true.
|
||||
|
||||
PAPERLESS_POST_CONSUME_SCRIPT=<filename>
|
||||
After a document is consumed, Paperless can trigger an arbitrary script if
|
||||
you like. This script will be passed a number of arguments for you to work
|
||||
@@ -696,6 +736,9 @@ PAPERLESS_FILENAME_DATE_ORDER=<format>
|
||||
The filename will be checked first, and if nothing is found, the document
|
||||
text will be checked as normal.
|
||||
|
||||
A date in a filename must have some separators (`.`, `-`, `/`, etc)
|
||||
for it to be parsed.
|
||||
|
||||
Defaults to none, which disables this feature.
|
||||
|
||||
PAPERLESS_THUMBNAIL_FONT_NAME=<filename>
|
||||
@@ -713,10 +756,7 @@ PAPERLESS_IGNORE_DATES=<string>
|
||||
this process. This is useful for special dates (like date of birth) that appear
|
||||
in documents regularly but are very unlikely to be the documents creation date.
|
||||
|
||||
You may specify dates in a multitude of formats supported by dateparser (see
|
||||
https://dateparser.readthedocs.io/en/latest/#popular-formats) but as the dates
|
||||
need to be comma separated, the options are limited.
|
||||
Example: "2020-12-02,22.04.1999"
|
||||
The date is parsed using the order specified in PAPERLESS_DATE_ORDER
|
||||
|
||||
Defaults to an empty string to not ignore any dates.
|
||||
|
||||
@@ -751,9 +791,6 @@ PAPERLESS_CONVERT_BINARY=<path>
|
||||
PAPERLESS_GS_BINARY=<path>
|
||||
Defaults to "/usr/bin/gs".
|
||||
|
||||
PAPERLESS_OPTIPNG_BINARY=<path>
|
||||
Defaults to "/usr/bin/optipng".
|
||||
|
||||
|
||||
.. _configuration-docker:
|
||||
|
||||
@@ -769,9 +806,7 @@ PAPERLESS_WEBSERVER_WORKERS=<num>
|
||||
also loads the entire application into memory separately, so increasing this value
|
||||
will increase RAM usage.
|
||||
|
||||
Consider configuring this to 1 on low power devices with limited amount of RAM.
|
||||
|
||||
Defaults to 2.
|
||||
Defaults to 1.
|
||||
|
||||
PAPERLESS_PORT=<port>
|
||||
The port number the webserver will listen on inside the container. There are
|
||||
|
@@ -88,7 +88,7 @@ Physical scanners
|
||||
|
||||
.. [1] Scanners with API Integration allow to push scanned documents directly to :ref:`Paperless API <api-file_uploads>`, sometimes referred to as Webhook or Document POST.
|
||||
.. [2] Canon Multi Function Printers show strange behavior over SMB. They close and reopen the file after every page. It's recommended to tune the
|
||||
:ref:`polling <configuration-polling>` configuration values for your scanner. The scanner timeout is 3 minutes, so ``180`` is a good starting point.
|
||||
:ref:`polling <configuration-polling>` and :ref:`inotify <configuration-inotify>` configuration values for your scanner. The scanner timeout is 3 minutes, so ``180`` is a good starting point.
|
||||
|
||||
Mobile phone software
|
||||
=====================
|
||||
|
@@ -184,6 +184,25 @@ Install Paperless from Docker Hub
|
||||
port 8000. Modifying the part before the colon will map requests on another
|
||||
port to the webserver running on the default port.
|
||||
|
||||
**Rootless**
|
||||
|
||||
If you want to run Paperless as a rootless container, you will need to do the
|
||||
following in your ``docker-compose.yml``:
|
||||
|
||||
- set the ``user`` running the container to map to the ``paperless`` user in the
|
||||
container.
|
||||
This value (``user_id`` below), should be the same id that ``USERMAP_UID`` and
|
||||
``USERMAP_GID`` are set to in the next step.
|
||||
See ``USERMAP_UID`` and ``USERMAP_GID`` :ref:`here <configuration-docker>`.
|
||||
|
||||
Your entry for Paperless should contain something like:
|
||||
|
||||
.. code::
|
||||
|
||||
webserver:
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
user: <user_id>
|
||||
|
||||
5. Modify ``docker-compose.env``, following the comments in the file. The
|
||||
most important change is to set ``USERMAP_UID`` and ``USERMAP_GID``
|
||||
to the uid and gid of your user on the host system. Use ``id -u`` and
|
||||
@@ -200,6 +219,19 @@ Install Paperless from Docker Hub
|
||||
You can copy any setting from the file ``paperless.conf.example`` and paste it here.
|
||||
Have a look at :ref:`configuration` to see what's available.
|
||||
|
||||
.. note::
|
||||
|
||||
You can utilize Docker secrets for some configuration settings by
|
||||
appending `_FILE` to some configuration values. This is supported currently
|
||||
only by:
|
||||
* PAPERLESS_DBUSER
|
||||
* PAPERLESS_DBPASS
|
||||
* PAPERLESS_SECRET_KEY
|
||||
* PAPERLESS_AUTO_LOGIN_USERNAME
|
||||
* PAPERLESS_ADMIN_USER
|
||||
* PAPERLESS_ADMIN_MAIL
|
||||
* PAPERLESS_ADMIN_PASSWORD
|
||||
|
||||
.. caution::
|
||||
|
||||
Some file systems such as NFS network shares don't support file system
|
||||
@@ -286,7 +318,6 @@ writing. Windows is not and will never be supported.
|
||||
|
||||
* ``fonts-liberation`` for generating thumbnails for plain text files
|
||||
* ``imagemagick`` >= 6 for PDF conversion
|
||||
* ``optipng`` for optimizing thumbnails
|
||||
* ``gnupg`` for handling encrypted documents
|
||||
* ``libpq-dev`` for PostgreSQL
|
||||
* ``libmagic-dev`` for mime type detection
|
||||
@@ -298,7 +329,7 @@ writing. Windows is not and will never be supported.
|
||||
|
||||
.. code::
|
||||
|
||||
python3 python3-pip python3-dev imagemagick fonts-liberation optipng gnupg libpq-dev libmagic-dev mime-support libzbar0 poppler-utils
|
||||
python3 python3-pip python3-dev imagemagick fonts-liberation gnupg libpq-dev libmagic-dev mime-support libzbar0 poppler-utils
|
||||
|
||||
These dependencies are required for OCRmyPDF, which is used for text recognition.
|
||||
|
||||
@@ -308,7 +339,7 @@ writing. Windows is not and will never be supported.
|
||||
* ``qpdf``
|
||||
* ``liblept5``
|
||||
* ``libxml2``
|
||||
* ``pngquant``
|
||||
* ``pngquant`` (suggested for certain PDF image optimizations)
|
||||
* ``zlib1g``
|
||||
* ``tesseract-ocr`` >= 4.0.0 for OCR
|
||||
* ``tesseract-ocr`` language packs (``tesseract-ocr-eng``, ``tesseract-ocr-deu``, etc)
|
||||
@@ -332,6 +363,12 @@ writing. Windows is not and will never be supported.
|
||||
3. Optional. Install ``postgresql`` and configure a database, user and password for paperless. If you do not wish
|
||||
to use PostgreSQL, SQLite is available as well.
|
||||
|
||||
.. note::
|
||||
|
||||
On bare-metal installations using SQLite, ensure the
|
||||
`JSON1 extension <https://code.djangoproject.com/wiki/JSON1Extension>`_ is enabled. This is
|
||||
usually the case, but not always.
|
||||
|
||||
4. Get the release archive from `<https://github.com/paperless-ngx/paperless-ngx/releases>`_.
|
||||
If you clone the git repo as it is, you also have to compile the front end by yourself.
|
||||
Extract the archive to a place from where you wish to execute it, such as ``/opt/paperless``.
|
||||
@@ -724,8 +761,6 @@ configuring some options in paperless can help improve performance immensely:
|
||||
* If you want to perform OCR on the device, consider using ``PAPERLESS_OCR_CLEAN=none``.
|
||||
This will speed up OCR times and use less memory at the expense of slightly worse
|
||||
OCR results.
|
||||
* Set ``PAPERLESS_OPTIMIZE_THUMBNAILS`` to 'false' if you want faster consumption
|
||||
times. Thumbnails will be about 20% larger.
|
||||
* If using docker, consider setting ``PAPERLESS_WEBSERVER_WORKERS`` to
|
||||
1. This will save some memory.
|
||||
|
||||
|
@@ -235,3 +235,85 @@ You might find messages like these in your log files:
|
||||
This indicates that paperless failed to read PDF metadata from one of your documents. This happens when you
|
||||
open the affected documents in paperless for editing. Paperless will continue to work, and will simply not
|
||||
show the invalid metadata.
|
||||
|
||||
Consumer fails with a FileNotFoundError
|
||||
#######################################
|
||||
|
||||
You might find messages like these in your log files:
|
||||
|
||||
.. code::
|
||||
|
||||
[ERROR] [paperless.consumer] Error while consuming document SCN_0001.pdf: FileNotFoundError: [Errno 2] No such file or directory: '/tmp/ocrmypdf.io.yhk3zbv0/origin.pdf'
|
||||
Traceback (most recent call last):
|
||||
File "/app/paperless/src/paperless_tesseract/parsers.py", line 261, in parse
|
||||
ocrmypdf.ocr(**args)
|
||||
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/api.py", line 337, in ocr
|
||||
return run_pipeline(options=options, plugin_manager=plugin_manager, api=True)
|
||||
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_sync.py", line 385, in run_pipeline
|
||||
exec_concurrent(context, executor)
|
||||
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_sync.py", line 302, in exec_concurrent
|
||||
pdf = post_process(pdf, context, executor)
|
||||
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_sync.py", line 235, in post_process
|
||||
pdf_out = metadata_fixup(pdf_out, context)
|
||||
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_pipeline.py", line 798, in metadata_fixup
|
||||
with pikepdf.open(context.origin) as original, pikepdf.open(working_file) as pdf:
|
||||
File "/usr/local/lib/python3.8/dist-packages/pikepdf/_methods.py", line 923, in open
|
||||
pdf = Pdf._open(
|
||||
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/ocrmypdf.io.yhk3zbv0/origin.pdf'
|
||||
|
||||
This probably indicates paperless tried to consume the same file twice. This can happen for a number of reasons,
|
||||
depending on how documents are placed into the consume folder. If paperless is using inotify (the default) to
|
||||
check for documents, try adjusting the :ref:`inotify configuration <configuration-inotify>`. If polling is enabled,
|
||||
try adjusting the :ref:`polling configuration <configuration-polling>`.
|
||||
|
||||
Consumer fails waiting for file to remain unmodified.
|
||||
#####################################################
|
||||
|
||||
You might find messages like these in your log files:
|
||||
|
||||
.. code::
|
||||
|
||||
[ERROR] [paperless.management.consumer] Timeout while waiting on file /usr/src/paperless/src/../consume/SCN_0001.pdf to remain unmodified.
|
||||
|
||||
This indicates paperless timed out while waiting for the file to be completely written to the consume folder.
|
||||
Adjusting :ref:`polling configuration <configuration-polling>` values should resolve the issue.
|
||||
|
||||
.. note::
|
||||
|
||||
The user will need to manually move the file out of the consume folder and
|
||||
back in, for the initial failing file to be consumed.
|
||||
|
||||
Consumer fails reporting "OS reports file as busy still".
|
||||
#########################################################
|
||||
|
||||
You might find messages like these in your log files:
|
||||
|
||||
.. code::
|
||||
|
||||
[WARNING] [paperless.management.consumer] Not consuming file /usr/src/paperless/src/../consume/SCN_0001.pdf: OS reports file as busy still
|
||||
|
||||
This indicates paperless was unable to open the file, as the OS reported the file as still being in use. To prevent a
|
||||
crash, paperless did not try to consume the file. If paperless is using inotify (the default) to
|
||||
check for documents, try adjusting the :ref:`inotify configuration <configuration-inotify>`. If polling is enabled,
|
||||
try adjusting the :ref:`polling configuration <configuration-polling>`.
|
||||
|
||||
.. note::
|
||||
|
||||
The user will need to manually move the file out of the consume folder and
|
||||
back in, for the initial failing file to be consumed.
|
||||
|
||||
Log reports "Creating PaperlessTask failed".
|
||||
#########################################################
|
||||
|
||||
You might find messages like these in your log files:
|
||||
|
||||
.. code::
|
||||
|
||||
[ERROR] [paperless.management.consumer] Creating PaperlessTask failed: db locked
|
||||
|
||||
You are likely using an sqlite based installation, with an increased number of workers and are running into sqlite's concurrency limitations.
|
||||
Uploading or consuming multiple files at once results in many workers attempting to access the database simultaneously.
|
||||
|
||||
Consider changing to the PostgreSQL database if you will be processing many documents at once often. Otherwise,
|
||||
try tweaking the ``PAPERLESS_DB_TIMEOUT`` setting to allow more time for the database to unlock. This may have
|
||||
minor performance implications.
|
||||
|
@@ -161,6 +161,9 @@ These are as follows:
|
||||
will not consume flagged mails.
|
||||
* **Move to folder:** Moves consumed mails out of the way so that paperless wont
|
||||
consume them again.
|
||||
* **Add custom Tag:** Adds a custom tag to mails with consumed documents (the IMAP
|
||||
standard calls these "keywords"). Paperless will not consume mails already tagged.
|
||||
Not all mail servers support this feature!
|
||||
|
||||
.. caution::
|
||||
|
||||
|
Reference in New Issue
Block a user