mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge branch 'dev'
This commit is contained in:
commit
5573a84335
4
Pipfile
4
Pipfile
@ -8,6 +8,9 @@ url = "https://www.piwheels.org/simple"
|
|||||||
verify_ssl = true
|
verify_ssl = true
|
||||||
name = "piwheels"
|
name = "piwheels"
|
||||||
|
|
||||||
|
[requires]
|
||||||
|
python_version = "3.6"
|
||||||
|
|
||||||
[packages]
|
[packages]
|
||||||
dateparser = "~=0.7.6"
|
dateparser = "~=0.7.6"
|
||||||
django = "~=3.1.3"
|
django = "~=3.1.3"
|
||||||
@ -35,6 +38,7 @@ scikit-learn="~=0.23.2"
|
|||||||
whitenoise = "~=5.2.0"
|
whitenoise = "~=5.2.0"
|
||||||
watchdog = "*"
|
watchdog = "*"
|
||||||
whoosh="~=2.7.4"
|
whoosh="~=2.7.4"
|
||||||
|
inotify-simple = "*"
|
||||||
|
|
||||||
[dev-packages]
|
[dev-packages]
|
||||||
coveralls = "*"
|
coveralls = "*"
|
||||||
|
50
Pipfile.lock
generated
50
Pipfile.lock
generated
@ -1,10 +1,12 @@
|
|||||||
{
|
{
|
||||||
"_meta": {
|
"_meta": {
|
||||||
"hash": {
|
"hash": {
|
||||||
"sha256": "ae2643b9cf0cf5741ae149fb6bc0c480de41329ce48e773eb4b5d760bc5e2244"
|
"sha256": "d6432a18280c092c108e998f00bcd377c0c55ef18f26cb0b8eb64f9618b9f383"
|
||||||
},
|
},
|
||||||
"pipfile-spec": 6,
|
"pipfile-spec": 6,
|
||||||
"requires": {},
|
"requires": {
|
||||||
|
"python_version": "3.6"
|
||||||
|
},
|
||||||
"sources": [
|
"sources": [
|
||||||
{
|
{
|
||||||
"name": "pypi",
|
"name": "pypi",
|
||||||
@ -129,6 +131,14 @@
|
|||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==0.32.0"
|
"version": "==0.32.0"
|
||||||
},
|
},
|
||||||
|
"inotify-simple": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:8440ffe49c4ae81a8df57c1ae1eb4b6bfa7acb830099bfb3e305b383005cc128",
|
||||||
|
"sha256:854f9ac752cc1fcff6ca34e9d3d875c9a94c9b7d6eb377f63be2d481a566c6ee"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"version": "==1.3.5"
|
||||||
|
},
|
||||||
"joblib": {
|
"joblib": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:698c311779f347cf6b7e6b8a39bb682277b8ee4aba8cf9507bc0cf4cd4737b72",
|
"sha256:698c311779f347cf6b7e6b8a39bb682277b8ee4aba8cf9507bc0cf4cd4737b72",
|
||||||
@ -663,11 +673,11 @@
|
|||||||
},
|
},
|
||||||
"faker": {
|
"faker": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:3f5d379e4b5ce92a8afe3c2ce59d7c43886370dd3bf9495a936b91888debfc81",
|
"sha256:5398268e1d751ffdb3ed36b8a790ed98659200599b368eec38a02eed15bce997",
|
||||||
"sha256:8c0e8a06acef4b9312902e2ce18becabe62badd3a6632180bd0680c6ee111473"
|
"sha256:d4183b8f57316de3be27cd6c3b40e9f9343d27c95c96179f027316c58c2c239e"
|
||||||
],
|
],
|
||||||
"markers": "python_version >= '3.5'",
|
"markers": "python_version >= '3.5'",
|
||||||
"version": "==4.17.0"
|
"version": "==4.17.1"
|
||||||
},
|
},
|
||||||
"filelock": {
|
"filelock": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@ -693,6 +703,22 @@
|
|||||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||||
"version": "==1.2.0"
|
"version": "==1.2.0"
|
||||||
},
|
},
|
||||||
|
"importlib-metadata": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:030f3b1bdb823ecbe4a9659e14cc861ce5af403fe99863bae173ec5fe00ab132",
|
||||||
|
"sha256:caeee3603f5dcf567864d1be9b839b0bcfdf1383e3e7be33ce2dead8144ff19c"
|
||||||
|
],
|
||||||
|
"markers": "python_version < '3.8'",
|
||||||
|
"version": "==2.1.0"
|
||||||
|
},
|
||||||
|
"importlib-resources": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:7b51f0106c8ec564b1bef3d9c588bc694ce2b92125bbb6278f4f2f5b54ec3592",
|
||||||
|
"sha256:a3d34a8464ce1d5d7c92b0ea4e921e696d86f2aa212e684451cb1482c8d84ed5"
|
||||||
|
],
|
||||||
|
"markers": "python_version < '3.7'",
|
||||||
|
"version": "==3.3.0"
|
||||||
|
},
|
||||||
"iniconfig": {
|
"iniconfig": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
|
"sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
|
||||||
@ -999,11 +1025,19 @@
|
|||||||
},
|
},
|
||||||
"virtualenv": {
|
"virtualenv": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:b0011228208944ce71052987437d3843e05690b2f23d1c7da4263fde104c97a2",
|
"sha256:07cff122e9d343140366055f31be4dcd61fd598c69d11cd33a9d9c8df4546dd7",
|
||||||
"sha256:b8d6110f493af256a40d65e29846c69340a947669eec8ce784fcf3dd3af28380"
|
"sha256:e0aac7525e880a429764cefd3aaaff54afb5d9f25c82627563603f5d7de5a6e5"
|
||||||
],
|
],
|
||||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||||
"version": "==20.1.0"
|
"version": "==20.2.1"
|
||||||
|
},
|
||||||
|
"zipp": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:102c24ef8f171fd729d46599845e95c7ab894a4cf45f5de11a44cc7444fb1108",
|
||||||
|
"sha256:ed5eee1974372595f9e416cc7bbeeb12335201d8081ca8a0743c954d4446e5cb"
|
||||||
|
],
|
||||||
|
"markers": "python_version < '3.8'",
|
||||||
|
"version": "==3.4.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -15,7 +15,7 @@ services:
|
|||||||
POSTGRES_PASSWORD: paperless
|
POSTGRES_PASSWORD: paperless
|
||||||
|
|
||||||
webserver:
|
webserver:
|
||||||
image: jonaswinkler/paperless-ng:0.9.2
|
image: jonaswinkler/paperless-ng:0.9.3
|
||||||
restart: always
|
restart: always
|
||||||
depends_on:
|
depends_on:
|
||||||
- db
|
- db
|
||||||
|
@ -5,7 +5,7 @@ services:
|
|||||||
restart: always
|
restart: always
|
||||||
|
|
||||||
webserver:
|
webserver:
|
||||||
image: jonaswinkler/paperless-ng:0.9.2
|
image: jonaswinkler/paperless-ng:0.9.3
|
||||||
restart: always
|
restart: always
|
||||||
depends_on:
|
depends_on:
|
||||||
- broker
|
- broker
|
||||||
|
@ -30,7 +30,7 @@ Options available to docker installations:
|
|||||||
Paperless uses 3 volumes:
|
Paperless uses 3 volumes:
|
||||||
|
|
||||||
* ``paperless_media``: This is where your documents are stored.
|
* ``paperless_media``: This is where your documents are stored.
|
||||||
* ``paperless_data``: This is where auxilliary data is stored. This
|
* ``paperless_data``: This is where auxillary data is stored. This
|
||||||
folder also contains the SQLite database, if you use it.
|
folder also contains the SQLite database, if you use it.
|
||||||
* ``paperless_pgdata``: Exists only if you use PostgreSQL and contains
|
* ``paperless_pgdata``: Exists only if you use PostgreSQL and contains
|
||||||
the database.
|
the database.
|
||||||
@ -109,7 +109,7 @@ B. If you built the image yourself, grab the new archive and replace your curre
|
|||||||
.. hint::
|
.. hint::
|
||||||
|
|
||||||
You can usually keep your ``docker-compose.env`` file, since this file will
|
You can usually keep your ``docker-compose.env`` file, since this file will
|
||||||
never include mandantory configuration options. However, it is worth checking
|
never include mandatory configuration options. However, it is worth checking
|
||||||
out the new version of this file, since it might have new recommendations
|
out the new version of this file, since it might have new recommendations
|
||||||
on what to configure.
|
on what to configure.
|
||||||
|
|
||||||
@ -126,8 +126,8 @@ After grabbing the new release and unpacking the contents, do the following:
|
|||||||
|
|
||||||
$ pip install --upgrade pipenv
|
$ pip install --upgrade pipenv
|
||||||
$ cd /path/to/paperless
|
$ cd /path/to/paperless
|
||||||
$ pipenv install
|
|
||||||
$ pipenv clean
|
$ pipenv clean
|
||||||
|
$ pipenv install
|
||||||
|
|
||||||
This creates a new virtual environment (or uses your existing environment)
|
This creates a new virtual environment (or uses your existing environment)
|
||||||
and installs all dependencies into it.
|
and installs all dependencies into it.
|
||||||
@ -247,12 +247,12 @@ your already processed documents.
|
|||||||
|
|
||||||
When multiple document types or correspondents match a single document,
|
When multiple document types or correspondents match a single document,
|
||||||
the retagger won't assign these to the document. Specify ``--use-first``
|
the retagger won't assign these to the document. Specify ``--use-first``
|
||||||
to override this behaviour and just use the first correspondent or type
|
to override this behavior and just use the first correspondent or type
|
||||||
it finds. This option does not apply to tags, since any amount of tags
|
it finds. This option does not apply to tags, since any amount of tags
|
||||||
can be applied to a document.
|
can be applied to a document.
|
||||||
|
|
||||||
Finally, ``-f`` specifies that you wish to overwrite already assigned
|
Finally, ``-f`` specifies that you wish to overwrite already assigned
|
||||||
correspondents, types and/or tags. The default behaviour is to not
|
correspondents, types and/or tags. The default behavior is to not
|
||||||
assign correspondents and types to documents that have this data already
|
assign correspondents and types to documents that have this data already
|
||||||
assigned. ``-f`` works differently for tags: By default, only additional tags get
|
assigned. ``-f`` works differently for tags: By default, only additional tags get
|
||||||
added to documents, no tags will be removed. With ``-f``, tags that don't
|
added to documents, no tags will be removed. With ``-f``, tags that don't
|
||||||
@ -341,7 +341,7 @@ Documents can be stored in Paperless using GnuPG encryption.
|
|||||||
|
|
||||||
.. danger::
|
.. danger::
|
||||||
|
|
||||||
Encryption is depreceated since paperless-ng 0.9 and doesn't really provide any
|
Encryption is deprecated since paperless-ng 0.9 and doesn't really provide any
|
||||||
additional security, since you have to store the passphrase in a configuration
|
additional security, since you have to store the passphrase in a configuration
|
||||||
file on the same system as the encrypted documents for paperless to work.
|
file on the same system as the encrypted documents for paperless to work.
|
||||||
Furthermore, the entire text content of the documents is stored plain in the
|
Furthermore, the entire text content of the documents is stored plain in the
|
||||||
@ -353,39 +353,23 @@ Documents can be stored in Paperless using GnuPG encryption.
|
|||||||
Consider running paperless on an encrypted filesystem instead, which will then
|
Consider running paperless on an encrypted filesystem instead, which will then
|
||||||
at least provide security against physical hardware theft.
|
at least provide security against physical hardware theft.
|
||||||
|
|
||||||
.. code::
|
|
||||||
|
|
||||||
change_storage_type [--passphrase PASSPHRASE] {gpg,unencrypted} {gpg,unencrypted}
|
|
||||||
|
|
||||||
positional arguments:
|
|
||||||
{gpg,unencrypted} The state you want to change your documents from
|
|
||||||
{gpg,unencrypted} The state you want to change your documents to
|
|
||||||
|
|
||||||
optional arguments:
|
|
||||||
--passphrase PASSPHRASE
|
|
||||||
|
|
||||||
Enabling encryption
|
Enabling encryption
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
Basic usage to enable encryption of your document store (**USE A MORE SECURE PASSPHRASE**):
|
Enabling encryption is no longer supported.
|
||||||
|
|
||||||
(Note: If ``PAPERLESS_PASSPHRASE`` isn't set already, you need to specify it here)
|
|
||||||
|
|
||||||
.. code::
|
|
||||||
|
|
||||||
change_storage_type [--passphrase SECR3TP4SSPHRA$E] unencrypted gpg
|
|
||||||
|
|
||||||
|
|
||||||
Disabling encryption
|
Disabling encryption
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
Basic usage to enable encryption of your document store:
|
Basic usage to disable encryption of your document store:
|
||||||
|
|
||||||
(Note: Again, if ``PAPERLESS_PASSPHRASE`` isn't set already, you need to specify it here)
|
(Note: If ``PAPERLESS_PASSPHRASE`` isn't set already, you need to specify it here)
|
||||||
|
|
||||||
.. code::
|
.. code::
|
||||||
|
|
||||||
change_storage_type [--passphrase SECR3TP4SSPHRA$E] gpg unencrypted
|
decrypt_documents [--passphrase SECR3TP4SSPHRA$E]
|
||||||
|
|
||||||
|
|
||||||
.. _Pipenv: https://pipenv.pypa.io/en/latest/
|
.. _Pipenv: https://pipenv.pypa.io/en/latest/
|
@ -84,6 +84,8 @@ to the filename.
|
|||||||
PAPERLESS_FILENAME_PARSE_TRANSFORMS=[{"pattern":"^([a-z]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.", "repl":"\\2\\3Z - \\4 - \\1."}, {"pattern":"^([a-z]+)_([0-9]+)\\.", "repl":" - \\2 - \\1."}]
|
PAPERLESS_FILENAME_PARSE_TRANSFORMS=[{"pattern":"^([a-z]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.", "repl":"\\2\\3Z - \\4 - \\1."}, {"pattern":"^([a-z]+)_([0-9]+)\\.", "repl":" - \\2 - \\1."}]
|
||||||
|
|
||||||
|
|
||||||
|
.. _advanced-matching:
|
||||||
|
|
||||||
Matching tags, correspondents and document types
|
Matching tags, correspondents and document types
|
||||||
################################################
|
################################################
|
||||||
|
|
||||||
@ -145,7 +147,9 @@ America are tagged with the tag "bofa_123" and the matching algorithm of this
|
|||||||
tag is set to *Auto*, this neural network will examine your documents and
|
tag is set to *Auto*, this neural network will examine your documents and
|
||||||
automatically learn when to assign this tag.
|
automatically learn when to assign this tag.
|
||||||
|
|
||||||
There are a couple caveats you need to keep in mind when using this feature:
|
Paperless tries to hide much of the involved complexity with this approach.
|
||||||
|
However, there are a couple caveats you need to keep in mind when using this
|
||||||
|
feature:
|
||||||
|
|
||||||
* Changes to your documents are not immediately reflected by the matching
|
* Changes to your documents are not immediately reflected by the matching
|
||||||
algorithm. The neural network needs to be *trained* on your documents after
|
algorithm. The neural network needs to be *trained* on your documents after
|
||||||
@ -165,6 +169,11 @@ There are a couple caveats you need to keep in mind when using this feature:
|
|||||||
has the correspondent "Very obscure web shop I bought something five years
|
has the correspondent "Very obscure web shop I bought something five years
|
||||||
ago", it will probably not assign this correspondent automatically if you buy
|
ago", it will probably not assign this correspondent automatically if you buy
|
||||||
something from them again. The more documents, the better.
|
something from them again. The more documents, the better.
|
||||||
|
* Paperless also needs a reasonable amount of negative examples to decide when
|
||||||
|
not to assign a certain tag, correspondent or type. This will usually be the
|
||||||
|
case as you start filling up paperless with documents. Example: If all your
|
||||||
|
documents are either from "Webshop" and "Bank", paperless will assign one of
|
||||||
|
these correspondents to ANY new document, if both are set to automatic matching.
|
||||||
|
|
||||||
Hooking into the consumption process
|
Hooking into the consumption process
|
||||||
####################################
|
####################################
|
||||||
@ -253,7 +262,7 @@ By default, paperless stores your documents in the media directory and renames t
|
|||||||
using the identifier which it has assigned to each document. You will end up getting
|
using the identifier which it has assigned to each document. You will end up getting
|
||||||
files like ``0000123.pdf`` in your media directory. This isn't necessarily a bad
|
files like ``0000123.pdf`` in your media directory. This isn't necessarily a bad
|
||||||
thing, because you normally don't have to access these files manually. However, if
|
thing, because you normally don't have to access these files manually. However, if
|
||||||
you wish to name your files differently, you can do that by adjustng the
|
you wish to name your files differently, you can do that by adjusting the
|
||||||
``PAPERLESS_FILENAME_FORMAT`` settings variable.
|
``PAPERLESS_FILENAME_FORMAT`` settings variable.
|
||||||
|
|
||||||
This variable allows you to configure the filename (folders are allowed!) using
|
This variable allows you to configure the filename (folders are allowed!) using
|
||||||
@ -278,7 +287,7 @@ will create a directory structure as follows:
|
|||||||
my_new_shoes-0000004.pdf
|
my_new_shoes-0000004.pdf
|
||||||
|
|
||||||
Paperless appends the unique identifier of each document to the filename. This
|
Paperless appends the unique identifier of each document to the filename. This
|
||||||
avoides filename clashes.
|
avoids filename clashes.
|
||||||
|
|
||||||
.. danger::
|
.. danger::
|
||||||
|
|
||||||
|
@ -94,7 +94,7 @@ Result object:
|
|||||||
}
|
}
|
||||||
|
|
||||||
* ``id``: the primary key of the found document
|
* ``id``: the primary key of the found document
|
||||||
* ``highlights``: an object containing parseable highlights for the result.
|
* ``highlights``: an object containing parsable highlights for the result.
|
||||||
See below.
|
See below.
|
||||||
* ``score``: The score assigned to the document. A higher score indicates a
|
* ``score``: The score assigned to the document. A higher score indicates a
|
||||||
better match with the query. Search results are sorted descending by score.
|
better match with the query. Search results are sorted descending by score.
|
||||||
|
@ -5,6 +5,24 @@
|
|||||||
Changelog
|
Changelog
|
||||||
*********
|
*********
|
||||||
|
|
||||||
|
paperless-ng 0.9.3
|
||||||
|
##################
|
||||||
|
|
||||||
|
* Setting ``PAPERLESS_AUTO_LOGIN_USERNAME`` replaces ``PAPERLESS_DISABLE_LOGIN``.
|
||||||
|
You have to specify your username.
|
||||||
|
* Added a simple sanity checker that checks your documents for missing or orphaned files,
|
||||||
|
files with wrong checksums, inaccessible files, and documents with empty content.
|
||||||
|
* It is no longer possible to encrypt your documents. For the time being, paperless will
|
||||||
|
continue to operate with already encrypted documents.
|
||||||
|
* Fixes:
|
||||||
|
|
||||||
|
* Paperless now uses inotify again, since the watchdog was causing issues which I was not
|
||||||
|
aware of.
|
||||||
|
* Issue with the automatic classifier not working with only one tag.
|
||||||
|
* A couple issues with the search index being opened to eagerly.
|
||||||
|
|
||||||
|
* Added lots of tests for various parts of the application.
|
||||||
|
|
||||||
paperless-ng 0.9.2
|
paperless-ng 0.9.2
|
||||||
##################
|
##################
|
||||||
|
|
||||||
@ -52,7 +70,7 @@ paperless-ng 0.9.0
|
|||||||
* **Added:** New frontend. Features:
|
* **Added:** New frontend. Features:
|
||||||
|
|
||||||
* Single page application: It's much more responsive than the django admin pages.
|
* Single page application: It's much more responsive than the django admin pages.
|
||||||
* Dashboard. Shows recently scanned documents, or todos, or other documents
|
* Dashboard. Shows recently scanned documents, or todo notes, or other documents
|
||||||
at wish. Allows uploading of documents. Shows basic statistics.
|
at wish. Allows uploading of documents. Shows basic statistics.
|
||||||
* Better document list with multiple display options.
|
* Better document list with multiple display options.
|
||||||
* Full text search with result highlighting, auto completion and scoring based
|
* Full text search with result highlighting, auto completion and scoring based
|
||||||
@ -102,7 +120,7 @@ paperless-ng 0.9.0
|
|||||||
|
|
||||||
* **Modified [breaking]:** PostgreSQL:
|
* **Modified [breaking]:** PostgreSQL:
|
||||||
|
|
||||||
* If ``PAPERLESS_DBHOST`` is specified in the settings, paperless uses postgresql instead of sqlite.
|
* If ``PAPERLESS_DBHOST`` is specified in the settings, paperless uses PostgreSQL instead of SQLite.
|
||||||
Username, database and password all default to ``paperless`` if not specified.
|
Username, database and password all default to ``paperless`` if not specified.
|
||||||
|
|
||||||
* **Modified [breaking]:** document_retagger management command rework. See
|
* **Modified [breaking]:** document_retagger management command rework. See
|
||||||
@ -130,7 +148,7 @@ paperless-ng 0.9.0
|
|||||||
Certain language specifics such as umlauts may not get picked up properly.
|
Certain language specifics such as umlauts may not get picked up properly.
|
||||||
* ``PAPERLESS_DEBUG`` defaults to ``false``.
|
* ``PAPERLESS_DEBUG`` defaults to ``false``.
|
||||||
* The presence of ``PAPERLESS_DBHOST`` now determines whether to use PostgreSQL or
|
* The presence of ``PAPERLESS_DBHOST`` now determines whether to use PostgreSQL or
|
||||||
sqlite.
|
SQLite.
|
||||||
* ``PAPERLESS_OCR_THREADS`` is gone and replaced with ``PAPERLESS_TASK_WORKERS`` and
|
* ``PAPERLESS_OCR_THREADS`` is gone and replaced with ``PAPERLESS_TASK_WORKERS`` and
|
||||||
``PAPERLESS_THREADS_PER_WORKER``. Refer to the config example for details.
|
``PAPERLESS_THREADS_PER_WORKER``. Refer to the config example for details.
|
||||||
* ``PAPERLESS_OPTIMIZE_THUMBNAILS`` allows you to disable or enable thumbnail
|
* ``PAPERLESS_OPTIMIZE_THUMBNAILS`` allows you to disable or enable thumbnail
|
||||||
@ -138,8 +156,11 @@ paperless-ng 0.9.0
|
|||||||
|
|
||||||
* Many more small changes here and there. The usual stuff.
|
* Many more small changes here and there. The usual stuff.
|
||||||
|
|
||||||
|
Paperless
|
||||||
|
#########
|
||||||
|
|
||||||
2.7.0
|
2.7.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* `syntonym`_ submitted a pull request to catch IMAP connection errors `#475`_.
|
* `syntonym`_ submitted a pull request to catch IMAP connection errors `#475`_.
|
||||||
* `Stéphane Brunner`_ added ``psycopg2`` to the Pipfile `#489`_. He also fixed
|
* `Stéphane Brunner`_ added ``psycopg2`` to the Pipfile `#489`_. He also fixed
|
||||||
@ -156,7 +177,7 @@ paperless-ng 0.9.0
|
|||||||
|
|
||||||
|
|
||||||
2.6.1
|
2.6.1
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* We now have a logo, complete with a favicon :-)
|
* We now have a logo, complete with a favicon :-)
|
||||||
* Removed some problematic tests.
|
* Removed some problematic tests.
|
||||||
@ -168,7 +189,7 @@ paperless-ng 0.9.0
|
|||||||
|
|
||||||
|
|
||||||
2.6.0
|
2.6.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Allow an infinite number of logs to be deleted. Thanks to `Ulli`_ for noting
|
* Allow an infinite number of logs to be deleted. Thanks to `Ulli`_ for noting
|
||||||
the problem in `#433`_.
|
the problem in `#433`_.
|
||||||
@ -189,7 +210,7 @@ paperless-ng 0.9.0
|
|||||||
|
|
||||||
|
|
||||||
2.5.0
|
2.5.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* **New dependency**: Paperless now optimises thumbnail generation with
|
* **New dependency**: Paperless now optimises thumbnail generation with
|
||||||
`optipng`_, so you'll need to install that somewhere in your PATH or declare
|
`optipng`_, so you'll need to install that somewhere in your PATH or declare
|
||||||
@ -233,7 +254,7 @@ paperless-ng 0.9.0
|
|||||||
|
|
||||||
|
|
||||||
2.4.0
|
2.4.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* A new set of actions are now available thanks to `jonaswinkler`_'s very first
|
* A new set of actions are now available thanks to `jonaswinkler`_'s very first
|
||||||
pull request! You can now do nifty things like tag documents in bulk, or set
|
pull request! You can now do nifty things like tag documents in bulk, or set
|
||||||
@ -254,7 +275,7 @@ paperless-ng 0.9.0
|
|||||||
|
|
||||||
|
|
||||||
2.3.0
|
2.3.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Support for consuming plain text & markdown documents was added by
|
* Support for consuming plain text & markdown documents was added by
|
||||||
`Joshua Taillon`_! This was a long-requested feature, and it's addition is
|
`Joshua Taillon`_! This was a long-requested feature, and it's addition is
|
||||||
@ -272,14 +293,14 @@ paperless-ng 0.9.0
|
|||||||
|
|
||||||
|
|
||||||
2.2.1
|
2.2.1
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* `Kyle Lucy`_ reported a bug quickly after the release of 2.2.0 where we broke
|
* `Kyle Lucy`_ reported a bug quickly after the release of 2.2.0 where we broke
|
||||||
the ``DISABLE_LOGIN`` feature: `#392`_.
|
the ``DISABLE_LOGIN`` feature: `#392`_.
|
||||||
|
|
||||||
|
|
||||||
2.2.0
|
2.2.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Thanks to `dadosch`_, `Wolfgang Mader`_, and `Tim Brooks`_ this is the first
|
* Thanks to `dadosch`_, `Wolfgang Mader`_, and `Tim Brooks`_ this is the first
|
||||||
version of Paperless that supports Django 2.0! As a result of their hard
|
version of Paperless that supports Django 2.0! As a result of their hard
|
||||||
@ -296,7 +317,7 @@ paperless-ng 0.9.0
|
|||||||
|
|
||||||
|
|
||||||
2.1.0
|
2.1.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* `Enno Lohmeier`_ added three simple features that make Paperless a lot more
|
* `Enno Lohmeier`_ added three simple features that make Paperless a lot more
|
||||||
user (and developer) friendly:
|
user (and developer) friendly:
|
||||||
@ -315,7 +336,7 @@ paperless-ng 0.9.0
|
|||||||
|
|
||||||
|
|
||||||
2.0.0
|
2.0.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
This is a big release as we've changed a core-functionality of Paperless: we no
|
This is a big release as we've changed a core-functionality of Paperless: we no
|
||||||
longer encrypt files with GPG by default.
|
longer encrypt files with GPG by default.
|
||||||
@ -347,7 +368,7 @@ Special thanks to `erikarvstedt`_, `matthewmoto`_, and `mcronce`_ who did the
|
|||||||
bulk of the work on this big change.
|
bulk of the work on this big change.
|
||||||
|
|
||||||
1.4.0
|
1.4.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* `Quentin Dawans`_ has refactored the document consumer to allow for some
|
* `Quentin Dawans`_ has refactored the document consumer to allow for some
|
||||||
command-line options. Notably, you can now direct it to consume from a
|
command-line options. Notably, you can now direct it to consume from a
|
||||||
@ -382,7 +403,7 @@ bulk of the work on this big change.
|
|||||||
to some excellent work from `erikarvstedt`_ on `#351`_
|
to some excellent work from `erikarvstedt`_ on `#351`_
|
||||||
|
|
||||||
1.3.0
|
1.3.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* You can now run Paperless without a login, though you'll still have to create
|
* You can now run Paperless without a login, though you'll still have to create
|
||||||
at least one user. This is thanks to a pull-request from `matthewmoto`_:
|
at least one user. This is thanks to a pull-request from `matthewmoto`_:
|
||||||
@ -405,7 +426,7 @@ bulk of the work on this big change.
|
|||||||
problem and helping me find where to fix it.
|
problem and helping me find where to fix it.
|
||||||
|
|
||||||
1.2.0
|
1.2.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* New Docker image, now based on Alpine, thanks to the efforts of `addadi`_
|
* New Docker image, now based on Alpine, thanks to the efforts of `addadi`_
|
||||||
and `Pit`_. This new image is dramatically smaller than the Debian-based
|
and `Pit`_. This new image is dramatically smaller than the Debian-based
|
||||||
@ -424,7 +445,7 @@ bulk of the work on this big change.
|
|||||||
in the document text.
|
in the document text.
|
||||||
|
|
||||||
1.1.0
|
1.1.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Fix for `#283`_, a redirect bug which broke interactions with
|
* Fix for `#283`_, a redirect bug which broke interactions with
|
||||||
paperless-desktop. Thanks to `chris-aeviator`_ for reporting it.
|
paperless-desktop. Thanks to `chris-aeviator`_ for reporting it.
|
||||||
@ -434,7 +455,7 @@ bulk of the work on this big change.
|
|||||||
`Dan Panzarella`_
|
`Dan Panzarella`_
|
||||||
|
|
||||||
1.0.0
|
1.0.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Upgrade to Django 1.11. **You'll need to run
|
* Upgrade to Django 1.11. **You'll need to run
|
||||||
``pip install -r requirements.txt`` after the usual ``git pull`` to
|
``pip install -r requirements.txt`` after the usual ``git pull`` to
|
||||||
@ -453,14 +474,14 @@ bulk of the work on this big change.
|
|||||||
`Lukas Winkler`_'s issue `#278`_
|
`Lukas Winkler`_'s issue `#278`_
|
||||||
|
|
||||||
0.8.0
|
0.8.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Paperless can now run in a subdirectory on a host (``/paperless``), rather
|
* Paperless can now run in a subdirectory on a host (``/paperless``), rather
|
||||||
than always running in the root (``/``) thanks to `maphy-psd`_'s work on
|
than always running in the root (``/``) thanks to `maphy-psd`_'s work on
|
||||||
`#255`_.
|
`#255`_.
|
||||||
|
|
||||||
0.7.0
|
0.7.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* **Potentially breaking change**: As per `#235`_, Paperless will no longer
|
* **Potentially breaking change**: As per `#235`_, Paperless will no longer
|
||||||
automatically delete documents attached to correspondents when those
|
automatically delete documents attached to correspondents when those
|
||||||
@ -472,7 +493,7 @@ bulk of the work on this big change.
|
|||||||
`Kusti Skytén`_ for posting the correct solution in the Github issue.
|
`Kusti Skytén`_ for posting the correct solution in the Github issue.
|
||||||
|
|
||||||
0.6.0
|
0.6.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Abandon the shared-secret trick we were using for the POST API in favour
|
* Abandon the shared-secret trick we were using for the POST API in favour
|
||||||
of BasicAuth or Django session.
|
of BasicAuth or Django session.
|
||||||
@ -486,7 +507,7 @@ bulk of the work on this big change.
|
|||||||
the help with this feature.
|
the help with this feature.
|
||||||
|
|
||||||
0.5.0
|
0.5.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Support for fuzzy matching in the auto-tagger & auto-correspondent systems
|
* Support for fuzzy matching in the auto-tagger & auto-correspondent systems
|
||||||
thanks to `Jake Gysland`_'s patch `#220`_.
|
thanks to `Jake Gysland`_'s patch `#220`_.
|
||||||
@ -504,13 +525,13 @@ bulk of the work on this big change.
|
|||||||
* Amended the Django Admin configuration to have nice headers (`#230`_)
|
* Amended the Django Admin configuration to have nice headers (`#230`_)
|
||||||
|
|
||||||
0.4.1
|
0.4.1
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Fix for `#206`_ wherein the pluggable parser didn't recognise files with
|
* Fix for `#206`_ wherein the pluggable parser didn't recognise files with
|
||||||
all-caps suffixes like ``.PDF``
|
all-caps suffixes like ``.PDF``
|
||||||
|
|
||||||
0.4.0
|
0.4.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Introducing reminders. See `#199`_ for more information, but the short
|
* Introducing reminders. See `#199`_ for more information, but the short
|
||||||
explanation is that you can now attach simple notes & times to documents
|
explanation is that you can now attach simple notes & times to documents
|
||||||
@ -520,7 +541,7 @@ bulk of the work on this big change.
|
|||||||
like to make use of this feature in his project.
|
like to make use of this feature in his project.
|
||||||
|
|
||||||
0.3.6
|
0.3.6
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Fix for `#200`_ (!!) where the API wasn't configured to allow updating the
|
* Fix for `#200`_ (!!) where the API wasn't configured to allow updating the
|
||||||
correspondent or the tags for a document.
|
correspondent or the tags for a document.
|
||||||
@ -534,7 +555,7 @@ bulk of the work on this big change.
|
|||||||
documentation is on its way.
|
documentation is on its way.
|
||||||
|
|
||||||
0.3.5
|
0.3.5
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* A serious facelift for the documents listing page wherein we drop the
|
* A serious facelift for the documents listing page wherein we drop the
|
||||||
tabular layout in favour of a tiled interface.
|
tabular layout in favour of a tiled interface.
|
||||||
@ -545,7 +566,7 @@ bulk of the work on this big change.
|
|||||||
consumption.
|
consumption.
|
||||||
|
|
||||||
0.3.4
|
0.3.4
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Removal of django-suit due to a licensing conflict I bumped into in 0.3.3.
|
* Removal of django-suit due to a licensing conflict I bumped into in 0.3.3.
|
||||||
Note that you *can* use Django Suit with Paperless, but only in a
|
Note that you *can* use Django Suit with Paperless, but only in a
|
||||||
@ -558,26 +579,26 @@ bulk of the work on this big change.
|
|||||||
API thanks to @thomasbrueggemann. See `#179`_.
|
API thanks to @thomasbrueggemann. See `#179`_.
|
||||||
|
|
||||||
0.3.3
|
0.3.3
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Thumbnails in the UI and a Django-suit -based face-lift courtesy of @ekw!
|
* Thumbnails in the UI and a Django-suit -based face-lift courtesy of @ekw!
|
||||||
* Timezone, items per page, and default language are now all configurable,
|
* Timezone, items per page, and default language are now all configurable,
|
||||||
also thanks to @ekw.
|
also thanks to @ekw.
|
||||||
|
|
||||||
0.3.2
|
0.3.2
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Fix for `#172`_: defaulting ALLOWED_HOSTS to ``["*"]`` and allowing the
|
* Fix for `#172`_: defaulting ALLOWED_HOSTS to ``["*"]`` and allowing the
|
||||||
user to set her own value via ``PAPERLESS_ALLOWED_HOSTS`` should the need
|
user to set her own value via ``PAPERLESS_ALLOWED_HOSTS`` should the need
|
||||||
arise.
|
arise.
|
||||||
|
|
||||||
0.3.1
|
0.3.1
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Added a default value for ``CONVERT_BINARY``
|
* Added a default value for ``CONVERT_BINARY``
|
||||||
|
|
||||||
0.3.0
|
0.3.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Updated to using django-filter 1.x
|
* Updated to using django-filter 1.x
|
||||||
* Added some system checks so new users aren't confused by misconfigurations.
|
* Added some system checks so new users aren't confused by misconfigurations.
|
||||||
@ -590,7 +611,7 @@ bulk of the work on this big change.
|
|||||||
``PAPERLESS_SHARED_SECRET`` respectively instead.
|
``PAPERLESS_SHARED_SECRET`` respectively instead.
|
||||||
|
|
||||||
0.2.0
|
0.2.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* `#150`_: The media root is now a variable you can set in
|
* `#150`_: The media root is now a variable you can set in
|
||||||
``paperless.conf``.
|
``paperless.conf``.
|
||||||
@ -618,7 +639,7 @@ bulk of the work on this big change.
|
|||||||
to `Martin Honermeyer`_ and `Tim White`_ for working with me on this.
|
to `Martin Honermeyer`_ and `Tim White`_ for working with me on this.
|
||||||
|
|
||||||
0.1.1
|
0.1.1
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Potentially **Breaking Change**: All references to "sender" in the code
|
* Potentially **Breaking Change**: All references to "sender" in the code
|
||||||
have been renamed to "correspondent" to better reflect the nature of the
|
have been renamed to "correspondent" to better reflect the nature of the
|
||||||
@ -642,7 +663,7 @@ bulk of the work on this big change.
|
|||||||
to be imported but made unavailable.
|
to be imported but made unavailable.
|
||||||
|
|
||||||
0.1.0
|
0.1.0
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Docker support! Big thanks to `Wayne Werner`_, `Brian Conn`_, and
|
* Docker support! Big thanks to `Wayne Werner`_, `Brian Conn`_, and
|
||||||
`Tikitu de Jager`_ for this one, and especially to `Pit`_
|
`Tikitu de Jager`_ for this one, and especially to `Pit`_
|
||||||
@ -661,14 +682,14 @@ bulk of the work on this big change.
|
|||||||
* Added tox with pep8 checking
|
* Added tox with pep8 checking
|
||||||
|
|
||||||
0.0.6
|
0.0.6
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Added support for parallel OCR (significant work from `Pit`_)
|
* Added support for parallel OCR (significant work from `Pit`_)
|
||||||
* Sped up the language detection (significant work from `Pit`_)
|
* Sped up the language detection (significant work from `Pit`_)
|
||||||
* Added simple logging
|
* Added simple logging
|
||||||
|
|
||||||
0.0.5
|
0.0.5
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Added support for image files as documents (png, jpg, gif, tiff)
|
* Added support for image files as documents (png, jpg, gif, tiff)
|
||||||
* Added a crude means of HTTP POST for document imports
|
* Added a crude means of HTTP POST for document imports
|
||||||
@ -677,7 +698,7 @@ bulk of the work on this big change.
|
|||||||
* Documentation for the above as well as data migration
|
* Documentation for the above as well as data migration
|
||||||
|
|
||||||
0.0.4
|
0.0.4
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Added automated tagging basted on keyword matching
|
* Added automated tagging basted on keyword matching
|
||||||
* Cleaned up the document listing page
|
* Cleaned up the document listing page
|
||||||
@ -685,19 +706,19 @@ bulk of the work on this big change.
|
|||||||
* Added ``pytz`` to the list of requirements
|
* Added ``pytz`` to the list of requirements
|
||||||
|
|
||||||
0.0.3
|
0.0.3
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Added basic tagging
|
* Added basic tagging
|
||||||
|
|
||||||
0.0.2
|
0.0.2
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Added language detection
|
* Added language detection
|
||||||
* Added datestamps to ``document_exporter``.
|
* Added datestamps to ``document_exporter``.
|
||||||
* Changed ``settings.TESSERACT_LANGUAGE`` to ``settings.OCR_LANGUAGE``.
|
* Changed ``settings.TESSERACT_LANGUAGE`` to ``settings.OCR_LANGUAGE``.
|
||||||
|
|
||||||
0.0.1
|
0.0.1
|
||||||
#####
|
=====
|
||||||
|
|
||||||
* Initial release
|
* Initial release
|
||||||
|
|
||||||
|
@ -69,7 +69,7 @@ PAPERLESS_CONSUMPTION_DIR=<path>
|
|||||||
Defaults to "../consume", relative to the "src" directory.
|
Defaults to "../consume", relative to the "src" directory.
|
||||||
|
|
||||||
PAPERLESS_DATA_DIR=<path>
|
PAPERLESS_DATA_DIR=<path>
|
||||||
This is where paperless stores all its data (search index, sqlite database,
|
This is where paperless stores all its data (search index, SQLite database,
|
||||||
classification model, etc).
|
classification model, etc).
|
||||||
|
|
||||||
Defaults to "../data", relative to the "src" directory.
|
Defaults to "../data", relative to the "src" directory.
|
||||||
@ -100,7 +100,7 @@ Hosting & Security
|
|||||||
##################
|
##################
|
||||||
|
|
||||||
PAPERLESS_SECRET_KEY=<key>
|
PAPERLESS_SECRET_KEY=<key>
|
||||||
Paperless uses this to make session tokens. If you exose paperless on the
|
Paperless uses this to make session tokens. If you expose paperless on the
|
||||||
internet, you need to change this, since the default secret is well known.
|
internet, you need to change this, since the default secret is well known.
|
||||||
|
|
||||||
Use any sequence of characters. The more, the better. You don't need to
|
Use any sequence of characters. The more, the better. You don't need to
|
||||||
@ -141,6 +141,16 @@ PAPERLESS_STATIC_URL=<path>
|
|||||||
|
|
||||||
Defaults to "/static/".
|
Defaults to "/static/".
|
||||||
|
|
||||||
|
PAPERLESS_AUTO_LOGIN_USERNAME=<username>
|
||||||
|
Specify a username here so that paperless will automatically perform login
|
||||||
|
with the selected user.
|
||||||
|
|
||||||
|
.. danger::
|
||||||
|
|
||||||
|
Do not use this when exposing paperless on the internet. There are no
|
||||||
|
checks in place that would prevent you from doing this.
|
||||||
|
|
||||||
|
Defaults to none, which disables this feature.
|
||||||
|
|
||||||
Software tweaks
|
Software tweaks
|
||||||
###############
|
###############
|
||||||
@ -220,7 +230,7 @@ PAPERLESS_CONSUMER_POLLING=<num>
|
|||||||
specify a polling interval in seconds here, which will then cause paperless
|
specify a polling interval in seconds here, which will then cause paperless
|
||||||
to periodically check your consumption directory for changes.
|
to periodically check your consumption directory for changes.
|
||||||
|
|
||||||
Defaults to 0, which disables polling and uses filesystem notifiactions.
|
Defaults to 0, which disables polling and uses filesystem notifications.
|
||||||
|
|
||||||
PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>
|
PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>
|
||||||
When the consumer detects a duplicate document, it will not touch the
|
When the consumer detects a duplicate document, it will not touch the
|
||||||
@ -264,7 +274,7 @@ PAPERLESS_CONVERT_DENSITY=<num>
|
|||||||
Default is 300.
|
Default is 300.
|
||||||
|
|
||||||
PAPERLESS_OPTIMIZE_THUMBNAILS=<bool>
|
PAPERLESS_OPTIMIZE_THUMBNAILS=<bool>
|
||||||
Use optipng to optimize thumbnails. This usually reduces the sice of
|
Use optipng to optimize thumbnails. This usually reduces the size of
|
||||||
thumbnails by about 20%, but uses considerable compute time during
|
thumbnails by about 20%, but uses considerable compute time during
|
||||||
consumption.
|
consumption.
|
||||||
|
|
||||||
|
@ -85,7 +85,7 @@ quoted, or triple-quoted string will do:
|
|||||||
problematic_string = 'This is a "string" with "quotes" in it'
|
problematic_string = 'This is a "string" with "quotes" in it'
|
||||||
|
|
||||||
In HTML templates, please use double-quotes for tag attributes, and single
|
In HTML templates, please use double-quotes for tag attributes, and single
|
||||||
quotes for arguments passed to Django tempalte tags:
|
quotes for arguments passed to Django template tags:
|
||||||
|
|
||||||
.. code:: html
|
.. code:: html
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ is
|
|||||||
|
|
||||||
.. caution::
|
.. caution::
|
||||||
|
|
||||||
Dont mess with this folder. Don't change permissions and don't move
|
Do not mess with this folder. Don't change permissions and don't move
|
||||||
files around manually. This folder is meant to be entirely managed by docker
|
files around manually. This folder is meant to be entirely managed by docker
|
||||||
and paperless.
|
and paperless.
|
||||||
|
|
||||||
@ -36,7 +36,7 @@ file extensions do not matter.
|
|||||||
|
|
||||||
**A:** The short answer is yes. I've tested it on a Raspberry Pi 3 B.
|
**A:** The short answer is yes. I've tested it on a Raspberry Pi 3 B.
|
||||||
The long answer is that certain parts of
|
The long answer is that certain parts of
|
||||||
Paperless will run very slow, such as the tesseract OCR. On Rasperry Pi,
|
Paperless will run very slow, such as the tesseract OCR. On Raspberry Pi,
|
||||||
try to OCR documents before feeding them into paperless so that paperless can
|
try to OCR documents before feeding them into paperless so that paperless can
|
||||||
reuse the text. The web interface should be a lot snappier, since it runs
|
reuse the text. The web interface should be a lot snappier, since it runs
|
||||||
in your browser and paperless has to do much less work to serve the data.
|
in your browser and paperless has to do much less work to serve the data.
|
||||||
|
@ -8,7 +8,7 @@ Scanner recommendations
|
|||||||
As Paperless operates by watching a folder for new files, doesn't care what
|
As Paperless operates by watching a folder for new files, doesn't care what
|
||||||
scanner you use, but sometimes finding a scanner that will write to an FTP,
|
scanner you use, but sometimes finding a scanner that will write to an FTP,
|
||||||
NFS, or SMB server can be difficult. This page is here to help you find one
|
NFS, or SMB server can be difficult. This page is here to help you find one
|
||||||
that works right for you based on recommentations from other Paperless users.
|
that works right for you based on recommendations from other Paperless users.
|
||||||
|
|
||||||
+---------+----------------+-----+-----+-----+----------------+
|
+---------+----------------+-----+-----+-----+----------------+
|
||||||
| Brand | Model | Supports | Recommended By |
|
| Brand | Model | Supports | Recommended By |
|
||||||
|
@ -21,7 +21,7 @@ Extensive filtering mechanisms:
|
|||||||
|
|
||||||
.. image:: _static/screenshots/documents-filter.png
|
.. image:: _static/screenshots/documents-filter.png
|
||||||
|
|
||||||
Side-by-side editing of documents. Optmized for 1080p.
|
Side-by-side editing of documents. Optimized for 1080p.
|
||||||
|
|
||||||
.. image:: _static/screenshots/editing.png
|
.. image:: _static/screenshots/editing.png
|
||||||
|
|
||||||
|
@ -265,15 +265,17 @@ Migration to paperless-ng is then performed in a few simple steps:
|
|||||||
``docker-compose.env`` to your needs.
|
``docker-compose.env`` to your needs.
|
||||||
See `docker route`_ for details on which edits are advised.
|
See `docker route`_ for details on which edits are advised.
|
||||||
|
|
||||||
6. Start paperless-ng.
|
6. In order to find your existing documents with the new search feature, you need
|
||||||
|
to invoke a one-time operation that will create the search index:
|
||||||
|
|
||||||
.. code:: bash
|
.. code:: shell-session
|
||||||
|
|
||||||
$ docker-compose up
|
$ docker-compose run --rm webserver document_index reindex
|
||||||
|
|
||||||
If you see everything working (you should see some migrations getting
|
This will migrate your database and create the search index. After that,
|
||||||
applied, for instance), you can gracefully stop paperless-ng with Ctrl-C
|
paperless will take care of maintaining the index by itself.
|
||||||
and then start paperless-ng as usual with
|
|
||||||
|
7. Start paperless-ng.
|
||||||
|
|
||||||
.. code:: bash
|
.. code:: bash
|
||||||
|
|
||||||
@ -281,11 +283,11 @@ Migration to paperless-ng is then performed in a few simple steps:
|
|||||||
|
|
||||||
This will run paperless in the background and automatically start it on system boot.
|
This will run paperless in the background and automatically start it on system boot.
|
||||||
|
|
||||||
7. Paperless installed a permanent redirect to ``admin/`` in your browser. This
|
8. Paperless installed a permanent redirect to ``admin/`` in your browser. This
|
||||||
redirect is still in place and prevents access to the new UI. Clear
|
redirect is still in place and prevents access to the new UI. Clear
|
||||||
browsing cache in order to fix this.
|
browsing cache in order to fix this.
|
||||||
|
|
||||||
8. Optionally, follow the instructions below to migrate your existing data to PostgreSQL.
|
9. Optionally, follow the instructions below to migrate your existing data to PostgreSQL.
|
||||||
|
|
||||||
|
|
||||||
.. _setup-sqlite_to_psql:
|
.. _setup-sqlite_to_psql:
|
||||||
@ -322,7 +324,7 @@ management commands as below.
|
|||||||
$ cd /path/to/paperless
|
$ cd /path/to/paperless
|
||||||
$ docker-compose run --rm webserver /bin/bash
|
$ docker-compose run --rm webserver /bin/bash
|
||||||
|
|
||||||
This will lauch the container and initialize the PostgreSQL database.
|
This will launch the container and initialize the PostgreSQL database.
|
||||||
|
|
||||||
b) Without docker, open a shell in your virtual environment, switch to
|
b) Without docker, open a shell in your virtual environment, switch to
|
||||||
the ``src`` directory and create the database schema:
|
the ``src`` directory and create the database schema:
|
||||||
@ -357,6 +359,35 @@ management commands as below.
|
|||||||
7. Start paperless.
|
7. Start paperless.
|
||||||
|
|
||||||
|
|
||||||
|
Moving back to paperless
|
||||||
|
========================
|
||||||
|
|
||||||
|
Lets say you migrated to Paperless-ng and used it for a while, but decided that
|
||||||
|
you don't like it and want to move back (If you do, send me a mail about what
|
||||||
|
part you didn't like!), you can totally do that with a few simple steps.
|
||||||
|
|
||||||
|
Paperless-ng modified the database schema slightly, however, these changes can
|
||||||
|
be reverted while keeping your current data, so that your current data will
|
||||||
|
be compatible with original Paperless.
|
||||||
|
|
||||||
|
Execute this:
|
||||||
|
|
||||||
|
.. code:: shell-session
|
||||||
|
|
||||||
|
$ cd /path/to/paperless
|
||||||
|
$ docker-compose run --rm webserver migrate documents 0023
|
||||||
|
|
||||||
|
Or without docker:
|
||||||
|
|
||||||
|
.. code:: shell-session
|
||||||
|
|
||||||
|
$ cd /path/to/paperless/src
|
||||||
|
$ python3 manage.py migrate documents 0023
|
||||||
|
|
||||||
|
After that, you need to clear your cookies (Paperless-ng comes with updated
|
||||||
|
dependencies that do cookie-processing differently) and probably your cache
|
||||||
|
as well.
|
||||||
|
|
||||||
.. _setup-less_powerful_devices:
|
.. _setup-less_powerful_devices:
|
||||||
|
|
||||||
|
|
||||||
@ -372,7 +403,7 @@ configuring some options in paperless can help improve performance immensely:
|
|||||||
* ``PAPERLESS_TASK_WORKERS`` and ``PAPERLESS_THREADS_PER_WORKER`` are configured
|
* ``PAPERLESS_TASK_WORKERS`` and ``PAPERLESS_THREADS_PER_WORKER`` are configured
|
||||||
to use all cores. The Raspberry Pi models 3 and up have 4 cores, meaning that
|
to use all cores. The Raspberry Pi models 3 and up have 4 cores, meaning that
|
||||||
paperless will use 2 workers and 2 threads per worker. This may result in
|
paperless will use 2 workers and 2 threads per worker. This may result in
|
||||||
slugish response times during consumption, so you might want to lower these
|
sluggish response times during consumption, so you might want to lower these
|
||||||
settings (example: 2 workers and 1 thread to always have some computing power
|
settings (example: 2 workers and 1 thread to always have some computing power
|
||||||
left for other tasks).
|
left for other tasks).
|
||||||
* Keep ``PAPERLESS_OCR_ALWAYS`` at its default value 'false' and consider OCR'ing
|
* Keep ``PAPERLESS_OCR_ALWAYS`` at its default value 'false' and consider OCR'ing
|
||||||
|
@ -5,13 +5,13 @@ Usage Overview
|
|||||||
Paperless is an application that manages your personal documents. With
|
Paperless is an application that manages your personal documents. With
|
||||||
the help of a document scanner (see :ref:`scanners`), paperless transforms
|
the help of a document scanner (see :ref:`scanners`), paperless transforms
|
||||||
your wieldy physical document binders into a searchable archive and
|
your wieldy physical document binders into a searchable archive and
|
||||||
provices many utilities for finding and managing your documents.
|
provides many utilities for finding and managing your documents.
|
||||||
|
|
||||||
|
|
||||||
Terms and definitions
|
Terms and definitions
|
||||||
#####################
|
#####################
|
||||||
|
|
||||||
Paperless esentially consists of two different parts for managing your
|
Paperless essentially consists of two different parts for managing your
|
||||||
documents:
|
documents:
|
||||||
|
|
||||||
* The *consumer* watches a specified folder and adds all documents in that
|
* The *consumer* watches a specified folder and adds all documents in that
|
||||||
@ -30,12 +30,12 @@ Each document has a couple of fields that you can assign to them:
|
|||||||
tag, however, a single document can also have multiple tags. This is not
|
tag, however, a single document can also have multiple tags. This is not
|
||||||
possible with folders. The reason folders are not implemented in paperless
|
possible with folders. The reason folders are not implemented in paperless
|
||||||
is simply that tags are much more versatile than folders.
|
is simply that tags are much more versatile than folders.
|
||||||
* A *document type* is used to demarkate the type of a document such as letter,
|
* A *document type* is used to demarcate the type of a document such as letter,
|
||||||
bank statement, invoice, contract, etc. It is used to identify what a document
|
bank statement, invoice, contract, etc. It is used to identify what a document
|
||||||
is about.
|
is about.
|
||||||
* The *date added* of a document is the date the document was scanned into
|
* The *date added* of a document is the date the document was scanned into
|
||||||
paperless. You cannot and should not change this date.
|
paperless. You cannot and should not change this date.
|
||||||
* The *date created* of a document is the date the document was intially issued.
|
* The *date created* of a document is the date the document was initially issued.
|
||||||
This can be the date you bought a product, the date you signed a contract, or
|
This can be the date you bought a product, the date you signed a contract, or
|
||||||
the date a letter was sent to you.
|
the date a letter was sent to you.
|
||||||
* The *archive serial number* (short: ASN) of a document is the identifier of
|
* The *archive serial number* (short: ASN) of a document is the identifier of
|
||||||
@ -131,7 +131,7 @@ These are as follows:
|
|||||||
|
|
||||||
With the correct set of rules, you can completely automate your email documents.
|
With the correct set of rules, you can completely automate your email documents.
|
||||||
Create rules for every correspondent you receive digital documents from and
|
Create rules for every correspondent you receive digital documents from and
|
||||||
paperless will read them automatically. The default acion "mark as read" is
|
paperless will read them automatically. The default action "mark as read" is
|
||||||
pretty tame and will not cause any damage or data loss whatsoever.
|
pretty tame and will not cause any damage or data loss whatsoever.
|
||||||
|
|
||||||
You can also setup a special folder in your mail account for paperless and use
|
You can also setup a special folder in your mail account for paperless and use
|
||||||
@ -182,7 +182,7 @@ Processing of the physical documents
|
|||||||
====================================
|
====================================
|
||||||
|
|
||||||
Keep a physical inbox. Whenever you receive a document that you need to
|
Keep a physical inbox. Whenever you receive a document that you need to
|
||||||
archive, put it into your inbox. Regulary, do the following for all documents
|
archive, put it into your inbox. Regularly, do the following for all documents
|
||||||
in your inbox:
|
in your inbox:
|
||||||
|
|
||||||
1. For each document, decide if you need to keep the document in physical
|
1. For each document, decide if you need to keep the document in physical
|
||||||
@ -217,18 +217,24 @@ Once you have scanned in a document, proceed in paperless as follows.
|
|||||||
|
|
||||||
1. If the document has an ASN, assign the ASN to the document.
|
1. If the document has an ASN, assign the ASN to the document.
|
||||||
2. Assign a correspondent to the document (i.e., your employer, bank, etc)
|
2. Assign a correspondent to the document (i.e., your employer, bank, etc)
|
||||||
This isnt strictly necessary but helps in finding a document when you need
|
This isn't strictly necessary but helps in finding a document when you need
|
||||||
it.
|
it.
|
||||||
3. Assign a document type (i.e., invoice, bank statement, etc) to the document
|
3. Assign a document type (i.e., invoice, bank statement, etc) to the document
|
||||||
This isnt strictly necessary but helps in finding a document when you need
|
This isn't strictly necessary but helps in finding a document when you need
|
||||||
it.
|
it.
|
||||||
4. Assign a proper title to the document (the name of an item you bought, the
|
4. Assign a proper title to the document (the name of an item you bought, the
|
||||||
subject of the letter, etc)
|
subject of the letter, etc)
|
||||||
5. Check that the date of the document is corrent. Paperless tries to read
|
5. Check that the date of the document is correct. Paperless tries to read
|
||||||
the date from the content of the document, but this fails sometimes if the
|
the date from the content of the document, but this fails sometimes if the
|
||||||
OCR is bad or multiple dates appear on the document.
|
OCR is bad or multiple dates appear on the document.
|
||||||
6. Remove inbox tags from the documents.
|
6. Remove inbox tags from the documents.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
You can setup manual matching rules for your correspondents and tags and
|
||||||
|
paperless will assign them automatically. After consuming a couple documents,
|
||||||
|
you can even ask paperless to *learn* when to assign tags and correspondents
|
||||||
|
by itself. For details on this feature, see :ref:`advanced-matching`.
|
||||||
|
|
||||||
Task management
|
Task management
|
||||||
===============
|
===============
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
#PAPERLESS_CORS_ALLOWED_HOSTS=localhost:8080,example.com,localhost:8000
|
#PAPERLESS_CORS_ALLOWED_HOSTS=localhost:8080,example.com,localhost:8000
|
||||||
#PAPERLESS_FORCE_SCRIPT_NAME=
|
#PAPERLESS_FORCE_SCRIPT_NAME=
|
||||||
#PAPERLESS_STATIC_URL=/static/
|
#PAPERLESS_STATIC_URL=/static/
|
||||||
|
#PAPERLESS_AUTO_LOGIN_USERNAME=
|
||||||
|
|
||||||
# Software tweaks
|
# Software tweaks
|
||||||
|
|
||||||
|
@ -1,5 +1,19 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Release checklist
|
||||||
|
# - wait for travis build.
|
||||||
|
# adjust src/paperless/version.py
|
||||||
|
# changelog in the documentation
|
||||||
|
# adjust versions in docker/hub/*
|
||||||
|
# If docker-compose was modified: all compose files are the same.
|
||||||
|
|
||||||
|
# Steps:
|
||||||
|
# run release script "dev", push
|
||||||
|
# if it works: new tag, merge into master
|
||||||
|
# on master: make release "lastest", push
|
||||||
|
# on master: make release "version-tag", push
|
||||||
|
# publish release files
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@ import { TagEditDialogComponent } from './components/manage/tag-list/tag-edit-di
|
|||||||
import { DocumentTypeEditDialogComponent } from './components/manage/document-type-list/document-type-edit-dialog/document-type-edit-dialog.component';
|
import { DocumentTypeEditDialogComponent } from './components/manage/document-type-list/document-type-edit-dialog/document-type-edit-dialog.component';
|
||||||
import { TagComponent } from './components/common/tag/tag.component';
|
import { TagComponent } from './components/common/tag/tag.component';
|
||||||
import { SearchComponent } from './components/search/search.component';
|
import { SearchComponent } from './components/search/search.component';
|
||||||
import { ResultHightlightComponent } from './components/search/result-hightlight/result-hightlight.component';
|
import { ResultHighlightComponent } from './components/search/result-highlight/result-highlight.component';
|
||||||
import { PageHeaderComponent } from './components/common/page-header/page-header.component';
|
import { PageHeaderComponent } from './components/common/page-header/page-header.component';
|
||||||
import { AppFrameComponent } from './components/app-frame/app-frame.component';
|
import { AppFrameComponent } from './components/app-frame/app-frame.component';
|
||||||
import { ToastsComponent } from './components/common/toasts/toasts.component';
|
import { ToastsComponent } from './components/common/toasts/toasts.component';
|
||||||
@ -65,7 +65,7 @@ import { WidgetFrameComponent } from './components/dashboard/widgets/widget-fram
|
|||||||
DocumentTypeEditDialogComponent,
|
DocumentTypeEditDialogComponent,
|
||||||
TagComponent,
|
TagComponent,
|
||||||
SearchComponent,
|
SearchComponent,
|
||||||
ResultHightlightComponent,
|
ResultHighlightComponent,
|
||||||
PageHeaderComponent,
|
PageHeaderComponent,
|
||||||
AppFrameComponent,
|
AppFrameComponent,
|
||||||
ToastsComponent,
|
ToastsComponent,
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
<h5 class="card-title" *ngIf="document.archive_serial_number">#{{document.archive_serial_number}}</h5>
|
<h5 class="card-title" *ngIf="document.archive_serial_number">#{{document.archive_serial_number}}</h5>
|
||||||
</div>
|
</div>
|
||||||
<p class="card-text">
|
<p class="card-text">
|
||||||
<app-result-hightlight *ngIf="getDetailsAsHighlight()" class="result-content" [highlights]="getDetailsAsHighlight()"></app-result-hightlight>
|
<app-result-highlight *ngIf="getDetailsAsHighlight()" class="result-content" [highlights]="getDetailsAsHighlight()"></app-result-highlight>
|
||||||
<span *ngIf="getDetailsAsString()" class="result-content">{{getDetailsAsString()}}</span>
|
<span *ngIf="getDetailsAsString()" class="result-content">{{getDetailsAsString()}}</span>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
@ -1,20 +1,20 @@
|
|||||||
import { ComponentFixture, TestBed } from '@angular/core/testing';
|
import { ComponentFixture, TestBed } from '@angular/core/testing';
|
||||||
|
|
||||||
import { ResultHightlightComponent } from './result-hightlight.component';
|
import { ResultHighlightComponent } from './result-highlight.component';
|
||||||
|
|
||||||
describe('ResultHightlightComponent', () => {
|
describe('ResultHighlightComponent', () => {
|
||||||
let component: ResultHightlightComponent;
|
let component: ResultHighlightComponent;
|
||||||
let fixture: ComponentFixture<ResultHightlightComponent>;
|
let fixture: ComponentFixture<ResultHighlightComponent>;
|
||||||
|
|
||||||
beforeEach(async () => {
|
beforeEach(async () => {
|
||||||
await TestBed.configureTestingModule({
|
await TestBed.configureTestingModule({
|
||||||
declarations: [ ResultHightlightComponent ]
|
declarations: [ ResultHighlightComponent ]
|
||||||
})
|
})
|
||||||
.compileComponents();
|
.compileComponents();
|
||||||
});
|
});
|
||||||
|
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
fixture = TestBed.createComponent(ResultHightlightComponent);
|
fixture = TestBed.createComponent(ResultHighlightComponent);
|
||||||
component = fixture.componentInstance;
|
component = fixture.componentInstance;
|
||||||
fixture.detectChanges();
|
fixture.detectChanges();
|
||||||
});
|
});
|
@ -2,11 +2,11 @@ import { Component, Input, OnInit } from '@angular/core';
|
|||||||
import { SearchHitHighlight } from 'src/app/data/search-result';
|
import { SearchHitHighlight } from 'src/app/data/search-result';
|
||||||
|
|
||||||
@Component({
|
@Component({
|
||||||
selector: 'app-result-hightlight',
|
selector: 'app-result-highlight',
|
||||||
templateUrl: './result-hightlight.component.html',
|
templateUrl: './result-highlight.component.html',
|
||||||
styleUrls: ['./result-hightlight.component.scss']
|
styleUrls: ['./result-highlight.component.scss']
|
||||||
})
|
})
|
||||||
export class ResultHightlightComponent implements OnInit {
|
export class ResultHighlightComponent implements OnInit {
|
||||||
|
|
||||||
constructor() { }
|
constructor() { }
|
||||||
|
|
@ -1 +1,2 @@
|
|||||||
from .checks import changed_password_check
|
# this is here so that django finds the checks.
|
||||||
|
from .checks import *
|
||||||
|
@ -4,12 +4,13 @@ import os
|
|||||||
import pickle
|
import pickle
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
from sklearn.feature_extraction.text import CountVectorizer
|
from sklearn.feature_extraction.text import CountVectorizer
|
||||||
from sklearn.neural_network import MLPClassifier
|
from sklearn.neural_network import MLPClassifier
|
||||||
from sklearn.preprocessing import MultiLabelBinarizer
|
from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
|
||||||
|
from sklearn.utils.multiclass import type_of_target
|
||||||
|
|
||||||
from documents.models import Document, MatchingModel
|
from documents.models import Document, MatchingModel
|
||||||
from paperless import settings
|
|
||||||
|
|
||||||
|
|
||||||
class IncompatibleClassifierVersionError(Exception):
|
class IncompatibleClassifierVersionError(Exception):
|
||||||
@ -27,7 +28,7 @@ def preprocess_content(content):
|
|||||||
|
|
||||||
class DocumentClassifier(object):
|
class DocumentClassifier(object):
|
||||||
|
|
||||||
FORMAT_VERSION = 5
|
FORMAT_VERSION = 6
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# mtime of the model file on disk. used to prevent reloading when
|
# mtime of the model file on disk. used to prevent reloading when
|
||||||
@ -54,6 +55,8 @@ class DocumentClassifier(object):
|
|||||||
"Cannor load classifier, incompatible versions.")
|
"Cannor load classifier, incompatible versions.")
|
||||||
else:
|
else:
|
||||||
if self.classifier_version > 0:
|
if self.classifier_version > 0:
|
||||||
|
# Don't be confused by this check. It's simply here
|
||||||
|
# so that we wont log anything on initial reload.
|
||||||
logger.info("Classifier updated on disk, "
|
logger.info("Classifier updated on disk, "
|
||||||
"reloading classifier models")
|
"reloading classifier models")
|
||||||
self.data_hash = pickle.load(f)
|
self.data_hash = pickle.load(f)
|
||||||
@ -122,9 +125,14 @@ class DocumentClassifier(object):
|
|||||||
labels_tags_unique = set([tag for tags in labels_tags for tag in tags])
|
labels_tags_unique = set([tag for tags in labels_tags for tag in tags])
|
||||||
|
|
||||||
num_tags = len(labels_tags_unique)
|
num_tags = len(labels_tags_unique)
|
||||||
|
|
||||||
# substract 1 since -1 (null) is also part of the classes.
|
# substract 1 since -1 (null) is also part of the classes.
|
||||||
num_correspondents = len(set(labels_correspondent)) - 1
|
|
||||||
num_document_types = len(set(labels_document_type)) - 1
|
# union with {-1} accounts for cases where all documents have
|
||||||
|
# correspondents and types assigned, so -1 isnt part of labels_x, which
|
||||||
|
# it usually is.
|
||||||
|
num_correspondents = len(set(labels_correspondent) | {-1}) - 1
|
||||||
|
num_document_types = len(set(labels_document_type) | {-1}) - 1
|
||||||
|
|
||||||
logging.getLogger(__name__).debug(
|
logging.getLogger(__name__).debug(
|
||||||
"{} documents, {} tag(s), {} correspondent(s), "
|
"{} documents, {} tag(s), {} correspondent(s), "
|
||||||
@ -145,12 +153,23 @@ class DocumentClassifier(object):
|
|||||||
)
|
)
|
||||||
data_vectorized = self.data_vectorizer.fit_transform(data)
|
data_vectorized = self.data_vectorizer.fit_transform(data)
|
||||||
|
|
||||||
self.tags_binarizer = MultiLabelBinarizer()
|
|
||||||
labels_tags_vectorized = self.tags_binarizer.fit_transform(labels_tags)
|
|
||||||
|
|
||||||
# Step 3: train the classifiers
|
# Step 3: train the classifiers
|
||||||
if num_tags > 0:
|
if num_tags > 0:
|
||||||
logging.getLogger(__name__).debug("Training tags classifier...")
|
logging.getLogger(__name__).debug("Training tags classifier...")
|
||||||
|
|
||||||
|
if num_tags == 1:
|
||||||
|
# Special case where only one tag has auto:
|
||||||
|
# Fallback to binary classification.
|
||||||
|
labels_tags = [label[0] if len(label) == 1 else -1
|
||||||
|
for label in labels_tags]
|
||||||
|
self.tags_binarizer = LabelBinarizer()
|
||||||
|
labels_tags_vectorized = self.tags_binarizer.fit_transform(
|
||||||
|
labels_tags).ravel()
|
||||||
|
else:
|
||||||
|
self.tags_binarizer = MultiLabelBinarizer()
|
||||||
|
labels_tags_vectorized = self.tags_binarizer.fit_transform(
|
||||||
|
labels_tags)
|
||||||
|
|
||||||
self.tags_classifier = MLPClassifier(tol=0.01)
|
self.tags_classifier = MLPClassifier(tol=0.01)
|
||||||
self.tags_classifier.fit(data_vectorized, labels_tags_vectorized)
|
self.tags_classifier.fit(data_vectorized, labels_tags_vectorized)
|
||||||
else:
|
else:
|
||||||
@ -222,6 +241,16 @@ class DocumentClassifier(object):
|
|||||||
X = self.data_vectorizer.transform([preprocess_content(content)])
|
X = self.data_vectorizer.transform([preprocess_content(content)])
|
||||||
y = self.tags_classifier.predict(X)
|
y = self.tags_classifier.predict(X)
|
||||||
tags_ids = self.tags_binarizer.inverse_transform(y)[0]
|
tags_ids = self.tags_binarizer.inverse_transform(y)[0]
|
||||||
return tags_ids
|
if type_of_target(y).startswith('multilabel'):
|
||||||
|
# the usual case when there are multiple tags.
|
||||||
|
return list(tags_ids)
|
||||||
|
elif type_of_target(y) == 'binary' and tags_ids != -1:
|
||||||
|
# This is for when we have binary classification with only one
|
||||||
|
# tag and the result is to assign this tag.
|
||||||
|
return [tags_ids]
|
||||||
|
else:
|
||||||
|
# Usually binary as well with -1 as the result, but we're
|
||||||
|
# going to catch everything else here as well.
|
||||||
|
return []
|
||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
|
@ -8,7 +8,6 @@ from django.conf import settings
|
|||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
|
||||||
from paperless.db import GnuPG
|
|
||||||
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
|
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
|
||||||
from .file_handling import generate_filename, create_source_path_directory
|
from .file_handling import generate_filename, create_source_path_directory
|
||||||
from .loggers import LoggingMixin
|
from .loggers import LoggingMixin
|
||||||
@ -40,17 +39,6 @@ class Consumer(LoggingMixin):
|
|||||||
raise ConsumerError("Cannot consume {}: It is not a file".format(
|
raise ConsumerError("Cannot consume {}: It is not a file".format(
|
||||||
self.path))
|
self.path))
|
||||||
|
|
||||||
def pre_check_consumption_dir(self):
|
|
||||||
if not settings.CONSUMPTION_DIR:
|
|
||||||
raise ConsumerError(
|
|
||||||
"The CONSUMPTION_DIR settings variable does not appear to be "
|
|
||||||
"set.")
|
|
||||||
|
|
||||||
if not os.path.isdir(settings.CONSUMPTION_DIR):
|
|
||||||
raise ConsumerError(
|
|
||||||
"Consumption directory {} does not exist".format(
|
|
||||||
settings.CONSUMPTION_DIR))
|
|
||||||
|
|
||||||
def pre_check_duplicate(self):
|
def pre_check_duplicate(self):
|
||||||
with open(self.path, "rb") as f:
|
with open(self.path, "rb") as f:
|
||||||
checksum = hashlib.md5(f.read()).hexdigest()
|
checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
@ -92,7 +80,6 @@ class Consumer(LoggingMixin):
|
|||||||
# Make sure that preconditions for consuming the file are met.
|
# Make sure that preconditions for consuming the file are met.
|
||||||
|
|
||||||
self.pre_check_file_exists()
|
self.pre_check_file_exists()
|
||||||
self.pre_check_consumption_dir()
|
|
||||||
self.pre_check_directories()
|
self.pre_check_directories()
|
||||||
self.pre_check_duplicate()
|
self.pre_check_duplicate()
|
||||||
|
|
||||||
@ -208,9 +195,6 @@ class Consumer(LoggingMixin):
|
|||||||
created = file_info.created or date or timezone.make_aware(
|
created = file_info.created or date or timezone.make_aware(
|
||||||
datetime.datetime.fromtimestamp(stats.st_mtime))
|
datetime.datetime.fromtimestamp(stats.st_mtime))
|
||||||
|
|
||||||
if settings.PASSPHRASE:
|
|
||||||
storage_type = Document.STORAGE_TYPE_GPG
|
|
||||||
else:
|
|
||||||
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||||
|
|
||||||
with open(self.path, "rb") as f:
|
with open(self.path, "rb") as f:
|
||||||
@ -260,8 +244,4 @@ class Consumer(LoggingMixin):
|
|||||||
def _write(self, document, source, target):
|
def _write(self, document, source, target):
|
||||||
with open(source, "rb") as read_file:
|
with open(source, "rb") as read_file:
|
||||||
with open(target, "wb") as write_file:
|
with open(target, "wb") as write_file:
|
||||||
if document.storage_type == Document.STORAGE_TYPE_UNENCRYPTED:
|
|
||||||
write_file.write(read_file.read())
|
write_file.write(read_file.read())
|
||||||
return
|
|
||||||
self.log("debug", "Encrypting")
|
|
||||||
write_file.write(GnuPG.encrypted(read_file))
|
|
||||||
|
@ -64,12 +64,12 @@ def get_schema():
|
|||||||
|
|
||||||
|
|
||||||
def open_index(recreate=False):
|
def open_index(recreate=False):
|
||||||
|
try:
|
||||||
if exists_in(settings.INDEX_DIR) and not recreate:
|
if exists_in(settings.INDEX_DIR) and not recreate:
|
||||||
return open_dir(settings.INDEX_DIR)
|
return open_dir(settings.INDEX_DIR)
|
||||||
else:
|
except Exception as e:
|
||||||
# TODO: this is not thread safe. If 2 instances try to create the index
|
logger.error(f"Error while opening the index: {e}, recreating.")
|
||||||
# at the same time, this fails. This currently prevents parallel
|
|
||||||
# tests.
|
|
||||||
if not os.path.isdir(settings.INDEX_DIR):
|
if not os.path.isdir(settings.INDEX_DIR):
|
||||||
os.makedirs(settings.INDEX_DIR, exist_ok=True)
|
os.makedirs(settings.INDEX_DIR, exist_ok=True)
|
||||||
return create_in(settings.INDEX_DIR, get_schema())
|
return create_in(settings.INDEX_DIR, get_schema())
|
||||||
|
@ -1,9 +1,14 @@
|
|||||||
import logging
|
import logging
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
|
||||||
class PaperlessHandler(logging.Handler):
|
class PaperlessHandler(logging.Handler):
|
||||||
def emit(self, record):
|
def emit(self, record):
|
||||||
|
if settings.DISABLE_DBHANDLER:
|
||||||
|
return
|
||||||
|
|
||||||
# We have to do the import here or Django will barf when it tries to
|
# We have to do the import here or Django will barf when it tries to
|
||||||
# load this because the apps aren't loaded at that point
|
# load this because the apps aren't loaded at that point
|
||||||
from .models import Log
|
from .models import Log
|
||||||
|
@ -17,16 +17,6 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"from",
|
|
||||||
choices=("gpg", "unencrypted"),
|
|
||||||
help="The state you want to change your documents from"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"to",
|
|
||||||
choices=("gpg", "unencrypted"),
|
|
||||||
help="The state you want to change your documents to"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--passphrase",
|
"--passphrase",
|
||||||
help="If PAPERLESS_PASSPHRASE isn't set already, you need to "
|
help="If PAPERLESS_PASSPHRASE isn't set already, you need to "
|
||||||
@ -50,11 +40,6 @@ class Command(BaseCommand):
|
|||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
return
|
return
|
||||||
|
|
||||||
if options["from"] == options["to"]:
|
|
||||||
raise CommandError(
|
|
||||||
'The "from" and "to" values can\'t be the same.'
|
|
||||||
)
|
|
||||||
|
|
||||||
passphrase = options["passphrase"] or settings.PASSPHRASE
|
passphrase = options["passphrase"] or settings.PASSPHRASE
|
||||||
if not passphrase:
|
if not passphrase:
|
||||||
raise CommandError(
|
raise CommandError(
|
||||||
@ -62,10 +47,7 @@ class Command(BaseCommand):
|
|||||||
"by declaring it in your environment or your config."
|
"by declaring it in your environment or your config."
|
||||||
)
|
)
|
||||||
|
|
||||||
if options["from"] == "gpg" and options["to"] == "unencrypted":
|
|
||||||
self.__gpg_to_unencrypted(passphrase)
|
self.__gpg_to_unencrypted(passphrase)
|
||||||
elif options["from"] == "unencrypted" and options["to"] == "gpg":
|
|
||||||
self.__unencrypted_to_gpg(passphrase)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def __gpg_to_unencrypted(passphrase):
|
def __gpg_to_unencrypted(passphrase):
|
||||||
@ -79,42 +61,28 @@ class Command(BaseCommand):
|
|||||||
document).encode('utf-8'), "green"))
|
document).encode('utf-8'), "green"))
|
||||||
|
|
||||||
old_paths = [document.source_path, document.thumbnail_path]
|
old_paths = [document.source_path, document.thumbnail_path]
|
||||||
|
|
||||||
raw_document = GnuPG.decrypted(document.source_file, passphrase)
|
raw_document = GnuPG.decrypted(document.source_file, passphrase)
|
||||||
raw_thumb = GnuPG.decrypted(document.thumbnail_file, passphrase)
|
raw_thumb = GnuPG.decrypted(document.thumbnail_file, passphrase)
|
||||||
|
|
||||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||||
|
|
||||||
|
ext = os.path.splitext(document.filename)[1]
|
||||||
|
|
||||||
|
if not ext == '.gpg':
|
||||||
|
raise CommandError(
|
||||||
|
f"Abort: encrypted file {document.source_path} does not "
|
||||||
|
f"end with .gpg")
|
||||||
|
|
||||||
|
document.filename = os.path.splitext(document.filename)[0]
|
||||||
|
|
||||||
with open(document.source_path, "wb") as f:
|
with open(document.source_path, "wb") as f:
|
||||||
f.write(raw_document)
|
f.write(raw_document)
|
||||||
|
|
||||||
with open(document.thumbnail_path, "wb") as f:
|
with open(document.thumbnail_path, "wb") as f:
|
||||||
f.write(raw_thumb)
|
f.write(raw_thumb)
|
||||||
|
|
||||||
document.save(update_fields=("storage_type",))
|
document.save(update_fields=("storage_type", "filename"))
|
||||||
|
|
||||||
for path in old_paths:
|
|
||||||
os.unlink(path)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def __unencrypted_to_gpg(passphrase):
|
|
||||||
|
|
||||||
unencrypted_files = Document.objects.filter(
|
|
||||||
storage_type=Document.STORAGE_TYPE_UNENCRYPTED)
|
|
||||||
|
|
||||||
for document in unencrypted_files:
|
|
||||||
|
|
||||||
print(coloured("Encrypting {}".format(document), "green"))
|
|
||||||
|
|
||||||
old_paths = [document.source_path, document.thumbnail_path]
|
|
||||||
with open(document.source_path, "rb") as raw_document:
|
|
||||||
with open(document.thumbnail_path, "rb") as raw_thumb:
|
|
||||||
document.storage_type = Document.STORAGE_TYPE_GPG
|
|
||||||
with open(document.source_path, "wb") as f:
|
|
||||||
f.write(GnuPG.encrypted(raw_document, passphrase))
|
|
||||||
with open(document.thumbnail_path, "wb") as f:
|
|
||||||
f.write(GnuPG.encrypted(raw_thumb, passphrase))
|
|
||||||
|
|
||||||
document.save(update_fields=("storage_type",))
|
|
||||||
|
|
||||||
for path in old_paths:
|
for path in old_paths:
|
||||||
os.unlink(path)
|
os.unlink(path)
|
@ -1,11 +1,11 @@
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand, CommandError
|
||||||
from django_q.tasks import async_task
|
from django_q.tasks import async_task
|
||||||
from watchdog.events import FileSystemEventHandler
|
from watchdog.events import FileSystemEventHandler
|
||||||
from watchdog.observers import Observer
|
|
||||||
from watchdog.observers.polling import PollingObserver
|
from watchdog.observers.polling import PollingObserver
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -13,25 +13,54 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
INotify = flags = None
|
INotify = flags = None
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class Handler(FileSystemEventHandler):
|
|
||||||
|
|
||||||
def _consume(self, file):
|
def _consume(file):
|
||||||
if os.path.isfile(file):
|
|
||||||
try:
|
try:
|
||||||
|
if os.path.isfile(file):
|
||||||
async_task("documents.tasks.consume_file",
|
async_task("documents.tasks.consume_file",
|
||||||
file,
|
file,
|
||||||
task_name=os.path.basename(file)[:100])
|
task_name=os.path.basename(file)[:100])
|
||||||
|
else:
|
||||||
|
logger.debug(
|
||||||
|
f"Not consuming file {file}: File has moved.")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Catch all so that the consumer won't crash.
|
# Catch all so that the consumer won't crash.
|
||||||
logging.getLogger(__name__).error(
|
# This is also what the test case is listening for to check for
|
||||||
|
# errors.
|
||||||
|
logger.error(
|
||||||
"Error while consuming document: {}".format(e))
|
"Error while consuming document: {}".format(e))
|
||||||
|
|
||||||
|
|
||||||
|
def _consume_wait_unmodified(file, num_tries=20, wait_time=1):
|
||||||
|
mtime = -1
|
||||||
|
current_try = 0
|
||||||
|
while current_try < num_tries:
|
||||||
|
try:
|
||||||
|
new_mtime = os.stat(file).st_mtime
|
||||||
|
except FileNotFoundError:
|
||||||
|
logger.debug(f"File {file} moved while waiting for it to remain "
|
||||||
|
f"unmodified.")
|
||||||
|
return
|
||||||
|
if new_mtime == mtime:
|
||||||
|
_consume(file)
|
||||||
|
return
|
||||||
|
mtime = new_mtime
|
||||||
|
sleep(wait_time)
|
||||||
|
current_try += 1
|
||||||
|
|
||||||
|
logger.error(f"Timeout while waiting on file {file} to remain unmodified.")
|
||||||
|
|
||||||
|
|
||||||
|
class Handler(FileSystemEventHandler):
|
||||||
|
|
||||||
def on_created(self, event):
|
def on_created(self, event):
|
||||||
self._consume(event.src_path)
|
_consume_wait_unmodified(event.src_path)
|
||||||
|
|
||||||
def on_moved(self, event):
|
def on_moved(self, event):
|
||||||
self._consume(event.src_path)
|
_consume_wait_unmodified(event.dest_path)
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
@ -40,12 +69,15 @@ class Command(BaseCommand):
|
|||||||
consumption directory.
|
consumption directory.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# This is here primarily for the tests and is irrelevant in production.
|
||||||
|
stop_flag = False
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
|
||||||
self.verbosity = 0
|
|
||||||
self.logger = logging.getLogger(__name__)
|
self.logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
BaseCommand.__init__(self, *args, **kwargs)
|
BaseCommand.__init__(self, *args, **kwargs)
|
||||||
|
self.observer = None
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -54,38 +86,66 @@ class Command(BaseCommand):
|
|||||||
nargs="?",
|
nargs="?",
|
||||||
help="The consumption directory."
|
help="The consumption directory."
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--oneshot",
|
||||||
|
action="store_true",
|
||||||
|
help="Run only once."
|
||||||
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
|
||||||
self.verbosity = options["verbosity"]
|
|
||||||
directory = options["directory"]
|
directory = options["directory"]
|
||||||
|
|
||||||
logging.getLogger(__name__).info(
|
if not directory:
|
||||||
"Starting document consumer at {}".format(
|
raise CommandError(
|
||||||
directory
|
"CONSUMPTION_DIR does not appear to be set."
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Consume all files as this is not done initially by the watchdog
|
if not os.path.isdir(directory):
|
||||||
|
raise CommandError(
|
||||||
|
f"Consumption directory {directory} does not exist")
|
||||||
|
|
||||||
for entry in os.scandir(directory):
|
for entry in os.scandir(directory):
|
||||||
if entry.is_file():
|
_consume(entry.path)
|
||||||
async_task("documents.tasks.consume_file",
|
|
||||||
entry.path,
|
|
||||||
task_name=os.path.basename(entry.path)[:100])
|
|
||||||
|
|
||||||
# Start the watchdog. Woof!
|
if options["oneshot"]:
|
||||||
if settings.CONSUMER_POLLING > 0:
|
return
|
||||||
logging.getLogger(__name__).info(
|
|
||||||
"Using polling instead of file system notifications.")
|
if settings.CONSUMER_POLLING == 0 and INotify:
|
||||||
observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
|
self.handle_inotify(directory)
|
||||||
else:
|
else:
|
||||||
observer = Observer()
|
self.handle_polling(directory)
|
||||||
event_handler = Handler()
|
|
||||||
observer.schedule(event_handler, directory, recursive=True)
|
logger.debug("Consumer exiting.")
|
||||||
observer.start()
|
|
||||||
|
def handle_polling(self, directory):
|
||||||
|
logging.getLogger(__name__).info(
|
||||||
|
f"Polling directory for changes: {directory}")
|
||||||
|
self.observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
|
||||||
|
self.observer.schedule(Handler(), directory, recursive=False)
|
||||||
|
self.observer.start()
|
||||||
try:
|
try:
|
||||||
while observer.is_alive():
|
while self.observer.is_alive():
|
||||||
observer.join(1)
|
self.observer.join(1)
|
||||||
|
if self.stop_flag:
|
||||||
|
self.observer.stop()
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
observer.stop()
|
self.observer.stop()
|
||||||
observer.join()
|
self.observer.join()
|
||||||
|
|
||||||
|
def handle_inotify(self, directory):
|
||||||
|
logging.getLogger(__name__).info(
|
||||||
|
f"Using inotify to watch directory for changes: {directory}")
|
||||||
|
|
||||||
|
inotify = INotify()
|
||||||
|
descriptor = inotify.add_watch(
|
||||||
|
directory, flags.CLOSE_WRITE | flags.MOVED_TO)
|
||||||
|
try:
|
||||||
|
while not self.stop_flag:
|
||||||
|
for event in inotify.read(timeout=1000, read_delay=1000):
|
||||||
|
file = os.path.join(directory, event.name)
|
||||||
|
_consume(file)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
|
|
||||||
|
inotify.rm_watch(descriptor)
|
||||||
|
inotify.close()
|
||||||
|
@ -22,13 +22,6 @@ class Command(Renderable, BaseCommand):
|
|||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument("target")
|
parser.add_argument("target")
|
||||||
parser.add_argument(
|
|
||||||
"--legacy",
|
|
||||||
action="store_true",
|
|
||||||
help="Don't try to export all of the document data, just dump the "
|
|
||||||
"original document files out in a format that makes "
|
|
||||||
"re-consuming them easy."
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
BaseCommand.__init__(self, *args, **kwargs)
|
BaseCommand.__init__(self, *args, **kwargs)
|
||||||
@ -44,9 +37,6 @@ class Command(Renderable, BaseCommand):
|
|||||||
if not os.access(self.target, os.W_OK):
|
if not os.access(self.target, os.W_OK):
|
||||||
raise CommandError("That path doesn't appear to be writable")
|
raise CommandError("That path doesn't appear to be writable")
|
||||||
|
|
||||||
if options["legacy"]:
|
|
||||||
self.dump_legacy()
|
|
||||||
else:
|
|
||||||
self.dump()
|
self.dump()
|
||||||
|
|
||||||
def dump(self):
|
def dump(self):
|
||||||
@ -102,33 +92,3 @@ class Command(Renderable, BaseCommand):
|
|||||||
|
|
||||||
with open(os.path.join(self.target, "manifest.json"), "w") as f:
|
with open(os.path.join(self.target, "manifest.json"), "w") as f:
|
||||||
json.dump(manifest, f, indent=2)
|
json.dump(manifest, f, indent=2)
|
||||||
|
|
||||||
def dump_legacy(self):
|
|
||||||
|
|
||||||
for document in Document.objects.all():
|
|
||||||
|
|
||||||
target = os.path.join(
|
|
||||||
self.target, self._get_legacy_file_name(document))
|
|
||||||
|
|
||||||
print("Exporting: {}".format(target))
|
|
||||||
|
|
||||||
with open(target, "wb") as f:
|
|
||||||
f.write(GnuPG.decrypted(document.source_file))
|
|
||||||
t = int(time.mktime(document.created.timetuple()))
|
|
||||||
os.utime(target, times=(t, t))
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _get_legacy_file_name(doc):
|
|
||||||
|
|
||||||
if not doc.correspondent and not doc.title:
|
|
||||||
return os.path.basename(doc.source_path)
|
|
||||||
|
|
||||||
created = doc.created.strftime("%Y%m%d%H%M%SZ")
|
|
||||||
tags = ",".join([t.slug for t in doc.tags.all()])
|
|
||||||
|
|
||||||
if tags:
|
|
||||||
return "{} - {} - {} - {}{}".format(
|
|
||||||
created, doc.correspondent, doc.title, tags, doc.file_type)
|
|
||||||
|
|
||||||
return "{} - {} - {}{}".format(
|
|
||||||
created, doc.correspondent, doc.title, doc.file_type)
|
|
||||||
|
@ -82,8 +82,6 @@ class Command(Renderable, BaseCommand):
|
|||||||
def _import_files_from_manifest(self):
|
def _import_files_from_manifest(self):
|
||||||
|
|
||||||
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||||
if settings.PASSPHRASE:
|
|
||||||
storage_type = Document.STORAGE_TYPE_GPG
|
|
||||||
|
|
||||||
for record in self.manifest:
|
for record in self.manifest:
|
||||||
|
|
||||||
@ -105,21 +103,6 @@ class Command(Renderable, BaseCommand):
|
|||||||
|
|
||||||
create_source_path_directory(document.source_path)
|
create_source_path_directory(document.source_path)
|
||||||
|
|
||||||
if settings.PASSPHRASE:
|
|
||||||
|
|
||||||
with open(document_path, "rb") as unencrypted:
|
|
||||||
with open(document.source_path, "wb") as encrypted:
|
|
||||||
print("Encrypting {} and saving it to {}".format(
|
|
||||||
doc_file, document.source_path))
|
|
||||||
encrypted.write(GnuPG.encrypted(unencrypted))
|
|
||||||
|
|
||||||
with open(thumbnail_path, "rb") as unencrypted:
|
|
||||||
with open(document.thumbnail_path, "wb") as encrypted:
|
|
||||||
print("Encrypting {} and saving it to {}".format(
|
|
||||||
thumb_file, document.thumbnail_path))
|
|
||||||
encrypted.write(GnuPG.encrypted(unencrypted))
|
|
||||||
|
|
||||||
else:
|
|
||||||
print(f"Moving {document_path} to {document.source_path}")
|
print(f"Moving {document_path} to {document.source_path}")
|
||||||
shutil.copy(document_path, document.source_path)
|
shutil.copy(document_path, document.source_path)
|
||||||
shutil.copy(thumbnail_path, document.thumbnail_path)
|
shutil.copy(thumbnail_path, document.thumbnail_path)
|
||||||
|
@ -5,23 +5,6 @@ from django.db import migrations, models
|
|||||||
import django.db.models.deletion
|
import django.db.models.deletion
|
||||||
|
|
||||||
|
|
||||||
def make_index(apps, schema_editor):
|
|
||||||
Document = apps.get_model("documents", "Document")
|
|
||||||
documents = Document.objects.all()
|
|
||||||
print()
|
|
||||||
try:
|
|
||||||
print(" --> Creating document index...")
|
|
||||||
from whoosh.writing import AsyncWriter
|
|
||||||
from documents import index
|
|
||||||
ix = index.open_index(recreate=True)
|
|
||||||
with AsyncWriter(ix) as writer:
|
|
||||||
for document in documents:
|
|
||||||
index.update_document(writer, document)
|
|
||||||
except ImportError:
|
|
||||||
# index may not be relevant anymore
|
|
||||||
print(" --> Cannot create document index.")
|
|
||||||
|
|
||||||
|
|
||||||
def logs_set_default_group(apps, schema_editor):
|
def logs_set_default_group(apps, schema_editor):
|
||||||
Log = apps.get_model('documents', 'Log')
|
Log = apps.get_model('documents', 'Log')
|
||||||
for log in Log.objects.all():
|
for log in Log.objects.all():
|
||||||
@ -99,8 +82,4 @@ class Migration(migrations.Migration):
|
|||||||
code=django.db.migrations.operations.special.RunPython.noop,
|
code=django.db.migrations.operations.special.RunPython.noop,
|
||||||
reverse_code=logs_set_default_group
|
reverse_code=logs_set_default_group
|
||||||
),
|
),
|
||||||
migrations.RunPython(
|
|
||||||
code=make_index,
|
|
||||||
reverse_code=django.db.migrations.operations.special.RunPython.noop,
|
|
||||||
),
|
|
||||||
]
|
]
|
||||||
|
26
src/documents/migrations/1004_sanity_check_schedule.py
Normal file
26
src/documents/migrations/1004_sanity_check_schedule.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
# Generated by Django 3.1.3 on 2020-11-25 14:53
|
||||||
|
|
||||||
|
from django.db import migrations
|
||||||
|
from django.db.migrations import RunPython
|
||||||
|
from django_q.models import Schedule
|
||||||
|
from django_q.tasks import schedule
|
||||||
|
|
||||||
|
|
||||||
|
def add_schedules(apps, schema_editor):
|
||||||
|
schedule('documents.tasks.sanity_check', name="Perform sanity check", schedule_type=Schedule.WEEKLY)
|
||||||
|
|
||||||
|
|
||||||
|
def remove_schedules(apps, schema_editor):
|
||||||
|
Schedule.objects.filter(func='documents.tasks.sanity_check').delete()
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('documents', '1003_mime_types'),
|
||||||
|
('django_q', '0013_task_attempt_count'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
RunPython(add_schedules, remove_schedules)
|
||||||
|
]
|
@ -230,6 +230,7 @@ class Document(models.Model):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def file_type(self):
|
def file_type(self):
|
||||||
|
# TODO: this is not stable across python versions
|
||||||
return mimetypes.guess_extension(str(self.mime_type))
|
return mimetypes.guess_extension(str(self.mime_type))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
94
src/documents/sanity_checker.py
Normal file
94
src/documents/sanity_checker.py
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from documents.models import Document
|
||||||
|
|
||||||
|
|
||||||
|
class SanityMessage:
|
||||||
|
message = None
|
||||||
|
|
||||||
|
|
||||||
|
class SanityWarning(SanityMessage):
|
||||||
|
def __init__(self, message):
|
||||||
|
self.message = message
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"Warning: {self.message}"
|
||||||
|
|
||||||
|
|
||||||
|
class SanityError(SanityMessage):
|
||||||
|
def __init__(self, message):
|
||||||
|
self.message = message
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"ERROR: {self.message}"
|
||||||
|
|
||||||
|
|
||||||
|
class SanityFailedError(Exception):
|
||||||
|
|
||||||
|
def __init__(self, messages):
|
||||||
|
self.messages = messages
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
message_string = "\n".join([str(m) for m in self.messages])
|
||||||
|
return (
|
||||||
|
f"The following issuse were found by the sanity checker:\n"
|
||||||
|
f"{message_string}\n\n===============\n\n")
|
||||||
|
|
||||||
|
|
||||||
|
def check_sanity():
|
||||||
|
messages = []
|
||||||
|
|
||||||
|
present_files = []
|
||||||
|
for root, subdirs, files in os.walk(settings.MEDIA_ROOT):
|
||||||
|
for f in files:
|
||||||
|
present_files.append(os.path.normpath(os.path.join(root, f)))
|
||||||
|
|
||||||
|
for doc in Document.objects.all():
|
||||||
|
# Check thumbnail
|
||||||
|
if not os.path.isfile(doc.thumbnail_path):
|
||||||
|
messages.append(SanityError(
|
||||||
|
f"Thumbnail of document {doc.pk} does not exist."))
|
||||||
|
else:
|
||||||
|
present_files.remove(os.path.normpath(doc.thumbnail_path))
|
||||||
|
try:
|
||||||
|
with doc.thumbnail_file as f:
|
||||||
|
f.read()
|
||||||
|
except OSError as e:
|
||||||
|
messages.append(SanityError(
|
||||||
|
f"Cannot read thumbnail file of document {doc.pk}: {e}"
|
||||||
|
))
|
||||||
|
|
||||||
|
# Check document
|
||||||
|
if not os.path.isfile(doc.source_path):
|
||||||
|
messages.append(SanityError(
|
||||||
|
f"Original of document {doc.pk} does not exist."))
|
||||||
|
else:
|
||||||
|
present_files.remove(os.path.normpath(doc.source_path))
|
||||||
|
checksum = None
|
||||||
|
try:
|
||||||
|
with doc.source_file as f:
|
||||||
|
checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
|
except OSError as e:
|
||||||
|
messages.append(SanityError(
|
||||||
|
f"Cannot read original file of document {doc.pk}: {e}"))
|
||||||
|
|
||||||
|
if checksum and not checksum == doc.checksum:
|
||||||
|
messages.append(SanityError(
|
||||||
|
f"Checksum mismatch of document {doc.pk}. "
|
||||||
|
f"Stored: {doc.checksum}, actual: {checksum}."
|
||||||
|
))
|
||||||
|
|
||||||
|
if not doc.content:
|
||||||
|
messages.append(SanityWarning(
|
||||||
|
f"Document {doc.pk} has no content."
|
||||||
|
))
|
||||||
|
|
||||||
|
for extra_file in present_files:
|
||||||
|
messages.append(SanityWarning(
|
||||||
|
f"Orphaned file in media dir: {extra_file}"
|
||||||
|
))
|
||||||
|
|
||||||
|
return messages
|
@ -93,14 +93,11 @@ class DocumentSerializer(serializers.ModelSerializer):
|
|||||||
"document_type_id",
|
"document_type_id",
|
||||||
"title",
|
"title",
|
||||||
"content",
|
"content",
|
||||||
"mime_type",
|
|
||||||
"tags",
|
"tags",
|
||||||
"tags_id",
|
"tags_id",
|
||||||
"checksum",
|
|
||||||
"created",
|
"created",
|
||||||
"modified",
|
"modified",
|
||||||
"added",
|
"added",
|
||||||
"file_name",
|
|
||||||
"archive_serial_number"
|
"archive_serial_number"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -3,11 +3,12 @@ import logging
|
|||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from whoosh.writing import AsyncWriter
|
from whoosh.writing import AsyncWriter
|
||||||
|
|
||||||
from documents import index
|
from documents import index, sanity_checker
|
||||||
from documents.classifier import DocumentClassifier, \
|
from documents.classifier import DocumentClassifier, \
|
||||||
IncompatibleClassifierVersionError
|
IncompatibleClassifierVersionError
|
||||||
from documents.consumer import Consumer, ConsumerError
|
from documents.consumer import Consumer, ConsumerError
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
|
from documents.sanity_checker import SanityFailedError
|
||||||
|
|
||||||
|
|
||||||
def index_optimize():
|
def index_optimize():
|
||||||
@ -74,3 +75,12 @@ def consume_file(path,
|
|||||||
else:
|
else:
|
||||||
raise ConsumerError("Unknown error: Returned document was null, but "
|
raise ConsumerError("Unknown error: Returned document was null, but "
|
||||||
"no error message was given.")
|
"no error message was given.")
|
||||||
|
|
||||||
|
|
||||||
|
def sanity_check():
|
||||||
|
messages = sanity_checker.check_sanity()
|
||||||
|
|
||||||
|
if len(messages) > 0:
|
||||||
|
raise SanityFailedError(messages)
|
||||||
|
else:
|
||||||
|
return "No issues detected."
|
||||||
|
BIN
src/documents/tests/samples/originals/0000001.pdf
Normal file
BIN
src/documents/tests/samples/originals/0000001.pdf
Normal file
Binary file not shown.
BIN
src/documents/tests/samples/originals/0000002.pdf.gpg
Normal file
BIN
src/documents/tests/samples/originals/0000002.pdf.gpg
Normal file
Binary file not shown.
BIN
src/documents/tests/samples/simple.pdf
Normal file
BIN
src/documents/tests/samples/simple.pdf
Normal file
Binary file not shown.
BIN
src/documents/tests/samples/simple.zip
Normal file
BIN
src/documents/tests/samples/simple.zip
Normal file
Binary file not shown.
BIN
src/documents/tests/samples/thumb/0000001.png
Normal file
BIN
src/documents/tests/samples/thumb/0000001.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 7.7 KiB |
BIN
src/documents/tests/samples/thumb/0000002.png.gpg
Normal file
BIN
src/documents/tests/samples/thumb/0000002.png.gpg
Normal file
Binary file not shown.
@ -1,40 +1,24 @@
|
|||||||
import os
|
import os
|
||||||
import shutil
|
|
||||||
import tempfile
|
import tempfile
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from django.test import override_settings
|
from pathvalidate import ValidationError
|
||||||
from rest_framework.test import APITestCase
|
from rest_framework.test import APITestCase
|
||||||
|
|
||||||
|
from documents import index
|
||||||
from documents.models import Document, Correspondent, DocumentType, Tag
|
from documents.models import Document, Correspondent, DocumentType, Tag
|
||||||
|
from documents.tests.utils import DirectoriesMixin
|
||||||
|
|
||||||
|
|
||||||
class DocumentApiTest(APITestCase):
|
class DocumentApiTest(DirectoriesMixin, APITestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.scratch_dir = tempfile.mkdtemp()
|
super(DocumentApiTest, self).setUp()
|
||||||
self.media_dir = tempfile.mkdtemp()
|
|
||||||
self.originals_dir = os.path.join(self.media_dir, "documents", "originals")
|
|
||||||
self.thumbnail_dir = os.path.join(self.media_dir, "documents", "thumbnails")
|
|
||||||
|
|
||||||
os.makedirs(self.originals_dir, exist_ok=True)
|
|
||||||
os.makedirs(self.thumbnail_dir, exist_ok=True)
|
|
||||||
|
|
||||||
override_settings(
|
|
||||||
SCRATCH_DIR=self.scratch_dir,
|
|
||||||
MEDIA_ROOT=self.media_dir,
|
|
||||||
ORIGINALS_DIR=self.originals_dir,
|
|
||||||
THUMBNAIL_DIR=self.thumbnail_dir
|
|
||||||
).enable()
|
|
||||||
|
|
||||||
user = User.objects.create_superuser(username="temp_admin")
|
user = User.objects.create_superuser(username="temp_admin")
|
||||||
self.client.force_login(user=user)
|
self.client.force_login(user=user)
|
||||||
|
|
||||||
def tearDown(self):
|
|
||||||
shutil.rmtree(self.scratch_dir, ignore_errors=True)
|
|
||||||
shutil.rmtree(self.media_dir, ignore_errors=True)
|
|
||||||
|
|
||||||
def testDocuments(self):
|
def testDocuments(self):
|
||||||
|
|
||||||
response = self.client.get("/api/documents/").data
|
response = self.client.get("/api/documents/").data
|
||||||
@ -87,7 +71,7 @@ class DocumentApiTest(APITestCase):
|
|||||||
|
|
||||||
def test_document_actions(self):
|
def test_document_actions(self):
|
||||||
|
|
||||||
_, filename = tempfile.mkstemp(dir=self.originals_dir)
|
_, filename = tempfile.mkstemp(dir=self.dirs.originals_dir)
|
||||||
|
|
||||||
content = b"This is a test"
|
content = b"This is a test"
|
||||||
content_thumbnail = b"thumbnail content"
|
content_thumbnail = b"thumbnail content"
|
||||||
@ -97,7 +81,7 @@ class DocumentApiTest(APITestCase):
|
|||||||
|
|
||||||
doc = Document.objects.create(title="none", filename=os.path.basename(filename), mime_type="application/pdf")
|
doc = Document.objects.create(title="none", filename=os.path.basename(filename), mime_type="application/pdf")
|
||||||
|
|
||||||
with open(os.path.join(self.thumbnail_dir, "{:07d}.png".format(doc.pk)), "wb") as f:
|
with open(os.path.join(self.dirs.thumbnail_dir, "{:07d}.png".format(doc.pk)), "wb") as f:
|
||||||
f.write(content_thumbnail)
|
f.write(content_thumbnail)
|
||||||
|
|
||||||
response = self.client.get('/api/documents/{}/download/'.format(doc.pk))
|
response = self.client.get('/api/documents/{}/download/'.format(doc.pk))
|
||||||
@ -179,6 +163,109 @@ class DocumentApiTest(APITestCase):
|
|||||||
results = response.data['results']
|
results = response.data['results']
|
||||||
self.assertEqual(len(results), 3)
|
self.assertEqual(len(results), 3)
|
||||||
|
|
||||||
|
def test_search_no_query(self):
|
||||||
|
response = self.client.get("/api/search/")
|
||||||
|
results = response.data['results']
|
||||||
|
|
||||||
|
self.assertEqual(len(results), 0)
|
||||||
|
|
||||||
|
def test_search(self):
|
||||||
|
d1=Document.objects.create(title="invoice", content="the thing i bought at a shop and paid with bank account", checksum="A", pk=1)
|
||||||
|
d2=Document.objects.create(title="bank statement 1", content="things i paid for in august", pk=2, checksum="B")
|
||||||
|
d3=Document.objects.create(title="bank statement 3", content="things i paid for in september", pk=3, checksum="C")
|
||||||
|
with index.open_index(False).writer() as writer:
|
||||||
|
# Note to future self: there is a reason we dont use a model signal handler to update the index: some operations edit many documents at once
|
||||||
|
# (retagger, renamer) and we don't want to open a writer for each of these, but rather perform the entire operation with one writer.
|
||||||
|
# That's why we cant open the writer in a model on_save handler or something.
|
||||||
|
index.update_document(writer, d1)
|
||||||
|
index.update_document(writer, d2)
|
||||||
|
index.update_document(writer, d3)
|
||||||
|
response = self.client.get("/api/search/?query=bank")
|
||||||
|
results = response.data['results']
|
||||||
|
self.assertEqual(response.data['count'], 3)
|
||||||
|
self.assertEqual(response.data['page'], 1)
|
||||||
|
self.assertEqual(response.data['page_count'], 1)
|
||||||
|
self.assertEqual(len(results), 3)
|
||||||
|
|
||||||
|
response = self.client.get("/api/search/?query=september")
|
||||||
|
results = response.data['results']
|
||||||
|
self.assertEqual(response.data['count'], 1)
|
||||||
|
self.assertEqual(response.data['page'], 1)
|
||||||
|
self.assertEqual(response.data['page_count'], 1)
|
||||||
|
self.assertEqual(len(results), 1)
|
||||||
|
|
||||||
|
response = self.client.get("/api/search/?query=statement")
|
||||||
|
results = response.data['results']
|
||||||
|
self.assertEqual(response.data['count'], 2)
|
||||||
|
self.assertEqual(response.data['page'], 1)
|
||||||
|
self.assertEqual(response.data['page_count'], 1)
|
||||||
|
self.assertEqual(len(results), 2)
|
||||||
|
|
||||||
|
response = self.client.get("/api/search/?query=sfegdfg")
|
||||||
|
results = response.data['results']
|
||||||
|
self.assertEqual(response.data['count'], 0)
|
||||||
|
self.assertEqual(response.data['page'], 0)
|
||||||
|
self.assertEqual(response.data['page_count'], 0)
|
||||||
|
self.assertEqual(len(results), 0)
|
||||||
|
|
||||||
|
def test_search_multi_page(self):
|
||||||
|
with index.open_index(False).writer() as writer:
|
||||||
|
for i in range(55):
|
||||||
|
doc = Document.objects.create(checksum=str(i), pk=i+1, title=f"Document {i+1}", content="content")
|
||||||
|
index.update_document(writer, doc)
|
||||||
|
|
||||||
|
# This is here so that we test that no document gets returned twice (might happen if the paging is not working)
|
||||||
|
seen_ids = []
|
||||||
|
|
||||||
|
for i in range(1, 6):
|
||||||
|
response = self.client.get(f"/api/search/?query=content&page={i}")
|
||||||
|
results = response.data['results']
|
||||||
|
self.assertEqual(response.data['count'], 55)
|
||||||
|
self.assertEqual(response.data['page'], i)
|
||||||
|
self.assertEqual(response.data['page_count'], 6)
|
||||||
|
self.assertEqual(len(results), 10)
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
self.assertNotIn(result['id'], seen_ids)
|
||||||
|
seen_ids.append(result['id'])
|
||||||
|
|
||||||
|
response = self.client.get(f"/api/search/?query=content&page=6")
|
||||||
|
results = response.data['results']
|
||||||
|
self.assertEqual(response.data['count'], 55)
|
||||||
|
self.assertEqual(response.data['page'], 6)
|
||||||
|
self.assertEqual(response.data['page_count'], 6)
|
||||||
|
self.assertEqual(len(results), 5)
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
self.assertNotIn(result['id'], seen_ids)
|
||||||
|
seen_ids.append(result['id'])
|
||||||
|
|
||||||
|
response = self.client.get(f"/api/search/?query=content&page=7")
|
||||||
|
results = response.data['results']
|
||||||
|
self.assertEqual(response.data['count'], 55)
|
||||||
|
self.assertEqual(response.data['page'], 6)
|
||||||
|
self.assertEqual(response.data['page_count'], 6)
|
||||||
|
self.assertEqual(len(results), 5)
|
||||||
|
|
||||||
|
def test_search_invalid_page(self):
|
||||||
|
with index.open_index(False).writer() as writer:
|
||||||
|
for i in range(15):
|
||||||
|
doc = Document.objects.create(checksum=str(i), pk=i+1, title=f"Document {i+1}", content="content")
|
||||||
|
index.update_document(writer, doc)
|
||||||
|
|
||||||
|
first_page = self.client.get(f"/api/search/?query=content&page=1").data
|
||||||
|
second_page = self.client.get(f"/api/search/?query=content&page=2").data
|
||||||
|
should_be_first_page_1 = self.client.get(f"/api/search/?query=content&page=0").data
|
||||||
|
should_be_first_page_2 = self.client.get(f"/api/search/?query=content&page=dgfd").data
|
||||||
|
should_be_first_page_3 = self.client.get(f"/api/search/?query=content&page=").data
|
||||||
|
should_be_first_page_4 = self.client.get(f"/api/search/?query=content&page=-7868").data
|
||||||
|
|
||||||
|
self.assertDictEqual(first_page, should_be_first_page_1)
|
||||||
|
self.assertDictEqual(first_page, should_be_first_page_2)
|
||||||
|
self.assertDictEqual(first_page, should_be_first_page_3)
|
||||||
|
self.assertDictEqual(first_page, should_be_first_page_4)
|
||||||
|
self.assertNotEqual(len(first_page['results']), len(second_page['results']))
|
||||||
|
|
||||||
@mock.patch("documents.index.autocomplete")
|
@mock.patch("documents.index.autocomplete")
|
||||||
def test_search_autocomplete(self, m):
|
def test_search_autocomplete(self, m):
|
||||||
m.side_effect = lambda ix, term, limit: [term for _ in range(limit)]
|
m.side_effect = lambda ix, term, limit: [term for _ in range(limit)]
|
||||||
@ -215,3 +302,42 @@ class DocumentApiTest(APITestCase):
|
|||||||
self.assertEqual(response.status_code, 200)
|
self.assertEqual(response.status_code, 200)
|
||||||
self.assertEqual(response.data['documents_total'], 3)
|
self.assertEqual(response.data['documents_total'], 3)
|
||||||
self.assertEqual(response.data['documents_inbox'], 1)
|
self.assertEqual(response.data['documents_inbox'], 1)
|
||||||
|
|
||||||
|
@mock.patch("documents.forms.async_task")
|
||||||
|
def test_upload(self, m):
|
||||||
|
|
||||||
|
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
|
||||||
|
response = self.client.post("/api/documents/post_document/", {"document": f})
|
||||||
|
|
||||||
|
self.assertEqual(response.status_code, 200)
|
||||||
|
|
||||||
|
m.assert_called_once()
|
||||||
|
|
||||||
|
args, kwargs = m.call_args
|
||||||
|
self.assertEqual(kwargs['override_filename'], "simple.pdf")
|
||||||
|
|
||||||
|
@mock.patch("documents.forms.async_task")
|
||||||
|
def test_upload_invalid_form(self, m):
|
||||||
|
|
||||||
|
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
|
||||||
|
response = self.client.post("/api/documents/post_document/", {"documenst": f})
|
||||||
|
self.assertEqual(response.status_code, 400)
|
||||||
|
m.assert_not_called()
|
||||||
|
|
||||||
|
@mock.patch("documents.forms.async_task")
|
||||||
|
def test_upload_invalid_file(self, m):
|
||||||
|
|
||||||
|
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), "rb") as f:
|
||||||
|
response = self.client.post("/api/documents/post_document/", {"document": f})
|
||||||
|
self.assertEqual(response.status_code, 400)
|
||||||
|
m.assert_not_called()
|
||||||
|
|
||||||
|
@mock.patch("documents.forms.async_task")
|
||||||
|
@mock.patch("documents.forms.validate_filename")
|
||||||
|
def test_upload_invalid_filename(self, validate_filename, async_task):
|
||||||
|
validate_filename.side_effect = ValidationError()
|
||||||
|
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
|
||||||
|
response = self.client.post("/api/documents/post_document/", {"document": f})
|
||||||
|
self.assertEqual(response.status_code, 400)
|
||||||
|
|
||||||
|
async_task.assert_not_called()
|
||||||
|
@ -1,24 +1,29 @@
|
|||||||
import tempfile
|
import tempfile
|
||||||
|
from time import sleep
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
from django.test import TestCase, override_settings
|
from django.test import TestCase, override_settings
|
||||||
|
|
||||||
from documents.classifier import DocumentClassifier
|
from documents.classifier import DocumentClassifier, IncompatibleClassifierVersionError
|
||||||
from documents.models import Correspondent, Document, Tag, DocumentType
|
from documents.models import Correspondent, Document, Tag, DocumentType
|
||||||
|
from documents.tests.utils import DirectoriesMixin
|
||||||
|
|
||||||
|
|
||||||
class TestClassifier(TestCase):
|
class TestClassifier(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
super(TestClassifier, self).setUp()
|
||||||
self.classifier = DocumentClassifier()
|
self.classifier = DocumentClassifier()
|
||||||
|
|
||||||
def generate_test_data(self):
|
def generate_test_data(self):
|
||||||
self.c1 = Correspondent.objects.create(name="c1", matching_algorithm=Correspondent.MATCH_AUTO)
|
self.c1 = Correspondent.objects.create(name="c1", matching_algorithm=Correspondent.MATCH_AUTO)
|
||||||
self.c2 = Correspondent.objects.create(name="c2")
|
self.c2 = Correspondent.objects.create(name="c2")
|
||||||
|
self.c3 = Correspondent.objects.create(name="c3", matching_algorithm=Correspondent.MATCH_AUTO)
|
||||||
self.t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
self.t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||||
self.t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_ANY, pk=34, is_inbox_tag=True)
|
self.t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_ANY, pk=34, is_inbox_tag=True)
|
||||||
self.t3 = Tag.objects.create(name="t3", matching_algorithm=Tag.MATCH_AUTO, pk=45)
|
self.t3 = Tag.objects.create(name="t3", matching_algorithm=Tag.MATCH_AUTO, pk=45)
|
||||||
self.dt = DocumentType.objects.create(name="dt", matching_algorithm=DocumentType.MATCH_AUTO)
|
self.dt = DocumentType.objects.create(name="dt", matching_algorithm=DocumentType.MATCH_AUTO)
|
||||||
|
self.dt2 = DocumentType.objects.create(name="dt2", matching_algorithm=DocumentType.MATCH_AUTO)
|
||||||
|
|
||||||
self.doc1 = Document.objects.create(title="doc1", content="this is a document from c1", correspondent=self.c1, checksum="A", document_type=self.dt)
|
self.doc1 = Document.objects.create(title="doc1", content="this is a document from c1", correspondent=self.c1, checksum="A", document_type=self.dt)
|
||||||
self.doc2 = Document.objects.create(title="doc1", content="this is another document, but from c2", correspondent=self.c2, checksum="B")
|
self.doc2 = Document.objects.create(title="doc1", content="this is another document, but from c2", correspondent=self.c2, checksum="B")
|
||||||
@ -59,8 +64,8 @@ class TestClassifier(TestCase):
|
|||||||
self.classifier.train()
|
self.classifier.train()
|
||||||
self.assertEqual(self.classifier.predict_correspondent(self.doc1.content), self.c1.pk)
|
self.assertEqual(self.classifier.predict_correspondent(self.doc1.content), self.c1.pk)
|
||||||
self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None)
|
self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None)
|
||||||
self.assertTupleEqual(self.classifier.predict_tags(self.doc1.content), (self.t1.pk,))
|
self.assertListEqual(self.classifier.predict_tags(self.doc1.content), [self.t1.pk])
|
||||||
self.assertTupleEqual(self.classifier.predict_tags(self.doc2.content), (self.t1.pk, self.t3.pk))
|
self.assertListEqual(self.classifier.predict_tags(self.doc2.content), [self.t1.pk, self.t3.pk])
|
||||||
self.assertEqual(self.classifier.predict_document_type(self.doc1.content), self.dt.pk)
|
self.assertEqual(self.classifier.predict_document_type(self.doc1.content), self.dt.pk)
|
||||||
self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None)
|
self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None)
|
||||||
|
|
||||||
@ -71,6 +76,44 @@ class TestClassifier(TestCase):
|
|||||||
self.assertTrue(self.classifier.train())
|
self.assertTrue(self.classifier.train())
|
||||||
self.assertFalse(self.classifier.train())
|
self.assertFalse(self.classifier.train())
|
||||||
|
|
||||||
|
def testVersionIncreased(self):
|
||||||
|
|
||||||
|
self.generate_test_data()
|
||||||
|
self.assertTrue(self.classifier.train())
|
||||||
|
self.assertFalse(self.classifier.train())
|
||||||
|
|
||||||
|
self.classifier.save_classifier()
|
||||||
|
|
||||||
|
classifier2 = DocumentClassifier()
|
||||||
|
|
||||||
|
current_ver = DocumentClassifier.FORMAT_VERSION
|
||||||
|
with mock.patch("documents.classifier.DocumentClassifier.FORMAT_VERSION", current_ver+1):
|
||||||
|
# assure that we won't load old classifiers.
|
||||||
|
self.assertRaises(IncompatibleClassifierVersionError, classifier2.reload)
|
||||||
|
|
||||||
|
self.classifier.save_classifier()
|
||||||
|
|
||||||
|
# assure that we can load the classifier after saving it.
|
||||||
|
classifier2.reload()
|
||||||
|
|
||||||
|
def testReload(self):
|
||||||
|
|
||||||
|
self.generate_test_data()
|
||||||
|
self.assertTrue(self.classifier.train())
|
||||||
|
self.classifier.save_classifier()
|
||||||
|
|
||||||
|
classifier2 = DocumentClassifier()
|
||||||
|
classifier2.reload()
|
||||||
|
v1 = classifier2.classifier_version
|
||||||
|
|
||||||
|
# change the classifier after some time.
|
||||||
|
sleep(1)
|
||||||
|
self.classifier.save_classifier()
|
||||||
|
|
||||||
|
classifier2.reload()
|
||||||
|
v2 = classifier2.classifier_version
|
||||||
|
self.assertNotEqual(v1, v2)
|
||||||
|
|
||||||
@override_settings(DATA_DIR=tempfile.mkdtemp())
|
@override_settings(DATA_DIR=tempfile.mkdtemp())
|
||||||
def testSaveClassifier(self):
|
def testSaveClassifier(self):
|
||||||
|
|
||||||
@ -83,3 +126,112 @@ class TestClassifier(TestCase):
|
|||||||
new_classifier = DocumentClassifier()
|
new_classifier = DocumentClassifier()
|
||||||
new_classifier.reload()
|
new_classifier.reload()
|
||||||
self.assertFalse(new_classifier.train())
|
self.assertFalse(new_classifier.train())
|
||||||
|
|
||||||
|
def test_one_correspondent_predict(self):
|
||||||
|
c1 = Correspondent.objects.create(name="c1", matching_algorithm=Correspondent.MATCH_AUTO)
|
||||||
|
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", correspondent=c1, checksum="A")
|
||||||
|
|
||||||
|
self.classifier.train()
|
||||||
|
self.assertEqual(self.classifier.predict_correspondent(doc1.content), c1.pk)
|
||||||
|
|
||||||
|
def test_one_correspondent_predict_manydocs(self):
|
||||||
|
c1 = Correspondent.objects.create(name="c1", matching_algorithm=Correspondent.MATCH_AUTO)
|
||||||
|
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", correspondent=c1, checksum="A")
|
||||||
|
doc2 = Document.objects.create(title="doc2", content="this is a document from noone", checksum="B")
|
||||||
|
|
||||||
|
self.classifier.train()
|
||||||
|
self.assertEqual(self.classifier.predict_correspondent(doc1.content), c1.pk)
|
||||||
|
self.assertIsNone(self.classifier.predict_correspondent(doc2.content))
|
||||||
|
|
||||||
|
def test_one_type_predict(self):
|
||||||
|
dt = DocumentType.objects.create(name="dt", matching_algorithm=DocumentType.MATCH_AUTO)
|
||||||
|
|
||||||
|
doc1 = Document.objects.create(title="doc1", content="this is a document from c1",
|
||||||
|
checksum="A", document_type=dt)
|
||||||
|
|
||||||
|
self.classifier.train()
|
||||||
|
self.assertEqual(self.classifier.predict_document_type(doc1.content), dt.pk)
|
||||||
|
|
||||||
|
def test_one_type_predict_manydocs(self):
|
||||||
|
dt = DocumentType.objects.create(name="dt", matching_algorithm=DocumentType.MATCH_AUTO)
|
||||||
|
|
||||||
|
doc1 = Document.objects.create(title="doc1", content="this is a document from c1",
|
||||||
|
checksum="A", document_type=dt)
|
||||||
|
|
||||||
|
doc2 = Document.objects.create(title="doc1", content="this is a document from c2",
|
||||||
|
checksum="B")
|
||||||
|
|
||||||
|
self.classifier.train()
|
||||||
|
self.assertEqual(self.classifier.predict_document_type(doc1.content), dt.pk)
|
||||||
|
self.assertIsNone(self.classifier.predict_document_type(doc2.content))
|
||||||
|
|
||||||
|
def test_one_tag_predict(self):
|
||||||
|
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||||
|
|
||||||
|
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
||||||
|
|
||||||
|
doc1.tags.add(t1)
|
||||||
|
self.classifier.train()
|
||||||
|
self.assertListEqual(self.classifier.predict_tags(doc1.content), [t1.pk])
|
||||||
|
|
||||||
|
def test_one_tag_predict_unassigned(self):
|
||||||
|
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||||
|
|
||||||
|
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
||||||
|
|
||||||
|
self.classifier.train()
|
||||||
|
self.assertListEqual(self.classifier.predict_tags(doc1.content), [])
|
||||||
|
|
||||||
|
def test_two_tags_predict_singledoc(self):
|
||||||
|
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||||
|
t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
|
||||||
|
|
||||||
|
doc4 = Document.objects.create(title="doc1", content="this is a document from c4", checksum="D")
|
||||||
|
|
||||||
|
doc4.tags.add(t1)
|
||||||
|
doc4.tags.add(t2)
|
||||||
|
self.classifier.train()
|
||||||
|
self.assertListEqual(self.classifier.predict_tags(doc4.content), [t1.pk, t2.pk])
|
||||||
|
|
||||||
|
def test_two_tags_predict(self):
|
||||||
|
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||||
|
t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
|
||||||
|
|
||||||
|
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
||||||
|
doc2 = Document.objects.create(title="doc1", content="this is a document from c2", checksum="B")
|
||||||
|
doc3 = Document.objects.create(title="doc1", content="this is a document from c3", checksum="C")
|
||||||
|
doc4 = Document.objects.create(title="doc1", content="this is a document from c4", checksum="D")
|
||||||
|
|
||||||
|
doc1.tags.add(t1)
|
||||||
|
doc2.tags.add(t2)
|
||||||
|
|
||||||
|
doc4.tags.add(t1)
|
||||||
|
doc4.tags.add(t2)
|
||||||
|
self.classifier.train()
|
||||||
|
self.assertListEqual(self.classifier.predict_tags(doc1.content), [t1.pk])
|
||||||
|
self.assertListEqual(self.classifier.predict_tags(doc2.content), [t2.pk])
|
||||||
|
self.assertListEqual(self.classifier.predict_tags(doc3.content), [])
|
||||||
|
self.assertListEqual(self.classifier.predict_tags(doc4.content), [t1.pk, t2.pk])
|
||||||
|
|
||||||
|
def test_one_tag_predict_multi(self):
|
||||||
|
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||||
|
|
||||||
|
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
||||||
|
doc2 = Document.objects.create(title="doc2", content="this is a document from c2", checksum="B")
|
||||||
|
|
||||||
|
doc1.tags.add(t1)
|
||||||
|
doc2.tags.add(t1)
|
||||||
|
self.classifier.train()
|
||||||
|
self.assertListEqual(self.classifier.predict_tags(doc1.content), [t1.pk])
|
||||||
|
self.assertListEqual(self.classifier.predict_tags(doc2.content), [t1.pk])
|
||||||
|
|
||||||
|
def test_one_tag_predict_multi_2(self):
|
||||||
|
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||||
|
|
||||||
|
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
||||||
|
doc2 = Document.objects.create(title="doc2", content="this is a document from c2", checksum="B")
|
||||||
|
|
||||||
|
doc1.tags.add(t1)
|
||||||
|
self.classifier.train()
|
||||||
|
self.assertListEqual(self.classifier.predict_tags(doc1.content), [t1.pk])
|
||||||
|
self.assertListEqual(self.classifier.predict_tags(doc2.content), [])
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
|
||||||
import tempfile
|
import tempfile
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
from django.test import TestCase, override_settings
|
from django.test import TestCase, override_settings
|
||||||
|
|
||||||
|
from .utils import DirectoriesMixin
|
||||||
from ..consumer import Consumer, ConsumerError
|
from ..consumer import Consumer, ConsumerError
|
||||||
from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
|
from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
|
||||||
from ..parsers import DocumentParser, ParseError
|
from ..parsers import DocumentParser, ParseError
|
||||||
@ -408,26 +408,16 @@ def fake_magic_from_file(file, mime=False):
|
|||||||
|
|
||||||
|
|
||||||
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
|
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
|
||||||
class TestConsumer(TestCase):
|
class TestConsumer(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
def make_dummy_parser(self, path, logging_group):
|
def make_dummy_parser(self, path, logging_group):
|
||||||
return DummyParser(path, logging_group, self.scratch_dir)
|
return DummyParser(path, logging_group, self.dirs.scratch_dir)
|
||||||
|
|
||||||
def make_faulty_parser(self, path, logging_group):
|
def make_faulty_parser(self, path, logging_group):
|
||||||
return FaultyParser(path, logging_group, self.scratch_dir)
|
return FaultyParser(path, logging_group, self.dirs.scratch_dir)
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.scratch_dir = tempfile.mkdtemp()
|
super(TestConsumer, self).setUp()
|
||||||
self.media_dir = tempfile.mkdtemp()
|
|
||||||
self.consumption_dir = tempfile.mkdtemp()
|
|
||||||
|
|
||||||
override_settings(
|
|
||||||
SCRATCH_DIR=self.scratch_dir,
|
|
||||||
MEDIA_ROOT=self.media_dir,
|
|
||||||
ORIGINALS_DIR=os.path.join(self.media_dir, "documents", "originals"),
|
|
||||||
THUMBNAIL_DIR=os.path.join(self.media_dir, "documents", "thumbnails"),
|
|
||||||
CONSUMPTION_DIR=self.consumption_dir
|
|
||||||
).enable()
|
|
||||||
|
|
||||||
patcher = mock.patch("documents.parsers.document_consumer_declaration.send")
|
patcher = mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||||
m = patcher.start()
|
m = patcher.start()
|
||||||
@ -441,13 +431,8 @@ class TestConsumer(TestCase):
|
|||||||
|
|
||||||
self.consumer = Consumer()
|
self.consumer = Consumer()
|
||||||
|
|
||||||
def tearDown(self):
|
|
||||||
shutil.rmtree(self.scratch_dir, ignore_errors=True)
|
|
||||||
shutil.rmtree(self.media_dir, ignore_errors=True)
|
|
||||||
shutil.rmtree(self.consumption_dir, ignore_errors=True)
|
|
||||||
|
|
||||||
def get_test_file(self):
|
def get_test_file(self):
|
||||||
fd, f = tempfile.mkstemp(suffix=".pdf", dir=self.scratch_dir)
|
fd, f = tempfile.mkstemp(suffix=".pdf", dir=self.dirs.scratch_dir)
|
||||||
return f
|
return f
|
||||||
|
|
||||||
def testNormalOperation(self):
|
def testNormalOperation(self):
|
||||||
@ -516,26 +501,6 @@ class TestConsumer(TestCase):
|
|||||||
|
|
||||||
self.fail("Should throw exception")
|
self.fail("Should throw exception")
|
||||||
|
|
||||||
@override_settings(CONSUMPTION_DIR=None)
|
|
||||||
def testConsumptionDirUnset(self):
|
|
||||||
try:
|
|
||||||
self.consumer.try_consume_file(self.get_test_file())
|
|
||||||
except ConsumerError as e:
|
|
||||||
self.assertEqual(str(e), "The CONSUMPTION_DIR settings variable does not appear to be set.")
|
|
||||||
return
|
|
||||||
|
|
||||||
self.fail("Should throw exception")
|
|
||||||
|
|
||||||
@override_settings(CONSUMPTION_DIR="asd")
|
|
||||||
def testNoConsumptionDir(self):
|
|
||||||
try:
|
|
||||||
self.consumer.try_consume_file(self.get_test_file())
|
|
||||||
except ConsumerError as e:
|
|
||||||
self.assertEqual(str(e), "Consumption directory asd does not exist")
|
|
||||||
return
|
|
||||||
|
|
||||||
self.fail("Should throw exception")
|
|
||||||
|
|
||||||
def testDuplicates(self):
|
def testDuplicates(self):
|
||||||
self.consumer.try_consume_file(self.get_test_file())
|
self.consumer.try_consume_file(self.get_test_file())
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ import logging
|
|||||||
import uuid
|
import uuid
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase, override_settings
|
||||||
|
|
||||||
from ..models import Log
|
from ..models import Log
|
||||||
|
|
||||||
@ -14,6 +14,7 @@ class TestPaperlessLog(TestCase):
|
|||||||
self.logger = logging.getLogger(
|
self.logger = logging.getLogger(
|
||||||
"documents.management.commands.document_consumer")
|
"documents.management.commands.document_consumer")
|
||||||
|
|
||||||
|
@override_settings(DISABLE_DBHANDLER=False)
|
||||||
def test_that_it_saves_at_all(self):
|
def test_that_it_saves_at_all(self):
|
||||||
|
|
||||||
kw = {"group": uuid.uuid4()}
|
kw = {"group": uuid.uuid4()}
|
||||||
@ -38,6 +39,7 @@ class TestPaperlessLog(TestCase):
|
|||||||
self.logger.critical("This is a critical message", extra=kw)
|
self.logger.critical("This is a critical message", extra=kw)
|
||||||
self.assertEqual(Log.objects.all().count(), 5)
|
self.assertEqual(Log.objects.all().count(), 5)
|
||||||
|
|
||||||
|
@override_settings(DISABLE_DBHANDLER=False)
|
||||||
def test_groups(self):
|
def test_groups(self):
|
||||||
|
|
||||||
kw1 = {"group": uuid.uuid4()}
|
kw1 = {"group": uuid.uuid4()}
|
||||||
|
210
src/documents/tests/test_management_consumer.py
Normal file
210
src/documents/tests/test_management_consumer.py
Normal file
@ -0,0 +1,210 @@
|
|||||||
|
import filecmp
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
from threading import Thread
|
||||||
|
from time import sleep
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.core.management import call_command, CommandError
|
||||||
|
from django.test import override_settings, TestCase
|
||||||
|
|
||||||
|
from documents.consumer import ConsumerError
|
||||||
|
from documents.management.commands import document_consumer
|
||||||
|
from documents.tests.utils import DirectoriesMixin
|
||||||
|
|
||||||
|
|
||||||
|
class ConsumerThread(Thread):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.cmd = document_consumer.Command()
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
self.cmd.handle(directory=settings.CONSUMPTION_DIR, oneshot=False)
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
# Consumer checks this every second.
|
||||||
|
self.cmd.stop_flag = True
|
||||||
|
|
||||||
|
|
||||||
|
def chunked(size, source):
|
||||||
|
for i in range(0, len(source), size):
|
||||||
|
yield source[i:i+size]
|
||||||
|
|
||||||
|
|
||||||
|
class TestConsumer(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
|
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
||||||
|
|
||||||
|
def setUp(self) -> None:
|
||||||
|
super(TestConsumer, self).setUp()
|
||||||
|
self.t = None
|
||||||
|
patcher = mock.patch("documents.management.commands.document_consumer.async_task")
|
||||||
|
self.task_mock = patcher.start()
|
||||||
|
self.addCleanup(patcher.stop)
|
||||||
|
|
||||||
|
def t_start(self):
|
||||||
|
self.t = ConsumerThread()
|
||||||
|
self.t.start()
|
||||||
|
# give the consumer some time to do initial work
|
||||||
|
sleep(1)
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
if self.t:
|
||||||
|
# set the stop flag
|
||||||
|
self.t.stop()
|
||||||
|
# wait for the consumer to exit.
|
||||||
|
self.t.join()
|
||||||
|
|
||||||
|
super(TestConsumer, self).tearDown()
|
||||||
|
|
||||||
|
def wait_for_task_mock_call(self):
|
||||||
|
n = 0
|
||||||
|
while n < 100:
|
||||||
|
if self.task_mock.call_count > 0:
|
||||||
|
# give task_mock some time to finish and raise errors
|
||||||
|
sleep(1)
|
||||||
|
return
|
||||||
|
n += 1
|
||||||
|
sleep(0.1)
|
||||||
|
self.fail("async_task was never called")
|
||||||
|
|
||||||
|
# A bogus async_task that will simply check the file for
|
||||||
|
# completeness and raise an exception otherwise.
|
||||||
|
def bogus_task(self, func, filename, **kwargs):
|
||||||
|
eq = filecmp.cmp(filename, self.sample_file, shallow=False)
|
||||||
|
if not eq:
|
||||||
|
print("Consumed an INVALID file.")
|
||||||
|
raise ConsumerError("Incomplete File READ FAILED")
|
||||||
|
else:
|
||||||
|
print("Consumed a perfectly valid file.")
|
||||||
|
|
||||||
|
def slow_write_file(self, target, incomplete=False):
|
||||||
|
with open(self.sample_file, 'rb') as f:
|
||||||
|
pdf_bytes = f.read()
|
||||||
|
|
||||||
|
if incomplete:
|
||||||
|
pdf_bytes = pdf_bytes[:len(pdf_bytes) - 100]
|
||||||
|
|
||||||
|
with open(target, 'wb') as f:
|
||||||
|
# this will take 2 seconds, since the file is about 20k.
|
||||||
|
print("Start writing file.")
|
||||||
|
for b in chunked(1000, pdf_bytes):
|
||||||
|
f.write(b)
|
||||||
|
sleep(0.1)
|
||||||
|
print("file completed.")
|
||||||
|
|
||||||
|
def test_consume_file(self):
|
||||||
|
self.t_start()
|
||||||
|
|
||||||
|
f = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
|
||||||
|
shutil.copy(self.sample_file, f)
|
||||||
|
|
||||||
|
self.wait_for_task_mock_call()
|
||||||
|
|
||||||
|
self.task_mock.assert_called_once()
|
||||||
|
|
||||||
|
args, kwargs = self.task_mock.call_args
|
||||||
|
self.assertEqual(args[1], f)
|
||||||
|
|
||||||
|
@override_settings(CONSUMER_POLLING=1)
|
||||||
|
def test_consume_file_polling(self):
|
||||||
|
self.test_consume_file()
|
||||||
|
|
||||||
|
def test_consume_existing_file(self):
|
||||||
|
f = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
|
||||||
|
shutil.copy(self.sample_file, f)
|
||||||
|
|
||||||
|
self.t_start()
|
||||||
|
self.task_mock.assert_called_once()
|
||||||
|
|
||||||
|
args, kwargs = self.task_mock.call_args
|
||||||
|
self.assertEqual(args[1], f)
|
||||||
|
|
||||||
|
@override_settings(CONSUMER_POLLING=1)
|
||||||
|
def test_consume_existing_file_polling(self):
|
||||||
|
self.test_consume_existing_file()
|
||||||
|
|
||||||
|
@mock.patch("documents.management.commands.document_consumer.logger.error")
|
||||||
|
def test_slow_write_pdf(self, error_logger):
|
||||||
|
|
||||||
|
self.task_mock.side_effect = self.bogus_task
|
||||||
|
|
||||||
|
self.t_start()
|
||||||
|
|
||||||
|
fname = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
|
||||||
|
|
||||||
|
self.slow_write_file(fname)
|
||||||
|
|
||||||
|
self.wait_for_task_mock_call()
|
||||||
|
|
||||||
|
error_logger.assert_not_called()
|
||||||
|
|
||||||
|
self.task_mock.assert_called_once()
|
||||||
|
|
||||||
|
args, kwargs = self.task_mock.call_args
|
||||||
|
self.assertEqual(args[1], fname)
|
||||||
|
|
||||||
|
@override_settings(CONSUMER_POLLING=1)
|
||||||
|
def test_slow_write_pdf_polling(self):
|
||||||
|
self.test_slow_write_pdf()
|
||||||
|
|
||||||
|
@mock.patch("documents.management.commands.document_consumer.logger.error")
|
||||||
|
def test_slow_write_and_move(self, error_logger):
|
||||||
|
|
||||||
|
self.task_mock.side_effect = self.bogus_task
|
||||||
|
|
||||||
|
self.t_start()
|
||||||
|
|
||||||
|
fname = os.path.join(self.dirs.consumption_dir, "my_file.~df")
|
||||||
|
fname2 = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
|
||||||
|
|
||||||
|
self.slow_write_file(fname)
|
||||||
|
shutil.move(fname, fname2)
|
||||||
|
|
||||||
|
self.wait_for_task_mock_call()
|
||||||
|
|
||||||
|
self.task_mock.assert_called_once()
|
||||||
|
|
||||||
|
args, kwargs = self.task_mock.call_args
|
||||||
|
self.assertEqual(args[1], fname2)
|
||||||
|
|
||||||
|
error_logger.assert_not_called()
|
||||||
|
|
||||||
|
@override_settings(CONSUMER_POLLING=1)
|
||||||
|
def test_slow_write_and_move_polling(self):
|
||||||
|
self.test_slow_write_and_move()
|
||||||
|
|
||||||
|
@mock.patch("documents.management.commands.document_consumer.logger.error")
|
||||||
|
def test_slow_write_incomplete(self, error_logger):
|
||||||
|
|
||||||
|
self.task_mock.side_effect = self.bogus_task
|
||||||
|
|
||||||
|
self.t_start()
|
||||||
|
|
||||||
|
fname = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
|
||||||
|
self.slow_write_file(fname, incomplete=True)
|
||||||
|
|
||||||
|
self.wait_for_task_mock_call()
|
||||||
|
|
||||||
|
self.task_mock.assert_called_once()
|
||||||
|
args, kwargs = self.task_mock.call_args
|
||||||
|
self.assertEqual(args[1], fname)
|
||||||
|
|
||||||
|
# assert that we have an error logged with this invalid file.
|
||||||
|
error_logger.assert_called_once()
|
||||||
|
|
||||||
|
@override_settings(CONSUMER_POLLING=1)
|
||||||
|
def test_slow_write_incomplete_polling(self):
|
||||||
|
self.test_slow_write_incomplete()
|
||||||
|
|
||||||
|
@override_settings(CONSUMPTION_DIR="does_not_exist")
|
||||||
|
def test_consumption_directory_invalid(self):
|
||||||
|
|
||||||
|
self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot')
|
||||||
|
|
||||||
|
@override_settings(CONSUMPTION_DIR="")
|
||||||
|
def test_consumption_directory_unset(self):
|
||||||
|
|
||||||
|
self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot')
|
56
src/documents/tests/test_management_decrypt.py
Normal file
56
src/documents/tests/test_management_decrypt.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
from django.core.management import call_command
|
||||||
|
from django.test import TestCase, override_settings
|
||||||
|
|
||||||
|
from documents.management.commands import document_exporter
|
||||||
|
from documents.models import Document, Tag, DocumentType, Correspondent
|
||||||
|
|
||||||
|
|
||||||
|
class TestDecryptDocuments(TestCase):
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
|
||||||
|
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
|
||||||
|
PASSPHRASE="test"
|
||||||
|
)
|
||||||
|
@mock.patch("documents.management.commands.decrypt_documents.input")
|
||||||
|
def test_decrypt(self, m):
|
||||||
|
|
||||||
|
media_dir = tempfile.mkdtemp()
|
||||||
|
originals_dir = os.path.join(media_dir, "documents", "originals")
|
||||||
|
thumb_dir = os.path.join(media_dir, "documents", "thumbnails")
|
||||||
|
os.makedirs(originals_dir, exist_ok=True)
|
||||||
|
os.makedirs(thumb_dir, exist_ok=True)
|
||||||
|
|
||||||
|
override_settings(
|
||||||
|
ORIGINALS_DIR=originals_dir,
|
||||||
|
THUMBNAIL_DIR=thumb_dir,
|
||||||
|
PASSPHRASE="test"
|
||||||
|
).enable()
|
||||||
|
|
||||||
|
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg"))
|
||||||
|
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "thumb", "0000002.png.gpg"), os.path.join(thumb_dir, "0000002.png.gpg"))
|
||||||
|
|
||||||
|
Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", id=2, mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
||||||
|
|
||||||
|
call_command('decrypt_documents')
|
||||||
|
|
||||||
|
doc = Document.objects.get(id=2)
|
||||||
|
|
||||||
|
self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED)
|
||||||
|
self.assertEqual(doc.filename, "0000002.pdf")
|
||||||
|
self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000002.pdf")))
|
||||||
|
self.assertTrue(os.path.isfile(doc.source_path))
|
||||||
|
self.assertTrue(os.path.isfile(os.path.join(thumb_dir, "0000002.png")))
|
||||||
|
self.assertTrue(os.path.isfile(doc.thumbnail_path))
|
||||||
|
|
||||||
|
with doc.source_file as f:
|
||||||
|
checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
|
self.assertEqual(checksum, doc.checksum)
|
||||||
|
|
53
src/documents/tests/test_management_exporter.py
Normal file
53
src/documents/tests/test_management_exporter.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
from django.core.management import call_command
|
||||||
|
from django.test import TestCase, override_settings
|
||||||
|
|
||||||
|
from documents.management.commands import document_exporter
|
||||||
|
from documents.models import Document, Tag, DocumentType, Correspondent
|
||||||
|
|
||||||
|
|
||||||
|
class TestExporter(TestCase):
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
|
||||||
|
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
|
||||||
|
PASSPHRASE="test"
|
||||||
|
)
|
||||||
|
def test_exporter(self):
|
||||||
|
file = os.path.join(os.path.dirname(__file__), "samples", "originals", "0000001.pdf")
|
||||||
|
|
||||||
|
with open(file, "rb") as f:
|
||||||
|
checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
|
|
||||||
|
Document.objects.create(checksum=checksum, title="wow", filename="0000001.pdf", id=1, mime_type="application/pdf")
|
||||||
|
Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", id=2, mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
||||||
|
Tag.objects.create(name="t")
|
||||||
|
DocumentType.objects.create(name="dt")
|
||||||
|
Correspondent.objects.create(name="c")
|
||||||
|
|
||||||
|
target = tempfile.mkdtemp()
|
||||||
|
|
||||||
|
call_command('document_exporter', target)
|
||||||
|
|
||||||
|
with open(os.path.join(target, "manifest.json")) as f:
|
||||||
|
manifest = json.load(f)
|
||||||
|
|
||||||
|
self.assertEqual(len(manifest), 5)
|
||||||
|
|
||||||
|
for element in manifest:
|
||||||
|
if element['model'] == 'documents.document':
|
||||||
|
fname = os.path.join(target, element[document_exporter.EXPORTER_FILE_NAME])
|
||||||
|
self.assertTrue(os.path.exists(fname))
|
||||||
|
self.assertTrue(os.path.exists(os.path.join(target, element[document_exporter.EXPORTER_THUMBNAIL_NAME])))
|
||||||
|
|
||||||
|
with open(fname, "rb") as f:
|
||||||
|
checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
|
self.assertEqual(checksum, element['fields']['checksum'])
|
||||||
|
|
||||||
|
Document.objects.create(checksum="AAAAAAAAAAAAAAAAA", title="wow", filename="0000004.pdf", id=3, mime_type="application/pdf")
|
||||||
|
|
||||||
|
self.assertRaises(FileNotFoundError, call_command, 'document_exporter', target)
|
58
src/documents/tests/test_management_retagger.py
Normal file
58
src/documents/tests/test_management_retagger.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
from django.core.management import call_command
|
||||||
|
from django.test import TestCase
|
||||||
|
|
||||||
|
from documents.models import Document, Tag, Correspondent, DocumentType
|
||||||
|
from documents.tests.utils import DirectoriesMixin
|
||||||
|
|
||||||
|
|
||||||
|
class TestRetagger(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
|
def make_models(self):
|
||||||
|
self.d1 = Document.objects.create(checksum="A", title="A", content="first document")
|
||||||
|
self.d2 = Document.objects.create(checksum="B", title="B", content="second document")
|
||||||
|
self.d3 = Document.objects.create(checksum="C", title="C", content="unrelated document")
|
||||||
|
|
||||||
|
self.tag_first = Tag.objects.create(name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY)
|
||||||
|
self.tag_second = Tag.objects.create(name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY)
|
||||||
|
|
||||||
|
self.correspondent_first = Correspondent.objects.create(
|
||||||
|
name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY)
|
||||||
|
self.correspondent_second = Correspondent.objects.create(
|
||||||
|
name="c2", match="second", matching_algorithm=Correspondent.MATCH_ANY)
|
||||||
|
|
||||||
|
self.doctype_first = DocumentType.objects.create(
|
||||||
|
name="dt1", match="first", matching_algorithm=DocumentType.MATCH_ANY)
|
||||||
|
self.doctype_second = DocumentType.objects.create(
|
||||||
|
name="dt2", match="second", matching_algorithm=DocumentType.MATCH_ANY)
|
||||||
|
|
||||||
|
def get_updated_docs(self):
|
||||||
|
return Document.objects.get(title="A"), Document.objects.get(title="B"), Document.objects.get(title="C")
|
||||||
|
|
||||||
|
def setUp(self) -> None:
|
||||||
|
super(TestRetagger, self).setUp()
|
||||||
|
self.make_models()
|
||||||
|
|
||||||
|
def test_add_tags(self):
|
||||||
|
call_command('document_retagger', '--tags')
|
||||||
|
d_first, d_second, d_unrelated = self.get_updated_docs()
|
||||||
|
|
||||||
|
self.assertEqual(d_first.tags.count(), 1)
|
||||||
|
self.assertEqual(d_second.tags.count(), 1)
|
||||||
|
self.assertEqual(d_unrelated.tags.count(), 0)
|
||||||
|
|
||||||
|
self.assertEqual(d_first.tags.first(), self.tag_first)
|
||||||
|
self.assertEqual(d_second.tags.first(), self.tag_second)
|
||||||
|
|
||||||
|
def test_add_type(self):
|
||||||
|
call_command('document_retagger', '--document_type')
|
||||||
|
d_first, d_second, d_unrelated = self.get_updated_docs()
|
||||||
|
|
||||||
|
self.assertEqual(d_first.document_type, self.doctype_first)
|
||||||
|
self.assertEqual(d_second.document_type, self.doctype_second)
|
||||||
|
|
||||||
|
def test_add_correspondent(self):
|
||||||
|
call_command('document_retagger', '--correspondent')
|
||||||
|
d_first, d_second, d_unrelated = self.get_updated_docs()
|
||||||
|
|
||||||
|
self.assertEqual(d_first.correspondent, self.correspondent_first)
|
||||||
|
self.assertEqual(d_second.correspondent, self.correspondent_second)
|
@ -1,3 +1,5 @@
|
|||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
from random import randint
|
from random import randint
|
||||||
|
|
||||||
from django.contrib.admin.models import LogEntry
|
from django.contrib.admin.models import LogEntry
|
||||||
@ -215,6 +217,13 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
|
|||||||
self.doc_contains = Document.objects.create(
|
self.doc_contains = Document.objects.create(
|
||||||
content="I contain the keyword.", mime_type="application/pdf")
|
content="I contain the keyword.", mime_type="application/pdf")
|
||||||
|
|
||||||
|
self.index_dir = tempfile.mkdtemp()
|
||||||
|
# TODO: we should not need the index here.
|
||||||
|
override_settings(INDEX_DIR=self.index_dir).enable()
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
shutil.rmtree(self.index_dir, ignore_errors=True)
|
||||||
|
|
||||||
def test_tag_applied_any(self):
|
def test_tag_applied_any(self):
|
||||||
t1 = Tag.objects.create(
|
t1 = Tag.objects.create(
|
||||||
name="test", match="keyword", matching_algorithm=Tag.MATCH_ANY)
|
name="test", match="keyword", matching_algorithm=Tag.MATCH_ANY)
|
||||||
|
59
src/documents/tests/utils.py
Normal file
59
src/documents/tests/utils.py
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
|
from django.test import override_settings
|
||||||
|
|
||||||
|
|
||||||
|
def setup_directories():
|
||||||
|
|
||||||
|
dirs = namedtuple("Dirs", ())
|
||||||
|
|
||||||
|
dirs.data_dir = tempfile.mkdtemp()
|
||||||
|
dirs.scratch_dir = tempfile.mkdtemp()
|
||||||
|
dirs.media_dir = tempfile.mkdtemp()
|
||||||
|
dirs.consumption_dir = tempfile.mkdtemp()
|
||||||
|
dirs.index_dir = os.path.join(dirs.data_dir, "index")
|
||||||
|
dirs.originals_dir = os.path.join(dirs.media_dir, "documents", "originals")
|
||||||
|
dirs.thumbnail_dir = os.path.join(dirs.media_dir, "documents", "thumbnails")
|
||||||
|
|
||||||
|
os.makedirs(dirs.index_dir, exist_ok=True)
|
||||||
|
os.makedirs(dirs.originals_dir, exist_ok=True)
|
||||||
|
os.makedirs(dirs.thumbnail_dir, exist_ok=True)
|
||||||
|
|
||||||
|
override_settings(
|
||||||
|
DATA_DIR=dirs.data_dir,
|
||||||
|
SCRATCH_DIR=dirs.scratch_dir,
|
||||||
|
MEDIA_ROOT=dirs.media_dir,
|
||||||
|
ORIGINALS_DIR=dirs.originals_dir,
|
||||||
|
THUMBNAIL_DIR=dirs.thumbnail_dir,
|
||||||
|
CONSUMPTION_DIR=dirs.consumption_dir,
|
||||||
|
INDEX_DIR=dirs.index_dir,
|
||||||
|
MODEL_FILE=os.path.join(dirs.data_dir, "classification_model.pickle")
|
||||||
|
|
||||||
|
).enable()
|
||||||
|
|
||||||
|
return dirs
|
||||||
|
|
||||||
|
|
||||||
|
def remove_dirs(dirs):
|
||||||
|
shutil.rmtree(dirs.media_dir, ignore_errors=True)
|
||||||
|
shutil.rmtree(dirs.data_dir, ignore_errors=True)
|
||||||
|
shutil.rmtree(dirs.scratch_dir, ignore_errors=True)
|
||||||
|
shutil.rmtree(dirs.consumption_dir, ignore_errors=True)
|
||||||
|
|
||||||
|
|
||||||
|
class DirectoriesMixin:
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.dirs = None
|
||||||
|
|
||||||
|
def setUp(self) -> None:
|
||||||
|
self.dirs = setup_directories()
|
||||||
|
super(DirectoriesMixin, self).setUp()
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
super(DirectoriesMixin, self).tearDown()
|
||||||
|
remove_dirs(self.dirs)
|
@ -149,13 +149,25 @@ class DocumentViewSet(RetrieveModelMixin,
|
|||||||
else:
|
else:
|
||||||
return HttpResponseBadRequest(str(form.errors))
|
return HttpResponseBadRequest(str(form.errors))
|
||||||
|
|
||||||
|
@action(methods=['get'], detail=True)
|
||||||
|
def metadata(self, request, pk=None):
|
||||||
|
try:
|
||||||
|
doc = Document.objects.get(pk=pk)
|
||||||
|
return Response({
|
||||||
|
"paperless__checksum": doc.checksum,
|
||||||
|
"paperless__mime_type": doc.mime_type,
|
||||||
|
"paperless__filename": doc.filename,
|
||||||
|
})
|
||||||
|
except Document.DoesNotExist:
|
||||||
|
raise Http404()
|
||||||
|
|
||||||
@action(methods=['get'], detail=True)
|
@action(methods=['get'], detail=True)
|
||||||
def preview(self, request, pk=None):
|
def preview(self, request, pk=None):
|
||||||
try:
|
try:
|
||||||
response = self.file_response(pk, "inline")
|
response = self.file_response(pk, "inline")
|
||||||
return response
|
return response
|
||||||
except FileNotFoundError:
|
except (FileNotFoundError, Document.DoesNotExist):
|
||||||
raise Http404("Document source file does not exist")
|
raise Http404()
|
||||||
|
|
||||||
@action(methods=['get'], detail=True)
|
@action(methods=['get'], detail=True)
|
||||||
@cache_control(public=False, max_age=315360000)
|
@cache_control(public=False, max_age=315360000)
|
||||||
@ -163,15 +175,15 @@ class DocumentViewSet(RetrieveModelMixin,
|
|||||||
try:
|
try:
|
||||||
return HttpResponse(Document.objects.get(id=pk).thumbnail_file,
|
return HttpResponse(Document.objects.get(id=pk).thumbnail_file,
|
||||||
content_type='image/png')
|
content_type='image/png')
|
||||||
except FileNotFoundError:
|
except (FileNotFoundError, Document.DoesNotExist):
|
||||||
raise Http404("Document thumbnail does not exist")
|
raise Http404()
|
||||||
|
|
||||||
@action(methods=['get'], detail=True)
|
@action(methods=['get'], detail=True)
|
||||||
def download(self, request, pk=None):
|
def download(self, request, pk=None):
|
||||||
try:
|
try:
|
||||||
return self.file_response(pk, "attachment")
|
return self.file_response(pk, "attachment")
|
||||||
except FileNotFoundError:
|
except (FileNotFoundError, Document.DoesNotExist):
|
||||||
raise Http404("Document source file does not exist")
|
raise Http404()
|
||||||
|
|
||||||
|
|
||||||
class LogViewSet(ReadOnlyModelViewSet):
|
class LogViewSet(ReadOnlyModelViewSet):
|
||||||
@ -190,7 +202,9 @@ class SearchView(APIView):
|
|||||||
|
|
||||||
permission_classes = (IsAuthenticated,)
|
permission_classes = (IsAuthenticated,)
|
||||||
|
|
||||||
ix = index.open_index()
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(SearchView, self).__init__(*args, **kwargs)
|
||||||
|
self.ix = index.open_index()
|
||||||
|
|
||||||
def add_infos_to_hit(self, r):
|
def add_infos_to_hit(self, r):
|
||||||
doc = Document.objects.get(id=r['id'])
|
doc = Document.objects.get(id=r['id'])
|
||||||
@ -210,6 +224,9 @@ class SearchView(APIView):
|
|||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
page = 1
|
page = 1
|
||||||
|
|
||||||
|
if page < 1:
|
||||||
|
page = 1
|
||||||
|
|
||||||
with index.query_page(self.ix, query, page) as result_page:
|
with index.query_page(self.ix, query, page) as result_page:
|
||||||
return Response(
|
return Response(
|
||||||
{'count': len(result_page),
|
{'count': len(result_page),
|
||||||
@ -229,7 +246,9 @@ class SearchAutoCompleteView(APIView):
|
|||||||
|
|
||||||
permission_classes = (IsAuthenticated,)
|
permission_classes = (IsAuthenticated,)
|
||||||
|
|
||||||
ix = index.open_index()
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(SearchAutoCompleteView, self).__init__(*args, **kwargs)
|
||||||
|
self.ix = index.open_index()
|
||||||
|
|
||||||
def get(self, request, format=None):
|
def get(self, request, format=None):
|
||||||
if 'term' in request.query_params:
|
if 'term' in request.query_params:
|
||||||
|
@ -1,8 +1,19 @@
|
|||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
|
from django.utils.deprecation import MiddlewareMixin
|
||||||
from rest_framework import authentication
|
from rest_framework import authentication
|
||||||
|
|
||||||
|
|
||||||
|
class AutoLoginMiddleware(MiddlewareMixin):
|
||||||
|
|
||||||
|
def process_request(self, request):
|
||||||
|
try:
|
||||||
|
request.user = User.objects.get(
|
||||||
|
username=settings.AUTO_LOGIN_USERNAME)
|
||||||
|
except User.DoesNotExist:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class AngularApiAuthenticationOverride(authentication.BaseAuthentication):
|
class AngularApiAuthenticationOverride(authentication.BaseAuthentication):
|
||||||
""" This class is here to provide authentication to the angular dev server
|
""" This class is here to provide authentication to the angular dev server
|
||||||
during development. This is disabled in production.
|
during development. This is disabled in production.
|
||||||
|
@ -144,6 +144,15 @@ TEMPLATES = [
|
|||||||
# Security #
|
# Security #
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
|
AUTO_LOGIN_USERNAME = os.getenv("PAPERLESS_AUTO_LOGIN_USERNAME")
|
||||||
|
|
||||||
|
if AUTO_LOGIN_USERNAME:
|
||||||
|
_index = MIDDLEWARE.index('django.contrib.auth.middleware.AuthenticationMiddleware')
|
||||||
|
# This overrides everything the auth middleware is doing but still allows
|
||||||
|
# regular login in case the provided user does not exist.
|
||||||
|
MIDDLEWARE.insert(_index+1, 'paperless.auth.AutoLoginMiddleware')
|
||||||
|
|
||||||
|
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
X_FRAME_OPTIONS = ''
|
X_FRAME_OPTIONS = ''
|
||||||
# this should really be 'allow-from uri' but its not supported in any mayor
|
# this should really be 'allow-from uri' but its not supported in any mayor
|
||||||
@ -241,6 +250,8 @@ USE_TZ = True
|
|||||||
# Logging #
|
# Logging #
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
|
DISABLE_DBHANDLER = __get_boolean("PAPERLESS_DISABLE_DBHANDLER")
|
||||||
|
|
||||||
LOGGING = {
|
LOGGING = {
|
||||||
"version": 1,
|
"version": 1,
|
||||||
"disable_existing_loggers": False,
|
"disable_existing_loggers": False,
|
||||||
|
@ -1 +1 @@
|
|||||||
__version__ = (0, 9, 2)
|
__version__ = (0, 9, 3)
|
||||||
|
@ -0,0 +1,2 @@
|
|||||||
|
# this is here so that django finds the checks.
|
||||||
|
from .checks import *
|
25
src/paperless_tesseract/checks.py
Normal file
25
src/paperless_tesseract/checks.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
import subprocess
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.core.checks import Error, register
|
||||||
|
|
||||||
|
|
||||||
|
def get_tesseract_langs():
|
||||||
|
with subprocess.Popen(['tesseract', '--list-langs'],
|
||||||
|
stdout=subprocess.PIPE) as p:
|
||||||
|
stdout, stderr = p.communicate()
|
||||||
|
|
||||||
|
return stdout.decode().strip().split("\n")[1:]
|
||||||
|
|
||||||
|
|
||||||
|
@register()
|
||||||
|
def check_default_language_available(app_configs, **kwargs):
|
||||||
|
langs = get_tesseract_langs()
|
||||||
|
|
||||||
|
if settings.OCR_LANGUAGE not in langs:
|
||||||
|
return [Error(
|
||||||
|
f"The default ocr language {settings.OCR_LANGUAGE} is "
|
||||||
|
f"not installed. Paperless cannot OCR your documents "
|
||||||
|
f"without it. Please fix PAPERLESS_OCR_LANGUAGE.")]
|
||||||
|
else:
|
||||||
|
return []
|
@ -3,10 +3,9 @@ exclude = migrations, paperless/settings.py, .tox, */tests/*
|
|||||||
|
|
||||||
[tool:pytest]
|
[tool:pytest]
|
||||||
DJANGO_SETTINGS_MODULE=paperless.settings
|
DJANGO_SETTINGS_MODULE=paperless.settings
|
||||||
addopts = --pythonwarnings=all
|
addopts = --pythonwarnings=all --cov --cov-report=html -n auto
|
||||||
env =
|
env =
|
||||||
PAPERLESS_SECRET=paperless
|
PAPERLESS_DISABLE_DBHANDLER=true
|
||||||
PAPERLESS_EMAIL_SECRET=paperless
|
|
||||||
|
|
||||||
|
|
||||||
[coverage:run]
|
[coverage:run]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user