Compare commits

...

16 Commits

Author SHA1 Message Date
jonaswinkler
5e669534f2 reorganized test case 2021-02-14 17:24:31 +01:00
jonaswinkler
98b147b622 better sanity checker that logs messages in the log files and does not fail on warnings. 2021-02-14 17:08:29 +01:00
jonaswinkler
df6c59bc4f update dependencies 2021-02-14 15:38:47 +01:00
jonaswinkler
6e48da41e5 changelog 2021-02-14 14:05:42 +01:00
Jonas Winkler
5c8a01a6e8 Merge pull request #538 from jonaswinkler/translations_src-locale-en-us-lc-messages-django-po--dev_cs
Translate '/src/locale/en-us/LC_MESSAGES/django.po' in 'cs'
2021-02-14 13:41:33 +01:00
jonaswinkler
3d0a52c25f only load channels app if DEBUG is enabled; its only purpose is to monkey-patch the runserver command. 2021-02-14 12:50:30 +01:00
jonaswinkler
43c729568b release worker memory after tasks are done. 2021-02-14 12:29:55 +01:00
transifex-integration[bot]
62caeed283 Apply translations in cs
translation completed for the source file '/src/locale/en-us/LC_MESSAGES/django.po'
on the 'cs' language.
2021-02-14 07:05:05 +00:00
jonaswinkler
12836d4c68 revert django-q configuration 2021-02-13 20:25:52 +01:00
jonaswinkler
b48e67d714 revert a faulty change that caused memory usage to explode #537 2021-02-13 19:51:04 +01:00
jonaswinkler
f91f4d71bb Merge branch 'master' into dev 2021-02-13 18:09:14 +01:00
jonaswinkler
0a1f264c71 Gotenberg troubleshooting 2021-02-13 18:09:00 +01:00
jonaswinkler
64d61ae2fa version bump 2021-02-13 18:01:19 +01:00
jonaswinkler
5f0e800f6e metadata tab not showing anything if files are missing #534 2021-02-13 16:41:03 +01:00
jonaswinkler
8b2965d55b added sanity checker management command for manual execution #534 2021-02-13 16:39:29 +01:00
jonaswinkler
ed478a1d73 change thumbnail display for extra wide images #433 2021-02-12 18:20:17 +01:00
23 changed files with 1049 additions and 202 deletions

View File

@@ -39,7 +39,7 @@ scikit-learn="==0.24.0"
# Prevent scipy updates because 1.6 is incompatible with python 3.6 # Prevent scipy updates because 1.6 is incompatible with python 3.6
scipy="~=1.5.4" scipy="~=1.5.4"
whitenoise = "~=5.2.0" whitenoise = "~=5.2.0"
watchdog = "*" watchdog = "~=1.0.0"
whoosh="~=2.7.4" whoosh="~=2.7.4"
inotifyrecursive = "~=0.3.4" inotifyrecursive = "~=0.3.4"
ocrmypdf = "~=11.6" ocrmypdf = "~=11.6"
@@ -51,7 +51,6 @@ channels = "~=3.0"
channels-redis = "*" channels-redis = "*"
uvicorn = {extras = ["standard"], version = "*"} uvicorn = {extras = ["standard"], version = "*"}
concurrent-log-handler = "*" concurrent-log-handler = "*"
django-redis = "*"
# uvloop 0.15+ incompatible with python 3.6 # uvloop 0.15+ incompatible with python 3.6
uvloop = "~=0.14.0" uvloop = "~=0.14.0"
# TODO: keep an eye on piwheel builds and update this once available (https://www.piwheels.org/project/cryptography/) # TODO: keep an eye on piwheel builds and update this once available (https://www.piwheels.org/project/cryptography/)

109
Pipfile.lock generated
View File

@@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "b3bed0a6b8981e8fffc1b6aa3bc35a0b1472f28e6f745c62469eb8045740e57b" "sha256": "bd8b69979d91f4d8c52cac127c891d750c52959807220a98dcf74fed126bfa26"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": {}, "requires": {},
@@ -60,11 +60,11 @@
}, },
"autobahn": { "autobahn": {
"hashes": [ "hashes": [
"sha256:93df8fc9d1821c9dabff9fed52181a9ad6eea5e9989d53102c391607d7c1666e", "sha256:41a3a3f89cde48643baf4e105d9491c566295f9abee951379e59121784044b8b",
"sha256:cceed2121b7a93024daa93c91fae33007f8346f0e522796421f36a6183abea99" "sha256:7e6b1bf95196b733978bab2d54a7ab8899c16ce11be369dc58422c07b7eea726"
], ],
"markers": "python_version >= '3.6'", "markers": "python_version >= '3.6'",
"version": "==21.1.1" "version": "==21.2.1"
}, },
"automat": { "automat": {
"hashes": [ "hashes": [
@@ -90,47 +90,47 @@
}, },
"cffi": { "cffi": {
"hashes": [ "hashes": [
"sha256:00a1ba5e2e95684448de9b89888ccd02c98d512064b4cb987d48f4b40aa0421e", "sha256:005a36f41773e148deac64b08f233873a4d0c18b053d37da83f6af4d9087b813",
"sha256:00e28066507bfc3fe865a31f325c8391a1ac2916219340f87dfad602c3e48e5d", "sha256:0857f0ae312d855239a55c81ef453ee8fd24136eaba8e87a2eceba644c0d4c06",
"sha256:045d792900a75e8b1e1b0ab6787dd733a8190ffcf80e8c8ceb2fb10a29ff238a", "sha256:1071534bbbf8cbb31b498d5d9db0f274f2f7a865adca4ae429e147ba40f73dea",
"sha256:0638c3ae1a0edfb77c6765d487fee624d2b1ee1bdfeffc1f0b58c64d149e7eec", "sha256:158d0d15119b4b7ff6b926536763dc0714313aa59e320ddf787502c70c4d4bee",
"sha256:105abaf8a6075dc96c1fe5ae7aae073f4696f2905fde6aeada4c9d2926752362", "sha256:1f436816fc868b098b0d63b8920de7d208c90a67212546d02f84fe78a9c26396",
"sha256:155136b51fd733fa94e1c2ea5211dcd4c8879869008fc811648f16541bf99668", "sha256:2894f2df484ff56d717bead0a5c2abb6b9d2bf26d6960c4604d5c48bbc30ee73",
"sha256:1a465cbe98a7fd391d47dce4b8f7e5b921e6cd805ef421d04f5f66ba8f06086c", "sha256:29314480e958fd8aab22e4a58b355b629c59bf5f2ac2492b61e3dc06d8c7a315",
"sha256:1d2c4994f515e5b485fd6d3a73d05526aa0fcf248eb135996b088d25dfa1865b", "sha256:34eff4b97f3d982fb93e2831e6750127d1355a923ebaeeb565407b3d2f8d41a1",
"sha256:23f318bf74b170c6e9adb390e8bd282457f6de46c19d03b52f3fd042b5e19654", "sha256:35f27e6eb43380fa080dccf676dece30bef72e4a67617ffda586641cd4508d49",
"sha256:2c24d61263f511551f740d1a065eb0212db1dbbbbd241db758f5244281590c06", "sha256:3d3dd4c9e559eb172ecf00a2a7517e97d1e96de2a5e610bd9b68cea3925b4892",
"sha256:51a8b381b16ddd370178a65360ebe15fbc1c71cf6f584613a7ea08bfad946698", "sha256:43e0b9d9e2c9e5d152946b9c5fe062c151614b262fda2e7b201204de0b99e482",
"sha256:594234691ac0e9b770aee9fcdb8fa02c22e43e5c619456efd0d6c2bf276f3eb2", "sha256:48e1c69bbacfc3d932221851b39d49e81567a4d4aac3b21258d9c24578280058",
"sha256:5cf4be6c304ad0b6602f5c4e90e2f59b47653ac1ed9c662ed379fe48a8f26b0c", "sha256:51182f8927c5af975fece87b1b369f722c570fe169f9880764b1ee3bca8347b5",
"sha256:64081b3f8f6f3c3de6191ec89d7dc6c86a8a43911f7ecb422c60e90c70be41c7", "sha256:5560dbf8deedbffb638d8a2da31da91094db361cc07f8a501a339b2daae2cbcc",
"sha256:6bc25fc545a6b3d57b5f8618e59fc13d3a3a68431e8ca5fd4c13241cd70d0009", "sha256:58e3f59d583d413809d60779492342801d6e82fefb89c86a38e040c16883be53",
"sha256:798caa2a2384b1cbe8a2a139d80734c9db54f9cc155c99d7cc92441a23871c03", "sha256:5de7970188bb46b7bf9858eb6890aad302577a5f6f75091fd7cdd3ef13ef3045",
"sha256:7c6b1dece89874d9541fc974917b631406233ea0440d0bdfbb8e03bf39a49b3b", "sha256:65fa59693c62cf06e45ddbb822165394a288edce9e276647f0046e1ec26920f3",
"sha256:7ef7d4ced6b325e92eb4d3502946c78c5367bc416398d387b39591532536734e", "sha256:69e395c24fc60aad6bb4fa7e583698ea6cc684648e1ffb7fe85e3c1ca131a7d5",
"sha256:840793c68105fe031f34d6a086eaea153a0cd5c491cde82a74b420edd0a2b909", "sha256:6c97d7350133666fbb5cf4abdc1178c812cb205dc6f41d174a7b0f18fb93337e",
"sha256:8d6603078baf4e11edc4168a514c5ce5b3ba6e3e9c374298cb88437957960a53", "sha256:6e4714cc64f474e4d6e37cfff31a814b509a35cb17de4fb1999907575684479c",
"sha256:9cc46bc107224ff5b6d04369e7c595acb700c3613ad7bcf2e2012f62ece80c35", "sha256:72d8d3ef52c208ee1c7b2e341f7d71c6fd3157138abf1a95166e6165dd5d4369",
"sha256:9f7a31251289b2ab6d4012f6e83e58bc3b96bd151f5b5262467f4bb6b34a7c26", "sha256:8ae6299f6c68de06f136f1f9e69458eae58f1dacf10af5c17353eae03aa0d827",
"sha256:9ffb888f19d54a4d4dfd4b3f29bc2c16aa4972f1c2ab9c4ab09b8ab8685b9c2b", "sha256:8b198cec6c72df5289c05b05b8b0969819783f9418e0409865dac47288d2a053",
"sha256:a5ed8c05548b54b998b9498753fb9cadbfd92ee88e884641377d8a8b291bcc01", "sha256:9338beed13d880320450d95c9e07ccf839faa3ea7b75d788f4ed46d845044a71",
"sha256:a7711edca4dcef1a75257b50a2fbfe92a65187c47dab5a0f1b9b332c5919a3fb", "sha256:99cd03ae7988a93dd00bcd9d0b75e1f6c426063d6f03d2f90b89e29b25b82dfa",
"sha256:af5c59122a011049aad5dd87424b8e65a80e4a6477419c0c1015f73fb5ea0293", "sha256:9cf8022fb8d07a97c178b02327b284521c7708d7c71a9c9c355c178ac4bbd3d4",
"sha256:b18e0a9ef57d2b41f5c68beefa32317d286c3d6ac0484efd10d6e07491bb95dd", "sha256:9de2e279153a443c656f2defd67769e6d1e4163952b3c622dcea5b08a6405322",
"sha256:b4e248d1087abf9f4c10f3c398896c87ce82a9856494a7155823eb45a892395d", "sha256:9e93e79c2551ff263400e1e4be085a1210e12073a31c2011dbbda14bda0c6132",
"sha256:ba4e9e0ae13fc41c6b23299545e5ef73055213e466bd107953e4a013a5ddd7e3", "sha256:9ff227395193126d82e60319a673a037d5de84633f11279e336f9c0f189ecc62",
"sha256:be8661bcee1bc2fc4b033a6ab65bd1f87ce5008492601695d0b9a4e820c3bde5", "sha256:a465da611f6fa124963b91bf432d960a555563efe4ed1cc403ba5077b15370aa",
"sha256:c6332685306b6417a91b1ff9fae889b3ba65c2292d64bd9245c093b1b284809d", "sha256:ad17025d226ee5beec591b52800c11680fca3df50b8b29fe51d882576e039ee0",
"sha256:d5ff0621c88ce83a28a10d2ce719b2ee85635e85c515f12bac99a95306da4b2e", "sha256:afb29c1ba2e5a3736f1c301d9d0abe3ec8b86957d04ddfa9d7a6a42b9367e396",
"sha256:d9efd8b7a3ef378dd61a1e77367f1924375befc2eba06168b6ebfa903a5e59ca", "sha256:b85eb46a81787c50650f2392b9b4ef23e1f126313b9e0e9013b35c15e4288e2e",
"sha256:df5169c4396adc04f9b0a05f13c074df878b6052430e03f50e68adf3a57aa28d", "sha256:bb89f306e5da99f4d922728ddcd6f7fcebb3241fc40edebcb7284d7514741991",
"sha256:ebb253464a5d0482b191274f1c8bf00e33f7e0b9c66405fbffc61ed2c839c775", "sha256:cbde590d4faaa07c72bf979734738f328d239913ba3e043b1e98fe9a39f8b2b6",
"sha256:ec80dc47f54e6e9a78181ce05feb71a0353854cc26999db963695f950b5fb375", "sha256:cd2868886d547469123fadc46eac7ea5253ea7fcb139f12e1dfc2bbd406427d1",
"sha256:f032b34669220030f905152045dfa27741ce1a6db3324a5bc0b96b6c7420c87b", "sha256:d42b11d692e11b6634f7613ad8df5d6d5f8875f5d48939520d351007b3c13406",
"sha256:f60567825f791c6f8a592f3c6e3bd93dd2934e3f9dac189308426bd76b00ef3b", "sha256:f2d45f97ab6bb54753eab54fffe75aaf3de4ff2341c9daee1987ee1837636f1d",
"sha256:f803eaa94c2fcda012c047e62bc7a51b0bdabda1cad7a92a522694ea2d76e49f" "sha256:fd78e5fee591709f32ef6edb9a015b4aa1a5022598e36227500c8f4e02328d9c"
], ],
"version": "==1.14.4" "version": "==1.14.5"
}, },
"channels": { "channels": {
"hashes": [ "hashes": [
@@ -273,15 +273,6 @@
"index": "pypi", "index": "pypi",
"version": "==1.3.4" "version": "==1.3.4"
}, },
"django-redis": {
"hashes": [
"sha256:1133b26b75baa3664164c3f44b9d5d133d1b8de45d94d79f38d1adc5b1d502e5",
"sha256:306589c7021e6468b2656edc89f62b8ba67e8d5a1c8877e2688042263daa7a63",
"sha256:f2b25b62cc95b63b7059aaf8e81710e7eea94678e545d31c46e47a6f4af99e56"
],
"index": "pypi",
"version": "==4.12.1"
},
"djangorestframework": { "djangorestframework": {
"hashes": [ "hashes": [
"sha256:0209bafcb7b5010fdfec784034f059d512256424de2a0f084cb82b096d6dd6a7", "sha256:0209bafcb7b5010fdfec784034f059d512256424de2a0f084cb82b096d6dd6a7",
@@ -1113,11 +1104,11 @@
}, },
"tqdm": { "tqdm": {
"hashes": [ "hashes": [
"sha256:2874fa525c051177583ec59c0fb4583e91f28ccd3f217ffad2acdb32d2c789ac", "sha256:11d544652edbdfc9cc41aa4c8a5c166513e279f3f2d9f1a9e1c89935b51de6ff",
"sha256:ab9b659241d82b8b51b2269ee243ec95286046bf06015c4e15a947cc15914211" "sha256:a89be573bfddb81bb0b395a416d5e55e3ecc73ce95a368a4f6360bedea33195e"
], ],
"index": "pypi", "index": "pypi",
"version": "==4.56.1" "version": "==4.56.2"
}, },
"twisted": { "twisted": {
"extras": [ "extras": [
@@ -1649,11 +1640,11 @@
}, },
"pygments": { "pygments": {
"hashes": [ "hashes": [
"sha256:bc9591213a8f0e0ca1a5e68a479b4887fdc3e75d0774e5c71c31920c427de435", "sha256:37a13ba168a02ac54cc5891a42b1caec333e59b66addb7fa633ea8a6d73445c0",
"sha256:df49d09b498e83c1a73128295860250b0b7edd4c723a32e9bc0d295c7c2ec337" "sha256:b21b072d0ccdf29297a82a2363359d99623597b8a265b8081760e4d0f7153c88"
], ],
"markers": "python_version >= '3.5'", "markers": "python_version >= '3.5'",
"version": "==2.7.4" "version": "==2.8.0"
}, },
"pyparsing": { "pyparsing": {
"hashes": [ "hashes": [

View File

@@ -1,4 +1,4 @@
for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails; for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails document_sanity_checker;
do do
echo "installing $command..." echo "installing $command..."
sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command

View File

@@ -410,6 +410,34 @@ the naming scheme.
The command takes no arguments and processes all your documents at once. The command takes no arguments and processes all your documents at once.
.. _utilities-sanity-checker:
Sanity checker
==============
Paperless has a built-in sanity checker that inspects your document collection for issues.
The issues detected by the sanity checker are as follows:
* Missing original files.
* Missing archive files.
* Inaccessible original files due to improper permissions.
* Inaccessible archive files due to improper permissions.
* Corrupted original documents by comparing their checksum against what is stored in the database.
* Corrupted archive documents by comparing their checksum against what is stored in the database.
* Missing thumbnails.
* Inaccessible thumbnails due to improper permissions.
* Documents without any content (warning).
* Orphaned files in the media directory (warning). These are files that are not referenced by any document im paperless.
.. code::
document_sanity_checker
The command takes no arguments. Depending on the size of your document archive, this may take some time.
Fetching e-mail Fetching e-mail
=============== ===============

View File

@@ -5,6 +5,22 @@
Changelog Changelog
********* *********
paperless-ng 1.1.2
##################
* Always show top left corner of thumbnails, even for extra wide documents.
* Added a management command for executing the sanity checker directly.
See :ref:`utilities-sanity-checker`.
* The weekly sanity check now reports messages in the log files.
* Fixed an issue with the metadata tab not reporting anything in case of missing files.
* Reverted a change from 1.1.0 that caused huge memory usage due to redis caching.
* Some memory usage optimizations.
paperless-ng 1.1.1 paperless-ng 1.1.1
################## ##################

View File

@@ -94,6 +94,30 @@ If you want to get rid of the warning or actually experience issues with automat
the file ``classification_model.pickle`` in the data directory and let paperless recreate it. the file ``classification_model.pickle`` in the data directory and let paperless recreate it.
504 Server Error: Gateway Timeout when adding Office documents
##############################################################
You may experience these errors when using the optional TIKA integration:
.. code::
requests.exceptions.HTTPError: 504 Server Error: Gateway Timeout for url: http://gotenberg:3000/convert/office
Gotenberg is a server that converts Office documents into PDF documents and has a default timeout of 10 seconds.
When conversion takes longer, Gotenberg raises this error.
You can increase the timeout by configuring an environment variable for gotenberg (see also `here <https://thecodingmachine.github.io/gotenberg/#environment_variables.default_wait_timeout>`__).
If using docker-compose, this is achieved by the following configuration change in the ``docker-compose.yml`` file:
.. code:: yaml
gotenberg:
image: thecodingmachine/gotenberg
restart: unless-stopped
environment:
DISABLE_GOOGLE_CHROME: 1
DEFAULT_WAIT_TIMEOUT: 30
Permission denied errors in the consumption directory Permission denied errors in the consumption directory
##################################################### #####################################################

View File

@@ -12,11 +12,11 @@ arrow==0.17.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2,
asgiref==3.3.1; python_version >= '3.5' asgiref==3.3.1; python_version >= '3.5'
async-timeout==3.0.1; python_full_version >= '3.5.3' async-timeout==3.0.1; python_full_version >= '3.5.3'
attrs==20.3.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' attrs==20.3.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
autobahn==21.1.1; python_version >= '3.6' autobahn==21.2.1; python_version >= '3.6'
automat==20.2.0 automat==20.2.0
blessed==1.17.12 blessed==1.17.12
certifi==2020.12.5 certifi==2020.12.5
cffi==1.14.4 cffi==1.14.5
channels-redis==3.2.0 channels-redis==3.2.0
channels==3.0.3 channels==3.0.3
chardet==4.0.0; python_version >= '3.1' chardet==4.0.0; python_version >= '3.1'
@@ -32,7 +32,6 @@ django-extensions==3.1.1
django-filter==2.4.0 django-filter==2.4.0
django-picklefield==3.0.1; python_version >= '3' django-picklefield==3.0.1; python_version >= '3'
django-q==1.3.4 django-q==1.3.4
django-redis==4.12.1
django==3.1.6 django==3.1.6
djangorestframework==3.12.2 djangorestframework==3.12.2
filelock==3.0.12 filelock==3.0.12
@@ -87,7 +86,7 @@ sortedcontainers==2.3.0
sqlparse==0.4.1; python_version >= '3.5' sqlparse==0.4.1; python_version >= '3.5'
threadpoolctl==2.1.0; python_version >= '3.5' threadpoolctl==2.1.0; python_version >= '3.5'
tika==1.24 tika==1.24
tqdm==4.56.1 tqdm==4.56.2
twisted[tls]==20.3.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' twisted[tls]==20.3.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
txaio==20.12.1; python_version >= '3.6' txaio==20.12.1; python_version >= '3.6'
tzlocal==2.1 tzlocal==2.1

View File

@@ -6,7 +6,7 @@
.doc-img { .doc-img {
object-fit: cover; object-fit: cover;
object-position: top; object-position: top left;
height: 100%; height: 100%;
position: absolute; position: absolute;
mix-blend-mode: multiply; mix-blend-mode: multiply;

View File

@@ -2,7 +2,7 @@
.doc-img { .doc-img {
object-fit: cover; object-fit: cover;
object-position: top; object-position: top left;
height: 200px; height: 200px;
mix-blend-mode: multiply; mix-blend-mode: multiply;
} }

View File

@@ -2,7 +2,7 @@ export const environment = {
production: true, production: true,
apiBaseUrl: "/api/", apiBaseUrl: "/api/",
appTitle: "Paperless-ng", appTitle: "Paperless-ng",
version: "1.1.1", version: "1.1.2",
webSocketHost: window.location.host, webSocketHost: window.location.host,
webSocketProtocol: (window.location.protocol == "https:" ? "wss:" : "ws:") webSocketProtocol: (window.location.protocol == "https:" ? "wss:" : "ws:")
}; };

View File

@@ -5,7 +5,6 @@ import pickle
import re import re
from django.conf import settings from django.conf import settings
from django.core.cache import cache
from documents.models import Document, MatchingModel from documents.models import Document, MatchingModel
@@ -31,29 +30,23 @@ def load_classifier():
) )
return None return None
version = os.stat(settings.MODEL_FILE).st_mtime classifier = DocumentClassifier()
try:
classifier.load()
classifier = cache.get("paperless-classifier", version=version) except (EOFError, IncompatibleClassifierVersionError) as e:
# there's something wrong with the model file.
if not classifier: logger.exception(
classifier = DocumentClassifier() f"Unrecoverable error while loading document "
try: f"classification model, deleting model file."
classifier.load() )
cache.set("paperless-classifier", classifier, os.unlink(settings.MODEL_FILE)
version=version, timeout=86400) classifier = None
except (EOFError, IncompatibleClassifierVersionError) as e: except OSError as e:
# there's something wrong with the model file. logger.error(
logger.exception( f"Error while loading document classification model: {str(e)}"
f"Unrecoverable error while loading document " )
f"classification model, deleting model file." classifier = None
)
os.unlink(settings.MODEL_FILE)
classifier = None
except OSError as e:
logger.error(
f"Error while loading document classification model: {str(e)}"
)
classifier = None
return classifier return classifier

View File

@@ -0,0 +1,15 @@
from django.core.management.base import BaseCommand
from documents.sanity_checker import check_sanity
class Command(BaseCommand):
help = """
This command checks your document archive for issues.
""".replace(" ", "")
def handle(self, *args, **options):
messages = check_sanity(progress=True)
messages.log_messages()

View File

@@ -1,45 +1,55 @@
import hashlib import hashlib
import logging
import os import os
from django.conf import settings from django.conf import settings
from tqdm import tqdm
from documents.models import Document from documents.models import Document
class SanityMessage: class SanityCheckMessages:
message = None
def __init__(self):
self._messages = []
def error(self, message):
self._messages.append({"level": logging.ERROR, "message": message})
def warning(self, message):
self._messages.append({"level": logging.WARNING, "message": message})
def info(self, message):
self._messages.append({"level": logging.INFO, "message": message})
def log_messages(self):
logger = logging.getLogger("paperless.sanity_checker")
if len(self._messages) == 0:
logger.info("Sanity checker detected no issues.")
else:
for msg in self._messages:
logger.log(msg['level'], msg['message'])
def __len__(self):
return len(self._messages)
def __getitem__(self, item):
return self._messages[item]
def has_error(self):
return any([msg['level'] == logging.ERROR for msg in self._messages])
def has_warning(self):
return any([msg['level'] == logging.WARNING for msg in self._messages])
class SanityWarning(SanityMessage): class SanityCheckFailedException(Exception):
def __init__(self, message): pass
self.message = message
def __str__(self):
return f"Warning: {self.message}"
class SanityError(SanityMessage): def check_sanity(progress=False):
def __init__(self, message): messages = SanityCheckMessages()
self.message = message
def __str__(self):
return f"ERROR: {self.message}"
class SanityFailedError(Exception):
def __init__(self, messages):
self.messages = messages
def __str__(self):
message_string = "\n".join([str(m) for m in self.messages])
return (
f"The following issuse were found by the sanity checker:\n"
f"{message_string}\n\n===============\n\n")
def check_sanity():
messages = []
present_files = [] present_files = []
for root, subdirs, files in os.walk(settings.MEDIA_ROOT): for root, subdirs, files in os.walk(settings.MEDIA_ROOT):
@@ -50,11 +60,15 @@ def check_sanity():
if lockfile in present_files: if lockfile in present_files:
present_files.remove(lockfile) present_files.remove(lockfile)
for doc in Document.objects.all(): if progress:
docs = tqdm(Document.objects.all())
else:
docs = Document.objects.all()
for doc in docs:
# Check sanity of the thumbnail # Check sanity of the thumbnail
if not os.path.isfile(doc.thumbnail_path): if not os.path.isfile(doc.thumbnail_path):
messages.append(SanityError( messages.error(f"Thumbnail of document {doc.pk} does not exist.")
f"Thumbnail of document {doc.pk} does not exist."))
else: else:
if os.path.normpath(doc.thumbnail_path) in present_files: if os.path.normpath(doc.thumbnail_path) in present_files:
present_files.remove(os.path.normpath(doc.thumbnail_path)) present_files.remove(os.path.normpath(doc.thumbnail_path))
@@ -62,15 +76,14 @@ def check_sanity():
with doc.thumbnail_file as f: with doc.thumbnail_file as f:
f.read() f.read()
except OSError as e: except OSError as e:
messages.append(SanityError( messages.error(
f"Cannot read thumbnail file of document {doc.pk}: {e}" f"Cannot read thumbnail file of document {doc.pk}: {e}"
)) )
# Check sanity of the original file # Check sanity of the original file
# TODO: extract method # TODO: extract method
if not os.path.isfile(doc.source_path): if not os.path.isfile(doc.source_path):
messages.append(SanityError( messages.error(f"Original of document {doc.pk} does not exist.")
f"Original of document {doc.pk} does not exist."))
else: else:
if os.path.normpath(doc.source_path) in present_files: if os.path.normpath(doc.source_path) in present_files:
present_files.remove(os.path.normpath(doc.source_path)) present_files.remove(os.path.normpath(doc.source_path))
@@ -78,31 +91,31 @@ def check_sanity():
with doc.source_file as f: with doc.source_file as f:
checksum = hashlib.md5(f.read()).hexdigest() checksum = hashlib.md5(f.read()).hexdigest()
except OSError as e: except OSError as e:
messages.append(SanityError( messages.error(
f"Cannot read original file of document {doc.pk}: {e}")) f"Cannot read original file of document {doc.pk}: {e}")
else: else:
if not checksum == doc.checksum: if not checksum == doc.checksum:
messages.append(SanityError( messages.error(
f"Checksum mismatch of document {doc.pk}. " f"Checksum mismatch of document {doc.pk}. "
f"Stored: {doc.checksum}, actual: {checksum}." f"Stored: {doc.checksum}, actual: {checksum}."
)) )
# Check sanity of the archive file. # Check sanity of the archive file.
if doc.archive_checksum and not doc.archive_filename: if doc.archive_checksum and not doc.archive_filename:
messages.append(SanityError( messages.error(
f"Document {doc.pk} has an archive file checksum, but no " f"Document {doc.pk} has an archive file checksum, but no "
f"archive filename." f"archive filename."
)) )
elif not doc.archive_checksum and doc.archive_filename: elif not doc.archive_checksum and doc.archive_filename:
messages.append(SanityError( messages.error(
f"Document {doc.pk} has an archive file, but its checksum is " f"Document {doc.pk} has an archive file, but its checksum is "
f"missing." f"missing."
)) )
elif doc.has_archive_version: elif doc.has_archive_version:
if not os.path.isfile(doc.archive_path): if not os.path.isfile(doc.archive_path):
messages.append(SanityError( messages.error(
f"Archived version of document {doc.pk} does not exist." f"Archived version of document {doc.pk} does not exist."
)) )
else: else:
if os.path.normpath(doc.archive_path) in present_files: if os.path.normpath(doc.archive_path) in present_files:
present_files.remove(os.path.normpath(doc.archive_path)) present_files.remove(os.path.normpath(doc.archive_path))
@@ -110,26 +123,23 @@ def check_sanity():
with doc.archive_file as f: with doc.archive_file as f:
checksum = hashlib.md5(f.read()).hexdigest() checksum = hashlib.md5(f.read()).hexdigest()
except OSError as e: except OSError as e:
messages.append(SanityError( messages.error(
f"Cannot read archive file of document {doc.pk}: {e}" f"Cannot read archive file of document {doc.pk}: {e}"
)) )
else: else:
if not checksum == doc.archive_checksum: if not checksum == doc.archive_checksum:
messages.append(SanityError( messages.error(
f"Checksum mismatch of archived document " f"Checksum mismatch of archived document "
f"{doc.pk}. " f"{doc.pk}. "
f"Stored: {doc.checksum}, actual: {checksum}." f"Stored: {doc.archive_checksum}, "
)) f"actual: {checksum}."
)
# other document checks # other document checks
if not doc.content: if not doc.content:
messages.append(SanityWarning( messages.info(f"Document {doc.pk} has no content.")
f"Document {doc.pk} has no content."
))
for extra_file in present_files: for extra_file in present_files:
messages.append(SanityWarning( messages.warning(f"Orphaned file in media dir: {extra_file}")
f"Orphaned file in media dir: {extra_file}"
))
return messages return messages

View File

@@ -9,8 +9,7 @@ from documents import index, sanity_checker
from documents.classifier import DocumentClassifier, load_classifier from documents.classifier import DocumentClassifier, load_classifier
from documents.consumer import Consumer, ConsumerError from documents.consumer import Consumer, ConsumerError
from documents.models import Document, Tag, DocumentType, Correspondent from documents.models import Document, Tag, DocumentType, Correspondent
from documents.sanity_checker import SanityFailedError from documents.sanity_checker import SanityCheckFailedException
logger = logging.getLogger("paperless.tasks") logger = logging.getLogger("paperless.tasks")
@@ -94,8 +93,15 @@ def consume_file(path,
def sanity_check(): def sanity_check():
messages = sanity_checker.check_sanity() messages = sanity_checker.check_sanity()
if len(messages) > 0: messages.log_messages()
raise SanityFailedError(messages)
if messages.has_error():
raise SanityCheckFailedException(
"Sanity check failed with errors. See log.")
elif messages.has_warning():
return "Sanity check exited with warnings. See log."
elif len(messages) > 0:
return "Sanity check exited with infos. See log."
else: else:
return "No issues detected." return "No issues detected."

View File

@@ -577,8 +577,11 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
def test_get_metadata(self): def test_get_metadata(self):
doc = Document.objects.create(title="test", filename="file.pdf", mime_type="image/png", archive_checksum="A", archive_filename="archive.pdf") doc = Document.objects.create(title="test", filename="file.pdf", mime_type="image/png", archive_checksum="A", archive_filename="archive.pdf")
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png"), doc.source_path) source_file = os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png")
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), doc.archive_path) archive_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
shutil.copy(source_file, doc.source_path)
shutil.copy(archive_file, doc.archive_path)
response = self.client.get(f"/api/documents/{doc.pk}/metadata/") response = self.client.get(f"/api/documents/{doc.pk}/metadata/")
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
@@ -591,6 +594,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertGreater(len(meta['archive_metadata']), 0) self.assertGreater(len(meta['archive_metadata']), 0)
self.assertEqual(meta['media_filename'], "file.pdf") self.assertEqual(meta['media_filename'], "file.pdf")
self.assertEqual(meta['archive_media_filename'], "archive.pdf") self.assertEqual(meta['archive_media_filename'], "archive.pdf")
self.assertEqual(meta['original_size'], os.stat(source_file).st_size)
self.assertEqual(meta['archive_size'], os.stat(archive_file).st_size)
def test_get_metadata_invalid_doc(self): def test_get_metadata_invalid_doc(self):
response = self.client.get(f"/api/documents/34576/metadata/") response = self.client.get(f"/api/documents/34576/metadata/")
@@ -612,6 +617,21 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertIsNone(meta['archive_metadata']) self.assertIsNone(meta['archive_metadata'])
self.assertIsNone(meta['archive_media_filename']) self.assertIsNone(meta['archive_media_filename'])
def test_get_metadata_missing_files(self):
doc = Document.objects.create(title="test", filename="file.pdf", mime_type="application/pdf", archive_filename="file.pdf", archive_checksum="B", checksum="A")
response = self.client.get(f"/api/documents/{doc.pk}/metadata/")
self.assertEqual(response.status_code, 200)
meta = response.data
self.assertTrue(meta['has_archive_version'])
self.assertIsNone(meta['original_metadata'])
self.assertIsNone(meta['original_size'])
self.assertIsNone(meta['archive_metadata'])
self.assertIsNone(meta['archive_size'])
def test_get_empty_suggestions(self): def test_get_empty_suggestions(self):
doc = Document.objects.create(title="test", mime_type="application/pdf") doc = Document.objects.create(title="test", mime_type="application/pdf")

View File

@@ -3,6 +3,7 @@ import tempfile
from pathlib import Path from pathlib import Path
from unittest import mock from unittest import mock
import pytest
from django.conf import settings from django.conf import settings
from django.test import TestCase, override_settings from django.test import TestCase, override_settings
@@ -233,7 +234,6 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.assertFalse(os.path.exists(settings.MODEL_FILE)) self.assertFalse(os.path.exists(settings.MODEL_FILE))
self.assertIsNone(load_classifier()) self.assertIsNone(load_classifier())
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}})
@mock.patch("documents.classifier.DocumentClassifier.load") @mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier(self, load): def test_load_classifier(self, load):
Path(settings.MODEL_FILE).touch() Path(settings.MODEL_FILE).touch()
@@ -242,6 +242,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'}}) @override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'}})
@override_settings(MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle")) @override_settings(MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"))
@pytest.mark.skip(reason="Disabled caching due to high memory usage - need to investigate.")
def test_load_classifier_cached(self): def test_load_classifier_cached(self):
classifier = load_classifier() classifier = load_classifier()
self.assertIsNotNone(classifier) self.assertIsNotNone(classifier)
@@ -250,7 +251,6 @@ class TestClassifier(DirectoriesMixin, TestCase):
classifier2 = load_classifier() classifier2 = load_classifier()
load.assert_not_called() load.assert_not_called()
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}})
@mock.patch("documents.classifier.DocumentClassifier.load") @mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier_incompatible_version(self, load): def test_load_classifier_incompatible_version(self, load):
Path(settings.MODEL_FILE).touch() Path(settings.MODEL_FILE).touch()
@@ -260,7 +260,6 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.assertIsNone(load_classifier()) self.assertIsNone(load_classifier())
self.assertFalse(os.path.exists(settings.MODEL_FILE)) self.assertFalse(os.path.exists(settings.MODEL_FILE))
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}})
@mock.patch("documents.classifier.DocumentClassifier.load") @mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier_os_error(self, load): def test_load_classifier_os_error(self, load):
Path(settings.MODEL_FILE).touch() Path(settings.MODEL_FILE).touch()

View File

@@ -65,6 +65,7 @@ class TestArchiver(DirectoriesMixin, TestCase):
self.assertEqual(doc1.archive_filename, "document.pdf") self.assertEqual(doc1.archive_filename, "document.pdf")
self.assertEqual(doc2.archive_filename, "document_01.pdf") self.assertEqual(doc2.archive_filename, "document_01.pdf")
class TestDecryptDocuments(TestCase): class TestDecryptDocuments(TestCase):
@override_settings( @override_settings(
@@ -154,3 +155,24 @@ class TestCreateClassifier(TestCase):
call_command("document_create_classifier") call_command("document_create_classifier")
m.assert_called_once() m.assert_called_once()
class TestSanityChecker(DirectoriesMixin, TestCase):
def test_no_issues(self):
with self.assertLogs() as capture:
call_command("document_sanity_checker")
self.assertEqual(len(capture.output), 1)
self.assertIn("Sanity checker detected no issues.", capture.output[0])
def test_errors(self):
doc = Document.objects.create(title="test", content="test", filename="test.pdf", checksum="abc")
Path(doc.source_path).touch()
Path(doc.thumbnail_path).touch()
with self.assertLogs() as capture:
call_command("document_sanity_checker")
self.assertEqual(len(capture.output), 1)
self.assertIn("Checksum mismatch of document", capture.output[0])

View File

@@ -1,3 +1,4 @@
import logging
import os import os
import shutil import shutil
from pathlib import Path from pathlib import Path
@@ -7,10 +8,59 @@ from django.conf import settings
from django.test import TestCase from django.test import TestCase
from documents.models import Document from documents.models import Document
from documents.sanity_checker import check_sanity, SanityFailedError from documents.sanity_checker import check_sanity, SanityCheckMessages
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
class TestSanityCheckMessages(TestCase):
def test_no_messages(self):
messages = SanityCheckMessages()
self.assertEqual(len(messages), 0)
self.assertFalse(messages.has_error())
self.assertFalse(messages.has_warning())
with self.assertLogs() as capture:
messages.log_messages()
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.INFO)
self.assertEqual(capture.records[0].message, "Sanity checker detected no issues.")
def test_info(self):
messages = SanityCheckMessages()
messages.info("Something might be wrong")
self.assertEqual(len(messages), 1)
self.assertFalse(messages.has_error())
self.assertFalse(messages.has_warning())
with self.assertLogs() as capture:
messages.log_messages()
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.INFO)
self.assertEqual(capture.records[0].message, "Something might be wrong")
def test_warning(self):
messages = SanityCheckMessages()
messages.warning("Something is wrong")
self.assertEqual(len(messages), 1)
self.assertFalse(messages.has_error())
self.assertTrue(messages.has_warning())
with self.assertLogs() as capture:
messages.log_messages()
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.WARNING)
self.assertEqual(capture.records[0].message, "Something is wrong")
def test_error(self):
messages = SanityCheckMessages()
messages.error("Something is seriously wrong")
self.assertEqual(len(messages), 1)
self.assertTrue(messages.has_error())
self.assertFalse(messages.has_warning())
with self.assertLogs() as capture:
messages.log_messages()
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.ERROR)
self.assertEqual(capture.records[0].message, "Something is seriously wrong")
class TestSanityCheck(DirectoriesMixin, TestCase): class TestSanityCheck(DirectoriesMixin, TestCase):
def make_test_data(self): def make_test_data(self):
@@ -23,6 +73,11 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
return Document.objects.create(title="test", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", content="test", pk=1, filename="0000001.pdf", mime_type="application/pdf", archive_filename="0000001.pdf") return Document.objects.create(title="test", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", content="test", pk=1, filename="0000001.pdf", mime_type="application/pdf", archive_filename="0000001.pdf")
def assertSanityError(self, messageRegex):
messages = check_sanity()
self.assertTrue(messages.has_error())
self.assertRegex(messages[0]['message'], messageRegex)
def test_no_docs(self): def test_no_docs(self):
self.assertEqual(len(check_sanity()), 0) self.assertEqual(len(check_sanity()), 0)
@@ -33,72 +88,75 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
def test_no_thumbnail(self): def test_no_thumbnail(self):
doc = self.make_test_data() doc = self.make_test_data()
os.remove(doc.thumbnail_path) os.remove(doc.thumbnail_path)
self.assertEqual(len(check_sanity()), 1) self.assertSanityError("Thumbnail of document .* does not exist")
def test_thumbnail_no_access(self): def test_thumbnail_no_access(self):
doc = self.make_test_data() doc = self.make_test_data()
os.chmod(doc.thumbnail_path, 0o000) os.chmod(doc.thumbnail_path, 0o000)
self.assertEqual(len(check_sanity()), 1) self.assertSanityError("Cannot read thumbnail file of document")
os.chmod(doc.thumbnail_path, 0o777) os.chmod(doc.thumbnail_path, 0o777)
def test_no_original(self): def test_no_original(self):
doc = self.make_test_data() doc = self.make_test_data()
os.remove(doc.source_path) os.remove(doc.source_path)
self.assertEqual(len(check_sanity()), 1) self.assertSanityError("Original of document .* does not exist.")
def test_original_no_access(self): def test_original_no_access(self):
doc = self.make_test_data() doc = self.make_test_data()
os.chmod(doc.source_path, 0o000) os.chmod(doc.source_path, 0o000)
self.assertEqual(len(check_sanity()), 1) self.assertSanityError("Cannot read original file of document")
os.chmod(doc.source_path, 0o777) os.chmod(doc.source_path, 0o777)
def test_original_checksum_mismatch(self): def test_original_checksum_mismatch(self):
doc = self.make_test_data() doc = self.make_test_data()
doc.checksum = "WOW" doc.checksum = "WOW"
doc.save() doc.save()
self.assertEqual(len(check_sanity()), 1) self.assertSanityError("Checksum mismatch of document")
def test_no_archive(self): def test_no_archive(self):
doc = self.make_test_data() doc = self.make_test_data()
os.remove(doc.archive_path) os.remove(doc.archive_path)
self.assertEqual(len(check_sanity()), 1) self.assertSanityError("Archived version of document .* does not exist.")
def test_archive_no_access(self): def test_archive_no_access(self):
doc = self.make_test_data() doc = self.make_test_data()
os.chmod(doc.archive_path, 0o000) os.chmod(doc.archive_path, 0o000)
self.assertEqual(len(check_sanity()), 1) self.assertSanityError("Cannot read archive file of document")
os.chmod(doc.archive_path, 0o777) os.chmod(doc.archive_path, 0o777)
def test_archive_checksum_mismatch(self): def test_archive_checksum_mismatch(self):
doc = self.make_test_data() doc = self.make_test_data()
doc.archive_checksum = "WOW" doc.archive_checksum = "WOW"
doc.save() doc.save()
self.assertEqual(len(check_sanity()), 1) self.assertSanityError("Checksum mismatch of archived document")
def test_empty_content(self): def test_empty_content(self):
doc = self.make_test_data() doc = self.make_test_data()
doc.content = "" doc.content = ""
doc.save() doc.save()
self.assertEqual(len(check_sanity()), 1) messages = check_sanity()
self.assertFalse(messages.has_error())
self.assertFalse(messages.has_warning())
self.assertEqual(len(messages), 1)
self.assertRegex(messages[0]['message'], "Document .* has no content.")
def test_orphaned_file(self): def test_orphaned_file(self):
doc = self.make_test_data() doc = self.make_test_data()
Path(self.dirs.originals_dir, "orphaned").touch() Path(self.dirs.originals_dir, "orphaned").touch()
self.assertEqual(len(check_sanity()), 1) messages = check_sanity()
self.assertFalse(messages.has_error())
def test_error_tostring(self): self.assertTrue(messages.has_warning())
Document.objects.create(title="test", checksum="dgfhj", archive_checksum="dfhg", content="", pk=1, filename="0000001.pdf", archive_filename="0000001.pdf") self.assertEqual(len(messages), 1)
string = str(SanityFailedError(check_sanity())) self.assertRegex(messages[0]['message'], "Orphaned file in media dir")
self.assertIsNotNone(string)
def test_archive_filename_no_checksum(self): def test_archive_filename_no_checksum(self):
doc = self.make_test_data() doc = self.make_test_data()
doc.archive_checksum = None doc.archive_checksum = None
doc.save() doc.save()
self.assertEqual(len(check_sanity()), 2) self.assertSanityError("has an archive file, but its checksum is missing.")
def test_archive_checksum_no_filename(self): def test_archive_checksum_no_filename(self):
doc = self.make_test_data() doc = self.make_test_data()
doc.archive_filename = None doc.archive_filename = None
doc.save() doc.save()
self.assertEqual(len(check_sanity()), 2) self.assertSanityError("has an archive file checksum, but no archive filename.")

View File

@@ -2,12 +2,12 @@ import os
from unittest import mock from unittest import mock
from django.conf import settings from django.conf import settings
from django.test import TestCase, override_settings from django.test import TestCase
from django.utils import timezone from django.utils import timezone
from documents import tasks from documents import tasks
from documents.models import Document, Tag, Correspondent, DocumentType from documents.models import Document, Tag, Correspondent, DocumentType
from documents.sanity_checker import SanityError, SanityFailedError from documents.sanity_checker import SanityCheckMessages, SanityCheckFailedException
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
@@ -52,7 +52,6 @@ class TestTasks(DirectoriesMixin, TestCase):
load_classifier.assert_called_once() load_classifier.assert_called_once()
self.assertFalse(os.path.isfile(settings.MODEL_FILE)) self.assertFalse(os.path.isfile(settings.MODEL_FILE))
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}})
def test_train_classifier(self): def test_train_classifier(self):
c = Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test") c = Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
doc = Document.objects.create(correspondent=c, content="test", title="test") doc = Document.objects.create(correspondent=c, content="test", title="test")
@@ -75,13 +74,33 @@ class TestTasks(DirectoriesMixin, TestCase):
self.assertNotEqual(mtime2, mtime3) self.assertNotEqual(mtime2, mtime3)
@mock.patch("documents.tasks.sanity_checker.check_sanity") @mock.patch("documents.tasks.sanity_checker.check_sanity")
def test_sanity_check(self, m): def test_sanity_check_success(self, m):
m.return_value = [] m.return_value = SanityCheckMessages()
tasks.sanity_check() self.assertEqual(tasks.sanity_check(), "No issues detected.")
m.assert_called_once() m.assert_called_once()
m.reset_mock()
m.return_value = [SanityError("")] @mock.patch("documents.tasks.sanity_checker.check_sanity")
self.assertRaises(SanityFailedError, tasks.sanity_check) def test_sanity_check_error(self, m):
messages = SanityCheckMessages()
messages.error("Some error")
m.return_value = messages
self.assertRaises(SanityCheckFailedException, tasks.sanity_check)
m.assert_called_once()
@mock.patch("documents.tasks.sanity_checker.check_sanity")
def test_sanity_check_warning(self, m):
messages = SanityCheckMessages()
messages.warning("Some warning")
m.return_value = messages
self.assertEqual(tasks.sanity_check(), "Sanity check exited with warnings. See log.")
m.assert_called_once()
@mock.patch("documents.tasks.sanity_checker.check_sanity")
def test_sanity_check_info(self, m):
messages = SanityCheckMessages()
messages.info("Some info")
m.return_value = messages
self.assertEqual(tasks.sanity_check(), "Sanity check exited with infos. See log.")
m.assert_called_once() m.assert_called_once()
def test_bulk_update_documents(self): def test_bulk_update_documents(self):

View File

@@ -225,6 +225,12 @@ class DocumentViewSet(RetrieveModelMixin,
else: else:
return [] return []
def get_filesize(self, filename):
if os.path.isfile(filename):
return os.stat(filename).st_size
else:
return None
@action(methods=['get'], detail=True) @action(methods=['get'], detail=True)
def metadata(self, request, pk=None): def metadata(self, request, pk=None):
try: try:
@@ -234,7 +240,7 @@ class DocumentViewSet(RetrieveModelMixin,
meta = { meta = {
"original_checksum": doc.checksum, "original_checksum": doc.checksum,
"original_size": os.stat(doc.source_path).st_size, "original_size": self.get_filesize(doc.source_path),
"original_mime_type": doc.mime_type, "original_mime_type": doc.mime_type,
"media_filename": doc.filename, "media_filename": doc.filename,
"has_archive_version": doc.has_archive_version, "has_archive_version": doc.has_archive_version,
@@ -245,7 +251,7 @@ class DocumentViewSet(RetrieveModelMixin,
} }
if doc.has_archive_version: if doc.has_archive_version:
meta['archive_size'] = os.stat(doc.archive_path).st_size, meta['archive_size'] = self.get_filesize(doc.archive_path)
meta['archive_metadata'] = self.get_metadata( meta['archive_metadata'] = self.get_metadata(
doc.archive_path, "application/pdf") doc.archive_path, "application/pdf")
else: else:

View File

@@ -0,0 +1,650 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the PACKAGE package.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
# Translators:
# Štěpán Šebestian <mys.orangeorange0123@gmail.com>, 2021
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2021-01-28 22:02+0100\n"
"PO-Revision-Date: 2020-12-30 19:27+0000\n"
"Last-Translator: Štěpán Šebestian <mys.orangeorange0123@gmail.com>, 2021\n"
"Language-Team: Czech (https://www.transifex.com/paperless/teams/115905/cs/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Language: cs\n"
"Plural-Forms: nplurals=4; plural=(n == 1 && n % 1 == 0) ? 0 : (n >= 2 && n <= 4 && n % 1 == 0) ? 1: (n % 1 != 0 ) ? 2 : 3;\n"
#: documents/apps.py:10
msgid "Documents"
msgstr "Dokumenty"
#: documents/models.py:33
msgid "Any word"
msgstr "Jakékoliv slovo"
#: documents/models.py:34
msgid "All words"
msgstr "Všechna slova"
#: documents/models.py:35
msgid "Exact match"
msgstr "Přesná shoda"
#: documents/models.py:36
msgid "Regular expression"
msgstr "Regulární výraz"
#: documents/models.py:37
msgid "Fuzzy word"
msgstr "Fuzzy slovo"
#: documents/models.py:38
msgid "Automatic"
msgstr "Automatický"
#: documents/models.py:42 documents/models.py:352 paperless_mail/models.py:25
#: paperless_mail/models.py:109
msgid "name"
msgstr "název"
#: documents/models.py:46
msgid "match"
msgstr "shoda"
#: documents/models.py:50
msgid "matching algorithm"
msgstr "algoritmus pro shodu"
#: documents/models.py:56
msgid "is insensitive"
msgstr "je ignorováno"
#: documents/models.py:75 documents/models.py:135
msgid "correspondent"
msgstr "korespondent"
#: documents/models.py:76
msgid "correspondents"
msgstr "korespondenti"
#: documents/models.py:98
msgid "color"
msgstr "barva"
#: documents/models.py:102
msgid "is inbox tag"
msgstr "tag přichozí"
#: documents/models.py:104
msgid ""
"Marks this tag as an inbox tag: All newly consumed documents will be tagged "
"with inbox tags."
msgstr ""
"Označí tento tag jako tag pro příchozí: Všechny nově zkonzumované dokumenty "
"budou označeny tagem pro přichozí"
#: documents/models.py:109
msgid "tag"
msgstr "tag"
#: documents/models.py:110 documents/models.py:166
msgid "tags"
msgstr "tagy"
#: documents/models.py:116 documents/models.py:148
msgid "document type"
msgstr "typ dokumentu"
#: documents/models.py:117
msgid "document types"
msgstr "typy dokumentu"
#: documents/models.py:125
msgid "Unencrypted"
msgstr "Nešifrované"
#: documents/models.py:126
msgid "Encrypted with GNU Privacy Guard"
msgstr "Šifrované pomocí GNU Privacy Guard"
#: documents/models.py:139
msgid "title"
msgstr "titulek"
#: documents/models.py:152
msgid "content"
msgstr "obsah"
#: documents/models.py:154
msgid ""
"The raw, text-only data of the document. This field is primarily used for "
"searching."
msgstr ""
"Nezpracovaná, pouze textová data dokumentu. Toto pole je používáno především"
" pro vyhledávání."
#: documents/models.py:159
msgid "mime type"
msgstr "mime typ"
#: documents/models.py:170
msgid "checksum"
msgstr "kontrolní součet"
#: documents/models.py:174
msgid "The checksum of the original document."
msgstr "Kontrolní součet původního dokumentu"
#: documents/models.py:178
msgid "archive checksum"
msgstr "kontrolní součet archivu"
#: documents/models.py:183
msgid "The checksum of the archived document."
msgstr "Kontrolní součet archivovaného dokumentu."
#: documents/models.py:187 documents/models.py:330
msgid "created"
msgstr "vytvořeno"
#: documents/models.py:191
msgid "modified"
msgstr "upraveno"
#: documents/models.py:195
msgid "storage type"
msgstr "typ úložiště"
#: documents/models.py:203
msgid "added"
msgstr "přidáno"
#: documents/models.py:207
msgid "filename"
msgstr "název souboru"
#: documents/models.py:212
msgid "Current filename in storage"
msgstr "Aktuální název souboru v úložišti"
#: documents/models.py:216
msgid "archive serial number"
msgstr "sériové číslo archivu"
#: documents/models.py:221
msgid "The position of this document in your physical document archive."
msgstr "Pozice dokumentu ve vašem archivu fyzických dokumentů"
#: documents/models.py:227
msgid "document"
msgstr "dokument"
#: documents/models.py:228
msgid "documents"
msgstr "dokumenty"
#: documents/models.py:313
msgid "debug"
msgstr "debug"
#: documents/models.py:314
msgid "information"
msgstr "informace"
#: documents/models.py:315
msgid "warning"
msgstr "varování"
#: documents/models.py:316
msgid "error"
msgstr "chyba"
#: documents/models.py:317
msgid "critical"
msgstr "kritická"
#: documents/models.py:321
msgid "group"
msgstr "skupina"
#: documents/models.py:324
msgid "message"
msgstr "zpráva"
#: documents/models.py:327
msgid "level"
msgstr "úroveň"
#: documents/models.py:334
msgid "log"
msgstr "záznam"
#: documents/models.py:335
msgid "logs"
msgstr "záznamy"
#: documents/models.py:346 documents/models.py:396
msgid "saved view"
msgstr "uložený pohled"
#: documents/models.py:347
msgid "saved views"
msgstr "uložené pohledy"
#: documents/models.py:350
msgid "user"
msgstr "uživatel"
#: documents/models.py:356
msgid "show on dashboard"
msgstr "zobrazit v dashboardu"
#: documents/models.py:359
msgid "show in sidebar"
msgstr "zobrazit v postranním menu"
#: documents/models.py:363
msgid "sort field"
msgstr "pole na řazení"
#: documents/models.py:366
msgid "sort reverse"
msgstr "třídit opačně"
#: documents/models.py:372
msgid "title contains"
msgstr "titulek obsahuje"
#: documents/models.py:373
msgid "content contains"
msgstr "obsah obsahuje"
#: documents/models.py:374
msgid "ASN is"
msgstr "ASN je"
#: documents/models.py:375
msgid "correspondent is"
msgstr "korespondent je"
#: documents/models.py:376
msgid "document type is"
msgstr "typ dokumentu je"
#: documents/models.py:377
msgid "is in inbox"
msgstr "je v příchozích"
#: documents/models.py:378
msgid "has tag"
msgstr "má tag"
#: documents/models.py:379
msgid "has any tag"
msgstr "má jakýkoliv tag"
#: documents/models.py:380
msgid "created before"
msgstr "vytvořeno před"
#: documents/models.py:381
msgid "created after"
msgstr "vytvořeno po"
#: documents/models.py:382
msgid "created year is"
msgstr "rok vytvoření je"
#: documents/models.py:383
msgid "created month is"
msgstr "měsíc vytvoření je"
#: documents/models.py:384
msgid "created day is"
msgstr "den vytvoření je"
#: documents/models.py:385
msgid "added before"
msgstr "přidáno před"
#: documents/models.py:386
msgid "added after"
msgstr "přidáno po"
#: documents/models.py:387
msgid "modified before"
msgstr "upraveno před"
#: documents/models.py:388
msgid "modified after"
msgstr "upraveno po"
#: documents/models.py:389
msgid "does not have tag"
msgstr "nemá tag"
#: documents/models.py:400
msgid "rule type"
msgstr "typ pravidla"
#: documents/models.py:404
msgid "value"
msgstr "hodnota"
#: documents/models.py:410
msgid "filter rule"
msgstr "filtrovací pravidlo"
#: documents/models.py:411
msgid "filter rules"
msgstr "filtrovací pravidla"
#: documents/serialisers.py:383
#, python-format
msgid "File type %(type)s not supported"
msgstr "Typ souboru %(type)s není podporován"
#: documents/templates/index.html:20
msgid "Paperless-ng is loading..."
msgstr "Paperless-ng se načítá..."
#: documents/templates/registration/logged_out.html:13
msgid "Paperless-ng signed out"
msgstr "Odhlášeno od Paperless-ng"
#: documents/templates/registration/logged_out.html:41
msgid "You have been successfully logged out. Bye!"
msgstr "Byli jste úspěšně odhlášeni. Nashledanou!"
#: documents/templates/registration/logged_out.html:42
msgid "Sign in again"
msgstr "Přihlašte se znovu"
#: documents/templates/registration/login.html:13
msgid "Paperless-ng sign in"
msgstr "Paperless-ng přihlášení"
#: documents/templates/registration/login.html:42
msgid "Please sign in."
msgstr "Prosím přihlaste se."
#: documents/templates/registration/login.html:45
msgid "Your username and password didn't match. Please try again."
msgstr "Vaše uživatelské jméno a heslo se neshodují. Prosím, zkuste to znovu."
#: documents/templates/registration/login.html:48
msgid "Username"
msgstr "Uživatelské jméno"
#: documents/templates/registration/login.html:49
msgid "Password"
msgstr "Heslo"
#: documents/templates/registration/login.html:54
msgid "Sign in"
msgstr "Přihlásit se"
#: paperless/settings.py:286
msgid "English"
msgstr "Angličtina"
#: paperless/settings.py:287
msgid "German"
msgstr "Němčina"
#: paperless/settings.py:288
msgid "Dutch"
msgstr "Holandština"
#: paperless/settings.py:289
msgid "French"
msgstr "Francouzština"
#: paperless/urls.py:114
msgid "Paperless-ng administration"
msgstr "Správa Paperless-ng"
#: paperless_mail/admin.py:25
msgid "Filter"
msgstr "Filtr"
#: paperless_mail/admin.py:27
msgid ""
"Paperless will only process mails that match ALL of the filters given below."
msgstr ""
"Paperless zpracuje pouze emaily které odpovídají VŠEM níže zadaným filtrům."
#: paperless_mail/admin.py:37
msgid "Actions"
msgstr "Akce"
#: paperless_mail/admin.py:39
msgid ""
"The action applied to the mail. This action is only performed when documents"
" were consumed from the mail. Mails without attachments will remain entirely"
" untouched."
msgstr ""
"Akce provedena na emailu. Tato akce je provedena jen pokud byly dokumenty "
"zkonzumovány z emailu. Emaily bez příloh zůstanou nedotčeny."
#: paperless_mail/admin.py:46
msgid "Metadata"
msgstr "Metadata"
#: paperless_mail/admin.py:48
msgid ""
"Assign metadata to documents consumed from this rule automatically. If you "
"do not assign tags, types or correspondents here, paperless will still "
"process all matching rules that you have defined."
msgstr ""
"Automaticky přiřadit metadata dokumentům zkonzumovaných z tohoto pravidla. "
"Pokud zde nepřiřadíte tagy, typy nebo korespondenty, paperless stále "
"zpracuje všechna shodující-se pravidla které jste definovali."
#: paperless_mail/apps.py:9
msgid "Paperless mail"
msgstr "Paperless pošta"
#: paperless_mail/models.py:11
msgid "mail account"
msgstr "emailový účet"
#: paperless_mail/models.py:12
msgid "mail accounts"
msgstr "emailové účty"
#: paperless_mail/models.py:19
msgid "No encryption"
msgstr "Žádné šifrování"
#: paperless_mail/models.py:20
msgid "Use SSL"
msgstr "Používat SSL"
#: paperless_mail/models.py:21
msgid "Use STARTTLS"
msgstr "Používat STARTTLS"
#: paperless_mail/models.py:29
msgid "IMAP server"
msgstr "IMAP server"
#: paperless_mail/models.py:33
msgid "IMAP port"
msgstr "IMAP port"
#: paperless_mail/models.py:36
msgid ""
"This is usually 143 for unencrypted and STARTTLS connections, and 993 for "
"SSL connections."
msgstr ""
"Toto je většinou 143 pro nešifrovaná připojení/připojení používající "
"STARTTLS a 993 pro SSL připojení."
#: paperless_mail/models.py:40
msgid "IMAP security"
msgstr "IMAP bezpečnost"
#: paperless_mail/models.py:46
msgid "username"
msgstr "uživatelské jméno"
#: paperless_mail/models.py:50
msgid "password"
msgstr "heslo"
#: paperless_mail/models.py:60
msgid "mail rule"
msgstr "mailové pravidlo"
#: paperless_mail/models.py:61
msgid "mail rules"
msgstr "mailová pravidla"
#: paperless_mail/models.py:67
msgid "Only process attachments."
msgstr "Zpracovávat jen přílohy"
#: paperless_mail/models.py:68
msgid "Process all files, including 'inline' attachments."
msgstr "Zpracovat všechny soubory, včetně vložených příloh"
#: paperless_mail/models.py:78
msgid "Mark as read, don't process read mails"
msgstr "Označit jako přečtené, nezpracovávat přečtené emaily"
#: paperless_mail/models.py:79
msgid "Flag the mail, don't process flagged mails"
msgstr "Označit email, nezpracovávat označené emaily"
#: paperless_mail/models.py:80
msgid "Move to specified folder"
msgstr "Přesunout do specifikované složky"
#: paperless_mail/models.py:81
msgid "Delete"
msgstr "Odstranit"
#: paperless_mail/models.py:88
msgid "Use subject as title"
msgstr "Použít předmět jako titulek"
#: paperless_mail/models.py:89
msgid "Use attachment filename as title"
msgstr "Použít název souboru u přílohy jako titulek"
#: paperless_mail/models.py:99
msgid "Do not assign a correspondent"
msgstr "Nepřiřazovat korespondenta"
#: paperless_mail/models.py:101
msgid "Use mail address"
msgstr "Použít emailovou adresu"
#: paperless_mail/models.py:103
msgid "Use name (or mail address if not available)"
msgstr "Použít jméno (nebo emailovou adresu pokud jméno není dostupné)"
#: paperless_mail/models.py:105
msgid "Use correspondent selected below"
msgstr "Použít korespondenta vybraného níže"
#: paperless_mail/models.py:113
msgid "order"
msgstr "pořadí"
#: paperless_mail/models.py:120
msgid "account"
msgstr "účet"
#: paperless_mail/models.py:124
msgid "folder"
msgstr "složka"
#: paperless_mail/models.py:128
msgid "filter from"
msgstr "filtrovat z"
#: paperless_mail/models.py:131
msgid "filter subject"
msgstr "název filtru"
#: paperless_mail/models.py:134
msgid "filter body"
msgstr "tělo filtru"
#: paperless_mail/models.py:138
msgid "filter attachment filename"
msgstr "název souboru u přílohy filtru"
#: paperless_mail/models.py:140
msgid ""
"Only consume documents which entirely match this filename if specified. "
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
msgstr ""
"Konzumovat jen dokumenty které přesně odpovídají tomuto názvu souboru pokud "
"specifikováno. Zástupné znaky jako *.pdf nebo *invoice* jsou povoleny. "
"Nezáleží na velikosti písmen."
#: paperless_mail/models.py:146
msgid "maximum age"
msgstr "maximální stáří"
#: paperless_mail/models.py:148
msgid "Specified in days."
msgstr "Specifikováno ve dnech."
#: paperless_mail/models.py:151
msgid "attachment type"
msgstr "typ přílohy"
#: paperless_mail/models.py:154
msgid ""
"Inline attachments include embedded images, so it's best to combine this "
"option with a filename filter."
msgstr ""
"Vložené přílohy zahrnují vložené obrázky, takže je nejlepší tuto možnost "
"kombinovat s filtrem na název souboru"
#: paperless_mail/models.py:159
msgid "action"
msgstr "akce"
#: paperless_mail/models.py:165
msgid "action parameter"
msgstr "parametr akce"
#: paperless_mail/models.py:167
msgid ""
"Additional parameter for the action selected above, i.e., the target folder "
"of the move to folder action."
msgstr ""
"Další parametr pro výše vybranou akci, napříkad cílová složka akce přesunutí"
" do složky."
#: paperless_mail/models.py:173
msgid "assign title from"
msgstr "nastavit titulek z"
#: paperless_mail/models.py:183
msgid "assign this tag"
msgstr "přiřadit tento tag"
#: paperless_mail/models.py:191
msgid "assign this document type"
msgstr "přiřadit tento typ dokumentu"
#: paperless_mail/models.py:195
msgid "assign correspondent from"
msgstr "přiřadit korespondenta z"
#: paperless_mail/models.py:205
msgid "assign this correspondent"
msgstr "přiřadit tohoto korespondenta"

View File

@@ -102,10 +102,11 @@ INSTALLED_APPS = [
"django_q", "django_q",
"channels",
] + env_apps ] + env_apps
if DEBUG:
INSTALLED_APPS.append("channels")
REST_FRAMEWORK = { REST_FRAMEWORK = {
'DEFAULT_AUTHENTICATION_CLASSES': [ 'DEFAULT_AUTHENTICATION_CLASSES': [
'rest_framework.authentication.BasicAuthentication', 'rest_framework.authentication.BasicAuthentication',
@@ -169,16 +170,6 @@ CHANNEL_LAYERS = {
}, },
} }
CACHES = {
"default": {
"BACKEND": "django_redis.cache.RedisCache",
"LOCATION": os.getenv("PAPERLESS_REDIS", "redis://localhost:6379"),
"OPTIONS": {
"CLIENT_CLASS": "django_redis.client.DefaultClient",
}
}
}
############################################################################### ###############################################################################
# Security # # Security #
############################################################################### ###############################################################################
@@ -407,8 +398,9 @@ TASK_WORKERS = int(os.getenv("PAPERLESS_TASK_WORKERS", default_task_workers()))
Q_CLUSTER = { Q_CLUSTER = {
'name': 'paperless', 'name': 'paperless',
'catch_up': False, 'catch_up': False,
'recycle': 1,
'workers': TASK_WORKERS, 'workers': TASK_WORKERS,
'django_redis': 'default' 'redis': os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")
} }

View File

@@ -1 +1 @@
__version__ = (1, 1, 1) __version__ = (1, 1, 2)