mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
e72735c4f0
2
Pipfile
2
Pipfile
@ -36,3 +36,5 @@ pytest-xdist = "*"
|
|||||||
[dev-packages]
|
[dev-packages]
|
||||||
ipython = "*"
|
ipython = "*"
|
||||||
sphinx = "*"
|
sphinx = "*"
|
||||||
|
tox = "*"
|
||||||
|
|
||||||
|
83
Pipfile.lock
generated
83
Pipfile.lock
generated
@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"_meta": {
|
"_meta": {
|
||||||
"hash": {
|
"hash": {
|
||||||
"sha256": "e20c2294bcafd346ee57901df94a515a12976ed192dc37df848b39b56bdd1f4b"
|
"sha256": "6d8bad24aa5d0c102b13b5ae27acba04836cd5a07a4003cb2763de1e0a3406b7"
|
||||||
},
|
},
|
||||||
"pipfile-spec": 6,
|
"pipfile-spec": 6,
|
||||||
"requires": {},
|
"requires": {},
|
||||||
@ -19,7 +19,7 @@
|
|||||||
"sha256:37228cda29411948b422fae072f57e31d3396d2ee1c9783775980ee9c9990af6",
|
"sha256:37228cda29411948b422fae072f57e31d3396d2ee1c9783775980ee9c9990af6",
|
||||||
"sha256:58587dd4dc3daefad0487f6d9ae32b4542b185e1c36db6993290e7c41ca2b47c"
|
"sha256:58587dd4dc3daefad0487f6d9ae32b4542b185e1c36db6993290e7c41ca2b47c"
|
||||||
],
|
],
|
||||||
"markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'",
|
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
||||||
"version": "==1.5"
|
"version": "==1.5"
|
||||||
},
|
},
|
||||||
"atomicwrites": {
|
"atomicwrites": {
|
||||||
@ -27,7 +27,7 @@
|
|||||||
"sha256:0312ad34fcad8fac3704d441f7b317e50af620823353ec657a53e981f92920c0",
|
"sha256:0312ad34fcad8fac3704d441f7b317e50af620823353ec657a53e981f92920c0",
|
||||||
"sha256:ec9ae8adaae229e4f8446952d204a3e4b5fdd2d099f9be3aaf556120135fb3ee"
|
"sha256:ec9ae8adaae229e4f8446952d204a3e4b5fdd2d099f9be3aaf556120135fb3ee"
|
||||||
],
|
],
|
||||||
"markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'",
|
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
||||||
"version": "==1.2.1"
|
"version": "==1.2.1"
|
||||||
},
|
},
|
||||||
"attrs": {
|
"attrs": {
|
||||||
@ -85,7 +85,7 @@
|
|||||||
"sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6",
|
"sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6",
|
||||||
"sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80"
|
"sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80"
|
||||||
],
|
],
|
||||||
"markers": "python_version >= '2.6' and python_version != '3.0.*' and python_version != '3.2.*' and python_version < '4' and python_version != '3.1.*'",
|
"markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4'",
|
||||||
"version": "==4.5.1"
|
"version": "==4.5.1"
|
||||||
},
|
},
|
||||||
"coveralls": {
|
"coveralls": {
|
||||||
@ -163,7 +163,7 @@
|
|||||||
"sha256:a7a84d5fa07a089186a329528f127c9d73b9de57f1a1131b82bb5320ee651f6a",
|
"sha256:a7a84d5fa07a089186a329528f127c9d73b9de57f1a1131b82bb5320ee651f6a",
|
||||||
"sha256:fc155a6b553c66c838d1a22dba1dc9f5f505c43285a878c6f74a79c024750b83"
|
"sha256:fc155a6b553c66c838d1a22dba1dc9f5f505c43285a878c6f74a79c024750b83"
|
||||||
],
|
],
|
||||||
"markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'",
|
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
||||||
"version": "==1.5.0"
|
"version": "==1.5.0"
|
||||||
},
|
},
|
||||||
"factory-boy": {
|
"factory-boy": {
|
||||||
@ -179,6 +179,7 @@
|
|||||||
"sha256:ea7cfd3aeb1544732d08bd9cfba40c5b78e3a91e17b1a0698ab81bfc5554c628",
|
"sha256:ea7cfd3aeb1544732d08bd9cfba40c5b78e3a91e17b1a0698ab81bfc5554c628",
|
||||||
"sha256:f6d67f04abfb2b4bea7afc7fa6c18cf4c523a67956e455668be9ae42bccc21ad"
|
"sha256:f6d67f04abfb2b4bea7afc7fa6c18cf4c523a67956e455668be9ae42bccc21ad"
|
||||||
],
|
],
|
||||||
|
"markers": "python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.2.*' and python_version >= '2.7'",
|
||||||
"version": "==0.9.0"
|
"version": "==0.9.0"
|
||||||
},
|
},
|
||||||
"filemagic": {
|
"filemagic": {
|
||||||
@ -282,7 +283,7 @@
|
|||||||
"sha256:6e3836e39f4d36ae72840833db137f7b7d35105079aee6ec4a62d9f80d594dd1",
|
"sha256:6e3836e39f4d36ae72840833db137f7b7d35105079aee6ec4a62d9f80d594dd1",
|
||||||
"sha256:95eb8364a4708392bae89035f45341871286a333f749c3141c20573d2b3876e1"
|
"sha256:95eb8364a4708392bae89035f45341871286a333f749c3141c20573d2b3876e1"
|
||||||
],
|
],
|
||||||
"markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'",
|
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
||||||
"version": "==0.7.1"
|
"version": "==0.7.1"
|
||||||
},
|
},
|
||||||
"py": {
|
"py": {
|
||||||
@ -290,7 +291,7 @@
|
|||||||
"sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1",
|
"sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1",
|
||||||
"sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6"
|
"sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6"
|
||||||
],
|
],
|
||||||
"markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'",
|
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
||||||
"version": "==1.6.0"
|
"version": "==1.6.0"
|
||||||
},
|
},
|
||||||
"pycodestyle": {
|
"pycodestyle": {
|
||||||
@ -303,26 +304,26 @@
|
|||||||
},
|
},
|
||||||
"pyocr": {
|
"pyocr": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:bdc4d43bf9b63c2a9a4b2c9a1a623a0e63c8e6600eede5dbe866b31f3a5f2207"
|
"sha256:b6ba6263fd92da56627dff6d263d991a2246aacd117d1788f11b93f419ca395f"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==0.5.2"
|
"version": "==0.5.3"
|
||||||
},
|
},
|
||||||
"pytest": {
|
"pytest": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:2d7c49e931316cc7d1638a3e5f54f5d7b4e5225972b3c9838f3584788d27f349",
|
"sha256:453cbbbe5ce6db38717d282b758b917de84802af4288910c12442984bde7b823",
|
||||||
"sha256:ad0c7db7b5d4081631e0155f5c61b80ad76ce148551aaafe3a718d65a7508b18"
|
"sha256:a8a07f84e680482eb51e244370aaf2caa6301ef265f37c2bdefb3dd3b663f99d"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==3.7.4"
|
"version": "==3.8.0"
|
||||||
},
|
},
|
||||||
"pytest-cov": {
|
"pytest-cov": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:03aa752cf11db41d281ea1d807d954c4eda35cfa1b21d6971966cc041bbf6e2d",
|
"sha256:513c425e931a0344944f84ea47f3956be0e416d95acbd897a44970c8d926d5d7",
|
||||||
"sha256:890fe5565400902b0c78b5357004aab1c814115894f4f21370e2433256a3eeec"
|
"sha256:e360f048b7dae3f2f2a9a4d067b2dd6b6a015d384d1577c994a43f3f7cbad762"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==2.5.1"
|
"version": "==2.6.0"
|
||||||
},
|
},
|
||||||
"pytest-django": {
|
"pytest-django": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@ -344,6 +345,7 @@
|
|||||||
"sha256:e4500cd0509ec4a26535f7d4112a8cc0f17d3a41c29ffd4eab479d2a55b30805",
|
"sha256:e4500cd0509ec4a26535f7d4112a8cc0f17d3a41c29ffd4eab479d2a55b30805",
|
||||||
"sha256:f275cb48a73fc61a6710726348e1da6d68a978f0ec0c54ece5a5fae5977e5a08"
|
"sha256:f275cb48a73fc61a6710726348e1da6d68a978f0ec0c54ece5a5fae5977e5a08"
|
||||||
],
|
],
|
||||||
|
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
||||||
"version": "==0.2"
|
"version": "==0.2"
|
||||||
},
|
},
|
||||||
"pytest-sugar": {
|
"pytest-sugar": {
|
||||||
@ -457,7 +459,7 @@
|
|||||||
"sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf",
|
"sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf",
|
||||||
"sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5"
|
"sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5"
|
||||||
],
|
],
|
||||||
"markers": "python_version >= '2.6' and python_version != '3.3.*' and python_version < '4' and python_version != '3.1.*' and python_version != '3.2.*' and python_version != '3.0.*'",
|
"markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4' and python_version != '3.3.*'",
|
||||||
"version": "==1.23"
|
"version": "==1.23"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -521,10 +523,11 @@
|
|||||||
},
|
},
|
||||||
"imagesize": {
|
"imagesize": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:3620cc0cadba3f7475f9940d22431fc4d407269f1be59ec9b8edcca26440cf18",
|
"sha256:3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8",
|
||||||
"sha256:5b326e4678b6925158ccc66a9fa3122b6106d7c876ee32d7de6ce59385b96315"
|
"sha256:f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5"
|
||||||
],
|
],
|
||||||
"version": "==1.0.0"
|
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
||||||
|
"version": "==1.1.0"
|
||||||
},
|
},
|
||||||
"ipython": {
|
"ipython": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@ -590,6 +593,14 @@
|
|||||||
],
|
],
|
||||||
"version": "==0.7.4"
|
"version": "==0.7.4"
|
||||||
},
|
},
|
||||||
|
"pluggy": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:6e3836e39f4d36ae72840833db137f7b7d35105079aee6ec4a62d9f80d594dd1",
|
||||||
|
"sha256:95eb8364a4708392bae89035f45341871286a333f749c3141c20573d2b3876e1"
|
||||||
|
],
|
||||||
|
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
||||||
|
"version": "==0.7.1"
|
||||||
|
},
|
||||||
"prompt-toolkit": {
|
"prompt-toolkit": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:1df952620eccb399c53ebb359cc7d9a8d3a9538cb34c5a1344bdbeb29fbcc381",
|
"sha256:1df952620eccb399c53ebb359cc7d9a8d3a9538cb34c5a1344bdbeb29fbcc381",
|
||||||
@ -605,6 +616,14 @@
|
|||||||
],
|
],
|
||||||
"version": "==0.6.0"
|
"version": "==0.6.0"
|
||||||
},
|
},
|
||||||
|
"py": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1",
|
||||||
|
"sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6"
|
||||||
|
],
|
||||||
|
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
||||||
|
"version": "==1.6.0"
|
||||||
|
},
|
||||||
"pygments": {
|
"pygments": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
|
"sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
|
||||||
@ -656,20 +675,28 @@
|
|||||||
},
|
},
|
||||||
"sphinx": {
|
"sphinx": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:a07050845cc9a2f4026a6035cc8ed795a5ce7be6528bbc82032385c10807dfe7",
|
"sha256:217a7705adcb573da5bbe1e0f5cab4fa0bd89fd9342c9159121746f593c2d5a4",
|
||||||
"sha256:d719de667218d763e8fd144b7fcfeefd8d434a6201f76bf9f0f0c1fa6f47fcdb"
|
"sha256:a602513f385f1d5785ff1ca420d9c7eb1a1b63381733b2f0ea8188a391314a86"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==1.7.8"
|
"version": "==1.7.9"
|
||||||
},
|
},
|
||||||
"sphinxcontrib-websupport": {
|
"sphinxcontrib-websupport": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:68ca7ff70785cbe1e7bccc71a48b5b6d965d79ca50629606c7861a21b206d9dd",
|
"sha256:68ca7ff70785cbe1e7bccc71a48b5b6d965d79ca50629606c7861a21b206d9dd",
|
||||||
"sha256:9de47f375baf1ea07cdb3436ff39d7a9c76042c10a769c52353ec46e4e8fc3b9"
|
"sha256:9de47f375baf1ea07cdb3436ff39d7a9c76042c10a769c52353ec46e4e8fc3b9"
|
||||||
],
|
],
|
||||||
"markers": "python_version != '3.3.*' and python_version >= '2.7' and python_version != '3.1.*' and python_version != '3.2.*' and python_version != '3.0.*'",
|
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
||||||
"version": "==1.1.0"
|
"version": "==1.1.0"
|
||||||
},
|
},
|
||||||
|
"tox": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:37cf240781b662fb790710c6998527e65ca6851eace84d1595ee71f7af4e85f7",
|
||||||
|
"sha256:eb61aa5bcce65325538686f09848f04ef679b5cd9b83cc491272099b28739600"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"version": "==3.2.1"
|
||||||
|
},
|
||||||
"traitlets": {
|
"traitlets": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835",
|
"sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835",
|
||||||
@ -682,9 +709,17 @@
|
|||||||
"sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf",
|
"sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf",
|
||||||
"sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5"
|
"sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5"
|
||||||
],
|
],
|
||||||
"markers": "python_version >= '2.6' and python_version != '3.3.*' and python_version < '4' and python_version != '3.1.*' and python_version != '3.2.*' and python_version != '3.0.*'",
|
"markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4' and python_version != '3.3.*'",
|
||||||
"version": "==1.23"
|
"version": "==1.23"
|
||||||
},
|
},
|
||||||
|
"virtualenv": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:2ce32cd126117ce2c539f0134eb89de91a8413a29baac49cbab3eb50e2026669",
|
||||||
|
"sha256:ca07b4c0b54e14a91af9f34d0919790b016923d157afda5efdde55c96718f752"
|
||||||
|
],
|
||||||
|
"markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.2.*' and python_version != '3.1.*'",
|
||||||
|
"version": "==16.0.0"
|
||||||
|
},
|
||||||
"wcwidth": {
|
"wcwidth": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e",
|
"sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e",
|
||||||
|
@ -1,6 +1,23 @@
|
|||||||
Changelog
|
Changelog
|
||||||
#########
|
#########
|
||||||
|
|
||||||
|
2.3.0
|
||||||
|
=====
|
||||||
|
|
||||||
|
* Support for consuming plain text & markdown documents was added by
|
||||||
|
`Joshua Taillon`_! This was a long-requested feature, and it's addition is
|
||||||
|
likely to be greatly appreciated by the community: `#395`_ Thanks also to
|
||||||
|
`David Martin`_ for his assistance on the issue.
|
||||||
|
* `dubit0`_ found & fixed a bug that prevented management commands from running
|
||||||
|
before we had an operational database: `#396`_
|
||||||
|
* Joshua also added a simple update to the thumbnail generation process to
|
||||||
|
improve performance: `#399`_
|
||||||
|
* As his last bit of effort on this release, Joshua also added some code to
|
||||||
|
allow you to view the documents inline rather than download them as an
|
||||||
|
attachment. `#400`_
|
||||||
|
* Finally, `ahyear`_ found a slip in the Docker documentation and patched it. `#401`_
|
||||||
|
|
||||||
|
|
||||||
2.2.1
|
2.2.1
|
||||||
=====
|
=====
|
||||||
|
|
||||||
@ -19,6 +36,10 @@ Changelog
|
|||||||
easier on those of us with lots of different tags: `#391`_.
|
easier on those of us with lots of different tags: `#391`_.
|
||||||
* `Kilian Koeltzsch`_ noticed a bug in how we capture & automatically create
|
* `Kilian Koeltzsch`_ noticed a bug in how we capture & automatically create
|
||||||
tags, so that's fixed now too: `#384`_.
|
tags, so that's fixed now too: `#384`_.
|
||||||
|
* `erikarvstedt`_ tweaked the behaviour of the test suite to be better behaved
|
||||||
|
for packaging environments: `#383`_.
|
||||||
|
* `Lukasz Soluch`_ added CORS support to make building a new Javascript-based front-end
|
||||||
|
cleaner & easier: `#387`_.
|
||||||
|
|
||||||
|
|
||||||
2.1.0
|
2.1.0
|
||||||
@ -476,6 +497,10 @@ bulk of the work on this big change.
|
|||||||
.. _Tim Brooks: https://github.com/brookst
|
.. _Tim Brooks: https://github.com/brookst
|
||||||
.. _Stéphane Brunner: https://github.com/sbrunner
|
.. _Stéphane Brunner: https://github.com/sbrunner
|
||||||
.. _Kilian Koeltzsch: https://github.com/kiliankoe
|
.. _Kilian Koeltzsch: https://github.com/kiliankoe
|
||||||
|
.. _Lukasz Soluch: https://github.com/LukaszSolo
|
||||||
|
.. _Joshua Taillon: https://github.com/jat255
|
||||||
|
.. _dubit0: https://github.com/dubit0
|
||||||
|
.. _ahyear: https://github.com/ahyear
|
||||||
|
|
||||||
.. _#20: https://github.com/danielquinn/paperless/issues/20
|
.. _#20: https://github.com/danielquinn/paperless/issues/20
|
||||||
.. _#44: https://github.com/danielquinn/paperless/issues/44
|
.. _#44: https://github.com/danielquinn/paperless/issues/44
|
||||||
@ -550,11 +575,18 @@ bulk of the work on this big change.
|
|||||||
.. _#374: https://github.com/danielquinn/paperless/pull/374
|
.. _#374: https://github.com/danielquinn/paperless/pull/374
|
||||||
.. _#375: https://github.com/danielquinn/paperless/pull/375
|
.. _#375: https://github.com/danielquinn/paperless/pull/375
|
||||||
.. _#376: https://github.com/danielquinn/paperless/pull/376
|
.. _#376: https://github.com/danielquinn/paperless/pull/376
|
||||||
|
.. _#383: https://github.com/danielquinn/paperless/pull/383
|
||||||
.. _#384: https://github.com/danielquinn/paperless/issues/384
|
.. _#384: https://github.com/danielquinn/paperless/issues/384
|
||||||
.. _#386: https://github.com/danielquinn/paperless/issues/386
|
.. _#386: https://github.com/danielquinn/paperless/issues/386
|
||||||
|
.. _#387: https://github.com/danielquinn/paperless/pull/387
|
||||||
.. _#391: https://github.com/danielquinn/paperless/pull/391
|
.. _#391: https://github.com/danielquinn/paperless/pull/391
|
||||||
.. _#390: https://github.com/danielquinn/paperless/pull/390
|
.. _#390: https://github.com/danielquinn/paperless/pull/390
|
||||||
.. _#392: https://github.com/danielquinn/paperless/issues/392
|
.. _#392: https://github.com/danielquinn/paperless/issues/392
|
||||||
|
.. _#395: https://github.com/danielquinn/paperless/pull/395
|
||||||
|
.. _#396: https://github.com/danielquinn/paperless/pull/396
|
||||||
|
.. _#399: https://github.com/danielquinn/paperless/pull/399
|
||||||
|
.. _#400: https://github.com/danielquinn/paperless/pull/400
|
||||||
|
.. _#401: https://github.com/danielquinn/paperless/pull/401
|
||||||
|
|
||||||
.. _pipenv: https://docs.pipenv.org/
|
.. _pipenv: https://docs.pipenv.org/
|
||||||
.. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
|
.. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
|
||||||
|
@ -101,6 +101,7 @@ is similar:
|
|||||||
$ cd /path/to/project
|
$ cd /path/to/project
|
||||||
$ git pull
|
$ git pull
|
||||||
$ docker build -t paperless .
|
$ docker build -t paperless .
|
||||||
|
$ docker-compose run --rm comsumer migrate
|
||||||
$ docker-compose up -d
|
$ docker-compose up -d
|
||||||
|
|
||||||
If ``git pull`` doesn't report any changes, there is no need to continue with
|
If ``git pull`` doesn't report any changes, there is no need to continue with
|
||||||
|
0
docs/requirements.txt
Normal file
0
docs/requirements.txt
Normal file
@ -89,9 +89,10 @@ PAPERLESS_EMAIL_SECRET=""
|
|||||||
# as is "example.com,www.example.com", but NOT " example.com" or "example.com,"
|
# as is "example.com,www.example.com", but NOT " example.com" or "example.com,"
|
||||||
#PAPERLESS_ALLOWED_HOSTS="example.com,www.example.com"
|
#PAPERLESS_ALLOWED_HOSTS="example.com,www.example.com"
|
||||||
|
|
||||||
# If you decide to use Paperless APIs in an ajax calls, you need to add your
|
# If you decide to use the Paperless API in an ajax call, you need to add your
|
||||||
# servers to the allowed hosts that can do CORS calls. By default Paperless allows
|
# servers to the list of allowed hosts that can do CORS calls. By default
|
||||||
# calls from localhost:8080. The same rules as above how the list should look like.
|
# Paperless allows calls from localhost:8080, but you'd like to change that,
|
||||||
|
# you can set this value to a comma-separated list.
|
||||||
#PAPERLESS_CORS_ALLOWED_HOSTS="localhost:8080,example.com,localhost:8000"
|
#PAPERLESS_CORS_ALLOWED_HOSTS="localhost:8080,example.com,localhost:8000"
|
||||||
|
|
||||||
# To host paperless under a subpath url like example.com/paperless you set
|
# To host paperless under a subpath url like example.com/paperless you set
|
||||||
@ -116,6 +117,10 @@ PAPERLESS_EMAIL_SECRET=""
|
|||||||
# http://paperless.readthedocs.org/en/latest/consumption.html#hooking-into-the-consumption-process
|
# http://paperless.readthedocs.org/en/latest/consumption.html#hooking-into-the-consumption-process
|
||||||
#PAPERLESS_POST_CONSUME_SCRIPT="/path/to/an/arbitrary/script.sh"
|
#PAPERLESS_POST_CONSUME_SCRIPT="/path/to/an/arbitrary/script.sh"
|
||||||
|
|
||||||
|
# By default, when clicking on a document within the web interface, the
|
||||||
|
# browser will prompt the user to save the document to disk. By setting this to
|
||||||
|
# "true", the document will instead be opened in the browser, if possible.
|
||||||
|
#PAPERLESS_INLINE_DOC="false"
|
||||||
|
|
||||||
#
|
#
|
||||||
# The following values use sensible defaults for modern systems, but if you're
|
# The following values use sensible defaults for modern systems, but if you're
|
||||||
|
@ -29,7 +29,7 @@ pillow==5.2.0
|
|||||||
pluggy==0.7.1; python_version != '3.1.*'
|
pluggy==0.7.1; python_version != '3.1.*'
|
||||||
py==1.6.0; python_version != '3.1.*'
|
py==1.6.0; python_version != '3.1.*'
|
||||||
pycodestyle==2.4.0
|
pycodestyle==2.4.0
|
||||||
pyocr==0.5.2
|
pyocr==0.5.3
|
||||||
pytest-cov==2.5.1
|
pytest-cov==2.5.1
|
||||||
pytest-django==3.4.2
|
pytest-django==3.4.2
|
||||||
pytest-env==0.6.2
|
pytest-env==0.6.2
|
||||||
|
@ -2,7 +2,7 @@ import textwrap
|
|||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.checks import Error, register
|
from django.core.checks import Error, register
|
||||||
from django.db.utils import OperationalError
|
from django.db.utils import OperationalError, ProgrammingError
|
||||||
|
|
||||||
|
|
||||||
@register()
|
@register()
|
||||||
@ -14,7 +14,7 @@ def changed_password_check(app_configs, **kwargs):
|
|||||||
try:
|
try:
|
||||||
encrypted_doc = Document.objects.filter(
|
encrypted_doc = Document.objects.filter(
|
||||||
storage_type=Document.STORAGE_TYPE_GPG).first()
|
storage_type=Document.STORAGE_TYPE_GPG).first()
|
||||||
except OperationalError:
|
except (OperationalError, ProgrammingError):
|
||||||
return [] # No documents table yet
|
return [] # No documents table yet
|
||||||
|
|
||||||
if encrypted_doc:
|
if encrypted_doc:
|
||||||
|
@ -1,24 +1,24 @@
|
|||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
|
|
||||||
import dateutil.parser
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
import dateutil.parser
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import models
|
||||||
|
from django.template.defaultfilters import slugify
|
||||||
|
from django.utils import timezone
|
||||||
from fuzzywuzzy import fuzz
|
from fuzzywuzzy import fuzz
|
||||||
|
|
||||||
from django.conf import settings
|
from .managers import LogManager
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from django.core.urlresolvers import reverse
|
from django.core.urlresolvers import reverse
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from django.urls import reverse
|
from django.urls import reverse
|
||||||
from django.db import models
|
|
||||||
from django.template.defaultfilters import slugify
|
|
||||||
from django.utils import timezone
|
|
||||||
|
|
||||||
from .managers import LogManager
|
|
||||||
|
|
||||||
|
|
||||||
class MatchingModel(models.Model):
|
class MatchingModel(models.Model):
|
||||||
@ -135,7 +135,7 @@ class MatchingModel(models.Model):
|
|||||||
Example:
|
Example:
|
||||||
' some random words "with quotes " and spaces'
|
' some random words "with quotes " and spaces'
|
||||||
==>
|
==>
|
||||||
["some", "random", "words", "with\s+quotes", "and", "spaces"]
|
["some", "random", "words", "with+quotes", "and", "spaces"]
|
||||||
"""
|
"""
|
||||||
findterms = re.compile(r'"([^"]+)"|(\S+)').findall
|
findterms = re.compile(r'"([^"]+)"|(\S+)').findall
|
||||||
normspace = re.compile(r"\s+").sub
|
normspace = re.compile(r"\s+").sub
|
||||||
@ -192,7 +192,11 @@ class Document(models.Model):
|
|||||||
TYPE_JPG = "jpg"
|
TYPE_JPG = "jpg"
|
||||||
TYPE_GIF = "gif"
|
TYPE_GIF = "gif"
|
||||||
TYPE_TIF = "tiff"
|
TYPE_TIF = "tiff"
|
||||||
TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,)
|
TYPE_TXT = "txt"
|
||||||
|
TYPE_CSV = "csv"
|
||||||
|
TYPE_MD = "md"
|
||||||
|
TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,
|
||||||
|
TYPE_TXT, TYPE_CSV, TYPE_MD)
|
||||||
|
|
||||||
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
|
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
|
||||||
STORAGE_TYPE_GPG = "gpg"
|
STORAGE_TYPE_GPG = "gpg"
|
||||||
@ -365,51 +369,52 @@ class FileInfo:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
formats = "pdf|jpe?g|png|gif|tiff?|te?xt|md|csv"
|
||||||
REGEXES = OrderedDict([
|
REGEXES = OrderedDict([
|
||||||
("created-correspondent-title-tags", re.compile(
|
("created-correspondent-title-tags", re.compile(
|
||||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||||
r"(?P<correspondent>.*) - "
|
r"(?P<correspondent>.*) - "
|
||||||
r"(?P<title>.*) - "
|
r"(?P<title>.*) - "
|
||||||
r"(?P<tags>[a-z0-9\-,]*)"
|
r"(?P<tags>[a-z0-9\-,]*)"
|
||||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
r"\.(?P<extension>{})$".format(formats),
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("created-title-tags", re.compile(
|
("created-title-tags", re.compile(
|
||||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||||
r"(?P<title>.*) - "
|
r"(?P<title>.*) - "
|
||||||
r"(?P<tags>[a-z0-9\-,]*)"
|
r"(?P<tags>[a-z0-9\-,]*)"
|
||||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
r"\.(?P<extension>{})$".format(formats),
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("created-correspondent-title", re.compile(
|
("created-correspondent-title", re.compile(
|
||||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||||
r"(?P<correspondent>.*) - "
|
r"(?P<correspondent>.*) - "
|
||||||
r"(?P<title>.*)"
|
r"(?P<title>.*)"
|
||||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
r"\.(?P<extension>{})$".format(formats),
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("created-title", re.compile(
|
("created-title", re.compile(
|
||||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||||
r"(?P<title>.*)"
|
r"(?P<title>.*)"
|
||||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
r"\.(?P<extension>{})$".format(formats),
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("correspondent-title-tags", re.compile(
|
("correspondent-title-tags", re.compile(
|
||||||
r"(?P<correspondent>.*) - "
|
r"(?P<correspondent>.*) - "
|
||||||
r"(?P<title>.*) - "
|
r"(?P<title>.*) - "
|
||||||
r"(?P<tags>[a-z0-9\-,]*)"
|
r"(?P<tags>[a-z0-9\-,]*)"
|
||||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
r"\.(?P<extension>{})$".format(formats),
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("correspondent-title", re.compile(
|
("correspondent-title", re.compile(
|
||||||
r"(?P<correspondent>.*) - "
|
r"(?P<correspondent>.*) - "
|
||||||
r"(?P<title>.*)?"
|
r"(?P<title>.*)?"
|
||||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
r"\.(?P<extension>{})$".format(formats),
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("title", re.compile(
|
("title", re.compile(
|
||||||
r"(?P<title>.*)"
|
r"(?P<title>.*)"
|
||||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
r"\.(?P<extension>{})$".format(formats),
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
))
|
))
|
||||||
])
|
])
|
||||||
|
@ -1,9 +1,25 @@
|
|||||||
import logging
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import re
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
|
# This regular expression will try to find dates in the document at
|
||||||
|
# hand and will match the following formats:
|
||||||
|
# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
|
# - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
|
# - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
|
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
|
# - MONTH ZZZZ, with ZZZZ being 4 digits
|
||||||
|
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
|
||||||
|
DATE_REGEX = re.compile(
|
||||||
|
r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' +
|
||||||
|
r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' +
|
||||||
|
r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' +
|
||||||
|
r'\b([^\W\d_]{3,9} [0-9]{4})\b'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ParseError(Exception):
|
class ParseError(Exception):
|
||||||
pass
|
pass
|
||||||
|
@ -166,7 +166,7 @@ class TestMatching(TestCase):
|
|||||||
def test_match_regex(self):
|
def test_match_regex(self):
|
||||||
|
|
||||||
self._test_matching(
|
self._test_matching(
|
||||||
"alpha\w+gamma",
|
r"alpha\w+gamma",
|
||||||
"MATCH_REGEX",
|
"MATCH_REGEX",
|
||||||
(
|
(
|
||||||
"I have alpha_and_gamma in me",
|
"I have alpha_and_gamma in me",
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
from django.http import HttpResponse, HttpResponseBadRequest
|
from django.http import HttpResponse, HttpResponseBadRequest
|
||||||
from django.views.generic import DetailView, FormView, TemplateView
|
from django.views.generic import DetailView, FormView, TemplateView
|
||||||
from django_filters.rest_framework import DjangoFilterBackend
|
from django_filters.rest_framework import DjangoFilterBackend
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
from paperless.db import GnuPG
|
from paperless.db import GnuPG
|
||||||
from paperless.mixins import SessionOrBasicAuthMixin
|
from paperless.mixins import SessionOrBasicAuthMixin
|
||||||
from paperless.views import StandardPagination
|
from paperless.views import StandardPagination
|
||||||
@ -48,6 +50,9 @@ class FetchView(SessionOrBasicAuthMixin, DetailView):
|
|||||||
Document.TYPE_JPG: "image/jpeg",
|
Document.TYPE_JPG: "image/jpeg",
|
||||||
Document.TYPE_GIF: "image/gif",
|
Document.TYPE_GIF: "image/gif",
|
||||||
Document.TYPE_TIF: "image/tiff",
|
Document.TYPE_TIF: "image/tiff",
|
||||||
|
Document.TYPE_CSV: "text/csv",
|
||||||
|
Document.TYPE_MD: "text/markdown",
|
||||||
|
Document.TYPE_TXT: "text/plain"
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.kwargs["kind"] == "thumb":
|
if self.kwargs["kind"] == "thumb":
|
||||||
@ -60,8 +65,11 @@ class FetchView(SessionOrBasicAuthMixin, DetailView):
|
|||||||
self._get_raw_data(self.object.source_file),
|
self._get_raw_data(self.object.source_file),
|
||||||
content_type=content_types[self.object.file_type]
|
content_type=content_types[self.object.file_type]
|
||||||
)
|
)
|
||||||
response["Content-Disposition"] = 'attachment; filename="{}"'.format(
|
|
||||||
self.object.file_name)
|
DISPOSITION = 'inline' if settings.INLINE_DOC else 'attachment'
|
||||||
|
|
||||||
|
response["Content-Disposition"] = '{}; filename="{}"'.format(
|
||||||
|
DISPOSITION, self.object.file_name)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
@ -22,6 +22,14 @@ elif os.path.exists("/usr/local/etc/paperless.conf"):
|
|||||||
load_dotenv("/usr/local/etc/paperless.conf")
|
load_dotenv("/usr/local/etc/paperless.conf")
|
||||||
|
|
||||||
|
|
||||||
|
def __get_boolean(key):
|
||||||
|
"""
|
||||||
|
Return a boolean value based on whatever the user has supplied in the
|
||||||
|
environment based on whether the value "looks like" it's True or not.
|
||||||
|
"""
|
||||||
|
return bool(os.getenv(key, "NO").lower() in ("yes", "y", "1", "t", "true"))
|
||||||
|
|
||||||
|
|
||||||
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
|
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
|
||||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
@ -67,6 +75,7 @@ INSTALLED_APPS = [
|
|||||||
"documents.apps.DocumentsConfig",
|
"documents.apps.DocumentsConfig",
|
||||||
"reminders.apps.RemindersConfig",
|
"reminders.apps.RemindersConfig",
|
||||||
"paperless_tesseract.apps.PaperlessTesseractConfig",
|
"paperless_tesseract.apps.PaperlessTesseractConfig",
|
||||||
|
"paperless_text.apps.PaperlessTextConfig",
|
||||||
|
|
||||||
"django.contrib.admin",
|
"django.contrib.admin",
|
||||||
|
|
||||||
@ -221,12 +230,12 @@ OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")
|
|||||||
OCR_THREADS = os.getenv("PAPERLESS_OCR_THREADS")
|
OCR_THREADS = os.getenv("PAPERLESS_OCR_THREADS")
|
||||||
|
|
||||||
# OCR all documents?
|
# OCR all documents?
|
||||||
OCR_ALWAYS = bool(os.getenv("PAPERLESS_OCR_ALWAYS", "NO").lower() in ("yes", "y", "1", "t", "true")) # NOQA
|
OCR_ALWAYS = __get_boolean("PAPERLESS_OCR_ALWAYS")
|
||||||
|
|
||||||
# If this is true, any failed attempts to OCR a PDF will result in the PDF
|
# If this is true, any failed attempts to OCR a PDF will result in the PDF
|
||||||
# being indexed anyway, with whatever we could get. If it's False, the file
|
# being indexed anyway, with whatever we could get. If it's False, the file
|
||||||
# will simply be left in the CONSUMPTION_DIR.
|
# will simply be left in the CONSUMPTION_DIR.
|
||||||
FORGIVING_OCR = bool(os.getenv("PAPERLESS_FORGIVING_OCR", "YES").lower() in ("yes", "y", "1", "t", "true")) # NOQA
|
FORGIVING_OCR = __get_boolean("PAPERLESS_FORGIVING_OCR")
|
||||||
|
|
||||||
# GNUPG needs a home directory for some reason
|
# GNUPG needs a home directory for some reason
|
||||||
GNUPG_HOME = os.getenv("HOME", "/tmp")
|
GNUPG_HOME = os.getenv("HOME", "/tmp")
|
||||||
@ -270,6 +279,9 @@ PASSPHRASE = os.getenv("PAPERLESS_PASSPHRASE")
|
|||||||
PRE_CONSUME_SCRIPT = os.getenv("PAPERLESS_PRE_CONSUME_SCRIPT")
|
PRE_CONSUME_SCRIPT = os.getenv("PAPERLESS_PRE_CONSUME_SCRIPT")
|
||||||
POST_CONSUME_SCRIPT = os.getenv("PAPERLESS_POST_CONSUME_SCRIPT")
|
POST_CONSUME_SCRIPT = os.getenv("PAPERLESS_POST_CONSUME_SCRIPT")
|
||||||
|
|
||||||
|
# Whether to display a selected document inline, or download it as attachment:
|
||||||
|
INLINE_DOC = __get_boolean("PAPERLESS_INLINE_DOC")
|
||||||
|
|
||||||
# The number of items on each page in the web UI. This value must be a
|
# The number of items on each page in the web UI. This value must be a
|
||||||
# positive integer, but if you don't define one in paperless.conf, a default of
|
# positive integer, but if you don't define one in paperless.conf, a default of
|
||||||
# 100 will be used.
|
# 100 will be used.
|
||||||
|
@ -1 +1 @@
|
|||||||
__version__ = (2, 2, 1)
|
__version__ = (2, 3, 0)
|
||||||
|
@ -14,7 +14,7 @@ from pyocr.libtesseract.tesseract_raw import \
|
|||||||
from pyocr.tesseract import TesseractError
|
from pyocr.tesseract import TesseractError
|
||||||
|
|
||||||
import pdftotext
|
import pdftotext
|
||||||
from documents.parsers import DocumentParser, ParseError
|
from documents.parsers import DocumentParser, ParseError, DATE_REGEX
|
||||||
|
|
||||||
from .languages import ISO639
|
from .languages import ISO639
|
||||||
|
|
||||||
@ -50,10 +50,11 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
self.CONVERT,
|
self.CONVERT,
|
||||||
"-scale", "500x5000",
|
"-scale", "500x5000",
|
||||||
"-alpha", "remove",
|
"-alpha", "remove",
|
||||||
self.document_path, os.path.join(self.tempdir, "convert-%04d.png")
|
"{}[0]".format(self.document_path),
|
||||||
|
os.path.join(self.tempdir, "convert.png")
|
||||||
)
|
)
|
||||||
|
|
||||||
return os.path.join(self.tempdir, "convert-0000.png")
|
return os.path.join(self.tempdir, "convert.png")
|
||||||
|
|
||||||
def _is_ocred(self):
|
def _is_ocred(self):
|
||||||
|
|
||||||
@ -210,22 +211,8 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
except ParseError as e:
|
except ParseError as e:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# This regular expression will try to find dates in the document at
|
|
||||||
# hand and will match the following formats:
|
|
||||||
# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
|
||||||
# - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
|
||||||
# - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
|
||||||
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
|
|
||||||
# - MONTH ZZZZ, with ZZZZ being 4 digits
|
|
||||||
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
|
|
||||||
pattern = re.compile(
|
|
||||||
r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' +
|
|
||||||
r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' +
|
|
||||||
r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' +
|
|
||||||
r'\b([^\W\d_]{3,9} [0-9]{4})\b')
|
|
||||||
|
|
||||||
# Iterate through all regex matches and try to parse the date
|
# Iterate through all regex matches and try to parse the date
|
||||||
for m in re.finditer(pattern, text):
|
for m in re.finditer(DATE_REGEX, text):
|
||||||
datestring = m.group(0)
|
datestring = m.group(0)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -272,8 +259,9 @@ def run_unpaper(args):
|
|||||||
def strip_excess_whitespace(text):
|
def strip_excess_whitespace(text):
|
||||||
collapsed_spaces = re.sub(r"([^\S\r\n]+)", " ", text)
|
collapsed_spaces = re.sub(r"([^\S\r\n]+)", " ", text)
|
||||||
no_leading_whitespace = re.sub(
|
no_leading_whitespace = re.sub(
|
||||||
"([\n\r]+)([^\S\n\r]+)", '\\1', collapsed_spaces)
|
r"([\n\r]+)([^\S\n\r]+)", '\\1', collapsed_spaces)
|
||||||
no_trailing_whitespace = re.sub("([^\S\n\r]+)$", '', no_leading_whitespace)
|
no_trailing_whitespace = re.sub(
|
||||||
|
r"([^\S\n\r]+)$", '', no_leading_whitespace)
|
||||||
return no_trailing_whitespace
|
return no_trailing_whitespace
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ from .parsers import RasterisedDocumentParser
|
|||||||
|
|
||||||
class ConsumerDeclaration:
|
class ConsumerDeclaration:
|
||||||
|
|
||||||
MATCHING_FILES = re.compile("^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")
|
MATCHING_FILES = re.compile(r"^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def handle(cls, sender, **kwargs):
|
def handle(cls, sender, **kwargs):
|
||||||
|
0
src/paperless_text/__init__.py
Normal file
0
src/paperless_text/__init__.py
Normal file
16
src/paperless_text/apps.py
Normal file
16
src/paperless_text/apps.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
|
||||||
|
class PaperlessTextConfig(AppConfig):
|
||||||
|
|
||||||
|
name = "paperless_text"
|
||||||
|
|
||||||
|
def ready(self):
|
||||||
|
|
||||||
|
from documents.signals import document_consumer_declaration
|
||||||
|
|
||||||
|
from .signals import ConsumerDeclaration
|
||||||
|
|
||||||
|
document_consumer_declaration.connect(ConsumerDeclaration.handle)
|
||||||
|
|
||||||
|
AppConfig.ready(self)
|
131
src/paperless_text/parsers.py
Normal file
131
src/paperless_text/parsers.py
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
import dateparser
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from documents.parsers import DocumentParser, ParseError, DATE_REGEX
|
||||||
|
|
||||||
|
|
||||||
|
class TextDocumentParser(DocumentParser):
|
||||||
|
"""
|
||||||
|
This parser directly parses a text document (.txt, .md, or .csv)
|
||||||
|
"""
|
||||||
|
|
||||||
|
CONVERT = settings.CONVERT_BINARY
|
||||||
|
THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
|
||||||
|
UNPAPER = settings.UNPAPER_BINARY
|
||||||
|
DATE_ORDER = settings.DATE_ORDER
|
||||||
|
DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
|
||||||
|
OCR_ALWAYS = settings.OCR_ALWAYS
|
||||||
|
|
||||||
|
def __init__(self, path):
|
||||||
|
super().__init__(path)
|
||||||
|
self._text = None
|
||||||
|
|
||||||
|
def get_thumbnail(self):
|
||||||
|
"""
|
||||||
|
The thumbnail of a txt is just a 500px wide image of the text
|
||||||
|
rendered onto a letter-sized page.
|
||||||
|
"""
|
||||||
|
# The below is heavily cribbed from https://askubuntu.com/a/590951
|
||||||
|
|
||||||
|
bg_color = "white" # bg color
|
||||||
|
text_color = "black" # text color
|
||||||
|
psize = [500, 647] # icon size
|
||||||
|
n_lines = 50 # number of lines to show
|
||||||
|
output_file = os.path.join(self.tempdir, "convert-txt.png")
|
||||||
|
|
||||||
|
temp_bg = os.path.join(self.tempdir, "bg.png")
|
||||||
|
temp_txlayer = os.path.join(self.tempdir, "tx.png")
|
||||||
|
picsize = "x".join([str(n) for n in psize])
|
||||||
|
txsize = "x".join([str(n - 8) for n in psize])
|
||||||
|
|
||||||
|
def create_bg():
|
||||||
|
work_size = ",".join([str(n - 1) for n in psize])
|
||||||
|
r = str(round(psize[0] / 10))
|
||||||
|
rounded = ",".join([r, r])
|
||||||
|
run_command(self.CONVERT, "-size ", picsize, ' xc:none -draw ',
|
||||||
|
'"fill ', bg_color, ' roundrectangle 0,0,',
|
||||||
|
work_size, ",", rounded, '" ', temp_bg)
|
||||||
|
|
||||||
|
def read_text():
|
||||||
|
with open(self.document_path, 'r') as src:
|
||||||
|
lines = [l.strip() for l in src.readlines()]
|
||||||
|
text = "\n".join([l for l in lines[:n_lines]])
|
||||||
|
return text.replace('"', "'")
|
||||||
|
|
||||||
|
def create_txlayer():
|
||||||
|
run_command(self.CONVERT,
|
||||||
|
"-background none",
|
||||||
|
"-fill",
|
||||||
|
text_color,
|
||||||
|
"-pointsize", "12",
|
||||||
|
"-border 4 -bordercolor none",
|
||||||
|
"-size ", txsize,
|
||||||
|
' caption:"', read_text(), '" ',
|
||||||
|
temp_txlayer)
|
||||||
|
|
||||||
|
create_txlayer()
|
||||||
|
create_bg()
|
||||||
|
run_command(self.CONVERT, temp_bg, temp_txlayer,
|
||||||
|
"-background None -layers merge ", output_file)
|
||||||
|
|
||||||
|
return output_file
|
||||||
|
|
||||||
|
def get_text(self):
|
||||||
|
|
||||||
|
if self._text is not None:
|
||||||
|
return self._text
|
||||||
|
|
||||||
|
with open(self.document_path, 'r') as f:
|
||||||
|
self._text = f.read()
|
||||||
|
|
||||||
|
return self._text
|
||||||
|
|
||||||
|
def get_date(self):
|
||||||
|
date = None
|
||||||
|
datestring = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
text = self.get_text()
|
||||||
|
except ParseError as e:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Iterate through all regex matches and try to parse the date
|
||||||
|
for m in re.finditer(DATE_REGEX, text):
|
||||||
|
datestring = m.group(0)
|
||||||
|
|
||||||
|
try:
|
||||||
|
date = dateparser.parse(
|
||||||
|
datestring,
|
||||||
|
settings={'DATE_ORDER': self.DATE_ORDER,
|
||||||
|
'PREFER_DAY_OF_MONTH': 'first',
|
||||||
|
'RETURN_AS_TIMEZONE_AWARE': True})
|
||||||
|
except TypeError:
|
||||||
|
# Skip all matches that do not parse to a proper date
|
||||||
|
continue
|
||||||
|
|
||||||
|
if date is not None:
|
||||||
|
break
|
||||||
|
|
||||||
|
if date is not None:
|
||||||
|
self.log("info", "Detected document date " + date.isoformat() +
|
||||||
|
" based on string " + datestring)
|
||||||
|
else:
|
||||||
|
self.log("info", "Unable to detect date for document")
|
||||||
|
|
||||||
|
return date
|
||||||
|
|
||||||
|
|
||||||
|
def run_command(*args):
|
||||||
|
environment = os.environ.copy()
|
||||||
|
if settings.CONVERT_MEMORY_LIMIT:
|
||||||
|
environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT
|
||||||
|
if settings.CONVERT_TMPDIR:
|
||||||
|
environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR
|
||||||
|
|
||||||
|
if not subprocess.Popen(' '.join(args), env=environment,
|
||||||
|
shell=True).wait() == 0:
|
||||||
|
raise ParseError("Convert failed at {}".format(args))
|
23
src/paperless_text/signals.py
Normal file
23
src/paperless_text/signals.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .parsers import TextDocumentParser
|
||||||
|
|
||||||
|
|
||||||
|
class ConsumerDeclaration:
|
||||||
|
|
||||||
|
MATCHING_FILES = re.compile(r"^.*\.(te?xt|md|csv)$")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def handle(cls, sender, **kwargs):
|
||||||
|
return cls.test
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def test(cls, doc):
|
||||||
|
|
||||||
|
if cls.MATCHING_FILES.match(doc.lower()):
|
||||||
|
return {
|
||||||
|
"parser": TextDocumentParser,
|
||||||
|
"weight": 10
|
||||||
|
}
|
||||||
|
|
||||||
|
return None
|
Loading…
x
Reference in New Issue
Block a user