Merge branch 'dev' into feature-ocrmypdf

This commit is contained in:
jonaswinkler 2020-11-30 23:53:19 +01:00
commit 24b8c358cc
12 changed files with 182 additions and 345 deletions

View File

@ -37,7 +37,7 @@ scikit-learn="~=0.23.2"
whitenoise = "~=5.2.0"
watchdog = "*"
whoosh="~=2.7.4"
inotify-simple = "*"
inotifyrecursive = ">=0.3.4"
ocrmypdf = "*"
[dev-packages]

336
Pipfile.lock generated
View File

@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "5462a6a5d2ade848e15116a2a101f4ad32106439cd71b72d95044831c74e0e27"
"sha256": "d266e1f67e3090ec68aa8ecba1e8373351daf89ad5a5ab46524d123bcaf29f62"
},
"pipfile-spec": 6,
"requires": {
@ -39,98 +39,10 @@
},
"blessed": {
"hashes": [
"sha256:7d4914079a6e8e14fbe080dcaf14dee596a088057cdc598561080e3266123b48",
"sha256:81125aa5b84cb9dfc09ff451886f64b4b923b75c5eaf51fde9d1c48a135eb797"
"sha256:0a74a8d3f0366db600d061273df77d44f0db07daade7bb7a4d49c8bc22ed9f74",
"sha256:580429e7e0c6f6a42ea81b0ae5a4993b6205c6ccbb635d034b4277af8175753e"
],
"version": "==1.17.11"
},
"cffi": {
"hashes": [
"sha256:00a1ba5e2e95684448de9b89888ccd02c98d512064b4cb987d48f4b40aa0421e",
"sha256:00e28066507bfc3fe865a31f325c8391a1ac2916219340f87dfad602c3e48e5d",
"sha256:045d792900a75e8b1e1b0ab6787dd733a8190ffcf80e8c8ceb2fb10a29ff238a",
"sha256:0638c3ae1a0edfb77c6765d487fee624d2b1ee1bdfeffc1f0b58c64d149e7eec",
"sha256:105abaf8a6075dc96c1fe5ae7aae073f4696f2905fde6aeada4c9d2926752362",
"sha256:155136b51fd733fa94e1c2ea5211dcd4c8879869008fc811648f16541bf99668",
"sha256:1a465cbe98a7fd391d47dce4b8f7e5b921e6cd805ef421d04f5f66ba8f06086c",
"sha256:1d2c4994f515e5b485fd6d3a73d05526aa0fcf248eb135996b088d25dfa1865b",
"sha256:23f318bf74b170c6e9adb390e8bd282457f6de46c19d03b52f3fd042b5e19654",
"sha256:2c24d61263f511551f740d1a065eb0212db1dbbbbd241db758f5244281590c06",
"sha256:51a8b381b16ddd370178a65360ebe15fbc1c71cf6f584613a7ea08bfad946698",
"sha256:594234691ac0e9b770aee9fcdb8fa02c22e43e5c619456efd0d6c2bf276f3eb2",
"sha256:5cf4be6c304ad0b6602f5c4e90e2f59b47653ac1ed9c662ed379fe48a8f26b0c",
"sha256:64081b3f8f6f3c3de6191ec89d7dc6c86a8a43911f7ecb422c60e90c70be41c7",
"sha256:6bc25fc545a6b3d57b5f8618e59fc13d3a3a68431e8ca5fd4c13241cd70d0009",
"sha256:798caa2a2384b1cbe8a2a139d80734c9db54f9cc155c99d7cc92441a23871c03",
"sha256:7c6b1dece89874d9541fc974917b631406233ea0440d0bdfbb8e03bf39a49b3b",
"sha256:840793c68105fe031f34d6a086eaea153a0cd5c491cde82a74b420edd0a2b909",
"sha256:8d6603078baf4e11edc4168a514c5ce5b3ba6e3e9c374298cb88437957960a53",
"sha256:9cc46bc107224ff5b6d04369e7c595acb700c3613ad7bcf2e2012f62ece80c35",
"sha256:9f7a31251289b2ab6d4012f6e83e58bc3b96bd151f5b5262467f4bb6b34a7c26",
"sha256:9ffb888f19d54a4d4dfd4b3f29bc2c16aa4972f1c2ab9c4ab09b8ab8685b9c2b",
"sha256:a7711edca4dcef1a75257b50a2fbfe92a65187c47dab5a0f1b9b332c5919a3fb",
"sha256:af5c59122a011049aad5dd87424b8e65a80e4a6477419c0c1015f73fb5ea0293",
"sha256:b18e0a9ef57d2b41f5c68beefa32317d286c3d6ac0484efd10d6e07491bb95dd",
"sha256:b4e248d1087abf9f4c10f3c398896c87ce82a9856494a7155823eb45a892395d",
"sha256:ba4e9e0ae13fc41c6b23299545e5ef73055213e466bd107953e4a013a5ddd7e3",
"sha256:be8661bcee1bc2fc4b033a6ab65bd1f87ce5008492601695d0b9a4e820c3bde5",
"sha256:c6332685306b6417a91b1ff9fae889b3ba65c2292d64bd9245c093b1b284809d",
"sha256:d9efd8b7a3ef378dd61a1e77367f1924375befc2eba06168b6ebfa903a5e59ca",
"sha256:df5169c4396adc04f9b0a05f13c074df878b6052430e03f50e68adf3a57aa28d",
"sha256:ebb253464a5d0482b191274f1c8bf00e33f7e0b9c66405fbffc61ed2c839c775",
"sha256:ec80dc47f54e6e9a78181ce05feb71a0353854cc26999db963695f950b5fb375",
"sha256:f032b34669220030f905152045dfa27741ce1a6db3324a5bc0b96b6c7420c87b",
"sha256:f60567825f791c6f8a592f3c6e3bd93dd2934e3f9dac189308426bd76b00ef3b",
"sha256:f803eaa94c2fcda012c047e62bc7a51b0bdabda1cad7a92a522694ea2d76e49f"
],
"version": "==1.14.4"
},
"chardet": {
"hashes": [
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
],
"markers": "python_version >= '3.1'",
"version": "==3.0.4"
},
"coloredlogs": {
"hashes": [
"sha256:346f58aad6afd48444c2468618623638dadab76e4e70d5e10822676f2d32226a",
"sha256:a1fab193d2053aa6c0a97608c4342d031f1f93a3d1218432c59322441d31a505",
"sha256:b0c2124367d4f72bd739f48e1f61491b4baf145d6bda33b606b4a53cb3f96a97"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==14.0"
},
"cryptography": {
"hashes": [
"sha256:07ca431b788249af92764e3be9a488aa1d39a0bc3be313d826bbec690417e538",
"sha256:13b88a0bd044b4eae1ef40e265d006e34dbcde0c2f1e15eb9896501b2d8f6c6f",
"sha256:257dab4f368fae15f378ea9a4d2799bf3696668062de0e9fa0ebb7a738a6917d",
"sha256:32434673d8505b42c0de4de86da8c1620651abd24afe91ae0335597683ed1b77",
"sha256:3cd75a683b15576cfc822c7c5742b3276e50b21a06672dc3a800a2d5da4ecd1b",
"sha256:4e7268a0ca14536fecfdf2b00297d4e407da904718658c1ff1961c713f90fd33",
"sha256:545a8550782dda68f8cdc75a6e3bf252017aa8f75f19f5a9ca940772fc0cb56e",
"sha256:55d0b896631412b6f0c7de56e12eb3e261ac347fbaa5d5e705291a9016e5f8cb",
"sha256:5849d59358547bf789ee7e0d7a9036b2d29e9a4ddf1ce5e06bb45634f995c53e",
"sha256:59f7d4cfea9ef12eb9b14b83d79b432162a0a24a91ddc15c2c9bf76a68d96f2b",
"sha256:6dc59630ecce8c1f558277ceb212c751d6730bd12c80ea96b4ac65637c4f55e7",
"sha256:7117319b44ed1842c617d0a452383a5a052ec6aa726dfbaffa8b94c910444297",
"sha256:75e8e6684cf0034f6bf2a97095cb95f81537b12b36a8fedf06e73050bb171c2d",
"sha256:7b8d9d8d3a9bd240f453342981f765346c87ade811519f98664519696f8e6ab7",
"sha256:a035a10686532b0587d58a606004aa20ad895c60c4d029afa245802347fab57b",
"sha256:a4e27ed0b2504195f855b52052eadcc9795c59909c9d84314c5408687f933fc7",
"sha256:a733671100cd26d816eed39507e585c156e4498293a907029969234e5e634bc4",
"sha256:a75f306a16d9f9afebfbedc41c8c2351d8e61e818ba6b4c40815e2b5740bb6b8",
"sha256:bd717aa029217b8ef94a7d21632a3bb5a4e7218a4513d2521c2a2fd63011e98b",
"sha256:d25cecbac20713a7c3bc544372d42d8eafa89799f492a43b79e1dfd650484851",
"sha256:d26a2557d8f9122f9bf445fc7034242f4375bd4e95ecda007667540270965b13",
"sha256:d3545829ab42a66b84a9aaabf216a4dce7f16dbc76eb69be5c302ed6b8f4a29b",
"sha256:d3d5e10be0cf2a12214ddee45c6bd203dab435e3d83b4560c03066eda600bfe3",
"sha256:efe15aca4f64f3a7ea0c09c87826490e50ed166ce67368a68f315ea0807a20df"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==3.2.1"
"version": "==1.17.12"
},
"dateparser": {
"hashes": [
@ -158,11 +70,11 @@
},
"django-extensions": {
"hashes": [
"sha256:6809c89ca952f0e08d4e0766bc0101dfaf508d7649aced1180c091d737046ea7",
"sha256:dc663652ac9460fd06580a973576820430c6d428720e874ae46b041fa63e0efa"
"sha256:7cd002495ff0a0e5eb6cdd6be759600905b4e4079232ea27618fc46bdd853651",
"sha256:c7f88625a53f631745d4f2bef9ec4dcb999ed59476393bdbbe99db8596778846"
],
"index": "pypi",
"version": "==3.0.9"
"version": "==3.1.0"
},
"django-filter": {
"hashes": [
@ -211,14 +123,6 @@
"index": "pypi",
"version": "==20.0.4"
},
"humanfriendly": {
"hashes": [
"sha256:bf52ec91244819c780341a3438d5d7b09f431d3f113a475147ac9b7b167a3d12",
"sha256:e78960b31198511f45fd455534ae7645a6207d33e512d2e842c766d15d9c8080"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==8.2"
},
"imap-tools": {
"hashes": [
"sha256:96e9a4ff6483462635737730a1df28e739faa71967b12a84f4363fb386542246",
@ -227,29 +131,22 @@
"index": "pypi",
"version": "==0.32.0"
},
"img2pdf": {
"hashes": [
"sha256:57905015579b1026acf1605aa95859cd79b051fa1c35485573d165526fc9dbb5",
"sha256:eaee690ab8403dd1a9cb4db10afee41dd3e6c7ed63bdace02a0121f9feadb0c9"
],
"version": "==0.4.0"
},
"importlib-metadata": {
"hashes": [
"sha256:590690d61efdd716ff82c39ca9a9d4209252adfe288a4b5721181050acbd4175",
"sha256:d9b8a46a0885337627a6430db287176970fff18ad421becec1d64cfc763c2099"
],
"markers": "python_version < '3.8'",
"version": "==3.1.0"
},
"inotify-simple": {
"hashes": [
"sha256:8440ffe49c4ae81a8df57c1ae1eb4b6bfa7acb830099bfb3e305b383005cc128",
"sha256:854f9ac752cc1fcff6ca34e9d3d875c9a94c9b7d6eb377f63be2d481a566c6ee"
],
"index": "pypi",
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.3.5"
},
"inotifyrecursive": {
"hashes": [
"sha256:7e5f4a2e1dc2bef0efa3b5f6b339c41fb4599055a2b54909d020e9e932cc8d2f",
"sha256:a2c450b317693e4538416f90eb1d7858506dafe6b8b885037bd2dd9ae2dafa1e"
],
"index": "pypi",
"version": "==0.3.5"
},
"joblib": {
"hashes": [
"sha256:698c311779f347cf6b7e6b8a39bb682277b8ee4aba8cf9507bc0cf4cd4737b72",
@ -267,51 +164,6 @@
"index": "pypi",
"version": "==1.0.8"
},
"lxml": {
"hashes": [
"sha256:098fb713b31050463751dcc694878e1d39f316b86366fb9fe3fbbe5396ac9fab",
"sha256:0e89f5d422988c65e6936e4ec0fe54d6f73f3128c80eb7ecc3b87f595523607b",
"sha256:189ad47203e846a7a4951c17694d845b6ade7917c47c64b29b86526eefc3adf5",
"sha256:1d87936cb5801c557f3e981c9c193861264c01209cb3ad0964a16310ca1b3301",
"sha256:211b3bcf5da70c2d4b84d09232534ad1d78320762e2c59dedc73bf01cb1fc45b",
"sha256:2358809cc64394617f2719147a58ae26dac9e21bae772b45cfb80baa26bfca5d",
"sha256:23c83112b4dada0b75789d73f949dbb4e8f29a0a3511647024a398ebd023347b",
"sha256:24e811118aab6abe3ce23ff0d7d38932329c513f9cef849d3ee88b0f848f2aa9",
"sha256:288ddf94d9d0488187f578fdcc1868af2a6fe6714444c8259b68a83fa27b76d2",
"sha256:2d5896ddf5389560257bbe89317ca7bcb4e54a02b53a3e572e1ce4226512b51b",
"sha256:2d6571c48328be4304aee031d2d5046cbc8aed5740c654575613c5a4f5a11311",
"sha256:2e311a10f3e85250910a615fe194839a04a0f6bc4e8e5bb5cac221344e3a7891",
"sha256:302160eb6e9764168e01d8c9ec6becddeb87776e81d3fcb0d97954dd51d48e0a",
"sha256:3a7a380bfecc551cfd67d6e8ad9faa91289173bdf12e9cfafbd2bdec0d7b1ec1",
"sha256:3d9b2b72eb0dbbdb0e276403873ecfae870599c83ba22cadff2db58541e72856",
"sha256:475325e037fdf068e0c2140b818518cf6bc4aa72435c407a798b2db9f8e90810",
"sha256:4b7572145054330c8e324a72d808c8c8fbe12be33368db28c39a255ad5f7fb51",
"sha256:4e006fdb434609956a8f710ffffe650afab414dc43728786ebdbdca48e179b14",
"sha256:4fff34721b628cce9eb4538cf9a73d02e0f3da4f35a515773cce6f5fe413b360",
"sha256:56eff8c6fb7bc4bcca395fdff494c52712b7a57486e4fbde34c31bb9da4c6cc4",
"sha256:573b2f5496c7e9f4985de70b9bbb4719ffd293d5565513e04ac20e42e6e5583f",
"sha256:7ecaef52fd9b9535ae5f01a1dd2651f6608e4ec9dc136fc4dfe7ebe3c3ddb230",
"sha256:803a80d72d1f693aa448566be46ffd70882d1ad8fc689a2e22afe63035eb998a",
"sha256:8862d1c2c020cb7a03b421a9a7b4fe046a208db30994fc8ff68c627a7915987f",
"sha256:9b06690224258db5cd39a84e993882a6874676f5de582da57f3df3a82ead9174",
"sha256:a71400b90b3599eb7bf241f947932e18a066907bf84617d80817998cee81e4bf",
"sha256:bb252f802f91f59767dcc559744e91efa9df532240a502befd874b54571417bd",
"sha256:be1ebf9cc25ab5399501c9046a7dcdaa9e911802ed0e12b7d620cd4bbf0518b3",
"sha256:be7c65e34d1b50ab7093b90427cbc488260e4b3a38ef2435d65b62e9fa3d798a",
"sha256:c0dac835c1a22621ffa5e5f999d57359c790c52bbd1c687fe514ae6924f65ef5",
"sha256:c152b2e93b639d1f36ec5a8ca24cde4a8eefb2b6b83668fcd8e83a67badcb367",
"sha256:d182eada8ea0de61a45a526aa0ae4bcd222f9673424e65315c35820291ff299c",
"sha256:d18331ea905a41ae71596502bd4c9a2998902328bbabd29e3d0f5f8569fabad1",
"sha256:d20d32cbb31d731def4b1502294ca2ee99f9249b63bc80e03e67e8f8e126dea8",
"sha256:d4ad7fd3269281cb471ad6c7bafca372e69789540d16e3755dd717e9e5c9d82f",
"sha256:d6f8c23f65a4bfe4300b85f1f40f6c32569822d08901db3b6454ab785d9117cc",
"sha256:d84d741c6e35c9f3e7406cb7c4c2e08474c2a6441d59322a00dcae65aac6315d",
"sha256:e65c221b2115a91035b55a593b6eb94aa1206fa3ab374f47c6dc10d364583ff9",
"sha256:f98b6f256be6cec8dd308a8563976ddaff0bdc18b730720f6f4bee927ffe926f"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==4.6.1"
},
"numpy": {
"hashes": [
"sha256:08308c38e44cc926bdfce99498b21eec1f848d24c302519e64203a8da99a97db",
@ -353,14 +205,6 @@
"markers": "python_version >= '3.6'",
"version": "==1.19.4"
},
"ocrmypdf": {
"hashes": [
"sha256:20722d89d2f0deeb5b3ffa8622ead59d54af46d44f21848ec0f15ef79ce1a4a3",
"sha256:c592e1bb37abafd24f067043bbf98d25405521cbe1e992de30d8b870dbe86928"
],
"index": "pypi",
"version": "==11.3.3"
},
"pathtools": {
"hashes": [
"sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0",
@ -376,14 +220,6 @@
"index": "pypi",
"version": "==2.3.0"
},
"pdfminer.six": {
"hashes": [
"sha256:b9aac0ebeafb21c08bf65f2039f4b2c5f78a3449d0a41df711d72445649e952a",
"sha256:d78877ba8d8bf957f3bb636c4f73f4f6f30f56c461993877ac22c39c20837509"
],
"markers": "python_version >= '3.4'",
"version": "==20201018"
},
"pdftotext": {
"hashes": [
"sha256:98aeb8b07a4127e1a30223bd933ef080bbd29aa88f801717ca6c5618380b8aa6"
@ -391,33 +227,6 @@
"index": "pypi",
"version": "==2.1.5"
},
"pikepdf": {
"hashes": [
"sha256:0dd42f791f29e7e2ab120103605b9ddd65937c773a72d21341a56873a89e76c9",
"sha256:12a1d243143cf972ce11def50f0bd1f6e630f5e660cdeddb2c7c49db5adad40a",
"sha256:2e1713af11b71e95c2d218c10d68b6f8e813be19c8596c560f3c84617f6d5437",
"sha256:2f90acad26d9939193946eb6ca8363fd3cf44b46b5c1409468906618bccb8113",
"sha256:3c482fe30fd58ff385795605a9233f37f97fb83427c3e829b1a568a2a3b59f60",
"sha256:3ddabfc33a8a7cecba76c1685ce5125fdf239a38d0854d7c2a703490b5783773",
"sha256:61dd3f13b7416111d19bf493ce4e7281f63a1dd22c532200cbbcd65813ea43e4",
"sha256:6ce42b7780835fb52452ccaff3a3ac1b28ae1f9d80faab59c559045d9fcb211d",
"sha256:6dba75782f108ebbf3947fcb29ea0ba7da0482868e53f6602643adc36245201d",
"sha256:716427a5c0372f3cc7dc282c4b49d49d8d5182a3e937739a4c3632151e74d6a4",
"sha256:730ef4013099da7ea722a9b5659260097af6f47ddfa3c2abab4d4493de2591f3",
"sha256:73e14bba4135adfb89ae2f2163369bd788ecf23839acc8d062d832118f07e288",
"sha256:84df07acc8968051da33891af55a3ab1aa55453d83df4ce9b84d821eedc34583",
"sha256:8f739e9c660d71cd479f11f9aa110857cf0d0d9c2472f40bbcbaf02f980355a1",
"sha256:a20ca7adbb9d3da416cf5f6de0ebca53855f9a3b99acdd6ec864c61482894d71",
"sha256:bc58d9486c0959619a2584e558a54d36468c6d1165cd9fe0bfb1ecc3e6b33c6a",
"sha256:c0627930a17b3a5e1a7c9109099535259afc50fe006a05af9c3634de05abd318",
"sha256:de5f445eaaadd7dae56e1043ab8ca5eef49ece302a4e37e1fc6d21b7dcfcfb1b",
"sha256:de6aae7782db33f2cc71c9ba63b7e2ec0e0529843c065eac4e71fcbe043426e2",
"sha256:e2efd844c09f8ce3103a93bfbd54983542a0a63c88bdc0f0cdbb2997f99a147d",
"sha256:fdb481ad1219e8d667625afd2f01b26f98df079e4f66e7e49816ec20c8d8c401"
],
"markers": "python_version < '3.9'",
"version": "==2.1.2"
},
"pillow": {
"hashes": [
"sha256:006de60d7580d81f4a1a7e9f0173dc90a932e3905cc4d47ea909bc946302311a",
@ -453,14 +262,6 @@
"index": "pypi",
"version": "==8.0.1"
},
"pluggy": {
"hashes": [
"sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0",
"sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==0.13.1"
},
"psycopg2-binary": {
"hashes": [
"sha256:0deac2af1a587ae12836aa07970f5cb91964f05a7c6cdb69d8425ff4c15d4e2c",
@ -504,14 +305,6 @@
"index": "pypi",
"version": "==2.8.6"
},
"pycparser": {
"hashes": [
"sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0",
"sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.20"
},
"pyocr": {
"hashes": [
"sha256:fa15adc7e1cf0d345a2990495fe125a947c6e09a60ddba0256a1c14b2e603179",
@ -626,53 +419,6 @@
],
"version": "==2020.11.13"
},
"reportlab": {
"hashes": [
"sha256:06be7f04a631f02cd0202f7dee0d3e61dc265223f4ff861525ed7784b5552540",
"sha256:0a788a537c48915eda083485b59ac40ac012fa7c43070069bde6eb5ea588313c",
"sha256:1a7a38810e79653d0ea8e61db4f0517ac2a0e76edd2497cf6d4969dd3be30030",
"sha256:22301773db730545b44d4c77d8f29baf5683ccabec9883d978e8b8eda6d2175f",
"sha256:2906321b3d2779faafe47e2c13f9c69e1fb4ddb907f5a49cab3f9b0ea95df1f5",
"sha256:2d65f9cc5c0d3f63b5d024e6cf92234f1ab1f267cc9e5a847ab5d3efe1c3cf3e",
"sha256:2e012f7b845ef9f1f5bd63461d5201fa624b019a65ff5a93d0002b4f915bbc89",
"sha256:31ccfdbf5bb5ec85f0397661085ce4c9e52537ca0d2bf4220259666a4dcc55c2",
"sha256:3e10bd20c8ada9f7e1113157aa73b8e0048f2624e74794b73799c3deb13d7a3f",
"sha256:440d5f86c2b822abdb7981d691a78bdcf56f4710174830283034235ab2af2969",
"sha256:4f307accda32c9f17015ed77c7424f904514e349dff063f78d2462d715963e53",
"sha256:59659ee8897950fd1acd41a9cc61f4afdfda52dc2bb69a1924ce68089491849d",
"sha256:6216b11313467989ac9d9578ea3756d0af46e97184ee4e11a6b7ef652458f70d",
"sha256:6268a9a3d75e714b22beeb7687270956b06b232ccfdf37b1c6462961eab04457",
"sha256:6b226830f80df066d5986a3fdb3eb4d1b6320048f3d9ade539a6c03a5bc8b3ec",
"sha256:6e10eba6a0e330096f4200b18824b3194c399329b7830e34baee1c04ea07f99f",
"sha256:6e224c16c3d6fafdb2fb67b33c4b84d984ec34869834b3a137809f2fe5b84778",
"sha256:7da162fa677b90bd14f19b20ff80fec18c24a31ac44e5342ba49e198b13c4f92",
"sha256:8406e960a974a65b765c9ff74b269aa64718b4af1e8c511ebdbd9a5b44b0c7e6",
"sha256:8999bb075102d1b8ca4aada6ca14653d52bf02e37fd064e477eb180741f75077",
"sha256:8ae21aa94e405bf5171718f11ebc702a0edf18c91d88b14c5c5724cabd664673",
"sha256:8f6163729612e815b89649aed2e237505362a78014199f819fd92f9e5c96769b",
"sha256:9699fa8f0911ad56b46cc60bbaebe1557fd1c9e8da98185a7a1c0c40193eba48",
"sha256:9a53d76eec33abda11617aad1c9f5f4a2d906dd2f92a03a3f1ea370efbb52c95",
"sha256:9ed4d761b726ff411565eddb10cb37a6bca0ec873d9a18a83cf078f4502a2d94",
"sha256:a020d308e7c2de284d5407e3c6c13e3977a62b314f7bfe19bcc69677931da589",
"sha256:a2e6c15aecbe631245aab639751a58671312cced7e17de1ed9c45fb37036f6c9",
"sha256:b10cb48606d97b70edb094576e3d493d40467395e4fc267655135a2c92defbe8",
"sha256:b8d6e9df5181ed07b7ae145258eb69e686133afc97930af51a3c0c9d784d834d",
"sha256:bbb297754f5cf25eb8fcb817752984252a7feb0ca83e383718e4eec2fb67ea32",
"sha256:be90599e5e78c1ddfcfee8c752108def58b4c672ebcc4d3d9aa7fe65e7d3f16b",
"sha256:bfdfad9b8ae00bd0752b77f954c7405327fd99b2cc6d5e4273e65be61429d56a",
"sha256:c1e5ef5089e16b249388f65d8c8f8b74989e72eb8332060dc580a2ecb967cfc2",
"sha256:c5ed342e29a5fd7eeb0f2ccf7e5b946b5f750f05633b2d6a94b1c02094a77967",
"sha256:c7087a26b26aa82a3ba27e13e66f507cc697f9ceb4c046c0f758876b55f040a5",
"sha256:cf589e980d92b0bf343fa512b9d3ae9ed0469cbffd99cb270b6c83da143cb437",
"sha256:e6fb762e524a4fb118be9f44dbd9456cf80e42253ee8f1bdb0ea5c1f882d4ba8",
"sha256:e961d3a84c65ca030963ca934a4faad2ac9fee75af36ba2f98733da7d3f7efab",
"sha256:f2fde5abb6f21c1eff5430f380cdbbee7fdeda6af935a83730ddce9f0c4e504e",
"sha256:f585b3bf7062c228306acd7f40b2ad915b32603228c19bb225952cc98fd2015a",
"sha256:f955a6366cf8e6729776c96e281bede468acd74f6eb49a5bbb048646adaa43d8",
"sha256:fe882fd348d8429debbdac4518d6a42888a7f4ad613dc596ce94788169caeb08"
],
"version": "==3.5.55"
},
"scikit-learn": {
"hashes": [
"sha256:090bbf144fd5823c1f2efa3e1a9bf180295b24294ca8f478e75b40ed54f8036e",
@ -736,13 +482,6 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.15.0"
},
"sortedcontainers": {
"hashes": [
"sha256:37257a32add0a3ee490bb170b599e93095eed89a55da91fa9f48753ea12fd73f",
"sha256:59cc937650cf60d677c16775597c89a960658a09cf7c1a668f86e1e4464b10a1"
],
"version": "==2.3.0"
},
"sqlparse": {
"hashes": [
"sha256:017cde379adbd6a1f15a61873f43e8274179378e95ef3fede90b5aa64d304ed0",
@ -759,14 +498,6 @@
"markers": "python_version >= '3.5'",
"version": "==2.1.0"
},
"tqdm": {
"hashes": [
"sha256:3d3f1470d26642e88bd3f73353cb6ff4c51ef7d5d7efef763238f4bc1f7e4e81",
"sha256:5ff3f5232b19fa4c5531641e480b7fad4598819f708a32eb815e6ea41c5fa313"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==4.53.0"
},
"tzlocal": {
"hashes": [
"sha256:643c97c5294aedc737780a49d9df30889321cbe1204eac2c2ec6134035a92e44",
@ -776,11 +507,11 @@
},
"watchdog": {
"hashes": [
"sha256:034c85530b647486e8c8477410fe79476511282658f2ce496f97106d9e5acfb8",
"sha256:4214e1379d128b0588021880ccaf40317ee156d4603ac388b9adcf29165e0c04"
"sha256:3caefdcc8f06a57fdc5ef2d22aa7c0bfda4f55e71a0bee74cbf3176d97536ef3",
"sha256:e38bffc89b15bafe2a131f0e1c74924cf07dcec020c2e0a26cccd208831fcd43"
],
"index": "pypi",
"version": "==0.10.3"
"version": "==0.10.4"
},
"wcwidth": {
"hashes": [
@ -805,14 +536,6 @@
],
"index": "pypi",
"version": "==2.7.4"
},
"zipp": {
"hashes": [
"sha256:102c24ef8f171fd729d46599845e95c7ab894a4cf45f5de11a44cc7444fb1108",
"sha256:ed5eee1974372595f9e416cc7bbeeb12335201d8081ca8a0743c954d4446e5cb"
],
"markers": "python_version >= '3.6'",
"version": "==3.4.0"
}
},
"develop": {
@ -866,7 +589,6 @@
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
],
"markers": "python_version >= '3.1'",
"version": "==3.0.4"
},
"coverage": {
@ -959,11 +681,11 @@
},
"faker": {
"hashes": [
"sha256:5398268e1d751ffdb3ed36b8a790ed98659200599b368eec38a02eed15bce997",
"sha256:d4183b8f57316de3be27cd6c3b40e9f9343d27c95c96179f027316c58c2c239e"
"sha256:2ba20a4438429cb08d729175d7bb0435ef3c2c4cedc7b1ceb703ee6da8dad906",
"sha256:6279746aed175a693108238e6d1ab8d7e26d0ec7ff8474f61025b9fdaae15d65"
],
"markers": "python_version >= '3.5'",
"version": "==4.17.1"
"version": "==4.18.0"
},
"filelock": {
"hashes": [
@ -991,11 +713,11 @@
},
"importlib-metadata": {
"hashes": [
"sha256:590690d61efdd716ff82c39ca9a9d4209252adfe288a4b5721181050acbd4175",
"sha256:d9b8a46a0885337627a6430db287176970fff18ad421becec1d64cfc763c2099"
"sha256:030f3b1bdb823ecbe4a9659e14cc861ce5af403fe99863bae173ec5fe00ab132",
"sha256:caeee3603f5dcf567864d1be9b839b0bcfdf1383e3e7be33ce2dead8144ff19c"
],
"markers": "python_version < '3.8'",
"version": "==3.1.0"
"version": "==2.1.0"
},
"importlib-resources": {
"hashes": [
@ -1066,11 +788,11 @@
},
"packaging": {
"hashes": [
"sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8",
"sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181"
"sha256:05af3bb85d320377db281cf254ab050e1a7ebcbf5410685a9a407e18a1f81236",
"sha256:eb41423378682dadb7166144a4926e443093863024de508ca5c9737d6bc08376"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==20.4"
"version": "==20.7"
},
"pluggy": {
"hashes": [
@ -1322,7 +1044,7 @@
"sha256:102c24ef8f171fd729d46599845e95c7ab894a4cf45f5de11a44cc7444fb1108",
"sha256:ed5eee1974372595f9e416cc7bbeeb12335201d8081ca8a0743c954d4446e5cb"
],
"markers": "python_version >= '3.6'",
"markers": "python_version < '3.8'",
"version": "==3.4.0"
}
}

View File

@ -45,16 +45,16 @@ For a complete list of changes, check out the [changelog](https://paperless-ng.r
# Roadmap for 1.0
- Make the front end nice (except mobile).
- Test coverage at 90%.
- Store archived documents with an embedded OCR text layer, while keeping originals available. Making good progress in the `feature-ocrmypdf` branch.
- Fix whatever bugs I and you find
- Fix whatever bugs I and you find.
## Roadmap for versions beyond 1.0
- **More search.** The search backend is incredibly versatile and customizable. Searching is the most important feature of this project and thus, I want to implement things like:
- Group and limit search results by correspondent, show “more from this” links in the results.
- Ability to search for “Similar documents” in the search results
- Provide corrections for mispelled queries
- **An interactive consumer** that shows its progress for documents it processes on the web page.
- With live updates ans websockets. This already works on a dev branch, but requires a lot of new dependencies, which I'm not particular happy about.
- Notifications when a document was added with buttons to open the new document right away.

View File

@ -10,13 +10,12 @@ paperless-ng 0.9.4
* Searching:
* Paperless now supports searching by tags, types and dates. In order to have this applied to your
* Paperless now supports searching by tags, types and dates and correspondents. In order to have this applied to your
existing documents, you need to perform a ``document_index reindex`` management command
(see :ref:`administration-index`)
that adds the new data to the search index. You only need to do this once, so that paperless can find
your documents by tags,types and dates. Paperless keeps the index updated after that whenever
something changes.
* Paperless now has spelling corrections ("Did you mean") for misstyped queries.
that adds the data to the search index. You only need to do this once, since the schema of the search index changed.
Paperless keeps the index updated after that whenever something changes.
* Paperless now has spelling corrections ("Did you mean") for miss-typed queries.
* The documentation contains :ref:`information about the query syntax <basic-searching>`.
* Front end:

View File

@ -176,20 +176,20 @@ further.
Matching documents with logical expressions:
.. code:: none
.. code::
shopname AND (product1 OR product2)
Matching specific tags, correspondents or types:
.. code:: none
.. code::
type:invoice tag:unpaid
correspondent:university certificate
Matching dates:
.. code:: none
.. code::
created:[2005 to 2009]
added:yesterday
@ -197,7 +197,7 @@ Matching dates:
Matching inexact words:
.. code:: none
.. code::
produ*name

View File

@ -1,31 +1,60 @@
import logging
import os
from pathlib import Path
from time import sleep
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from django.utils.text import slugify
from django_q.tasks import async_task
from watchdog.events import FileSystemEventHandler
from watchdog.observers.polling import PollingObserver
from documents.models import Tag
try:
from inotify_simple import INotify, flags
from inotifyrecursive import INotify, flags
except ImportError:
INotify = flags = None
logger = logging.getLogger(__name__)
def _consume(file):
try:
if os.path.isfile(file):
async_task("documents.tasks.consume_file",
file,
task_name=os.path.basename(file)[:100])
else:
logger.debug(
f"Not consuming file {file}: File has moved.")
def _tags_from_path(filepath):
"""Walk up the directory tree from filepath to CONSUMPTION_DIr
and get or create Tag IDs for every directory.
"""
tag_ids = set()
path_parts = Path(filepath).relative_to(
settings.CONSUMPTION_DIR).parent.parts
for part in path_parts:
tag_ids.add(Tag.objects.get_or_create(
slug=slugify(part),
defaults={"name": part},
)[0].pk)
return tag_ids
def _consume(filepath):
if not os.path.isfile(filepath):
logger.debug(
f"Not consuming file {filepath}: File has moved.")
return
tag_ids = None
try:
if settings.CONSUMER_SUBDIRS_AS_TAGS:
tag_ids = _tags_from_path(filepath)
except Exception as e:
logger.error(
"Error creating tags from path: {}".format(e))
try:
async_task("documents.tasks.consume_file",
filepath,
override_tag_ids=tag_ids if tag_ids else None,
task_name=os.path.basename(filepath)[:100])
except Exception as e:
# Catch all so that the consumer won't crash.
# This is also what the test case is listening for to check for
@ -94,6 +123,7 @@ class Command(BaseCommand):
def handle(self, *args, **options):
directory = options["directory"]
recursive = settings.CONSUMER_RECURSIVE
if not directory:
raise CommandError(
@ -104,24 +134,30 @@ class Command(BaseCommand):
raise CommandError(
f"Consumption directory {directory} does not exist")
for entry in os.scandir(directory):
_consume(entry.path)
if recursive:
for dirpath, _, filenames in os.walk(directory):
for filename in filenames:
filepath = os.path.join(dirpath, filename)
_consume(filepath)
else:
for entry in os.scandir(directory):
_consume(entry.path)
if options["oneshot"]:
return
if settings.CONSUMER_POLLING == 0 and INotify:
self.handle_inotify(directory)
self.handle_inotify(directory, recursive)
else:
self.handle_polling(directory)
self.handle_polling(directory, recursive)
logger.debug("Consumer exiting.")
def handle_polling(self, directory):
def handle_polling(self, directory, recursive):
logging.getLogger(__name__).info(
f"Polling directory for changes: {directory}")
self.observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
self.observer.schedule(Handler(), directory, recursive=False)
self.observer.schedule(Handler(), directory, recursive=recursive)
self.observer.start()
try:
while self.observer.is_alive():
@ -132,18 +168,26 @@ class Command(BaseCommand):
self.observer.stop()
self.observer.join()
def handle_inotify(self, directory):
def handle_inotify(self, directory, recursive):
logging.getLogger(__name__).info(
f"Using inotify to watch directory for changes: {directory}")
inotify = INotify()
descriptor = inotify.add_watch(
directory, flags.CLOSE_WRITE | flags.MOVED_TO)
inotify_flags = flags.CLOSE_WRITE | flags.MOVED_TO
if recursive:
descriptor = inotify.add_watch_recursive(directory, inotify_flags)
else:
descriptor = inotify.add_watch(directory, inotify_flags)
try:
while not self.stop_flag:
for event in inotify.read(timeout=1000, read_delay=1000):
file = os.path.join(directory, event.name)
_consume(file)
if recursive:
path = inotify.get_path(event.wd)
else:
path = directory
filepath = os.path.join(path, event.name)
_consume(filepath)
except KeyboardInterrupt:
pass

View File

@ -12,10 +12,10 @@ from documents.models import Document, Correspondent, DocumentType, Tag
from documents.tests.utils import DirectoriesMixin
class DocumentApiTest(DirectoriesMixin, APITestCase):
class TestDocumentApi(DirectoriesMixin, APITestCase):
def setUp(self):
super(DocumentApiTest, self).setUp()
super(TestDocumentApi, self).setUp()
user = User.objects.create_superuser(username="temp_admin")
self.client.force_login(user=user)

View File

@ -445,6 +445,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
shutil.copy(src, dst)
return dst
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
def testNormalOperation(self):
filename = self.get_test_file()

View File

@ -7,8 +7,9 @@ from unittest import mock
from django.conf import settings
from django.core.management import call_command, CommandError
from django.test import override_settings, TestCase
from django.test import override_settings, TransactionTestCase
from documents.models import Tag
from documents.consumer import ConsumerError
from documents.management.commands import document_consumer
from documents.tests.utils import DirectoriesMixin
@ -33,7 +34,7 @@ def chunked(size, source):
yield source[i:i+size]
class TestConsumer(DirectoriesMixin, TestCase):
class TestConsumer(DirectoriesMixin, TransactionTestCase):
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
@ -126,6 +127,43 @@ class TestConsumer(DirectoriesMixin, TestCase):
def test_consume_existing_file_polling(self):
self.test_consume_existing_file()
@override_settings(CONSUMER_RECURSIVE=1)
@override_settings(CONSUMER_SUBDIRS_AS_TAGS=1)
def test_consume_file_with_path_tags(self):
tag_names = ("existingTag", "Space Tag")
# Create a Tag prior to consuming a file using it in path
tag_ids = [Tag.objects.create(name=tag_names[0]).pk,]
self.t_start()
path = os.path.join(self.dirs.consumption_dir, *tag_names)
os.makedirs(path, exist_ok=True)
f = os.path.join(path, "my_file.pdf")
# Wait at least inotify read_delay for recursive watchers
# to be created for the new directories
sleep(1)
shutil.copy(self.sample_file, f)
self.wait_for_task_mock_call()
self.task_mock.assert_called_once()
# Add the pk of the Tag created by _consume()
tag_ids.append(Tag.objects.get(name=tag_names[1]).pk)
args, kwargs = self.task_mock.call_args
self.assertEqual(args[1], f)
# assertCountEqual has a bad name, but test that the first
# sequence contains the same elements as second, regardless of
# their order.
self.assertCountEqual(kwargs["override_tag_ids"], tag_ids)
@override_settings(CONSUMER_POLLING=1)
def test_consume_file_with_path_tags_polling(self):
self.test_consume_file_with_path_tags()
@mock.patch("documents.management.commands.document_consumer.logger.error")
def test_slow_write_pdf(self, error_logger):

View File

@ -17,7 +17,8 @@ class TestDecryptDocuments(TestCase):
@override_settings(
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
PASSPHRASE="test"
PASSPHRASE="test",
PAPERLESS_FILENAME_FORMAT=None
)
@mock.patch("documents.management.commands.decrypt_documents.input")
def test_decrypt(self, m):

View File

@ -0,0 +1,23 @@
from datetime import datetime
from django.test import TestCase
from documents import tasks
from documents.models import Document
from documents.tests.utils import DirectoriesMixin
class TestTasks(DirectoriesMixin, TestCase):
def test_index_reindex(self):
Document.objects.create(title="test", content="my document", checksum="wow", added=datetime.now(), created=datetime.now(), modified=datetime.now())
tasks.index_reindex()
def test_index_optimize(self):
Document.objects.create(title="test", content="my document", checksum="wow", added=datetime.now(), created=datetime.now(), modified=datetime.now())
tasks.index_optimize()
def test_train_classifier(self):
tasks.train_classifier()

View File

@ -332,6 +332,15 @@ CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0))
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
# Consume from subdirectories of CONSUMPTION_DIR as well
CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE")
# Set the names of subdirectories as tags for consumed files.
# E.g. $CONSUMPTION_DIR/foo/bar/file.pdf will add the tags "foo" and "bar" to
# the consumed file.
# PAPERLESS_CONSUMER_RECURSIVE must be enabled for this to work.
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
OCR_PAGES = int(os.getenv('PAPERLESS_OCR_PAGES', 0))