some more tests.

This commit is contained in:
jonaswinkler 2020-12-01 14:15:43 +01:00
parent 24b8c358cc
commit fd3df1ec58
5 changed files with 441 additions and 55 deletions

298
Pipfile.lock generated
View File

@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "d266e1f67e3090ec68aa8ecba1e8373351daf89ad5a5ab46524d123bcaf29f62"
"sha256": "55c9136777e78d6cd362628cd1fc0c5ff36b437699b92089ce504d598004371d"
},
"pipfile-spec": 6,
"requires": {
@ -44,6 +44,94 @@
],
"version": "==1.17.12"
},
"cffi": {
"hashes": [
"sha256:00a1ba5e2e95684448de9b89888ccd02c98d512064b4cb987d48f4b40aa0421e",
"sha256:00e28066507bfc3fe865a31f325c8391a1ac2916219340f87dfad602c3e48e5d",
"sha256:045d792900a75e8b1e1b0ab6787dd733a8190ffcf80e8c8ceb2fb10a29ff238a",
"sha256:0638c3ae1a0edfb77c6765d487fee624d2b1ee1bdfeffc1f0b58c64d149e7eec",
"sha256:105abaf8a6075dc96c1fe5ae7aae073f4696f2905fde6aeada4c9d2926752362",
"sha256:155136b51fd733fa94e1c2ea5211dcd4c8879869008fc811648f16541bf99668",
"sha256:1a465cbe98a7fd391d47dce4b8f7e5b921e6cd805ef421d04f5f66ba8f06086c",
"sha256:1d2c4994f515e5b485fd6d3a73d05526aa0fcf248eb135996b088d25dfa1865b",
"sha256:23f318bf74b170c6e9adb390e8bd282457f6de46c19d03b52f3fd042b5e19654",
"sha256:2c24d61263f511551f740d1a065eb0212db1dbbbbd241db758f5244281590c06",
"sha256:51a8b381b16ddd370178a65360ebe15fbc1c71cf6f584613a7ea08bfad946698",
"sha256:594234691ac0e9b770aee9fcdb8fa02c22e43e5c619456efd0d6c2bf276f3eb2",
"sha256:5cf4be6c304ad0b6602f5c4e90e2f59b47653ac1ed9c662ed379fe48a8f26b0c",
"sha256:64081b3f8f6f3c3de6191ec89d7dc6c86a8a43911f7ecb422c60e90c70be41c7",
"sha256:6bc25fc545a6b3d57b5f8618e59fc13d3a3a68431e8ca5fd4c13241cd70d0009",
"sha256:798caa2a2384b1cbe8a2a139d80734c9db54f9cc155c99d7cc92441a23871c03",
"sha256:7c6b1dece89874d9541fc974917b631406233ea0440d0bdfbb8e03bf39a49b3b",
"sha256:840793c68105fe031f34d6a086eaea153a0cd5c491cde82a74b420edd0a2b909",
"sha256:8d6603078baf4e11edc4168a514c5ce5b3ba6e3e9c374298cb88437957960a53",
"sha256:9cc46bc107224ff5b6d04369e7c595acb700c3613ad7bcf2e2012f62ece80c35",
"sha256:9f7a31251289b2ab6d4012f6e83e58bc3b96bd151f5b5262467f4bb6b34a7c26",
"sha256:9ffb888f19d54a4d4dfd4b3f29bc2c16aa4972f1c2ab9c4ab09b8ab8685b9c2b",
"sha256:a7711edca4dcef1a75257b50a2fbfe92a65187c47dab5a0f1b9b332c5919a3fb",
"sha256:af5c59122a011049aad5dd87424b8e65a80e4a6477419c0c1015f73fb5ea0293",
"sha256:b18e0a9ef57d2b41f5c68beefa32317d286c3d6ac0484efd10d6e07491bb95dd",
"sha256:b4e248d1087abf9f4c10f3c398896c87ce82a9856494a7155823eb45a892395d",
"sha256:ba4e9e0ae13fc41c6b23299545e5ef73055213e466bd107953e4a013a5ddd7e3",
"sha256:be8661bcee1bc2fc4b033a6ab65bd1f87ce5008492601695d0b9a4e820c3bde5",
"sha256:c6332685306b6417a91b1ff9fae889b3ba65c2292d64bd9245c093b1b284809d",
"sha256:d9efd8b7a3ef378dd61a1e77367f1924375befc2eba06168b6ebfa903a5e59ca",
"sha256:df5169c4396adc04f9b0a05f13c074df878b6052430e03f50e68adf3a57aa28d",
"sha256:ebb253464a5d0482b191274f1c8bf00e33f7e0b9c66405fbffc61ed2c839c775",
"sha256:ec80dc47f54e6e9a78181ce05feb71a0353854cc26999db963695f950b5fb375",
"sha256:f032b34669220030f905152045dfa27741ce1a6db3324a5bc0b96b6c7420c87b",
"sha256:f60567825f791c6f8a592f3c6e3bd93dd2934e3f9dac189308426bd76b00ef3b",
"sha256:f803eaa94c2fcda012c047e62bc7a51b0bdabda1cad7a92a522694ea2d76e49f"
],
"version": "==1.14.4"
},
"chardet": {
"hashes": [
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
],
"markers": "python_version >= '3.1'",
"version": "==3.0.4"
},
"coloredlogs": {
"hashes": [
"sha256:346f58aad6afd48444c2468618623638dadab76e4e70d5e10822676f2d32226a",
"sha256:a1fab193d2053aa6c0a97608c4342d031f1f93a3d1218432c59322441d31a505",
"sha256:b0c2124367d4f72bd739f48e1f61491b4baf145d6bda33b606b4a53cb3f96a97"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==14.0"
},
"cryptography": {
"hashes": [
"sha256:07ca431b788249af92764e3be9a488aa1d39a0bc3be313d826bbec690417e538",
"sha256:13b88a0bd044b4eae1ef40e265d006e34dbcde0c2f1e15eb9896501b2d8f6c6f",
"sha256:257dab4f368fae15f378ea9a4d2799bf3696668062de0e9fa0ebb7a738a6917d",
"sha256:32434673d8505b42c0de4de86da8c1620651abd24afe91ae0335597683ed1b77",
"sha256:3cd75a683b15576cfc822c7c5742b3276e50b21a06672dc3a800a2d5da4ecd1b",
"sha256:4e7268a0ca14536fecfdf2b00297d4e407da904718658c1ff1961c713f90fd33",
"sha256:545a8550782dda68f8cdc75a6e3bf252017aa8f75f19f5a9ca940772fc0cb56e",
"sha256:55d0b896631412b6f0c7de56e12eb3e261ac347fbaa5d5e705291a9016e5f8cb",
"sha256:5849d59358547bf789ee7e0d7a9036b2d29e9a4ddf1ce5e06bb45634f995c53e",
"sha256:59f7d4cfea9ef12eb9b14b83d79b432162a0a24a91ddc15c2c9bf76a68d96f2b",
"sha256:6dc59630ecce8c1f558277ceb212c751d6730bd12c80ea96b4ac65637c4f55e7",
"sha256:7117319b44ed1842c617d0a452383a5a052ec6aa726dfbaffa8b94c910444297",
"sha256:75e8e6684cf0034f6bf2a97095cb95f81537b12b36a8fedf06e73050bb171c2d",
"sha256:7b8d9d8d3a9bd240f453342981f765346c87ade811519f98664519696f8e6ab7",
"sha256:a035a10686532b0587d58a606004aa20ad895c60c4d029afa245802347fab57b",
"sha256:a4e27ed0b2504195f855b52052eadcc9795c59909c9d84314c5408687f933fc7",
"sha256:a733671100cd26d816eed39507e585c156e4498293a907029969234e5e634bc4",
"sha256:a75f306a16d9f9afebfbedc41c8c2351d8e61e818ba6b4c40815e2b5740bb6b8",
"sha256:bd717aa029217b8ef94a7d21632a3bb5a4e7218a4513d2521c2a2fd63011e98b",
"sha256:d25cecbac20713a7c3bc544372d42d8eafa89799f492a43b79e1dfd650484851",
"sha256:d26a2557d8f9122f9bf445fc7034242f4375bd4e95ecda007667540270965b13",
"sha256:d3545829ab42a66b84a9aaabf216a4dce7f16dbc76eb69be5c302ed6b8f4a29b",
"sha256:d3d5e10be0cf2a12214ddee45c6bd203dab435e3d83b4560c03066eda600bfe3",
"sha256:efe15aca4f64f3a7ea0c09c87826490e50ed166ce67368a68f315ea0807a20df"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==3.2.1"
},
"dateparser": {
"hashes": [
"sha256:7552c994f893b5cb8fcf103b4cd2ff7f57aab9bfd2619fdf0cf571c0740fd90b",
@ -123,6 +211,14 @@
"index": "pypi",
"version": "==20.0.4"
},
"humanfriendly": {
"hashes": [
"sha256:bf52ec91244819c780341a3438d5d7b09f431d3f113a475147ac9b7b167a3d12",
"sha256:e78960b31198511f45fd455534ae7645a6207d33e512d2e842c766d15d9c8080"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==8.2"
},
"imap-tools": {
"hashes": [
"sha256:96e9a4ff6483462635737730a1df28e739faa71967b12a84f4363fb386542246",
@ -131,6 +227,13 @@
"index": "pypi",
"version": "==0.32.0"
},
"img2pdf": {
"hashes": [
"sha256:57905015579b1026acf1605aa95859cd79b051fa1c35485573d165526fc9dbb5",
"sha256:eaee690ab8403dd1a9cb4db10afee41dd3e6c7ed63bdace02a0121f9feadb0c9"
],
"version": "==0.4.0"
},
"inotify-simple": {
"hashes": [
"sha256:8440ffe49c4ae81a8df57c1ae1eb4b6bfa7acb830099bfb3e305b383005cc128",
@ -164,6 +267,51 @@
"index": "pypi",
"version": "==1.0.8"
},
"lxml": {
"hashes": [
"sha256:0448576c148c129594d890265b1a83b9cd76fd1f0a6a04620753d9a6bcfd0a4d",
"sha256:127f76864468d6630e1b453d3ffbbd04b024c674f55cf0a30dc2595137892d37",
"sha256:1471cee35eba321827d7d53d104e7b8c593ea3ad376aa2df89533ce8e1b24a01",
"sha256:2363c35637d2d9d6f26f60a208819e7eafc4305ce39dc1d5005eccc4593331c2",
"sha256:2e5cc908fe43fe1aa299e58046ad66981131a66aea3129aac7770c37f590a644",
"sha256:2e6fd1b8acd005bd71e6c94f30c055594bbd0aa02ef51a22bbfa961ab63b2d75",
"sha256:366cb750140f221523fa062d641393092813b81e15d0e25d9f7c6025f910ee80",
"sha256:42ebca24ba2a21065fb546f3e6bd0c58c3fe9ac298f3a320147029a4850f51a2",
"sha256:4e751e77006da34643ab782e4a5cc21ea7b755551db202bc4d3a423b307db780",
"sha256:4fb85c447e288df535b17ebdebf0ec1cf3a3f1a8eba7e79169f4f37af43c6b98",
"sha256:50c348995b47b5a4e330362cf39fc503b4a43b14a91c34c83b955e1805c8e308",
"sha256:535332fe9d00c3cd455bd3dd7d4bacab86e2d564bdf7606079160fa6251caacf",
"sha256:535f067002b0fd1a4e5296a8f1bf88193080ff992a195e66964ef2a6cfec5388",
"sha256:5be4a2e212bb6aa045e37f7d48e3e1e4b6fd259882ed5a00786f82e8c37ce77d",
"sha256:60a20bfc3bd234d54d49c388950195d23a5583d4108e1a1d47c9eef8d8c042b3",
"sha256:648914abafe67f11be7d93c1a546068f8eff3c5fa938e1f94509e4a5d682b2d8",
"sha256:681d75e1a38a69f1e64ab82fe4b1ed3fd758717bed735fb9aeaa124143f051af",
"sha256:68a5d77e440df94011214b7db907ec8f19e439507a70c958f750c18d88f995d2",
"sha256:69a63f83e88138ab7642d8f61418cf3180a4d8cd13995df87725cb8b893e950e",
"sha256:6e4183800f16f3679076dfa8abf2db3083919d7e30764a069fb66b2b9eff9939",
"sha256:6fd8d5903c2e53f49e99359b063df27fdf7acb89a52b6a12494208bf61345a03",
"sha256:791394449e98243839fa822a637177dd42a95f4883ad3dec2a0ce6ac99fb0a9d",
"sha256:7a7669ff50f41225ca5d6ee0a1ec8413f3a0d8aa2b109f86d540887b7ec0d72a",
"sha256:7e9eac1e526386df7c70ef253b792a0a12dd86d833b1d329e038c7a235dfceb5",
"sha256:7ee8af0b9f7de635c61cdd5b8534b76c52cd03536f29f51151b377f76e214a1a",
"sha256:8246f30ca34dc712ab07e51dc34fea883c00b7ccb0e614651e49da2c49a30711",
"sha256:8c88b599e226994ad4db29d93bc149aa1aff3dc3a4355dd5757569ba78632bdf",
"sha256:91d6dace31b07ab47eeadd3f4384ded2f77b94b30446410cb2c3e660e047f7a7",
"sha256:923963e989ffbceaa210ac37afc9b906acebe945d2723e9679b643513837b089",
"sha256:94d55bd03d8671686e3f012577d9caa5421a07286dd351dfef64791cf7c6c505",
"sha256:97db258793d193c7b62d4e2586c6ed98d51086e93f9a3af2b2034af01450a74b",
"sha256:a9d6bc8642e2c67db33f1247a77c53476f3a166e09067c0474facb045756087f",
"sha256:cd11c7e8d21af997ee8079037fff88f16fda188a9776eb4b81c7e4c9c0a7d7fc",
"sha256:d8d3d4713f0c28bdc6c806a278d998546e8efc3498949e3ace6e117462ac0a5e",
"sha256:e0bfe9bb028974a481410432dbe1b182e8191d5d40382e5b8ff39cdd2e5c5931",
"sha256:e1dbb88a937126ab14d219a000728224702e0ec0fc7ceb7131c53606b7a76772",
"sha256:f4822c0660c3754f1a41a655e37cb4dbbc9be3d35b125a37fab6f82d47674ebc",
"sha256:f83d281bb2a6217cd806f4cf0ddded436790e66f393e124dfe9731f6b3fb9afe",
"sha256:fc37870d6716b137e80d19241d0e2cff7a7643b925dfa49b4c8ebd1295eb506e"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==4.6.2"
},
"numpy": {
"hashes": [
"sha256:08308c38e44cc926bdfce99498b21eec1f848d24c302519e64203a8da99a97db",
@ -205,6 +353,14 @@
"markers": "python_version >= '3.6'",
"version": "==1.19.4"
},
"ocrmypdf": {
"hashes": [
"sha256:20722d89d2f0deeb5b3ffa8622ead59d54af46d44f21848ec0f15ef79ce1a4a3",
"sha256:c592e1bb37abafd24f067043bbf98d25405521cbe1e992de30d8b870dbe86928"
],
"index": "pypi",
"version": "==11.3.3"
},
"pathtools": {
"hashes": [
"sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0",
@ -220,6 +376,14 @@
"index": "pypi",
"version": "==2.3.0"
},
"pdfminer.six": {
"hashes": [
"sha256:b9aac0ebeafb21c08bf65f2039f4b2c5f78a3449d0a41df711d72445649e952a",
"sha256:d78877ba8d8bf957f3bb636c4f73f4f6f30f56c461993877ac22c39c20837509"
],
"markers": "python_version >= '3.4'",
"version": "==20201018"
},
"pdftotext": {
"hashes": [
"sha256:98aeb8b07a4127e1a30223bd933ef080bbd29aa88f801717ca6c5618380b8aa6"
@ -227,6 +391,33 @@
"index": "pypi",
"version": "==2.1.5"
},
"pikepdf": {
"hashes": [
"sha256:0829bd5dacd73bb4a37e7575bae523f49603479755563c92ddb55c206700cab1",
"sha256:0d2b631077cd6af6e4d1b396208020705842610a6f13fab489d5f9c47916baa2",
"sha256:21c98af08fae4ac9fbcad02b613b6768a4ca300fda4cba867f4a4b6f73c2d04b",
"sha256:2240372fed30124ddc35b0c15a613f2b687a426ea2f150091e0a0c58cca7a495",
"sha256:2a97f5f1403e058d217d7f6861cf51fca200c5687bce0d052f5f2fa89b5bfa22",
"sha256:3faaefca0ae80d19891acec8b0dd5e6235f59f2206d82375eb80d090285e9557",
"sha256:48ef45b64882901c0d69af3b85d16a19bd0f3e95b43e614fefb53521d8caf36c",
"sha256:5212fe41f2323fc7356ba67caa39737fe13080562cff37bcbb74a8094076c8d0",
"sha256:56859c32170663c57bd0658189ce44e180533eebe813853446cd6413810be9eb",
"sha256:5f8fd1cb3478c5534222018aca24fbbd2bc74460c899bda988ec76722c13caa9",
"sha256:74300a32c41b3d578772f6933f23a88b19f74484185e71e5225ce2f7ea5aea78",
"sha256:8cbc946bdd217148f4a9c029fcea62f4ae0f67d5346de4c865f4718cd0ddc37f",
"sha256:9ceefd30076f732530cf84a1be2ecb2fa9931af932706ded760a6d37c73b96ad",
"sha256:ad69c170fda41b07a4c6b668a3128e7a759f50d9aebcfcde0ccff1358abe0423",
"sha256:b715fe182189fb6870fab5b0383bb2fb278c88c46eade346b0f4c1ed8818c09d",
"sha256:bb01ecf95083ffcb9ad542dc5342ccc1059e46f1395fd966629d36d9cc766b4a",
"sha256:bd6328547219cf48cefb4e0a1bc54442910594de1c5a5feae847d9ff3c629031",
"sha256:edb128379bb1dea76b5bdbdacf5657a6e4754bacc2049640762725590d8ed905",
"sha256:f8e687900557fcd4c51b4e72b9e337fdae9e2c81049d1d80b624bb2e88b5769d",
"sha256:fe0ca120e3347c851c34a91041d574f3c588d832023906d8ae18d66d042e8a52",
"sha256:fe8e0152672f24d8bfdecc725f97e9013f2de1b41849150959526ca3562bd3ef"
],
"markers": "python_version < '3.9'",
"version": "==2.2.0"
},
"pillow": {
"hashes": [
"sha256:006de60d7580d81f4a1a7e9f0173dc90a932e3905cc4d47ea909bc946302311a",
@ -262,6 +453,14 @@
"index": "pypi",
"version": "==8.0.1"
},
"pluggy": {
"hashes": [
"sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0",
"sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==0.13.1"
},
"psycopg2-binary": {
"hashes": [
"sha256:0deac2af1a587ae12836aa07970f5cb91964f05a7c6cdb69d8425ff4c15d4e2c",
@ -305,13 +504,13 @@
"index": "pypi",
"version": "==2.8.6"
},
"pyocr": {
"pycparser": {
"hashes": [
"sha256:fa15adc7e1cf0d345a2990495fe125a947c6e09a60ddba0256a1c14b2e603179",
"sha256:fd602af17b6e21985669aadc058a95f343ff921e962ed4aa6520ded32e4d1301"
"sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0",
"sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705"
],
"index": "pypi",
"version": "==0.7.2"
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.20"
},
"python-dateutil": {
"hashes": [
@ -419,6 +618,53 @@
],
"version": "==2020.11.13"
},
"reportlab": {
"hashes": [
"sha256:06be7f04a631f02cd0202f7dee0d3e61dc265223f4ff861525ed7784b5552540",
"sha256:0a788a537c48915eda083485b59ac40ac012fa7c43070069bde6eb5ea588313c",
"sha256:1a7a38810e79653d0ea8e61db4f0517ac2a0e76edd2497cf6d4969dd3be30030",
"sha256:22301773db730545b44d4c77d8f29baf5683ccabec9883d978e8b8eda6d2175f",
"sha256:2906321b3d2779faafe47e2c13f9c69e1fb4ddb907f5a49cab3f9b0ea95df1f5",
"sha256:2d65f9cc5c0d3f63b5d024e6cf92234f1ab1f267cc9e5a847ab5d3efe1c3cf3e",
"sha256:2e012f7b845ef9f1f5bd63461d5201fa624b019a65ff5a93d0002b4f915bbc89",
"sha256:31ccfdbf5bb5ec85f0397661085ce4c9e52537ca0d2bf4220259666a4dcc55c2",
"sha256:3e10bd20c8ada9f7e1113157aa73b8e0048f2624e74794b73799c3deb13d7a3f",
"sha256:440d5f86c2b822abdb7981d691a78bdcf56f4710174830283034235ab2af2969",
"sha256:4f307accda32c9f17015ed77c7424f904514e349dff063f78d2462d715963e53",
"sha256:59659ee8897950fd1acd41a9cc61f4afdfda52dc2bb69a1924ce68089491849d",
"sha256:6216b11313467989ac9d9578ea3756d0af46e97184ee4e11a6b7ef652458f70d",
"sha256:6268a9a3d75e714b22beeb7687270956b06b232ccfdf37b1c6462961eab04457",
"sha256:6b226830f80df066d5986a3fdb3eb4d1b6320048f3d9ade539a6c03a5bc8b3ec",
"sha256:6e10eba6a0e330096f4200b18824b3194c399329b7830e34baee1c04ea07f99f",
"sha256:6e224c16c3d6fafdb2fb67b33c4b84d984ec34869834b3a137809f2fe5b84778",
"sha256:7da162fa677b90bd14f19b20ff80fec18c24a31ac44e5342ba49e198b13c4f92",
"sha256:8406e960a974a65b765c9ff74b269aa64718b4af1e8c511ebdbd9a5b44b0c7e6",
"sha256:8999bb075102d1b8ca4aada6ca14653d52bf02e37fd064e477eb180741f75077",
"sha256:8ae21aa94e405bf5171718f11ebc702a0edf18c91d88b14c5c5724cabd664673",
"sha256:8f6163729612e815b89649aed2e237505362a78014199f819fd92f9e5c96769b",
"sha256:9699fa8f0911ad56b46cc60bbaebe1557fd1c9e8da98185a7a1c0c40193eba48",
"sha256:9a53d76eec33abda11617aad1c9f5f4a2d906dd2f92a03a3f1ea370efbb52c95",
"sha256:9ed4d761b726ff411565eddb10cb37a6bca0ec873d9a18a83cf078f4502a2d94",
"sha256:a020d308e7c2de284d5407e3c6c13e3977a62b314f7bfe19bcc69677931da589",
"sha256:a2e6c15aecbe631245aab639751a58671312cced7e17de1ed9c45fb37036f6c9",
"sha256:b10cb48606d97b70edb094576e3d493d40467395e4fc267655135a2c92defbe8",
"sha256:b8d6e9df5181ed07b7ae145258eb69e686133afc97930af51a3c0c9d784d834d",
"sha256:bbb297754f5cf25eb8fcb817752984252a7feb0ca83e383718e4eec2fb67ea32",
"sha256:be90599e5e78c1ddfcfee8c752108def58b4c672ebcc4d3d9aa7fe65e7d3f16b",
"sha256:bfdfad9b8ae00bd0752b77f954c7405327fd99b2cc6d5e4273e65be61429d56a",
"sha256:c1e5ef5089e16b249388f65d8c8f8b74989e72eb8332060dc580a2ecb967cfc2",
"sha256:c5ed342e29a5fd7eeb0f2ccf7e5b946b5f750f05633b2d6a94b1c02094a77967",
"sha256:c7087a26b26aa82a3ba27e13e66f507cc697f9ceb4c046c0f758876b55f040a5",
"sha256:cf589e980d92b0bf343fa512b9d3ae9ed0469cbffd99cb270b6c83da143cb437",
"sha256:e6fb762e524a4fb118be9f44dbd9456cf80e42253ee8f1bdb0ea5c1f882d4ba8",
"sha256:e961d3a84c65ca030963ca934a4faad2ac9fee75af36ba2f98733da7d3f7efab",
"sha256:f2fde5abb6f21c1eff5430f380cdbbee7fdeda6af935a83730ddce9f0c4e504e",
"sha256:f585b3bf7062c228306acd7f40b2ad915b32603228c19bb225952cc98fd2015a",
"sha256:f955a6366cf8e6729776c96e281bede468acd74f6eb49a5bbb048646adaa43d8",
"sha256:fe882fd348d8429debbdac4518d6a42888a7f4ad613dc596ce94788169caeb08"
],
"version": "==3.5.55"
},
"scikit-learn": {
"hashes": [
"sha256:090bbf144fd5823c1f2efa3e1a9bf180295b24294ca8f478e75b40ed54f8036e",
@ -482,6 +728,13 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.15.0"
},
"sortedcontainers": {
"hashes": [
"sha256:37257a32add0a3ee490bb170b599e93095eed89a55da91fa9f48753ea12fd73f",
"sha256:59cc937650cf60d677c16775597c89a960658a09cf7c1a668f86e1e4464b10a1"
],
"version": "==2.3.0"
},
"sqlparse": {
"hashes": [
"sha256:017cde379adbd6a1f15a61873f43e8274179378e95ef3fede90b5aa64d304ed0",
@ -498,6 +751,14 @@
"markers": "python_version >= '3.5'",
"version": "==2.1.0"
},
"tqdm": {
"hashes": [
"sha256:5c0d04e06ccc0da1bd3fa5ae4550effcce42fcad947b4a6cafa77bdc9b09ff22",
"sha256:9e7b8ab0ecbdbf0595adadd5f0ebbb9e69010e0bd48bbb0c15e550bf2a5292df"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==4.54.0"
},
"tzlocal": {
"hashes": [
"sha256:643c97c5294aedc737780a49d9df30889321cbe1204eac2c2ec6134035a92e44",
@ -589,6 +850,7 @@
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
],
"markers": "python_version >= '3.1'",
"version": "==3.0.4"
},
"coverage": {
@ -711,22 +973,6 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.2.0"
},
"importlib-metadata": {
"hashes": [
"sha256:030f3b1bdb823ecbe4a9659e14cc861ce5af403fe99863bae173ec5fe00ab132",
"sha256:caeee3603f5dcf567864d1be9b839b0bcfdf1383e3e7be33ce2dead8144ff19c"
],
"markers": "python_version < '3.8'",
"version": "==2.1.0"
},
"importlib-resources": {
"hashes": [
"sha256:7b51f0106c8ec564b1bef3d9c588bc694ce2b92125bbb6278f4f2f5b54ec3592",
"sha256:a3d34a8464ce1d5d7c92b0ea4e921e696d86f2aa212e684451cb1482c8d84ed5"
],
"markers": "python_version < '3.7'",
"version": "==3.3.0"
},
"iniconfig": {
"hashes": [
"sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
@ -1038,14 +1284,6 @@
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==20.2.1"
},
"zipp": {
"hashes": [
"sha256:102c24ef8f171fd729d46599845e95c7ab894a4cf45f5de11a44cc7444fb1108",
"sha256:ed5eee1974372595f9e416cc7bbeeb12335201d8081ca8a0743c954d4446e5cb"
],
"markers": "python_version < '3.8'",
"version": "==3.4.0"
}
}
}

View File

@ -177,11 +177,13 @@ PAPERLESS_OCR_LANGUAGE=<lang>
Defaults to "eng".
PAPERLESS_OCR_MODE=<mode>
Tell paperless when and how to perform ocr on your documents. Three modes
Tell paperless when and how to perform ocr on your documents. Four modes
are available:
* ``skip``: Paperless skips all pages and will perform ocr only on pages
where no text is present. This is the safest and fastest option.
* ``skip_noarchive``: In addition to skip, paperless won't create an
archived version of your documents when it finds any text in them.
* ``redo``: Paperless will OCR all pages of your documents and attempt to
replace any existing text layers with new text. This will be useful for
documents from scanners that already performed OCR with insufficient

View File

@ -10,7 +10,6 @@ def create_source_path_directory(source_path):
os.makedirs(os.path.dirname(source_path), exist_ok=True)
# TODO: also make this work for archive dir
def delete_empty_directories(directory, root):
if not os.path.isdir(directory):
return

View File

@ -2,32 +2,17 @@ import os
import shutil
from pathlib import Path
from unittest import mock
from uuid import uuid4
from django.conf import settings
from django.db import DatabaseError
from django.test import TestCase, override_settings
from .utils import DirectoriesMixin
from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories
from ..models import Document, Correspondent
class TestDate(TestCase):
deletion_list = []
def add_to_deletion_list(self, dirname):
self.deletion_list.append(dirname)
def setUp(self):
folder = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
os.makedirs(folder + "/documents/originals")
override_settings(MEDIA_ROOT=folder).enable()
override_settings(ORIGINALS_DIR=folder + "/documents/originals").enable()
self.add_to_deletion_list(folder)
def tearDown(self):
for dirname in self.deletion_list:
shutil.rmtree(dirname, ignore_errors=True)
class TestFileHandling(DirectoriesMixin, TestCase):
@override_settings(PAPERLESS_FILENAME_FORMAT="")
def test_generate_source_filename(self):
@ -104,7 +89,7 @@ class TestDate(TestCase):
document.save()
# Check proper handling of files
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True)
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)), True)
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)
@ -140,7 +125,7 @@ class TestDate(TestCase):
# Check proper handling of files
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True)
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)), True)
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@ -196,8 +181,8 @@ class TestDate(TestCase):
document.save()
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/test"), True)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/none"), True)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True)
self.assertTrue(os.path.isfile(important_file))
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
@ -315,7 +300,6 @@ class TestDate(TestCase):
# Create our working directory
tmp = os.path.join(settings.ORIGINALS_DIR, "test_delete_empty")
os.makedirs(tmp)
self.add_to_deletion_list(tmp)
os.makedirs(os.path.join(tmp, "notempty"))
Path(os.path.join(tmp, "notempty", "file")).touch()
@ -345,3 +329,159 @@ class TestDate(TestCase):
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
self.assertEqual(generate_filename(document), "0000001.pdf")
class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
def test_create_no_format(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", filename="0000001.pdf", checksum="A", archive_checksum="B")
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
def test_create_with_format(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
self.assertFalse(os.path.isfile(original))
self.assertFalse(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc-0000001.pdf"))
self.assertEqual(doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc-0000001.pdf"))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
def test_move_archive_gone(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
#Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
self.assertTrue(os.path.isfile(original))
self.assertFalse(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertFalse(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
def test_move_archive_exists(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
os.makedirs(os.path.join(settings.ARCHIVE_DIR, "none"))
Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc-0000001.pdf")).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@mock.patch("documents.signals.handlers.os.rename")
def test_move_archive_error(self, m):
def fake_rename(src, dst):
if "archive" in src:
raise OSError()
else:
os.remove(src)
Path(dst).touch()
m.side_effect = fake_rename
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
def test_move_file_gone(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
#Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
self.assertFalse(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
self.assertFalse(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@mock.patch("documents.signals.handlers.os.rename")
def test_move_file_error(self, m):
def fake_rename(src, dst):
if "original" in src:
raise OSError()
else:
os.remove(src)
Path(dst).touch()
m.side_effect = fake_rename
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
def test_archive_deleted(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
doc.delete()
self.assertFalse(os.path.isfile(original))
self.assertFalse(os.path.isfile(archive))
self.assertFalse(os.path.isfile(doc.source_path))
self.assertFalse(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
def test_database_error(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
with mock.patch("documents.signals.handlers.Document.objects.filter") as m:
m.side_effect = DatabaseError()
doc.save()
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))

View File

@ -80,6 +80,12 @@ class RasterisedDocumentParser(DocumentParser):
return None
def parse(self, document_path, mime_type):
if settings.OCR_MODE == "skip_noarchive":
text = get_text_from_pdf(document_path)
if text and len(text) > 50:
self.text = text
return
archive_path = os.path.join(self.tempdir, "archive.pdf")
ocr_args = {
@ -96,7 +102,7 @@ class RasterisedDocumentParser(DocumentParser):
if settings.OCR_PAGES > 0:
ocr_args['pages'] = f"1-{settings.OCR_PAGES}"
if settings.OCR_MODE == 'skip':
if settings.OCR_MODE in ['skip', 'skip_noarchive']:
ocr_args['skip_text'] = True
elif settings.OCR_MODE == 'redo':
ocr_args['redo_ocr'] = True
@ -184,6 +190,7 @@ def get_text_from_pdf(pdf_file):
try:
pdf = pdftotext.PDF(f)
except pdftotext.Error:
# might not be a PDF file
return None
text = "\n".join(pdf)