mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Merge branch 'master' into dev
This commit is contained in:
		
							
								
								
									
										2
									
								
								Pipfile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Pipfile
									
									
									
									
									
								
							| @@ -36,3 +36,5 @@ pytest-xdist = "*" | |||||||
| [dev-packages] | [dev-packages] | ||||||
| ipython = "*" | ipython = "*" | ||||||
| sphinx = "*" | sphinx = "*" | ||||||
|  | tox = "*" | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										83
									
								
								Pipfile.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										83
									
								
								Pipfile.lock
									
									
									
										generated
									
									
									
								
							| @@ -1,7 +1,7 @@ | |||||||
| { | { | ||||||
|     "_meta": { |     "_meta": { | ||||||
|         "hash": { |         "hash": { | ||||||
|             "sha256": "e20c2294bcafd346ee57901df94a515a12976ed192dc37df848b39b56bdd1f4b" |             "sha256": "6d8bad24aa5d0c102b13b5ae27acba04836cd5a07a4003cb2763de1e0a3406b7" | ||||||
|         }, |         }, | ||||||
|         "pipfile-spec": 6, |         "pipfile-spec": 6, | ||||||
|         "requires": {}, |         "requires": {}, | ||||||
| @@ -19,7 +19,7 @@ | |||||||
|                 "sha256:37228cda29411948b422fae072f57e31d3396d2ee1c9783775980ee9c9990af6", |                 "sha256:37228cda29411948b422fae072f57e31d3396d2ee1c9783775980ee9c9990af6", | ||||||
|                 "sha256:58587dd4dc3daefad0487f6d9ae32b4542b185e1c36db6993290e7c41ca2b47c" |                 "sha256:58587dd4dc3daefad0487f6d9ae32b4542b185e1c36db6993290e7c41ca2b47c" | ||||||
|             ], |             ], | ||||||
|             "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'", |             "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'", | ||||||
|             "version": "==1.5" |             "version": "==1.5" | ||||||
|         }, |         }, | ||||||
|         "atomicwrites": { |         "atomicwrites": { | ||||||
| @@ -27,7 +27,7 @@ | |||||||
|                 "sha256:0312ad34fcad8fac3704d441f7b317e50af620823353ec657a53e981f92920c0", |                 "sha256:0312ad34fcad8fac3704d441f7b317e50af620823353ec657a53e981f92920c0", | ||||||
|                 "sha256:ec9ae8adaae229e4f8446952d204a3e4b5fdd2d099f9be3aaf556120135fb3ee" |                 "sha256:ec9ae8adaae229e4f8446952d204a3e4b5fdd2d099f9be3aaf556120135fb3ee" | ||||||
|             ], |             ], | ||||||
|             "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'", |             "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'", | ||||||
|             "version": "==1.2.1" |             "version": "==1.2.1" | ||||||
|         }, |         }, | ||||||
|         "attrs": { |         "attrs": { | ||||||
| @@ -85,7 +85,7 @@ | |||||||
|                 "sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6", |                 "sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6", | ||||||
|                 "sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80" |                 "sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80" | ||||||
|             ], |             ], | ||||||
|             "markers": "python_version >= '2.6' and python_version != '3.0.*' and python_version != '3.2.*' and python_version < '4' and python_version != '3.1.*'", |             "markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4'", | ||||||
|             "version": "==4.5.1" |             "version": "==4.5.1" | ||||||
|         }, |         }, | ||||||
|         "coveralls": { |         "coveralls": { | ||||||
| @@ -163,7 +163,7 @@ | |||||||
|                 "sha256:a7a84d5fa07a089186a329528f127c9d73b9de57f1a1131b82bb5320ee651f6a", |                 "sha256:a7a84d5fa07a089186a329528f127c9d73b9de57f1a1131b82bb5320ee651f6a", | ||||||
|                 "sha256:fc155a6b553c66c838d1a22dba1dc9f5f505c43285a878c6f74a79c024750b83" |                 "sha256:fc155a6b553c66c838d1a22dba1dc9f5f505c43285a878c6f74a79c024750b83" | ||||||
|             ], |             ], | ||||||
|             "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'", |             "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'", | ||||||
|             "version": "==1.5.0" |             "version": "==1.5.0" | ||||||
|         }, |         }, | ||||||
|         "factory-boy": { |         "factory-boy": { | ||||||
| @@ -179,6 +179,7 @@ | |||||||
|                 "sha256:ea7cfd3aeb1544732d08bd9cfba40c5b78e3a91e17b1a0698ab81bfc5554c628", |                 "sha256:ea7cfd3aeb1544732d08bd9cfba40c5b78e3a91e17b1a0698ab81bfc5554c628", | ||||||
|                 "sha256:f6d67f04abfb2b4bea7afc7fa6c18cf4c523a67956e455668be9ae42bccc21ad" |                 "sha256:f6d67f04abfb2b4bea7afc7fa6c18cf4c523a67956e455668be9ae42bccc21ad" | ||||||
|             ], |             ], | ||||||
|  |             "markers": "python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.2.*' and python_version >= '2.7'", | ||||||
|             "version": "==0.9.0" |             "version": "==0.9.0" | ||||||
|         }, |         }, | ||||||
|         "filemagic": { |         "filemagic": { | ||||||
| @@ -282,7 +283,7 @@ | |||||||
|                 "sha256:6e3836e39f4d36ae72840833db137f7b7d35105079aee6ec4a62d9f80d594dd1", |                 "sha256:6e3836e39f4d36ae72840833db137f7b7d35105079aee6ec4a62d9f80d594dd1", | ||||||
|                 "sha256:95eb8364a4708392bae89035f45341871286a333f749c3141c20573d2b3876e1" |                 "sha256:95eb8364a4708392bae89035f45341871286a333f749c3141c20573d2b3876e1" | ||||||
|             ], |             ], | ||||||
|             "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'", |             "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'", | ||||||
|             "version": "==0.7.1" |             "version": "==0.7.1" | ||||||
|         }, |         }, | ||||||
|         "py": { |         "py": { | ||||||
| @@ -290,7 +291,7 @@ | |||||||
|                 "sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1", |                 "sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1", | ||||||
|                 "sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6" |                 "sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6" | ||||||
|             ], |             ], | ||||||
|             "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'", |             "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'", | ||||||
|             "version": "==1.6.0" |             "version": "==1.6.0" | ||||||
|         }, |         }, | ||||||
|         "pycodestyle": { |         "pycodestyle": { | ||||||
| @@ -303,26 +304,26 @@ | |||||||
|         }, |         }, | ||||||
|         "pyocr": { |         "pyocr": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:bdc4d43bf9b63c2a9a4b2c9a1a623a0e63c8e6600eede5dbe866b31f3a5f2207" |                 "sha256:b6ba6263fd92da56627dff6d263d991a2246aacd117d1788f11b93f419ca395f" | ||||||
|             ], |             ], | ||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
|             "version": "==0.5.2" |             "version": "==0.5.3" | ||||||
|         }, |         }, | ||||||
|         "pytest": { |         "pytest": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:2d7c49e931316cc7d1638a3e5f54f5d7b4e5225972b3c9838f3584788d27f349", |                 "sha256:453cbbbe5ce6db38717d282b758b917de84802af4288910c12442984bde7b823", | ||||||
|                 "sha256:ad0c7db7b5d4081631e0155f5c61b80ad76ce148551aaafe3a718d65a7508b18" |                 "sha256:a8a07f84e680482eb51e244370aaf2caa6301ef265f37c2bdefb3dd3b663f99d" | ||||||
|             ], |             ], | ||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
|             "version": "==3.7.4" |             "version": "==3.8.0" | ||||||
|         }, |         }, | ||||||
|         "pytest-cov": { |         "pytest-cov": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:03aa752cf11db41d281ea1d807d954c4eda35cfa1b21d6971966cc041bbf6e2d", |                 "sha256:513c425e931a0344944f84ea47f3956be0e416d95acbd897a44970c8d926d5d7", | ||||||
|                 "sha256:890fe5565400902b0c78b5357004aab1c814115894f4f21370e2433256a3eeec" |                 "sha256:e360f048b7dae3f2f2a9a4d067b2dd6b6a015d384d1577c994a43f3f7cbad762" | ||||||
|             ], |             ], | ||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
|             "version": "==2.5.1" |             "version": "==2.6.0" | ||||||
|         }, |         }, | ||||||
|         "pytest-django": { |         "pytest-django": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
| @@ -344,6 +345,7 @@ | |||||||
|                 "sha256:e4500cd0509ec4a26535f7d4112a8cc0f17d3a41c29ffd4eab479d2a55b30805", |                 "sha256:e4500cd0509ec4a26535f7d4112a8cc0f17d3a41c29ffd4eab479d2a55b30805", | ||||||
|                 "sha256:f275cb48a73fc61a6710726348e1da6d68a978f0ec0c54ece5a5fae5977e5a08" |                 "sha256:f275cb48a73fc61a6710726348e1da6d68a978f0ec0c54ece5a5fae5977e5a08" | ||||||
|             ], |             ], | ||||||
|  |             "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'", | ||||||
|             "version": "==0.2" |             "version": "==0.2" | ||||||
|         }, |         }, | ||||||
|         "pytest-sugar": { |         "pytest-sugar": { | ||||||
| @@ -457,7 +459,7 @@ | |||||||
|                 "sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf", |                 "sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf", | ||||||
|                 "sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5" |                 "sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5" | ||||||
|             ], |             ], | ||||||
|             "markers": "python_version >= '2.6' and python_version != '3.3.*' and python_version < '4' and python_version != '3.1.*' and python_version != '3.2.*' and python_version != '3.0.*'", |             "markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4' and python_version != '3.3.*'", | ||||||
|             "version": "==1.23" |             "version": "==1.23" | ||||||
|         } |         } | ||||||
|     }, |     }, | ||||||
| @@ -521,10 +523,11 @@ | |||||||
|         }, |         }, | ||||||
|         "imagesize": { |         "imagesize": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:3620cc0cadba3f7475f9940d22431fc4d407269f1be59ec9b8edcca26440cf18", |                 "sha256:3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8", | ||||||
|                 "sha256:5b326e4678b6925158ccc66a9fa3122b6106d7c876ee32d7de6ce59385b96315" |                 "sha256:f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5" | ||||||
|             ], |             ], | ||||||
|             "version": "==1.0.0" |             "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'", | ||||||
|  |             "version": "==1.1.0" | ||||||
|         }, |         }, | ||||||
|         "ipython": { |         "ipython": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
| @@ -590,6 +593,14 @@ | |||||||
|             ], |             ], | ||||||
|             "version": "==0.7.4" |             "version": "==0.7.4" | ||||||
|         }, |         }, | ||||||
|  |         "pluggy": { | ||||||
|  |             "hashes": [ | ||||||
|  |                 "sha256:6e3836e39f4d36ae72840833db137f7b7d35105079aee6ec4a62d9f80d594dd1", | ||||||
|  |                 "sha256:95eb8364a4708392bae89035f45341871286a333f749c3141c20573d2b3876e1" | ||||||
|  |             ], | ||||||
|  |             "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'", | ||||||
|  |             "version": "==0.7.1" | ||||||
|  |         }, | ||||||
|         "prompt-toolkit": { |         "prompt-toolkit": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:1df952620eccb399c53ebb359cc7d9a8d3a9538cb34c5a1344bdbeb29fbcc381", |                 "sha256:1df952620eccb399c53ebb359cc7d9a8d3a9538cb34c5a1344bdbeb29fbcc381", | ||||||
| @@ -605,6 +616,14 @@ | |||||||
|             ], |             ], | ||||||
|             "version": "==0.6.0" |             "version": "==0.6.0" | ||||||
|         }, |         }, | ||||||
|  |         "py": { | ||||||
|  |             "hashes": [ | ||||||
|  |                 "sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1", | ||||||
|  |                 "sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6" | ||||||
|  |             ], | ||||||
|  |             "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'", | ||||||
|  |             "version": "==1.6.0" | ||||||
|  |         }, | ||||||
|         "pygments": { |         "pygments": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d", |                 "sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d", | ||||||
| @@ -656,20 +675,28 @@ | |||||||
|         }, |         }, | ||||||
|         "sphinx": { |         "sphinx": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:a07050845cc9a2f4026a6035cc8ed795a5ce7be6528bbc82032385c10807dfe7", |                 "sha256:217a7705adcb573da5bbe1e0f5cab4fa0bd89fd9342c9159121746f593c2d5a4", | ||||||
|                 "sha256:d719de667218d763e8fd144b7fcfeefd8d434a6201f76bf9f0f0c1fa6f47fcdb" |                 "sha256:a602513f385f1d5785ff1ca420d9c7eb1a1b63381733b2f0ea8188a391314a86" | ||||||
|             ], |             ], | ||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
|             "version": "==1.7.8" |             "version": "==1.7.9" | ||||||
|         }, |         }, | ||||||
|         "sphinxcontrib-websupport": { |         "sphinxcontrib-websupport": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:68ca7ff70785cbe1e7bccc71a48b5b6d965d79ca50629606c7861a21b206d9dd", |                 "sha256:68ca7ff70785cbe1e7bccc71a48b5b6d965d79ca50629606c7861a21b206d9dd", | ||||||
|                 "sha256:9de47f375baf1ea07cdb3436ff39d7a9c76042c10a769c52353ec46e4e8fc3b9" |                 "sha256:9de47f375baf1ea07cdb3436ff39d7a9c76042c10a769c52353ec46e4e8fc3b9" | ||||||
|             ], |             ], | ||||||
|             "markers": "python_version != '3.3.*' and python_version >= '2.7' and python_version != '3.1.*' and python_version != '3.2.*' and python_version != '3.0.*'", |             "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'", | ||||||
|             "version": "==1.1.0" |             "version": "==1.1.0" | ||||||
|         }, |         }, | ||||||
|  |         "tox": { | ||||||
|  |             "hashes": [ | ||||||
|  |                 "sha256:37cf240781b662fb790710c6998527e65ca6851eace84d1595ee71f7af4e85f7", | ||||||
|  |                 "sha256:eb61aa5bcce65325538686f09848f04ef679b5cd9b83cc491272099b28739600" | ||||||
|  |             ], | ||||||
|  |             "index": "pypi", | ||||||
|  |             "version": "==3.2.1" | ||||||
|  |         }, | ||||||
|         "traitlets": { |         "traitlets": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835", |                 "sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835", | ||||||
| @@ -682,9 +709,17 @@ | |||||||
|                 "sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf", |                 "sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf", | ||||||
|                 "sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5" |                 "sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5" | ||||||
|             ], |             ], | ||||||
|             "markers": "python_version >= '2.6' and python_version != '3.3.*' and python_version < '4' and python_version != '3.1.*' and python_version != '3.2.*' and python_version != '3.0.*'", |             "markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4' and python_version != '3.3.*'", | ||||||
|             "version": "==1.23" |             "version": "==1.23" | ||||||
|         }, |         }, | ||||||
|  |         "virtualenv": { | ||||||
|  |             "hashes": [ | ||||||
|  |                 "sha256:2ce32cd126117ce2c539f0134eb89de91a8413a29baac49cbab3eb50e2026669", | ||||||
|  |                 "sha256:ca07b4c0b54e14a91af9f34d0919790b016923d157afda5efdde55c96718f752" | ||||||
|  |             ], | ||||||
|  |             "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.2.*' and python_version != '3.1.*'", | ||||||
|  |             "version": "==16.0.0" | ||||||
|  |         }, | ||||||
|         "wcwidth": { |         "wcwidth": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e", |                 "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e", | ||||||
|   | |||||||
| @@ -1,6 +1,23 @@ | |||||||
| Changelog | Changelog | ||||||
| ######### | ######### | ||||||
|  |  | ||||||
|  | 2.3.0 | ||||||
|  | ===== | ||||||
|  |  | ||||||
|  | * Support for consuming plain text & markdown documents was added by | ||||||
|  |   `Joshua Taillon`_!  This was a long-requested feature, and it's addition is | ||||||
|  |   likely to be greatly appreciated by the community: `#395`_  Thanks also to | ||||||
|  |   `David Martin`_ for his assistance on the issue. | ||||||
|  | * `dubit0`_ found & fixed a bug that prevented management commands from running | ||||||
|  |   before we had an operational database: `#396`_ | ||||||
|  | * Joshua also added a simple update to the thumbnail generation process to | ||||||
|  |   improve performance: `#399`_ | ||||||
|  | * As his last bit of effort on this release, Joshua also added some code to | ||||||
|  |   allow you to view the documents inline rather than download them as an | ||||||
|  |   attachment. `#400`_ | ||||||
|  | * Finally, `ahyear`_ found a slip in the Docker documentation and patched it. `#401`_ | ||||||
|  |  | ||||||
|  |  | ||||||
| 2.2.1 | 2.2.1 | ||||||
| ===== | ===== | ||||||
|  |  | ||||||
| @@ -19,6 +36,10 @@ Changelog | |||||||
|   easier on those of us with lots of different tags: `#391`_. |   easier on those of us with lots of different tags: `#391`_. | ||||||
| * `Kilian Koeltzsch`_ noticed a bug in how we capture & automatically create | * `Kilian Koeltzsch`_ noticed a bug in how we capture & automatically create | ||||||
|   tags, so that's fixed now too: `#384`_. |   tags, so that's fixed now too: `#384`_. | ||||||
|  | * `erikarvstedt`_ tweaked the behaviour of the test suite to be better behaved | ||||||
|  |   for packaging environments: `#383`_. | ||||||
|  | * `Lukasz Soluch`_ added CORS support to make building a new Javascript-based front-end | ||||||
|  |   cleaner & easier: `#387`_. | ||||||
|  |  | ||||||
|  |  | ||||||
| 2.1.0 | 2.1.0 | ||||||
| @@ -476,6 +497,10 @@ bulk of the work on this big change. | |||||||
| .. _Tim Brooks: https://github.com/brookst | .. _Tim Brooks: https://github.com/brookst | ||||||
| .. _Stéphane Brunner: https://github.com/sbrunner | .. _Stéphane Brunner: https://github.com/sbrunner | ||||||
| .. _Kilian Koeltzsch: https://github.com/kiliankoe | .. _Kilian Koeltzsch: https://github.com/kiliankoe | ||||||
|  | .. _Lukasz Soluch: https://github.com/LukaszSolo | ||||||
|  | .. _Joshua Taillon: https://github.com/jat255 | ||||||
|  | .. _dubit0:  https://github.com/dubit0 | ||||||
|  | .. _ahyear:  https://github.com/ahyear | ||||||
|  |  | ||||||
| .. _#20: https://github.com/danielquinn/paperless/issues/20 | .. _#20: https://github.com/danielquinn/paperless/issues/20 | ||||||
| .. _#44: https://github.com/danielquinn/paperless/issues/44 | .. _#44: https://github.com/danielquinn/paperless/issues/44 | ||||||
| @@ -550,11 +575,18 @@ bulk of the work on this big change. | |||||||
| .. _#374: https://github.com/danielquinn/paperless/pull/374 | .. _#374: https://github.com/danielquinn/paperless/pull/374 | ||||||
| .. _#375: https://github.com/danielquinn/paperless/pull/375 | .. _#375: https://github.com/danielquinn/paperless/pull/375 | ||||||
| .. _#376: https://github.com/danielquinn/paperless/pull/376 | .. _#376: https://github.com/danielquinn/paperless/pull/376 | ||||||
|  | .. _#383: https://github.com/danielquinn/paperless/pull/383 | ||||||
| .. _#384: https://github.com/danielquinn/paperless/issues/384 | .. _#384: https://github.com/danielquinn/paperless/issues/384 | ||||||
| .. _#386: https://github.com/danielquinn/paperless/issues/386 | .. _#386: https://github.com/danielquinn/paperless/issues/386 | ||||||
|  | .. _#387: https://github.com/danielquinn/paperless/pull/387 | ||||||
| .. _#391: https://github.com/danielquinn/paperless/pull/391 | .. _#391: https://github.com/danielquinn/paperless/pull/391 | ||||||
| .. _#390: https://github.com/danielquinn/paperless/pull/390 | .. _#390: https://github.com/danielquinn/paperless/pull/390 | ||||||
| .. _#392: https://github.com/danielquinn/paperless/issues/392 | .. _#392: https://github.com/danielquinn/paperless/issues/392 | ||||||
|  | .. _#395: https://github.com/danielquinn/paperless/pull/395 | ||||||
|  | .. _#396: https://github.com/danielquinn/paperless/pull/396 | ||||||
|  | .. _#399: https://github.com/danielquinn/paperless/pull/399 | ||||||
|  | .. _#400: https://github.com/danielquinn/paperless/pull/400 | ||||||
|  | .. _#401: https://github.com/danielquinn/paperless/pull/401 | ||||||
|  |  | ||||||
| .. _pipenv: https://docs.pipenv.org/ | .. _pipenv: https://docs.pipenv.org/ | ||||||
| .. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/ | .. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/ | ||||||
|   | |||||||
| @@ -101,6 +101,7 @@ is similar: | |||||||
|     $ cd /path/to/project |     $ cd /path/to/project | ||||||
|     $ git pull |     $ git pull | ||||||
|     $ docker build -t paperless . |     $ docker build -t paperless . | ||||||
|  |     $ docker-compose run --rm comsumer migrate | ||||||
|     $ docker-compose up -d |     $ docker-compose up -d | ||||||
|  |  | ||||||
| If ``git pull`` doesn't report any changes, there is no need to continue with | If ``git pull`` doesn't report any changes, there is no need to continue with | ||||||
|   | |||||||
							
								
								
									
										0
									
								
								docs/requirements.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								docs/requirements.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -111,9 +111,10 @@ PAPERLESS_DEBUG="false" | |||||||
| # as is "example.com,www.example.com", but NOT " example.com" or "example.com," | # as is "example.com,www.example.com", but NOT " example.com" or "example.com," | ||||||
| #PAPERLESS_ALLOWED_HOSTS="example.com,www.example.com" | #PAPERLESS_ALLOWED_HOSTS="example.com,www.example.com" | ||||||
|  |  | ||||||
| # If you decide to use Paperless APIs in an ajax calls, you need to add your | # If you decide to use the Paperless API in an ajax call, you need to add your | ||||||
| # servers to the allowed hosts that can do CORS calls. By default Paperless allows  | # servers to the list of allowed hosts that can do CORS calls. By default | ||||||
| # calls from localhost:8080. The same rules as above how the list should look like. | # Paperless allows calls from localhost:8080, but you'd like to change that, | ||||||
|  | # you can set this value to a comma-separated list. | ||||||
| #PAPERLESS_CORS_ALLOWED_HOSTS="localhost:8080,example.com,localhost:8000" | #PAPERLESS_CORS_ALLOWED_HOSTS="localhost:8080,example.com,localhost:8000" | ||||||
|  |  | ||||||
| # To host paperless under a subpath url like example.com/paperless you set | # To host paperless under a subpath url like example.com/paperless you set | ||||||
| @@ -138,6 +139,10 @@ PAPERLESS_DEBUG="false" | |||||||
| # http://paperless.readthedocs.org/en/latest/consumption.html#hooking-into-the-consumption-process | # http://paperless.readthedocs.org/en/latest/consumption.html#hooking-into-the-consumption-process | ||||||
| #PAPERLESS_POST_CONSUME_SCRIPT="/path/to/an/arbitrary/script.sh" | #PAPERLESS_POST_CONSUME_SCRIPT="/path/to/an/arbitrary/script.sh" | ||||||
|  |  | ||||||
|  | # By default, when clicking on a document within the web interface, the | ||||||
|  | # browser will prompt the user to save the document to disk. By setting this to | ||||||
|  | # "true", the document will instead be opened in the browser, if possible. | ||||||
|  | #PAPERLESS_INLINE_DOC="false" | ||||||
|  |  | ||||||
| # | # | ||||||
| # The following values use sensible defaults for modern systems, but if you're | # The following values use sensible defaults for modern systems, but if you're | ||||||
|   | |||||||
| @@ -29,7 +29,7 @@ pillow==5.2.0 | |||||||
| pluggy==0.7.1; python_version != '3.1.*' | pluggy==0.7.1; python_version != '3.1.*' | ||||||
| py==1.6.0; python_version != '3.1.*' | py==1.6.0; python_version != '3.1.*' | ||||||
| pycodestyle==2.4.0 | pycodestyle==2.4.0 | ||||||
| pyocr==0.5.2 | pyocr==0.5.3 | ||||||
| pytest-cov==2.5.1 | pytest-cov==2.5.1 | ||||||
| pytest-django==3.4.2 | pytest-django==3.4.2 | ||||||
| pytest-env==0.6.2 | pytest-env==0.6.2 | ||||||
|   | |||||||
| @@ -1,24 +1,24 @@ | |||||||
| # coding=utf-8 | # coding=utf-8 | ||||||
|  |  | ||||||
| import dateutil.parser |  | ||||||
| import logging | import logging | ||||||
| import os | import os | ||||||
| import re | import re | ||||||
| import uuid | import uuid | ||||||
|  |  | ||||||
| from collections import OrderedDict | from collections import OrderedDict | ||||||
|  |  | ||||||
|  | import dateutil.parser | ||||||
|  | from django.conf import settings | ||||||
|  | from django.db import models | ||||||
|  | from django.template.defaultfilters import slugify | ||||||
|  | from django.utils import timezone | ||||||
| from fuzzywuzzy import fuzz | from fuzzywuzzy import fuzz | ||||||
|  |  | ||||||
| from django.conf import settings | from .managers import LogManager | ||||||
|  |  | ||||||
| try: | try: | ||||||
|     from django.core.urlresolvers import reverse |     from django.core.urlresolvers import reverse | ||||||
| except ImportError: | except ImportError: | ||||||
|     from django.urls import reverse |     from django.urls import reverse | ||||||
| from django.db import models |  | ||||||
| from django.template.defaultfilters import slugify |  | ||||||
| from django.utils import timezone |  | ||||||
|  |  | ||||||
| from .managers import LogManager |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class MatchingModel(models.Model): | class MatchingModel(models.Model): | ||||||
| @@ -94,7 +94,11 @@ class Document(models.Model): | |||||||
|     TYPE_JPG = "jpg" |     TYPE_JPG = "jpg" | ||||||
|     TYPE_GIF = "gif" |     TYPE_GIF = "gif" | ||||||
|     TYPE_TIF = "tiff" |     TYPE_TIF = "tiff" | ||||||
|     TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,) |     TYPE_TXT = "txt" | ||||||
|  |     TYPE_CSV = "csv" | ||||||
|  |     TYPE_MD = "md" | ||||||
|  |     TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF, | ||||||
|  |              TYPE_TXT, TYPE_CSV, TYPE_MD) | ||||||
|  |  | ||||||
|     STORAGE_TYPE_UNENCRYPTED = "unencrypted" |     STORAGE_TYPE_UNENCRYPTED = "unencrypted" | ||||||
|     STORAGE_TYPE_GPG = "gpg" |     STORAGE_TYPE_GPG = "gpg" | ||||||
| @@ -282,51 +286,52 @@ class FileInfo: | |||||||
|         ) |         ) | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|  |     formats = "pdf|jpe?g|png|gif|tiff?|te?xt|md|csv" | ||||||
|     REGEXES = OrderedDict([ |     REGEXES = OrderedDict([ | ||||||
|         ("created-correspondent-title-tags", re.compile( |         ("created-correspondent-title-tags", re.compile( | ||||||
|             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " |             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " | ||||||
|             r"(?P<correspondent>.*) - " |             r"(?P<correspondent>.*) - " | ||||||
|             r"(?P<title>.*) - " |             r"(?P<title>.*) - " | ||||||
|             r"(?P<tags>[a-z0-9\-,]*)" |             r"(?P<tags>[a-z0-9\-,]*)" | ||||||
|             r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$", |             r"\.(?P<extension>{})$".format(formats), | ||||||
|             flags=re.IGNORECASE |             flags=re.IGNORECASE | ||||||
|         )), |         )), | ||||||
|         ("created-title-tags", re.compile( |         ("created-title-tags", re.compile( | ||||||
|             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " |             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " | ||||||
|             r"(?P<title>.*) - " |             r"(?P<title>.*) - " | ||||||
|             r"(?P<tags>[a-z0-9\-,]*)" |             r"(?P<tags>[a-z0-9\-,]*)" | ||||||
|             r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$", |             r"\.(?P<extension>{})$".format(formats), | ||||||
|             flags=re.IGNORECASE |             flags=re.IGNORECASE | ||||||
|         )), |         )), | ||||||
|         ("created-correspondent-title", re.compile( |         ("created-correspondent-title", re.compile( | ||||||
|             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " |             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " | ||||||
|             r"(?P<correspondent>.*) - " |             r"(?P<correspondent>.*) - " | ||||||
|             r"(?P<title>.*)" |             r"(?P<title>.*)" | ||||||
|             r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$", |             r"\.(?P<extension>{})$".format(formats), | ||||||
|             flags=re.IGNORECASE |             flags=re.IGNORECASE | ||||||
|         )), |         )), | ||||||
|         ("created-title", re.compile( |         ("created-title", re.compile( | ||||||
|             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " |             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " | ||||||
|             r"(?P<title>.*)" |             r"(?P<title>.*)" | ||||||
|             r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$", |             r"\.(?P<extension>{})$".format(formats), | ||||||
|             flags=re.IGNORECASE |             flags=re.IGNORECASE | ||||||
|         )), |         )), | ||||||
|         ("correspondent-title-tags", re.compile( |         ("correspondent-title-tags", re.compile( | ||||||
|             r"(?P<correspondent>.*) - " |             r"(?P<correspondent>.*) - " | ||||||
|             r"(?P<title>.*) - " |             r"(?P<title>.*) - " | ||||||
|             r"(?P<tags>[a-z0-9\-,]*)" |             r"(?P<tags>[a-z0-9\-,]*)" | ||||||
|             r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$", |             r"\.(?P<extension>{})$".format(formats), | ||||||
|             flags=re.IGNORECASE |             flags=re.IGNORECASE | ||||||
|         )), |         )), | ||||||
|         ("correspondent-title", re.compile( |         ("correspondent-title", re.compile( | ||||||
|             r"(?P<correspondent>.*) - " |             r"(?P<correspondent>.*) - " | ||||||
|             r"(?P<title>.*)?" |             r"(?P<title>.*)?" | ||||||
|             r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$", |             r"\.(?P<extension>{})$".format(formats), | ||||||
|             flags=re.IGNORECASE |             flags=re.IGNORECASE | ||||||
|         )), |         )), | ||||||
|         ("title", re.compile( |         ("title", re.compile( | ||||||
|             r"(?P<title>.*)" |             r"(?P<title>.*)" | ||||||
|             r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$", |             r"\.(?P<extension>{})$".format(formats), | ||||||
|             flags=re.IGNORECASE |             flags=re.IGNORECASE | ||||||
|         )) |         )) | ||||||
|     ]) |     ]) | ||||||
|   | |||||||
| @@ -1,9 +1,25 @@ | |||||||
| import logging | import logging | ||||||
| import shutil | import shutil | ||||||
| import tempfile | import tempfile | ||||||
|  | import re | ||||||
|  |  | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
|  |  | ||||||
|  | # This regular expression will try to find dates in the document at | ||||||
|  | # hand and will match the following formats: | ||||||
|  | # - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits | ||||||
|  | # - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits | ||||||
|  | # - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits | ||||||
|  | # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits | ||||||
|  | # - MONTH ZZZZ, with ZZZZ being 4 digits | ||||||
|  | # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits | ||||||
|  | DATE_REGEX = re.compile( | ||||||
|  |     r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' + | ||||||
|  |     r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' + | ||||||
|  |     r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' + | ||||||
|  |     r'\b([^\W\d_]{3,9} [0-9]{4})\b' | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class ParseError(Exception): | class ParseError(Exception): | ||||||
|     pass |     pass | ||||||
|   | |||||||
| @@ -166,7 +166,7 @@ class TestMatching(TestCase): | |||||||
|     def test_match_regex(self): |     def test_match_regex(self): | ||||||
|  |  | ||||||
|         self._test_matching( |         self._test_matching( | ||||||
|             "alpha\w+gamma", |             r"alpha\w+gamma", | ||||||
|             "MATCH_REGEX", |             "MATCH_REGEX", | ||||||
|             ( |             ( | ||||||
|                 "I have alpha_and_gamma in me", |                 "I have alpha_and_gamma in me", | ||||||
|   | |||||||
| @@ -1,6 +1,8 @@ | |||||||
| from django.http import HttpResponse, HttpResponseBadRequest | from django.http import HttpResponse, HttpResponseBadRequest | ||||||
| from django.views.generic import DetailView, FormView, TemplateView | from django.views.generic import DetailView, FormView, TemplateView | ||||||
| from django_filters.rest_framework import DjangoFilterBackend | from django_filters.rest_framework import DjangoFilterBackend | ||||||
|  | from django.conf import settings | ||||||
|  |  | ||||||
| from paperless.db import GnuPG | from paperless.db import GnuPG | ||||||
| from paperless.mixins import SessionOrBasicAuthMixin | from paperless.mixins import SessionOrBasicAuthMixin | ||||||
| from paperless.views import StandardPagination | from paperless.views import StandardPagination | ||||||
| @@ -48,6 +50,9 @@ class FetchView(SessionOrBasicAuthMixin, DetailView): | |||||||
|             Document.TYPE_JPG: "image/jpeg", |             Document.TYPE_JPG: "image/jpeg", | ||||||
|             Document.TYPE_GIF: "image/gif", |             Document.TYPE_GIF: "image/gif", | ||||||
|             Document.TYPE_TIF: "image/tiff", |             Document.TYPE_TIF: "image/tiff", | ||||||
|  |             Document.TYPE_CSV: "text/csv", | ||||||
|  |             Document.TYPE_MD:  "text/markdown", | ||||||
|  |             Document.TYPE_TXT: "text/plain" | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         if self.kwargs["kind"] == "thumb": |         if self.kwargs["kind"] == "thumb": | ||||||
| @@ -60,8 +65,11 @@ class FetchView(SessionOrBasicAuthMixin, DetailView): | |||||||
|             self._get_raw_data(self.object.source_file), |             self._get_raw_data(self.object.source_file), | ||||||
|             content_type=content_types[self.object.file_type] |             content_type=content_types[self.object.file_type] | ||||||
|         ) |         ) | ||||||
|         response["Content-Disposition"] = 'attachment; filename="{}"'.format( |  | ||||||
|             self.object.file_name) |         DISPOSITION = 'inline' if settings.INLINE_DOC else 'attachment' | ||||||
|  |  | ||||||
|  |         response["Content-Disposition"] = '{}; filename="{}"'.format( | ||||||
|  |             DISPOSITION, self.object.file_name) | ||||||
|  |  | ||||||
|         return response |         return response | ||||||
|  |  | ||||||
|   | |||||||
| @@ -22,6 +22,14 @@ elif os.path.exists("/usr/local/etc/paperless.conf"): | |||||||
|     load_dotenv("/usr/local/etc/paperless.conf") |     load_dotenv("/usr/local/etc/paperless.conf") | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def __get_boolean(key): | ||||||
|  |     """ | ||||||
|  |     Return a boolean value based on whatever the user has supplied in the | ||||||
|  |     environment based on whether the value "looks like" it's True or not. | ||||||
|  |     """ | ||||||
|  |     return bool(os.getenv(key, "NO").lower() in ("yes", "y", "1", "t", "true")) | ||||||
|  |  | ||||||
|  |  | ||||||
| # Build paths inside the project like this: os.path.join(BASE_DIR, ...) | # Build paths inside the project like this: os.path.join(BASE_DIR, ...) | ||||||
| BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||||||
|  |  | ||||||
| @@ -67,6 +75,7 @@ INSTALLED_APPS = [ | |||||||
|     "documents.apps.DocumentsConfig", |     "documents.apps.DocumentsConfig", | ||||||
|     "reminders.apps.RemindersConfig", |     "reminders.apps.RemindersConfig", | ||||||
|     "paperless_tesseract.apps.PaperlessTesseractConfig", |     "paperless_tesseract.apps.PaperlessTesseractConfig", | ||||||
|  |     "paperless_text.apps.PaperlessTextConfig", | ||||||
|  |  | ||||||
|     "django.contrib.admin", |     "django.contrib.admin", | ||||||
|  |  | ||||||
| @@ -226,12 +235,12 @@ OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng") | |||||||
| OCR_THREADS = os.getenv("PAPERLESS_OCR_THREADS") | OCR_THREADS = os.getenv("PAPERLESS_OCR_THREADS") | ||||||
|  |  | ||||||
| # OCR all documents? | # OCR all documents? | ||||||
| OCR_ALWAYS = bool(os.getenv("PAPERLESS_OCR_ALWAYS", "NO").lower() in ("yes", "y", "1", "t", "true"))  # NOQA | OCR_ALWAYS = __get_boolean("PAPERLESS_OCR_ALWAYS") | ||||||
|  |  | ||||||
| # If this is true, any failed attempts to OCR a PDF will result in the PDF | # If this is true, any failed attempts to OCR a PDF will result in the PDF | ||||||
| # being indexed anyway, with whatever we could get.  If it's False, the file | # being indexed anyway, with whatever we could get.  If it's False, the file | ||||||
| # will simply be left in the CONSUMPTION_DIR. | # will simply be left in the CONSUMPTION_DIR. | ||||||
| FORGIVING_OCR = bool(os.getenv("PAPERLESS_FORGIVING_OCR", "YES").lower() in ("yes", "y", "1", "t", "true"))  # NOQA | FORGIVING_OCR = __get_boolean("PAPERLESS_FORGIVING_OCR") | ||||||
|  |  | ||||||
| # GNUPG needs a home directory for some reason | # GNUPG needs a home directory for some reason | ||||||
| GNUPG_HOME = os.getenv("HOME", "/tmp") | GNUPG_HOME = os.getenv("HOME", "/tmp") | ||||||
| @@ -275,6 +284,9 @@ PASSPHRASE = os.getenv("PAPERLESS_PASSPHRASE") | |||||||
| PRE_CONSUME_SCRIPT = os.getenv("PAPERLESS_PRE_CONSUME_SCRIPT") | PRE_CONSUME_SCRIPT = os.getenv("PAPERLESS_PRE_CONSUME_SCRIPT") | ||||||
| POST_CONSUME_SCRIPT = os.getenv("PAPERLESS_POST_CONSUME_SCRIPT") | POST_CONSUME_SCRIPT = os.getenv("PAPERLESS_POST_CONSUME_SCRIPT") | ||||||
|  |  | ||||||
|  | # Whether to display a selected document inline, or download it as attachment: | ||||||
|  | INLINE_DOC = __get_boolean("PAPERLESS_INLINE_DOC") | ||||||
|  |  | ||||||
| # The number of items on each page in the web UI.  This value must be a | # The number of items on each page in the web UI.  This value must be a | ||||||
| # positive integer, but if you don't define one in paperless.conf, a default of | # positive integer, but if you don't define one in paperless.conf, a default of | ||||||
| # 100 will be used. | # 100 will be used. | ||||||
|   | |||||||
| @@ -1 +1 @@ | |||||||
| __version__ = (2, 2, 1) | __version__ = (2, 3, 0) | ||||||
|   | |||||||
| @@ -14,7 +14,7 @@ from pyocr.libtesseract.tesseract_raw import \ | |||||||
| from pyocr.tesseract import TesseractError | from pyocr.tesseract import TesseractError | ||||||
|  |  | ||||||
| import pdftotext | import pdftotext | ||||||
| from documents.parsers import DocumentParser, ParseError | from documents.parsers import DocumentParser, ParseError, DATE_REGEX | ||||||
|  |  | ||||||
| from .languages import ISO639 | from .languages import ISO639 | ||||||
|  |  | ||||||
| @@ -50,10 +50,11 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|             self.CONVERT, |             self.CONVERT, | ||||||
|             "-scale", "500x5000", |             "-scale", "500x5000", | ||||||
|             "-alpha", "remove", |             "-alpha", "remove", | ||||||
|             self.document_path, os.path.join(self.tempdir, "convert-%04d.png") |             "{}[0]".format(self.document_path), | ||||||
|  |             os.path.join(self.tempdir, "convert.png") | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|         return os.path.join(self.tempdir, "convert-0000.png") |         return os.path.join(self.tempdir, "convert.png") | ||||||
|  |  | ||||||
|     def _is_ocred(self): |     def _is_ocred(self): | ||||||
|  |  | ||||||
| @@ -210,22 +211,8 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|         except ParseError as e: |         except ParseError as e: | ||||||
|             return None |             return None | ||||||
|  |  | ||||||
|         # This regular expression will try to find dates in the document at |  | ||||||
|         # hand and will match the following formats: |  | ||||||
|         # - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits |  | ||||||
|         # - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits |  | ||||||
|         # - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits |  | ||||||
|         # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits |  | ||||||
|         # - MONTH ZZZZ, with ZZZZ being 4 digits |  | ||||||
|         # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits |  | ||||||
|         pattern = re.compile( |  | ||||||
|             r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' + |  | ||||||
|             r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' + |  | ||||||
|             r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' + |  | ||||||
|             r'\b([^\W\d_]{3,9} [0-9]{4})\b') |  | ||||||
|  |  | ||||||
|         # Iterate through all regex matches and try to parse the date |         # Iterate through all regex matches and try to parse the date | ||||||
|         for m in re.finditer(pattern, text): |         for m in re.finditer(DATE_REGEX, text): | ||||||
|             datestring = m.group(0) |             datestring = m.group(0) | ||||||
|  |  | ||||||
|             try: |             try: | ||||||
| @@ -272,8 +259,9 @@ def run_unpaper(args): | |||||||
| def strip_excess_whitespace(text): | def strip_excess_whitespace(text): | ||||||
|     collapsed_spaces = re.sub(r"([^\S\r\n]+)", " ", text) |     collapsed_spaces = re.sub(r"([^\S\r\n]+)", " ", text) | ||||||
|     no_leading_whitespace = re.sub( |     no_leading_whitespace = re.sub( | ||||||
|         "([\n\r]+)([^\S\n\r]+)", '\\1', collapsed_spaces) |         r"([\n\r]+)([^\S\n\r]+)", '\\1', collapsed_spaces) | ||||||
|     no_trailing_whitespace = re.sub("([^\S\n\r]+)$", '', no_leading_whitespace) |     no_trailing_whitespace = re.sub( | ||||||
|  |         r"([^\S\n\r]+)$", '', no_leading_whitespace) | ||||||
|     return no_trailing_whitespace |     return no_trailing_whitespace | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -5,7 +5,7 @@ from .parsers import RasterisedDocumentParser | |||||||
|  |  | ||||||
| class ConsumerDeclaration: | class ConsumerDeclaration: | ||||||
|  |  | ||||||
|     MATCHING_FILES = re.compile("^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$") |     MATCHING_FILES = re.compile(r"^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$") | ||||||
|  |  | ||||||
|     @classmethod |     @classmethod | ||||||
|     def handle(cls, sender, **kwargs): |     def handle(cls, sender, **kwargs): | ||||||
|   | |||||||
							
								
								
									
										0
									
								
								src/paperless_text/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								src/paperless_text/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										16
									
								
								src/paperless_text/apps.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								src/paperless_text/apps.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,16 @@ | |||||||
|  | from django.apps import AppConfig | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class PaperlessTextConfig(AppConfig): | ||||||
|  |  | ||||||
|  |     name = "paperless_text" | ||||||
|  |  | ||||||
|  |     def ready(self): | ||||||
|  |  | ||||||
|  |         from documents.signals import document_consumer_declaration | ||||||
|  |  | ||||||
|  |         from .signals import ConsumerDeclaration | ||||||
|  |  | ||||||
|  |         document_consumer_declaration.connect(ConsumerDeclaration.handle) | ||||||
|  |  | ||||||
|  |         AppConfig.ready(self) | ||||||
							
								
								
									
										131
									
								
								src/paperless_text/parsers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										131
									
								
								src/paperless_text/parsers.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,131 @@ | |||||||
|  | import os | ||||||
|  | import re | ||||||
|  | import subprocess | ||||||
|  |  | ||||||
|  | import dateparser | ||||||
|  | from django.conf import settings | ||||||
|  |  | ||||||
|  | from documents.parsers import DocumentParser, ParseError, DATE_REGEX | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class TextDocumentParser(DocumentParser): | ||||||
|  |     """ | ||||||
|  |     This parser directly parses a text document (.txt, .md, or .csv) | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     CONVERT = settings.CONVERT_BINARY | ||||||
|  |     THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None | ||||||
|  |     UNPAPER = settings.UNPAPER_BINARY | ||||||
|  |     DATE_ORDER = settings.DATE_ORDER | ||||||
|  |     DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE | ||||||
|  |     OCR_ALWAYS = settings.OCR_ALWAYS | ||||||
|  |  | ||||||
|  |     def __init__(self, path): | ||||||
|  |         super().__init__(path) | ||||||
|  |         self._text = None | ||||||
|  |  | ||||||
|  |     def get_thumbnail(self): | ||||||
|  |         """ | ||||||
|  |         The thumbnail of a txt is just a 500px wide image of the text | ||||||
|  |         rendered onto a letter-sized page. | ||||||
|  |         """ | ||||||
|  |         # The below is heavily cribbed from https://askubuntu.com/a/590951 | ||||||
|  |  | ||||||
|  |         bg_color = "white"  # bg color | ||||||
|  |         text_color = "black"  # text color | ||||||
|  |         psize = [500, 647]  # icon size | ||||||
|  |         n_lines = 50  # number of lines to show | ||||||
|  |         output_file = os.path.join(self.tempdir, "convert-txt.png") | ||||||
|  |  | ||||||
|  |         temp_bg = os.path.join(self.tempdir, "bg.png") | ||||||
|  |         temp_txlayer = os.path.join(self.tempdir, "tx.png") | ||||||
|  |         picsize = "x".join([str(n) for n in psize]) | ||||||
|  |         txsize = "x".join([str(n - 8) for n in psize]) | ||||||
|  |  | ||||||
|  |         def create_bg(): | ||||||
|  |             work_size = ",".join([str(n - 1) for n in psize]) | ||||||
|  |             r = str(round(psize[0] / 10)) | ||||||
|  |             rounded = ",".join([r, r]) | ||||||
|  |             run_command(self.CONVERT, "-size ", picsize, ' xc:none -draw ', | ||||||
|  |                         '"fill ', bg_color, ' roundrectangle 0,0,', | ||||||
|  |                         work_size, ",", rounded, '" ', temp_bg) | ||||||
|  |  | ||||||
|  |         def read_text(): | ||||||
|  |             with open(self.document_path, 'r') as src: | ||||||
|  |                 lines = [l.strip() for l in src.readlines()] | ||||||
|  |                 text = "\n".join([l for l in lines[:n_lines]]) | ||||||
|  |                 return text.replace('"', "'") | ||||||
|  |  | ||||||
|  |         def create_txlayer(): | ||||||
|  |             run_command(self.CONVERT, | ||||||
|  |                         "-background none", | ||||||
|  |                         "-fill", | ||||||
|  |                         text_color, | ||||||
|  |                         "-pointsize", "12", | ||||||
|  |                         "-border 4 -bordercolor none", | ||||||
|  |                         "-size ", txsize, | ||||||
|  |                         ' caption:"', read_text(), '" ', | ||||||
|  |                         temp_txlayer) | ||||||
|  |  | ||||||
|  |         create_txlayer() | ||||||
|  |         create_bg() | ||||||
|  |         run_command(self.CONVERT, temp_bg, temp_txlayer, | ||||||
|  |                     "-background None -layers merge ", output_file) | ||||||
|  |  | ||||||
|  |         return output_file | ||||||
|  |  | ||||||
|  |     def get_text(self): | ||||||
|  |  | ||||||
|  |         if self._text is not None: | ||||||
|  |             return self._text | ||||||
|  |  | ||||||
|  |         with open(self.document_path, 'r') as f: | ||||||
|  |             self._text = f.read() | ||||||
|  |  | ||||||
|  |         return self._text | ||||||
|  |  | ||||||
|  |     def get_date(self): | ||||||
|  |         date = None | ||||||
|  |         datestring = None | ||||||
|  |  | ||||||
|  |         try: | ||||||
|  |             text = self.get_text() | ||||||
|  |         except ParseError as e: | ||||||
|  |             return None | ||||||
|  |  | ||||||
|  |         # Iterate through all regex matches and try to parse the date | ||||||
|  |         for m in re.finditer(DATE_REGEX, text): | ||||||
|  |             datestring = m.group(0) | ||||||
|  |  | ||||||
|  |             try: | ||||||
|  |                 date = dateparser.parse( | ||||||
|  |                            datestring, | ||||||
|  |                            settings={'DATE_ORDER': self.DATE_ORDER, | ||||||
|  |                                      'PREFER_DAY_OF_MONTH': 'first', | ||||||
|  |                                      'RETURN_AS_TIMEZONE_AWARE': True}) | ||||||
|  |             except TypeError: | ||||||
|  |                 # Skip all matches that do not parse to a proper date | ||||||
|  |                 continue | ||||||
|  |  | ||||||
|  |             if date is not None: | ||||||
|  |                 break | ||||||
|  |  | ||||||
|  |         if date is not None: | ||||||
|  |             self.log("info", "Detected document date " + date.isoformat() + | ||||||
|  |                              " based on string " + datestring) | ||||||
|  |         else: | ||||||
|  |             self.log("info", "Unable to detect date for document") | ||||||
|  |  | ||||||
|  |         return date | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def run_command(*args): | ||||||
|  |     environment = os.environ.copy() | ||||||
|  |     if settings.CONVERT_MEMORY_LIMIT: | ||||||
|  |         environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT | ||||||
|  |     if settings.CONVERT_TMPDIR: | ||||||
|  |         environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR | ||||||
|  |  | ||||||
|  |     if not subprocess.Popen(' '.join(args), env=environment, | ||||||
|  |                             shell=True).wait() == 0: | ||||||
|  |         raise ParseError("Convert failed at {}".format(args)) | ||||||
							
								
								
									
										23
									
								
								src/paperless_text/signals.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								src/paperless_text/signals.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | |||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .parsers import TextDocumentParser | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ConsumerDeclaration: | ||||||
|  |  | ||||||
|  |     MATCHING_FILES = re.compile(r"^.*\.(te?xt|md|csv)$") | ||||||
|  |  | ||||||
|  |     @classmethod | ||||||
|  |     def handle(cls, sender, **kwargs): | ||||||
|  |         return cls.test | ||||||
|  |  | ||||||
|  |     @classmethod | ||||||
|  |     def test(cls, doc): | ||||||
|  |  | ||||||
|  |         if cls.MATCHING_FILES.match(doc.lower()): | ||||||
|  |             return { | ||||||
|  |                 "parser": TextDocumentParser, | ||||||
|  |                 "weight": 10 | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |         return None | ||||||
		Reference in New Issue
	
	Block a user
	 Jonas Winkler
					Jonas Winkler