mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-28 03:46:06 -05:00 
			
		
		
		
	Merge branch 'dev'
This commit is contained in:
		
							
								
								
									
										82
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										82
									
								
								Dockerfile
									
									
									
									
									
								
							| @@ -1,82 +0,0 @@ | |||||||
| ############################################################################### |  | ||||||
| ### Front end                                                               ### |  | ||||||
| ############################################################################### |  | ||||||
|  |  | ||||||
| FROM node:current AS frontend |  | ||||||
|  |  | ||||||
| WORKDIR /usr/src/paperless/src-ui/ |  | ||||||
|  |  | ||||||
| COPY src-ui/package* ./ |  | ||||||
| RUN npm install |  | ||||||
|  |  | ||||||
| COPY src-ui . |  | ||||||
| RUN node_modules/.bin/ng build --prod --output-hashing none --sourceMap=false --output-path dist/paperless-ui |  | ||||||
|  |  | ||||||
| ############################################################################### |  | ||||||
| ### Back end                                                                ### |  | ||||||
| ############################################################################### |  | ||||||
|  |  | ||||||
| FROM ubuntu:20.04 |  | ||||||
|  |  | ||||||
| WORKDIR /usr/src/paperless/ |  | ||||||
|  |  | ||||||
| COPY Pipfile* ./ |  | ||||||
|  |  | ||||||
| #Dependencies |  | ||||||
| RUN apt-get update \ |  | ||||||
|   && DEBIAN_FRONTEND="noninteractive" apt-get -y --no-install-recommends install \ |  | ||||||
| 		build-essential \ |  | ||||||
| 		curl \ |  | ||||||
| 		ghostscript \ |  | ||||||
| 		gnupg \ |  | ||||||
| 		imagemagick \ |  | ||||||
| 		libmagic-dev \ |  | ||||||
| 		libpoppler-cpp-dev \ |  | ||||||
| 		libpq-dev \ |  | ||||||
| 		optipng \ |  | ||||||
| 		python3 \ |  | ||||||
| 		python3-dev \ |  | ||||||
| 		python3-pip \ |  | ||||||
| 		sudo \ |  | ||||||
| 		tesseract-ocr \ |  | ||||||
| 		tesseract-ocr-eng \ |  | ||||||
| 		tesseract-ocr-deu \ |  | ||||||
| 		tesseract-ocr-fra \ |  | ||||||
| 		tesseract-ocr-ita \ |  | ||||||
| 		tesseract-ocr-spa \ |  | ||||||
| 		tzdata \ |  | ||||||
| 		unpaper \ |  | ||||||
| 	&& pip3 install --upgrade pipenv supervisor setuptools \ |  | ||||||
| 	&& pipenv install --system --deploy \ |  | ||||||
| 	&& pipenv --clear \ |  | ||||||
| 	&& apt-get -y purge build-essential python3-pip python3-dev \ |  | ||||||
| 	&& apt-get -y autoremove --purge \ |  | ||||||
| 	&& rm -rf /var/lib/apt/lists/* \ |  | ||||||
| 	&& mkdir /var/log/supervisord /var/run/supervisord |  | ||||||
|  |  | ||||||
| # copy scripts |  | ||||||
| # this fixes issues with imagemagick and PDF |  | ||||||
| COPY docker/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml |  | ||||||
| COPY docker/gunicorn.conf.py ./ |  | ||||||
| COPY docker/supervisord.conf /etc/supervisord.conf |  | ||||||
| COPY docker/docker-entrypoint.sh /sbin/docker-entrypoint.sh |  | ||||||
|  |  | ||||||
| # copy app |  | ||||||
| COPY src/ ./src/ |  | ||||||
| COPY --from=frontend /usr/src/paperless/src-ui/dist/paperless-ui/ ./src/documents/static/frontend/ |  | ||||||
|  |  | ||||||
| # add users, setup scripts |  | ||||||
| RUN addgroup --gid 1000 paperless \ |  | ||||||
| 	&& useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \ |  | ||||||
| 	&& chown -R paperless:paperless . \ |  | ||||||
| 	&& chmod 755 /sbin/docker-entrypoint.sh |  | ||||||
|  |  | ||||||
| WORKDIR /usr/src/paperless/src/ |  | ||||||
|  |  | ||||||
| RUN sudo -HEu paperless python3 manage.py collectstatic --clear --no-input |  | ||||||
|  |  | ||||||
| VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/media", "/usr/src/paperless/consume", "/usr/src/paperless/export"] |  | ||||||
| ENTRYPOINT ["/sbin/docker-entrypoint.sh"] |  | ||||||
| CMD ["/usr/local/bin/supervisord", "-c", "/etc/supervisord.conf"] |  | ||||||
|  |  | ||||||
| LABEL maintainer="Jonas Winkler <dev@jpwinkler.de>" |  | ||||||
							
								
								
									
										5
									
								
								Pipfile
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								Pipfile
									
									
									
									
									
								
							| @@ -3,6 +3,11 @@ url = "https://pypi.python.org/simple" | |||||||
| verify_ssl = true | verify_ssl = true | ||||||
| name = "pypi" | name = "pypi" | ||||||
|  |  | ||||||
|  | [[source]] | ||||||
|  | url = "https://www.piwheels.org/simple" | ||||||
|  | verify_ssl = true | ||||||
|  | name = "piwheels" | ||||||
|  |  | ||||||
| [packages] | [packages] | ||||||
| django = "~=3.1" | django = "~=3.1" | ||||||
| pillow = "*" | pillow = "*" | ||||||
|   | |||||||
							
								
								
									
										51
									
								
								Pipfile.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										51
									
								
								Pipfile.lock
									
									
									
										generated
									
									
									
								
							| @@ -1,7 +1,7 @@ | |||||||
| { | { | ||||||
|     "_meta": { |     "_meta": { | ||||||
|         "hash": { |         "hash": { | ||||||
|             "sha256": "d6416e6844126b09200b9839a3abdcf3c24ef5cf70052b8f134d8bc804552c17" |             "sha256": "abc7e5f5a8d075d4b013ceafd06ca07f57e597f053d670f73449ba210511b114" | ||||||
|         }, |         }, | ||||||
|         "pipfile-spec": 6, |         "pipfile-spec": 6, | ||||||
|         "requires": {}, |         "requires": {}, | ||||||
| @@ -10,6 +10,11 @@ | |||||||
|                 "name": "pypi", |                 "name": "pypi", | ||||||
|                 "url": "https://pypi.python.org/simple", |                 "url": "https://pypi.python.org/simple", | ||||||
|                 "verify_ssl": true |                 "verify_ssl": true | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 "name": "piwheels", | ||||||
|  |                 "url": "https://www.piwheels.org/simple", | ||||||
|  |                 "verify_ssl": true | ||||||
|             } |             } | ||||||
|         ] |         ] | ||||||
|     }, |     }, | ||||||
| @@ -102,6 +107,7 @@ | |||||||
|         }, |         }, | ||||||
|         "filemagic": { |         "filemagic": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|  |                 "sha256:b2fd77411975510e28673220c4b8868ed81b5eb5906339b6f4c233b32122d7d3", | ||||||
|                 "sha256:e684359ef40820fe406f0ebc5bf8a78f89717bdb7fed688af68082d991d6dbf3" |                 "sha256:e684359ef40820fe406f0ebc5bf8a78f89717bdb7fed688af68082d991d6dbf3" | ||||||
|             ], |             ], | ||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
| @@ -142,6 +148,7 @@ | |||||||
|         "langdetect": { |         "langdetect": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:363795ea005f1243c958e953245dac5d814fabdc025c9afa91588c5fa6b2fa83", |                 "sha256:363795ea005f1243c958e953245dac5d814fabdc025c9afa91588c5fa6b2fa83", | ||||||
|  |                 "sha256:ae53a024643df713274c297c0795dbfb5a16b329902f8e543e7b2d7d45f699e4", | ||||||
|                 "sha256:f37495e63607865e47deed08d78f7f8e58172658216ff954b2f14671bcd87740" |                 "sha256:f37495e63607865e47deed08d78f7f8e58172658216ff954b2f14671bcd87740" | ||||||
|             ], |             ], | ||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
| @@ -162,6 +169,7 @@ | |||||||
|                 "sha256:448ebb1b3bf64c0267d6b09a7cba26b5ae61b6d2dbabff7c91b660c7eccf2bdb", |                 "sha256:448ebb1b3bf64c0267d6b09a7cba26b5ae61b6d2dbabff7c91b660c7eccf2bdb", | ||||||
|                 "sha256:50e86c076611212ca62e5a59f518edafe0c0730f7d9195fec718da1a5c2bb1fc", |                 "sha256:50e86c076611212ca62e5a59f518edafe0c0730f7d9195fec718da1a5c2bb1fc", | ||||||
|                 "sha256:5734bdc0342aba9dfc6f04920988140fb41234db42381cf7ccba64169f9fe7ac", |                 "sha256:5734bdc0342aba9dfc6f04920988140fb41234db42381cf7ccba64169f9fe7ac", | ||||||
|  |                 "sha256:5ddd1dfa2be066595c1993165b4cae84b9866b12339d0c903db7f21a094324a3", | ||||||
|                 "sha256:64324f64f90a9e4ef732be0928be853eee378fd6a01be21a0a8469c4f2682c83", |                 "sha256:64324f64f90a9e4ef732be0928be853eee378fd6a01be21a0a8469c4f2682c83", | ||||||
|                 "sha256:6ae6c680f3ebf1cf7ad1d7748868b39d9f900836df774c453c11c5440bc15b36", |                 "sha256:6ae6c680f3ebf1cf7ad1d7748868b39d9f900836df774c453c11c5440bc15b36", | ||||||
|                 "sha256:6d7593a705d662be5bfe24111af14763016765f43cb6923ed86223f965f52387", |                 "sha256:6d7593a705d662be5bfe24111af14763016765f43cb6923ed86223f965f52387", | ||||||
| @@ -189,7 +197,8 @@ | |||||||
|         }, |         }, | ||||||
|         "pathtools": { |         "pathtools": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0" |                 "sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0", | ||||||
|  |                 "sha256:d77d982475e87f32b82157a43b09f0a5ef3e66c1d8f3c7eb8d2580e783cd8202" | ||||||
|             ], |             ], | ||||||
|             "version": "==0.1.2" |             "version": "==0.1.2" | ||||||
|         }, |         }, | ||||||
| @@ -217,6 +226,7 @@ | |||||||
|                 "sha256:2fb113757a369a6cdb189f8df3226e995acfed0a8919a72416626af1a0a71140", |                 "sha256:2fb113757a369a6cdb189f8df3226e995acfed0a8919a72416626af1a0a71140", | ||||||
|                 "sha256:4b0ef2470c4979e345e4e0cc1bbac65fda11d0d7b789dbac035e4c6ce3f98adb", |                 "sha256:4b0ef2470c4979e345e4e0cc1bbac65fda11d0d7b789dbac035e4c6ce3f98adb", | ||||||
|                 "sha256:59e903ca800c8cfd1ebe482349ec7c35687b95e98cefae213e271c8c7fffa021", |                 "sha256:59e903ca800c8cfd1ebe482349ec7c35687b95e98cefae213e271c8c7fffa021", | ||||||
|  |                 "sha256:5a3342d34289715928c914ee7f389351eb37fa4857caa9297fc7948f2ed3e53d", | ||||||
|                 "sha256:5abd653a23c35d980b332bc0431d39663b1709d64142e3652890df4c9b6970f6", |                 "sha256:5abd653a23c35d980b332bc0431d39663b1709d64142e3652890df4c9b6970f6", | ||||||
|                 "sha256:5f9403af9c790cc18411ea398a6950ee2def2a830ad0cfe6dc9122e6d528b302", |                 "sha256:5f9403af9c790cc18411ea398a6950ee2def2a830ad0cfe6dc9122e6d528b302", | ||||||
|                 "sha256:6b4a8fd632b4ebee28282a9fef4c341835a1aa8671e2770b6f89adc8e8c2703c", |                 "sha256:6b4a8fd632b4ebee28282a9fef4c341835a1aa8671e2770b6f89adc8e8c2703c", | ||||||
| @@ -274,8 +284,10 @@ | |||||||
|                 "sha256:d14b140a4439d816e3b1229a4a525df917d6ea22a0771a2a78332273fd9528a4", |                 "sha256:d14b140a4439d816e3b1229a4a525df917d6ea22a0771a2a78332273fd9528a4", | ||||||
|                 "sha256:d1b4ab59e02d9008efe10ceabd0b31e79519da6fb67f7d8e8977118832d0f449", |                 "sha256:d1b4ab59e02d9008efe10ceabd0b31e79519da6fb67f7d8e8977118832d0f449", | ||||||
|                 "sha256:d5227b229005a696cc67676e24c214740efd90b148de5733419ac9aaba3773da", |                 "sha256:d5227b229005a696cc67676e24c214740efd90b148de5733419ac9aaba3773da", | ||||||
|  |                 "sha256:d9f3a909b59ac4a3ca9beb77716f4bce627276edb039a71d4e9ec4b7548536a0", | ||||||
|                 "sha256:e1f57aa70d3f7cc6947fd88636a481638263ba04a742b4a37dd25c373e41491a", |                 "sha256:e1f57aa70d3f7cc6947fd88636a481638263ba04a742b4a37dd25c373e41491a", | ||||||
|                 "sha256:e74a55f6bad0e7d3968399deb50f61f4db1926acf4a6d83beaaa7df986f48b1c", |                 "sha256:e74a55f6bad0e7d3968399deb50f61f4db1926acf4a6d83beaaa7df986f48b1c", | ||||||
|  |                 "sha256:e7f5a465c6431c0ad8d4e69603ee3306e521a09d3c6af76a16bdb62946bdddf0", | ||||||
|                 "sha256:e82aba2188b9ba309fd8e271702bd0d0fc9148ae3150532bbb474f4590039ffb", |                 "sha256:e82aba2188b9ba309fd8e271702bd0d0fc9148ae3150532bbb474f4590039ffb", | ||||||
|                 "sha256:ee69dad2c7155756ad114c02db06002f4cded41132cc51378e57aad79cc8e4f4", |                 "sha256:ee69dad2c7155756ad114c02db06002f4cded41132cc51378e57aad79cc8e4f4", | ||||||
|                 "sha256:f5ab93a2cb2d8338b1674be43b442a7f544a0971da062a5da774ed40587f18f5" |                 "sha256:f5ab93a2cb2d8338b1674be43b442a7f544a0971da062a5da774ed40587f18f5" | ||||||
| @@ -285,7 +297,8 @@ | |||||||
|         }, |         }, | ||||||
|         "pyocr": { |         "pyocr": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:fa15adc7e1cf0d345a2990495fe125a947c6e09a60ddba0256a1c14b2e603179" |                 "sha256:fa15adc7e1cf0d345a2990495fe125a947c6e09a60ddba0256a1c14b2e603179", | ||||||
|  |                 "sha256:fd602af17b6e21985669aadc058a95f343ff921e962ed4aa6520ded32e4d1301" | ||||||
|             ], |             ], | ||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
|             "version": "==0.7.2" |             "version": "==0.7.2" | ||||||
| @@ -316,7 +329,10 @@ | |||||||
|         }, |         }, | ||||||
|         "python-levenshtein": { |         "python-levenshtein": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1" |                 "sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1", | ||||||
|  |                 "sha256:15e26882728c29ccdf74cfc6ac4b49fc22c08b44d152348cb0eb1ec4f3dbf9df", | ||||||
|  |                 "sha256:3df5e5eb144570ecf5ad38864a2393068798328c7f05e7b167a49391d36a2db1", | ||||||
|  |                 "sha256:7f049b3ddc4b525bd469febafb98bf5202f789b722e0e4ccbec2ffbe8c07d7b4" | ||||||
|             ], |             ], | ||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
|             "version": "==0.12.0" |             "version": "==0.12.0" | ||||||
| @@ -331,6 +347,7 @@ | |||||||
|         "redis": { |         "redis": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:0e7e0cfca8660dea8b7d5cd8c4f6c5e29e11f31158c0b0ae91a397f00e5a05a2", |                 "sha256:0e7e0cfca8660dea8b7d5cd8c4f6c5e29e11f31158c0b0ae91a397f00e5a05a2", | ||||||
|  |                 "sha256:3f1c7f166fa6c803613eec222224848a80f5e5b9c6af3aa82461506643034a7a", | ||||||
|                 "sha256:432b788c4530cfe16d8d943a09d40ca6c16149727e4afe8c2c9d5580c59d9f24" |                 "sha256:432b788c4530cfe16d8d943a09d40ca6c16149727e4afe8c2c9d5580c59d9f24" | ||||||
|             ], |             ], | ||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
| @@ -360,7 +377,9 @@ | |||||||
|                 "sha256:749078d1eb89484db5f34b4012092ad14b327944ee7f1c4f74d6279a6e4d1884", |                 "sha256:749078d1eb89484db5f34b4012092ad14b327944ee7f1c4f74d6279a6e4d1884", | ||||||
|                 "sha256:7913bd25f4ab274ba37bc97ad0e21c31004224ccb02765ad984eef43e04acc6c", |                 "sha256:7913bd25f4ab274ba37bc97ad0e21c31004224ccb02765ad984eef43e04acc6c", | ||||||
|                 "sha256:7a25fcbeae08f96a754b45bdc050e1fb94b95cab046bf56b016c25e9ab127b3e", |                 "sha256:7a25fcbeae08f96a754b45bdc050e1fb94b95cab046bf56b016c25e9ab127b3e", | ||||||
|  |                 "sha256:80ef188c0e47a6c964eed71c55a73c245f8daf9f0a4a9d804e91275afb468ca4", | ||||||
|                 "sha256:83d6b356e116ca119db8e7c6fc2983289d87b27b3fac238cfe5dca529d884562", |                 "sha256:83d6b356e116ca119db8e7c6fc2983289d87b27b3fac238cfe5dca529d884562", | ||||||
|  |                 "sha256:842fb985b2b99a82a2b145b6bbd588c5f5cfd83693402920fcb985d515794666", | ||||||
|                 "sha256:8b882a78c320478b12ff024e81dc7d43c1462aa4a3341c754ee65d857a521f85", |                 "sha256:8b882a78c320478b12ff024e81dc7d43c1462aa4a3341c754ee65d857a521f85", | ||||||
|                 "sha256:8f6a2229e8ad946e36815f2a03386bb8353d4bde368fdf8ca5f0cb97264d3b5c", |                 "sha256:8f6a2229e8ad946e36815f2a03386bb8353d4bde368fdf8ca5f0cb97264d3b5c", | ||||||
|                 "sha256:9801c4c1d9ae6a70aeb2128e5b4b68c45d4f0af0d1535500884d644fa9b768c6", |                 "sha256:9801c4c1d9ae6a70aeb2128e5b4b68c45d4f0af0d1535500884d644fa9b768c6", | ||||||
| @@ -384,6 +403,7 @@ | |||||||
|         }, |         }, | ||||||
|         "scikit-learn": { |         "scikit-learn": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|  |                 "sha256:090bbf144fd5823c1f2efa3e1a9bf180295b24294ca8f478e75b40ed54f8036e", | ||||||
|                 "sha256:0a127cc70990d4c15b1019680bfedc7fec6c23d14d3719fdf9b64b22d37cdeca", |                 "sha256:0a127cc70990d4c15b1019680bfedc7fec6c23d14d3719fdf9b64b22d37cdeca", | ||||||
|                 "sha256:0d39748e7c9669ba648acf40fb3ce96b8a07b240db6888563a7cb76e05e0d9cc", |                 "sha256:0d39748e7c9669ba648acf40fb3ce96b8a07b240db6888563a7cb76e05e0d9cc", | ||||||
|                 "sha256:1b8a391de95f6285a2f9adffb7db0892718950954b7149a70c783dc848f104ea", |                 "sha256:1b8a391de95f6285a2f9adffb7db0892718950954b7149a70c783dc848f104ea", | ||||||
| @@ -423,6 +443,7 @@ | |||||||
|                 "sha256:9ad4fcddcbf5dc67619379782e6aeef41218a79e17979aaed01ed099876c0e62", |                 "sha256:9ad4fcddcbf5dc67619379782e6aeef41218a79e17979aaed01ed099876c0e62", | ||||||
|                 "sha256:a254b98dbcc744c723a838c03b74a8a34c0558c9ac5c86d5561703362231107d", |                 "sha256:a254b98dbcc744c723a838c03b74a8a34c0558c9ac5c86d5561703362231107d", | ||||||
|                 "sha256:b03c4338d6d3d299e8ca494194c0ae4f611548da59e3c038813f1a43976cb437", |                 "sha256:b03c4338d6d3d299e8ca494194c0ae4f611548da59e3c038813f1a43976cb437", | ||||||
|  |                 "sha256:b5e9d3e4474644915809d6aa1416ff20430a3ed9ae723a5d295da5ddb24985e2", | ||||||
|                 "sha256:cc1f78ebc982cd0602c9a7615d878396bec94908db67d4ecddca864d049112f2", |                 "sha256:cc1f78ebc982cd0602c9a7615d878396bec94908db67d4ecddca864d049112f2", | ||||||
|                 "sha256:d6d25c41a009e3c6b7e757338948d0076ee1dd1770d1c09ec131f11946883c54", |                 "sha256:d6d25c41a009e3c6b7e757338948d0076ee1dd1770d1c09ec131f11946883c54", | ||||||
|                 "sha256:d84cadd7d7998433334c99fa55bcba0d8b4aeff0edb123b2a1dfcface538e474", |                 "sha256:d84cadd7d7998433334c99fa55bcba0d8b4aeff0edb123b2a1dfcface538e474", | ||||||
| @@ -468,6 +489,7 @@ | |||||||
|         }, |         }, | ||||||
|         "watchdog": { |         "watchdog": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|  |                 "sha256:034c85530b647486e8c8477410fe79476511282658f2ce496f97106d9e5acfb8", | ||||||
|                 "sha256:4214e1379d128b0588021880ccaf40317ee156d4603ac388b9adcf29165e0c04" |                 "sha256:4214e1379d128b0588021880ccaf40317ee156d4603ac388b9adcf29165e0c04" | ||||||
|             ], |             ], | ||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
| @@ -561,6 +583,7 @@ | |||||||
|                 "sha256:29a6272fec10623fcbe158fdf9abc7a5fa032048ac1d8631f14b50fbfc10d17f", |                 "sha256:29a6272fec10623fcbe158fdf9abc7a5fa032048ac1d8631f14b50fbfc10d17f", | ||||||
|                 "sha256:2b31f46bf7b31e6aa690d4c7a3d51bb262438c6dcb0d528adde446531d0d3bb7", |                 "sha256:2b31f46bf7b31e6aa690d4c7a3d51bb262438c6dcb0d528adde446531d0d3bb7", | ||||||
|                 "sha256:2d43af2be93ffbad25dd959899b5b809618a496926146ce98ee0b23683f8c51c", |                 "sha256:2d43af2be93ffbad25dd959899b5b809618a496926146ce98ee0b23683f8c51c", | ||||||
|  |                 "sha256:3188a7dfd96f734a7498f37cde6598b1e9c084f1ca68bc1aa04e88db31168ab6", | ||||||
|                 "sha256:381ead10b9b9af5f64646cd27107fb27b614ee7040bb1226f9c07ba96625cbb5", |                 "sha256:381ead10b9b9af5f64646cd27107fb27b614ee7040bb1226f9c07ba96625cbb5", | ||||||
|                 "sha256:47a11bdbd8ada9b7ee628596f9d97fbd3851bd9999d398e9436bd67376dbece7", |                 "sha256:47a11bdbd8ada9b7ee628596f9d97fbd3851bd9999d398e9436bd67376dbece7", | ||||||
|                 "sha256:4d6a42744139a7fa5b46a264874a781e8694bb32f1d76d8137b68138686f1729", |                 "sha256:4d6a42744139a7fa5b46a264874a781e8694bb32f1d76d8137b68138686f1729", | ||||||
| @@ -586,7 +609,8 @@ | |||||||
|                 "sha256:c851b35fc078389bc16b915a0a7c1d5923e12e2c5aeec58c52f4aa8085ac8237", |                 "sha256:c851b35fc078389bc16b915a0a7c1d5923e12e2c5aeec58c52f4aa8085ac8237", | ||||||
|                 "sha256:cb7df71de0af56000115eafd000b867d1261f786b5eebd88a0ca6360cccfaca7", |                 "sha256:cb7df71de0af56000115eafd000b867d1261f786b5eebd88a0ca6360cccfaca7", | ||||||
|                 "sha256:cedb2f9e1f990918ea061f28a0f0077a07702e3819602d3507e2ff98c8d20636", |                 "sha256:cedb2f9e1f990918ea061f28a0f0077a07702e3819602d3507e2ff98c8d20636", | ||||||
|                 "sha256:e8caf961e1b1a945db76f1b5fa9c91498d15f545ac0ababbe575cfab185d3bd8" |                 "sha256:e8caf961e1b1a945db76f1b5fa9c91498d15f545ac0ababbe575cfab185d3bd8", | ||||||
|  |                 "sha256:ef221855191457fffeb909d5787d1807800ab4d0111f089e6c93ee68f577634d" | ||||||
|             ], |             ], | ||||||
|             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", |             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", | ||||||
|             "version": "==5.3" |             "version": "==5.3" | ||||||
| @@ -608,6 +632,7 @@ | |||||||
|         }, |         }, | ||||||
|         "docopt": { |         "docopt": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|  |                 "sha256:15fde8252aa9f2804171014d50d069ffbf42c7a50b7d74bcbb82bfd5700fcfc2", | ||||||
|                 "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491" |                 "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491" | ||||||
|             ], |             ], | ||||||
|             "version": "==0.6.2" |             "version": "==0.6.2" | ||||||
| @@ -638,11 +663,11 @@ | |||||||
|         }, |         }, | ||||||
|         "faker": { |         "faker": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:6afc461ab3f779c9c16e299fc731d775e39ea7e8e063b3053ee359ae198a15ca", |                 "sha256:4d038ba51ae5e0a956d79cadd684d856e5750bfd608b61dad1807f8f08b1da49", | ||||||
|                 "sha256:ce1c38823eb0f927567cde5bf2e7c8ca565c7a70316139342050ce2ca74b4026" |                 "sha256:f260f0375a44cd1e1a735c9b8c9b914304f607b5eef431d20e098c7c2f5b50a6" | ||||||
|             ], |             ], | ||||||
|             "markers": "python_version >= '3.5'", |             "markers": "python_version >= '3.5'", | ||||||
|             "version": "==4.14.2" |             "version": "==4.16.0" | ||||||
|         }, |         }, | ||||||
|         "filelock": { |         "filelock": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
| @@ -653,6 +678,7 @@ | |||||||
|         }, |         }, | ||||||
|         "idna": { |         "idna": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|  |                 "sha256:4a57a6379512ade94fa99e2fa46d3cd0f2f553040548d0e2958c6ed90ee48226", | ||||||
|                 "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", |                 "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", | ||||||
|                 "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" |                 "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" | ||||||
|             ], |             ], | ||||||
| @@ -670,12 +696,14 @@ | |||||||
|         "iniconfig": { |         "iniconfig": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3", |                 "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3", | ||||||
|  |                 "sha256:8647b85c03813b8680f4ae9c9db2fd7293f8591ea536a10d73d90f6eb4b10aac", | ||||||
|                 "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32" |                 "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32" | ||||||
|             ], |             ], | ||||||
|             "version": "==1.1.1" |             "version": "==1.1.1" | ||||||
|         }, |         }, | ||||||
|         "jinja2": { |         "jinja2": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|  |                 "sha256:3f172970d5670703bd3812e8ca6459a9a7e069fa8e51b40195f83c81db191ec4", | ||||||
|                 "sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0", |                 "sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0", | ||||||
|                 "sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035" |                 "sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035" | ||||||
|             ], |             ], | ||||||
| @@ -689,8 +717,10 @@ | |||||||
|                 "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235", |                 "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235", | ||||||
|                 "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5", |                 "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5", | ||||||
|                 "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42", |                 "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42", | ||||||
|  |                 "sha256:19536834abffb3fa155017053c607cb835b2ecc6a3a2554a88043d991dffb736", | ||||||
|                 "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff", |                 "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff", | ||||||
|                 "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b", |                 "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b", | ||||||
|  |                 "sha256:3d61f15e39611aacd91b7e71d903787da86d9e80896e683c0103fced9add7834", | ||||||
|                 "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1", |                 "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1", | ||||||
|                 "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e", |                 "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e", | ||||||
|                 "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183", |                 "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183", | ||||||
| @@ -700,6 +730,7 @@ | |||||||
|                 "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15", |                 "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15", | ||||||
|                 "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1", |                 "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1", | ||||||
|                 "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e", |                 "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e", | ||||||
|  |                 "sha256:7952deddf24b85c88dab48f6ec366ac6e39d2761b5280f2f9594911e03fcd064", | ||||||
|                 "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b", |                 "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b", | ||||||
|                 "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905", |                 "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905", | ||||||
|                 "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735", |                 "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735", | ||||||
| @@ -795,6 +826,7 @@ | |||||||
|         }, |         }, | ||||||
|         "pytest-env": { |         "pytest-env": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|  |                 "sha256:33b4030383a021924fe3f3ba5ca4311990d8b1d02ca77389c2be020c4500f96a", | ||||||
|                 "sha256:7e94956aef7f2764f3c147d216ce066bf6c42948bb9e293169b1b1c880a580c2" |                 "sha256:7e94956aef7f2764f3c147d216ce066bf6c42948bb9e293169b1b1c880a580c2" | ||||||
|             ], |             ], | ||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
| @@ -802,6 +834,7 @@ | |||||||
|         }, |         }, | ||||||
|         "pytest-forked": { |         "pytest-forked": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|  |                 "sha256:2d1bfc93ab65a28324eb0a63503bfb500c2da6916efede7a24b43a04970fe63c", | ||||||
|                 "sha256:6aa9ac7e00ad1a539c41bec6d21011332de671e938c7637378ec9710204e37ca", |                 "sha256:6aa9ac7e00ad1a539c41bec6d21011332de671e938c7637378ec9710204e37ca", | ||||||
|                 "sha256:dc4147784048e70ef5d437951728825a131b81714b398d5d52f17c7c144d8815" |                 "sha256:dc4147784048e70ef5d437951728825a131b81714b398d5d52f17c7c144d8815" | ||||||
|             ], |             ], | ||||||
| @@ -810,6 +843,7 @@ | |||||||
|         }, |         }, | ||||||
|         "pytest-sugar": { |         "pytest-sugar": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|  |                 "sha256:67a55a83c7b2717ad607704d3fe9004bb6543b54017ef82f9c6590acc38c1aec", | ||||||
|                 "sha256:b1b2186b0a72aada6859bea2a5764145e3aaa2c1cfbb23c3a19b5f7b697563d3" |                 "sha256:b1b2186b0a72aada6859bea2a5764145e3aaa2c1cfbb23c3a19b5f7b697563d3" | ||||||
|             ], |             ], | ||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
| @@ -927,6 +961,7 @@ | |||||||
|         }, |         }, | ||||||
|         "termcolor": { |         "termcolor": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|  |                 "sha256:19b1225d03bfb56571484caaa8521d8ec6e2473ae1640c9f48a48dda49417706", | ||||||
|                 "sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b" |                 "sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b" | ||||||
|             ], |             ], | ||||||
|             "version": "==1.1.0" |             "version": "==1.1.0" | ||||||
|   | |||||||
| @@ -2,27 +2,25 @@ | |||||||
| ### Back end                                                                ### | ### Back end                                                                ### | ||||||
| ############################################################################### | ############################################################################### | ||||||
|  |  | ||||||
| FROM ubuntu:20.04 | FROM python:3.7-slim | ||||||
|  |  | ||||||
| WORKDIR /usr/src/paperless/ | WORKDIR /usr/src/paperless/ | ||||||
|  |  | ||||||
| COPY Pipfile* ./ | COPY requirements.txt ./ | ||||||
|  |  | ||||||
| #Dependencies | #Dependencies | ||||||
| RUN apt-get update \ | RUN apt-get update \ | ||||||
|   && DEBIAN_FRONTEND="noninteractive" apt-get -y --no-install-recommends install \ |   && apt-get -y --no-install-recommends install \ | ||||||
| 		build-essential \ | 		build-essential \ | ||||||
| 		curl \ | 		curl \ | ||||||
| 		ghostscript \ | 		ghostscript \ | ||||||
| 		gnupg \ | 		gnupg \ | ||||||
| 		imagemagick \ | 		imagemagick \ | ||||||
|  | 		libatlas-base-dev \ | ||||||
| 		libmagic-dev \ | 		libmagic-dev \ | ||||||
| 		libpoppler-cpp-dev \ | 		libpoppler-cpp-dev \ | ||||||
| 		libpq-dev \ | 		libpq-dev \ | ||||||
| 		optipng \ | 		optipng \ | ||||||
| 		python3 \ |  | ||||||
| 		python3-dev \ |  | ||||||
| 		python3-pip \ |  | ||||||
| 		sudo \ | 		sudo \ | ||||||
| 		tesseract-ocr \ | 		tesseract-ocr \ | ||||||
| 		tesseract-ocr-eng \ | 		tesseract-ocr-eng \ | ||||||
| @@ -32,10 +30,9 @@ RUN apt-get update \ | |||||||
| 		tesseract-ocr-spa \ | 		tesseract-ocr-spa \ | ||||||
| 		tzdata \ | 		tzdata \ | ||||||
| 		unpaper \ | 		unpaper \ | ||||||
| 	&& pip3 install --upgrade pipenv supervisor setuptools \ | 	&& pip3 install --upgrade supervisor setuptools \ | ||||||
| 	&& pipenv install --system --deploy \ | 	&& pip install --no-cache-dir -r requirements.txt \ | ||||||
| 	&& pipenv --clear \ | 	&& apt-get -y purge build-essential \ | ||||||
| 	&& apt-get -y purge build-essential python3-pip python3-dev \ |  | ||||||
| 	&& apt-get -y autoremove --purge \ | 	&& apt-get -y autoremove --purge \ | ||||||
| 	&& rm -rf /var/lib/apt/lists/* \ | 	&& rm -rf /var/lib/apt/lists/* \ | ||||||
| 	&& mkdir /var/log/supervisord /var/run/supervisord | 	&& mkdir /var/log/supervisord /var/run/supervisord | ||||||
|   | |||||||
| @@ -8,16 +8,40 @@ Administration | |||||||
| Making backups | Making backups | ||||||
| ############## | ############## | ||||||
|  |  | ||||||
| .. warning:: | Multiple options exist for making backups of your paperless instance, | ||||||
|  | depending on how you installed paperless. | ||||||
|  |  | ||||||
|     This section is not updated to paperless-ng yet, the exporter is a valid tool | Before making backups, make sure that paperless is not running. | ||||||
|     for backups though. |  | ||||||
|  |  | ||||||
| So you're bored of this whole project, or you want to make a remote backup of | Options available to any installation of paperless: | ||||||
| your files for whatever reason.  This is easy to do, simply use the |  | ||||||
| :ref:`exporter <utilities-exporter>` to dump your documents and database out |  | ||||||
| into an arbitrary directory. |  | ||||||
|  |  | ||||||
|  | *   Use the :ref:`document exporter <utilities-exporter>`. | ||||||
|  |     The document exporter exports all your documents, thumbnails and | ||||||
|  |     metadata to a specific folder. You may import your documents into a | ||||||
|  |     fresh instance of paperless again or store your documents in another | ||||||
|  |     DMS with this export. | ||||||
|  |  | ||||||
|  | Options available to docker installations: | ||||||
|  |  | ||||||
|  | *   Backup the docker volumes. These usually reside within | ||||||
|  |     ``/var/lib/docker/volumes`` on the host and you need to be root in order | ||||||
|  |     to access them. | ||||||
|  |  | ||||||
|  |     Paperless uses 3 volumes: | ||||||
|  |  | ||||||
|  |     *   ``paperless_media``: This is where your documents are stored. | ||||||
|  |     *   ``paperless_data``: This is where auxilliary data is stored. This | ||||||
|  |         folder also contains the SQLite database, if you use it. | ||||||
|  |     *   ``paperless_pgdata``: Exists only if you use PostgreSQL and contains | ||||||
|  |         the database. | ||||||
|  |  | ||||||
|  | Options available to bare-metal and non-docker installations: | ||||||
|  |  | ||||||
|  | *   Backup the entire paperless folder. This ensures that if your paperless instance | ||||||
|  |     crashes at some point or your disk fails, you can simply copy the folder back | ||||||
|  |     into place and it works. | ||||||
|  |  | ||||||
|  |     When using PostgreSQL, you'll also have to backup the database. | ||||||
|  |  | ||||||
| .. _migrating-restoring: | .. _migrating-restoring: | ||||||
|  |  | ||||||
| @@ -25,6 +49,8 @@ Restoring | |||||||
| ========= | ========= | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| .. _administration-updating: | .. _administration-updating: | ||||||
|  |  | ||||||
| Updating paperless | Updating paperless | ||||||
|   | |||||||
| @@ -128,6 +128,8 @@ consumer.  Once complete, you should see the newly-created document, | |||||||
| automatically tagged with the appropriate data. | automatically tagged with the appropriate data. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | .. _advanced-automatic_matching: | ||||||
|  |  | ||||||
| Automatic matching | Automatic matching | ||||||
| ================== | ================== | ||||||
|  |  | ||||||
| @@ -175,8 +177,6 @@ then put the path to that script in ``paperless.conf`` with the variable name | |||||||
| of either ``PAPERLESS_PRE_CONSUME_SCRIPT`` or | of either ``PAPERLESS_PRE_CONSUME_SCRIPT`` or | ||||||
| ``PAPERLESS_POST_CONSUME_SCRIPT``. | ``PAPERLESS_POST_CONSUME_SCRIPT``. | ||||||
|  |  | ||||||
| .. TODO HYPEREF TO CONFIG |  | ||||||
|  |  | ||||||
| .. important:: | .. important:: | ||||||
|  |  | ||||||
|     These scripts are executed in a **blocking** process, which means that if |     These scripts are executed in a **blocking** process, which means that if | ||||||
|   | |||||||
| @@ -96,6 +96,8 @@ paperless-ng 0.9.0 | |||||||
|     sqlite. |     sqlite. | ||||||
|   * ``PAPERLESS_OCR_THREADS`` is gone and replaced with ``PAPERLESS_TASK_WORKERS`` and |   * ``PAPERLESS_OCR_THREADS`` is gone and replaced with ``PAPERLESS_TASK_WORKERS`` and | ||||||
|     ``PAPERLESS_THREADS_PER_WORKER``. Refer to the config example for details. |     ``PAPERLESS_THREADS_PER_WORKER``. Refer to the config example for details. | ||||||
|  |   * ``PAPERLESS_OPTIMIZE_THUMBNAILS`` allows you to disable or enable thumbnail | ||||||
|  |     optimization. This is useful on less powerful devices. | ||||||
|  |  | ||||||
| * Many more small changes here and there. The usual stuff. | * Many more small changes here and there. The usual stuff. | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										34
									
								
								docs/faq.rst
									
									
									
									
									
								
							
							
						
						
									
										34
									
								
								docs/faq.rst
									
									
									
									
									
								
							| @@ -23,27 +23,35 @@ is | |||||||
|  |  | ||||||
| **Q:** *Will paperless-ng run on Raspberry Pi?* | **Q:** *Will paperless-ng run on Raspberry Pi?* | ||||||
|  |  | ||||||
| **A:** The short answer is yes. The long answer is that certain parts of | **A:** The short answer is yes. I've tested it on a Raspberry Pi 3 B. | ||||||
|  | The long answer is that certain parts of | ||||||
| Paperless will run very slow, such as the tesseract OCR. On Rasperry Pi, | Paperless will run very slow, such as the tesseract OCR. On Rasperry Pi, | ||||||
| try to OCR documents before feeding them into paperless so that paperless can | try to OCR documents before feeding them into paperless so that paperless can | ||||||
| reuse the text. The web interface should be alot snappier, since it runs | reuse the text. The web interface should be alot snappier, since it runs | ||||||
| in your browser and paperless has to do much less work to serve the data. | in your browser and paperless has to do much less work to serve the data. | ||||||
|  |  | ||||||
|  | .. note:: | ||||||
|  |      | ||||||
|  |     Consider setting ``PAPERLESS_OPTIMIZE_THUMBNAILS`` to false to speed up | ||||||
|  |     the consumption process. This takes quite a bit of time on Raspberry Pi. | ||||||
|  |  | ||||||
|  | .. note:: | ||||||
|  |      | ||||||
|  |     Updating the :ref:`automatic matching algorithm <advanced-automatic_matching>` | ||||||
|  |     takes quite a bit of time. However, the update mechanism checks if your | ||||||
|  |     data has changed before doing the heavy lifting. If you experience the  | ||||||
|  |     algorithm taking too much cpu time, consider changing the schedule in the | ||||||
|  |     admin interface to daily or weekly. You can also manually invoke the task | ||||||
|  |     by changing the date and time of the next run to today/now. | ||||||
|  |  | ||||||
|  |     The actual matching of the algorithm is fast and works on Raspberry Pi as  | ||||||
|  |     well as on any other device. | ||||||
|  |  | ||||||
|  |      | ||||||
|  |  | ||||||
| **Q:** *How do I install paperless-ng on Raspberry Pi?* | **Q:** *How do I install paperless-ng on Raspberry Pi?* | ||||||
|  |  | ||||||
| **A:** There is not docker image for ARM available. If you know how to build | **A:** There is not docker image for ARM available. If you know how to build | ||||||
| that automatically, I'm all ears. For now, you have to grab the latest release | that automatically, I'm all ears. For now, you have to grab the latest release | ||||||
| archive from the project page and build the image yourself. The release comes | archive from the project page and build the image yourself. The release comes | ||||||
| with the front end already compiled, so you don't have to do this on the Pi. | with the front end already compiled, so you don't have to do this on the Pi. | ||||||
|  |  | ||||||
| You may encounter some issues during the build: |  | ||||||
|  |  | ||||||
| .. code:: shell-session |  | ||||||
|  |  | ||||||
|     W: GPG error: http://ports.ubuntu.com/ubuntu-ports focal InRelease: At least one invalid signature was encountered. |  | ||||||
|     E: The repository 'http://ports.ubuntu.com/ubuntu-ports focal InRelease' is not signed. |  | ||||||
|     N: Updating from such a repository can't be done securely, and is therefore disabled by default. |  | ||||||
|     N: See apt-secure(8) manpage for repository creation and user configuration details. |  | ||||||
|  |  | ||||||
| If this happens, look at `this thread <https://askubuntu.com/questions/1263284/>`:_. |  | ||||||
| You will need to update docker to the latest version to fix this issue. |  | ||||||
|   | |||||||
| @@ -10,7 +10,7 @@ | |||||||
| # This is required for processing scheduled tasks such as email fetching, index | # This is required for processing scheduled tasks such as email fetching, index | ||||||
| # optimization and for training the automatic document matcher. | # optimization and for training the automatic document matcher. | ||||||
| # Defaults to localhost:6379. | # Defaults to localhost:6379. | ||||||
| #PAPERLESS_REDIS="redis://localhost:6379" | #PAPERLESS_REDIS=redis://localhost:6379 | ||||||
|  |  | ||||||
|  |  | ||||||
| ############################################################################### | ############################################################################### | ||||||
| @@ -22,15 +22,15 @@ | |||||||
| # configuration for this is already done inside the docker-compose.env file. | # configuration for this is already done inside the docker-compose.env file. | ||||||
|  |  | ||||||
| #Set PAPERLESS_DBHOST and postgresql will be used instead of mysql. | #Set PAPERLESS_DBHOST and postgresql will be used instead of mysql. | ||||||
| #PAPERLESS_DBHOST="localhost" | #PAPERLESS_DBHOST=localhost | ||||||
|  |  | ||||||
| #Adjust port if necessary | #Adjust port if necessary | ||||||
| #PAPERLESS_DBPORT= | #PAPERLESS_DBPORT= | ||||||
|  |  | ||||||
| #name, user and pass all default to "paperless" | #name, user and pass all default to "paperless" | ||||||
| #PAPERLESS_DBNAME="paperless" | #PAPERLESS_DBNAME=paperless | ||||||
| #PAPERLESS_DBUSER="paperless" | #PAPERLESS_DBUSER=paperless | ||||||
| #PAPERLESS_DBPASS="paperless" | #PAPERLESS_DBPASS=paperless | ||||||
|  |  | ||||||
|  |  | ||||||
| ############################################################################### | ############################################################################### | ||||||
| @@ -40,23 +40,23 @@ | |||||||
| # This where your documents should go to be consumed.  Make sure that it exists | # This where your documents should go to be consumed.  Make sure that it exists | ||||||
| # and that the user running the paperless service can read/write its contents | # and that the user running the paperless service can read/write its contents | ||||||
| # before you start Paperless. | # before you start Paperless. | ||||||
| PAPERLESS_CONSUMPTION_DIR="../consume" | PAPERLESS_CONSUMPTION_DIR=../consume | ||||||
|  |  | ||||||
| # This is where paperless stores all its data (search index, sqlite database, | # This is where paperless stores all its data (search index, sqlite database, | ||||||
| # classification model, etc). | # classification model, etc). | ||||||
| #PAPERLESS_DATA_DIR="../data" | #PAPERLESS_DATA_DIR=../data | ||||||
|  |  | ||||||
| # This is where your documents and thumbnails are stored. | # This is where your documents and thumbnails are stored. | ||||||
| #PAPERLESS_MEDIA_ROOT="../media" | #PAPERLESS_MEDIA_ROOT=../media | ||||||
|  |  | ||||||
| # Override the default STATIC_ROOT here.  This is where all static files | # Override the default STATIC_ROOT here.  This is where all static files | ||||||
| # created using "collectstatic" manager command are stored. | # created using "collectstatic" manager command are stored. | ||||||
| #PAPERLESS_STATICDIR="../static" | #PAPERLESS_STATICDIR=../static | ||||||
|  |  | ||||||
|  |  | ||||||
| # Override the STATIC_URL here.  Unless you're hosting Paperless off a | # Override the STATIC_URL here.  Unless you're hosting Paperless off a | ||||||
| # subdomain like /paperless/, you probably don't need to change this. | # subdomain like /paperless/, you probably don't need to change this. | ||||||
| #PAPERLESS_STATIC_URL="/static/" | #PAPERLESS_STATIC_URL=/static/ | ||||||
|  |  | ||||||
|  |  | ||||||
| # Specify a filename format for the document (directories are supported) | # Specify a filename format for the document (directories are supported) | ||||||
| @@ -69,7 +69,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume" | |||||||
| # * {tags[INDEX]} If your tags are strings, select the tag by index | # * {tags[INDEX]} If your tags are strings, select the tag by index | ||||||
| # Uniqueness of filenames is ensured, as an incrementing counter is attached | # Uniqueness of filenames is ensured, as an incrementing counter is attached | ||||||
| # to each filename. | # to each filename. | ||||||
| #PAPERLESS_FILENAME_FORMAT="" | #PAPERLESS_FILENAME_FORMAT= | ||||||
|  |  | ||||||
| ############################################################################### | ############################################################################### | ||||||
| ####                              Security                                 #### | ####                              Security                                 #### | ||||||
| @@ -77,10 +77,12 @@ PAPERLESS_CONSUMPTION_DIR="../consume" | |||||||
|  |  | ||||||
| # Controls whether django's debug mode is enabled. Disable this on production | # Controls whether django's debug mode is enabled. Disable this on production | ||||||
| # systems. Debug mode is disabled by default. | # systems. Debug mode is disabled by default. | ||||||
| #PAPERLESS_DEBUG="false" | #PAPERLESS_DEBUG=false | ||||||
|  |  | ||||||
| # GnuPG encryption is deprecated and will be removed in future versions. | # GnuPG encryption is deprecated and will be removed in future versions. | ||||||
| # | # | ||||||
|  | # Dont use it. It does not provide any security at all. | ||||||
|  | # | ||||||
| # Paperless can be instructed to attempt to encrypt your PDF files with GPG | # Paperless can be instructed to attempt to encrypt your PDF files with GPG | ||||||
| # using the PAPERLESS_PASSPHRASE specified below.  If however you're not | # using the PAPERLESS_PASSPHRASE specified below.  If however you're not | ||||||
| # concerned about encrypting these files (for example if you have disk | # concerned about encrypting these files (for example if you have disk | ||||||
| @@ -93,13 +95,13 @@ PAPERLESS_CONSUMPTION_DIR="../consume" | |||||||
| # you've since changed it to a new one. | # you've since changed it to a new one. | ||||||
| # | # | ||||||
| # The default is to not use encryption at all. | # The default is to not use encryption at all. | ||||||
| #PAPERLESS_PASSPHRASE="secret" | #PAPERLESS_PASSPHRASE=secret | ||||||
|  |  | ||||||
|  |  | ||||||
| # The secret key has a default that should be fine so long as you're hosting | # The secret key has a default that should be fine so long as you're hosting | ||||||
| # Paperless on a closed network.  However, if you're putting this anywhere | # Paperless on a closed network.  However, if you're putting this anywhere | ||||||
| # public, you should change the key to something unique and verbose. | # public, you should change the key to something unique and verbose. | ||||||
| #PAPERLESS_SECRET_KEY="change-me" | #PAPERLESS_SECRET_KEY=change-me | ||||||
|  |  | ||||||
|  |  | ||||||
| # If you're planning on putting Paperless on the open internet, then you | # If you're planning on putting Paperless on the open internet, then you | ||||||
| @@ -109,19 +111,19 @@ PAPERLESS_CONSUMPTION_DIR="../consume" | |||||||
| # | # | ||||||
| # Just remember that this is a comma-separated list, so "example.com" is fine, | # Just remember that this is a comma-separated list, so "example.com" is fine, | ||||||
| # as is "example.com,www.example.com", but NOT " example.com" or "example.com," | # as is "example.com,www.example.com", but NOT " example.com" or "example.com," | ||||||
| #PAPERLESS_ALLOWED_HOSTS="example.com,www.example.com" | #PAPERLESS_ALLOWED_HOSTS=example.com,www.example.com | ||||||
|  |  | ||||||
| # If you decide to use the Paperless API in an ajax call, you need to add your | # If you decide to use the Paperless API in an ajax call, you need to add your | ||||||
| # servers to the list of allowed hosts that can do CORS calls. By default | # servers to the list of allowed hosts that can do CORS calls. By default | ||||||
| # Paperless allows calls from localhost:8080, but you'd like to change that, | # Paperless allows calls from localhost:8080, but you'd like to change that, | ||||||
| # you can set this value to a comma-separated list. | # you can set this value to a comma-separated list. | ||||||
| #PAPERLESS_CORS_ALLOWED_HOSTS="localhost:8080,example.com,localhost:8000" | #PAPERLESS_CORS_ALLOWED_HOSTS=localhost:8080,example.com,localhost:8000 | ||||||
|  |  | ||||||
| # To host paperless under a subpath url like example.com/paperless you set | # To host paperless under a subpath url like example.com/paperless you set | ||||||
| # this value to /paperless. No trailing slash! | # this value to /paperless. No trailing slash! | ||||||
| # | # | ||||||
| # https://docs.djangoproject.com/en/1.11/ref/settings/#force-script-name | # https://docs.djangoproject.com/en/1.11/ref/settings/#force-script-name | ||||||
| #PAPERLESS_FORCE_SCRIPT_NAME="" | #PAPERLESS_FORCE_SCRIPT_NAME= | ||||||
|  |  | ||||||
| ############################################################################### | ############################################################################### | ||||||
| ####                          Software Tweaks                              #### | ####                          Software Tweaks                              #### | ||||||
| @@ -158,14 +160,19 @@ PAPERLESS_CONSUMPTION_DIR="../consume" | |||||||
|  |  | ||||||
| # When the consumer detects a duplicate document, it will not touch the | # When the consumer detects a duplicate document, it will not touch the | ||||||
| # original document. This default behavior can be changed here. | # original document. This default behavior can be changed here. | ||||||
| #PAPERLESS_CONSUMER_DELETE_DUPLICATES="false" | #PAPERLESS_CONSUMER_DELETE_DUPLICATES=false | ||||||
|  |  | ||||||
|  | # Use optipng to optimize thumbnails. This usually reduces the sice of | ||||||
|  | # thumbnails by about 20%, but uses considerable compute time during | ||||||
|  | # consumption. | ||||||
|  | #PAPERLESS_OPTIMIZE_THUMBNAILS=true | ||||||
|  |  | ||||||
| # After a document is consumed, Paperless can trigger an arbitrary script if | # After a document is consumed, Paperless can trigger an arbitrary script if | ||||||
| # you like.  This script will be passed a number of arguments for you to work | # you like.  This script will be passed a number of arguments for you to work | ||||||
| # with.  The default is blank, which means nothing will be executed.  For more | # with.  The default is blank, which means nothing will be executed.  For more | ||||||
| # information, take a look at the docs: | # information, take a look at the docs: | ||||||
| # http://paperless.readthedocs.org/en/latest/consumption.html#hooking-into-the-consumption-process | # http://paperless.readthedocs.org/en/latest/consumption.html#hooking-into-the-consumption-process | ||||||
| #PAPERLESS_POST_CONSUME_SCRIPT="/path/to/an/arbitrary/script.sh" | #PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh | ||||||
|  |  | ||||||
| # By default, paperless will check the document text for document date information. | # By default, paperless will check the document text for document date information. | ||||||
| # Uncomment the line below to enable checking the document filename for date | # Uncomment the line below to enable checking the document filename for date | ||||||
| @@ -173,7 +180,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume" | |||||||
| # https://dateparser.readthedocs.io/en/latest/#settings. The filename will be | # https://dateparser.readthedocs.io/en/latest/#settings. The filename will be | ||||||
| # checked first, and if nothing is found, the document text will be checked | # checked first, and if nothing is found, the document text will be checked | ||||||
| # as normal. | # as normal. | ||||||
| #PAPERLESS_FILENAME_DATE_ORDER="YMD" | #PAPERLESS_FILENAME_DATE_ORDER=YMD | ||||||
|  |  | ||||||
| # Sometimes devices won't create filenames which can be parsed properly | # Sometimes devices won't create filenames which can be parsed properly | ||||||
| # by the filename parser (see | # by the filename parser (see | ||||||
| @@ -243,7 +250,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume" | |||||||
|  |  | ||||||
| # By default Paperless does not OCR a document if the text can be retrieved from | # By default Paperless does not OCR a document if the text can be retrieved from | ||||||
| # the document directly. Set to true to always OCR documents. | # the document directly. Set to true to always OCR documents. | ||||||
| #PAPERLESS_OCR_ALWAYS="false" | #PAPERLESS_OCR_ALWAYS=false | ||||||
|  |  | ||||||
|  |  | ||||||
| ############################################################################### | ############################################################################### | ||||||
| @@ -271,7 +278,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume" | |||||||
| #PAPERLESS_CONVERT_BINARY=/usr/bin/convert | #PAPERLESS_CONVERT_BINARY=/usr/bin/convert | ||||||
|  |  | ||||||
| # Ghostscript | # Ghostscript | ||||||
| #PAPERLESS_GS_BINARY = /usr/bin/gs | #PAPERLESS_GS_BINARY=/usr/bin/gs | ||||||
|  |  | ||||||
| # Unpaper | # Unpaper | ||||||
| #PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper | #PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper | ||||||
|   | |||||||
| @@ -24,12 +24,17 @@ then | |||||||
| 	rm "$PAPERLESS_DIST" -r | 	rm "$PAPERLESS_DIST" -r | ||||||
| fi | fi | ||||||
|  |  | ||||||
|  | mkdir "$PAPERLESS_DIST" | ||||||
|  | mkdir "$PAPERLESS_DIST_APP" | ||||||
|  | mkdir "$PAPERLESS_DIST_APP/docker" | ||||||
|  |  | ||||||
| # setup dependencies. | # setup dependencies. | ||||||
|  |  | ||||||
| cd "$PAPERLESS_ROOT" | cd "$PAPERLESS_ROOT" | ||||||
|  |  | ||||||
| pipenv clean | pipenv clean | ||||||
| pipenv install --dev | pipenv install --dev | ||||||
|  | pipenv lock --keep-outdated -r > "$PAPERLESS_DIST_APP/requirements.txt" | ||||||
|  |  | ||||||
| # test if the application works. | # test if the application works. | ||||||
|  |  | ||||||
| @@ -44,10 +49,6 @@ make clean html | |||||||
|  |  | ||||||
| # copy stuff into place | # copy stuff into place | ||||||
|  |  | ||||||
| mkdir "$PAPERLESS_DIST" |  | ||||||
| mkdir "$PAPERLESS_DIST_APP" |  | ||||||
| mkdir "$PAPERLESS_DIST_APP/docker" |  | ||||||
|  |  | ||||||
| # the application itself | # the application itself | ||||||
|  |  | ||||||
| cp "$PAPERLESS_ROOT/.env" \ | cp "$PAPERLESS_ROOT/.env" \ | ||||||
| @@ -92,8 +93,6 @@ cd "$PAPERLESS_DIST_APP" | |||||||
|  |  | ||||||
| docker build . -t "jonaswinkler/paperless-ng:$VERSION" | docker build . -t "jonaswinkler/paperless-ng:$VERSION" | ||||||
|  |  | ||||||
| docker push "jonaswinkler/paperless-ng:$VERSION" |  | ||||||
|  |  | ||||||
| # works. package the app! | # works. package the app! | ||||||
|  |  | ||||||
| cd "$PAPERLESS_DIST" | cd "$PAPERLESS_DIST" | ||||||
|   | |||||||
							
								
								
									
										23
									
								
								scripts/push-release.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										23
									
								
								scripts/push-release.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,23 @@ | |||||||
|  | #!/bin/bash | ||||||
|  |  | ||||||
|  | set -e | ||||||
|  |  | ||||||
|  |  | ||||||
|  | VERSION=$1 | ||||||
|  |  | ||||||
|  | if [ -z "$VERSION" ] | ||||||
|  | then | ||||||
|  | 	echo "Need a version string." | ||||||
|  | 	exit 1 | ||||||
|  | fi | ||||||
|  |  | ||||||
|  | # source root directory of paperless | ||||||
|  | PAPERLESS_ROOT=$(git rev-parse --show-toplevel) | ||||||
|  |  | ||||||
|  | # output directory | ||||||
|  | PAPERLESS_DIST="$PAPERLESS_ROOT/dist" | ||||||
|  | PAPERLESS_DIST_APP="$PAPERLESS_DIST/paperless-ng" | ||||||
|  |  | ||||||
|  | cd "$PAPERLESS_DIST_APP" | ||||||
|  |  | ||||||
|  | docker push "jonaswinkler/paperless-ng:$VERSION" | ||||||
| @@ -132,6 +132,28 @@ | |||||||
|             </a> |             </a> | ||||||
|           </li> |           </li> | ||||||
|         </ul> |         </ul> | ||||||
|  |  | ||||||
|  |         <h6 class="sidebar-heading d-flex justify-content-between align-items-center px-3 mt-4 mb-1 text-muted"> | ||||||
|  |           <span>Misc</span> | ||||||
|  |         </h6> | ||||||
|  |         <ul class="nav flex-column mb-2"> | ||||||
|  |           <li class="nav-item"> | ||||||
|  |             <a class="nav-link" href="https://paperless-ng.readthedocs.io/en/latest/"> | ||||||
|  |               <svg class="sidebaricon" fill="currentColor"> | ||||||
|  |                 <use xlink:href="assets/bootstrap-icons.svg#question-circle"/> | ||||||
|  |               </svg> | ||||||
|  |               Documentation | ||||||
|  |             </a> | ||||||
|  |           </li> | ||||||
|  |           <li class="nav-item"> | ||||||
|  |             <a class="nav-link" href="https://github.com/jonaswinkler/paperless-ng"> | ||||||
|  |               <svg class="sidebaricon" fill="currentColor"> | ||||||
|  |                 <use xlink:href="assets/bootstrap-icons.svg#link"/> | ||||||
|  |               </svg> | ||||||
|  |               Github | ||||||
|  |             </a> | ||||||
|  |           </li> | ||||||
|  |         </ul> | ||||||
|       </div> |       </div> | ||||||
|     </nav> |     </nav> | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,6 +1,6 @@ | |||||||
| <div class="row pt-3 pb-2 mb-3 border-bottom align-items-center"> | <div class="row pt-3 pb-1 mb-3 border-bottom align-items-center" > | ||||||
|   <div class="col text-truncate"> |   <div class="col text-truncate"> | ||||||
|     <h1 class="h2 text-truncate">{{title}}</h1> |     <h1 class="h2 text-truncate" style="line-height: 1.4">{{title}}</h1> | ||||||
|   </div> |   </div> | ||||||
|   <div class="btn-toolbar col-auto"> |   <div class="btn-toolbar col-auto"> | ||||||
|     <ng-content></ng-content> |     <ng-content></ng-content> | ||||||
|   | |||||||
| @@ -1,3 +1,7 @@ | |||||||
|  | .log-entry-10 { | ||||||
|  |   color: lightslategray !important; | ||||||
|  | } | ||||||
|  |  | ||||||
| .log-entry-30 { | .log-entry-30 { | ||||||
|   color: yellow !important; |   color: yellow !important; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -3,7 +3,6 @@ import hashlib | |||||||
| import logging | import logging | ||||||
| import os | import os | ||||||
| import re | import re | ||||||
| import uuid |  | ||||||
|  |  | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| from django.db import transaction | from django.db import transaction | ||||||
| @@ -12,6 +11,7 @@ from django.utils import timezone | |||||||
| from paperless.db import GnuPG | from paperless.db import GnuPG | ||||||
| from .classifier import DocumentClassifier, IncompatibleClassifierVersionError | from .classifier import DocumentClassifier, IncompatibleClassifierVersionError | ||||||
| from .file_handling import generate_filename, create_source_path_directory | from .file_handling import generate_filename, create_source_path_directory | ||||||
|  | from .loggers import LoggingMixin | ||||||
| from .models import Document, FileInfo, Correspondent, DocumentType, Tag | from .models import Document, FileInfo, Correspondent, DocumentType, Tag | ||||||
| from .parsers import ParseError, get_parser_class | from .parsers import ParseError, get_parser_class | ||||||
| from .signals import ( | from .signals import ( | ||||||
| @@ -24,12 +24,10 @@ class ConsumerError(Exception): | |||||||
|     pass |     pass | ||||||
|  |  | ||||||
|  |  | ||||||
| class Consumer: | class Consumer(LoggingMixin): | ||||||
|  |  | ||||||
|     def __init__(self): |     def __init__(self): | ||||||
|  |         super().__init__() | ||||||
|         self.logger = logging.getLogger(__name__) |  | ||||||
|         self.logging_group = None |  | ||||||
|         self.path = None |         self.path = None | ||||||
|         self.filename = None |         self.filename = None | ||||||
|         self.override_title = None |         self.override_title = None | ||||||
| @@ -74,11 +72,6 @@ class Consumer: | |||||||
|         os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True) |         os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True) | ||||||
|         os.makedirs(settings.ORIGINALS_DIR, exist_ok=True) |         os.makedirs(settings.ORIGINALS_DIR, exist_ok=True) | ||||||
|  |  | ||||||
|     def log(self, level, message): |  | ||||||
|         getattr(self.logger, level)(message, extra={ |  | ||||||
|             "group": self.logging_group |  | ||||||
|         }) |  | ||||||
|  |  | ||||||
|     def try_consume_file(self, |     def try_consume_file(self, | ||||||
|                          path, |                          path, | ||||||
|                          override_filename=None, |                          override_filename=None, | ||||||
| @@ -100,7 +93,7 @@ class Consumer: | |||||||
|         # this is for grouping logging entries for this particular file |         # this is for grouping logging entries for this particular file | ||||||
|         # together. |         # together. | ||||||
|  |  | ||||||
|         self.logging_group = uuid.uuid4() |         self.renew_logging_group() | ||||||
|  |  | ||||||
|         # Make sure that preconditions for consuming the file are met. |         # Make sure that preconditions for consuming the file are met. | ||||||
|  |  | ||||||
|   | |||||||
| @@ -86,7 +86,7 @@ def generate_filename(document): | |||||||
|                 added_day=document.added.day if document.added else "none", |                 added_day=document.added.day if document.added else "none", | ||||||
|                 tags=tags, |                 tags=tags, | ||||||
|             ) |             ) | ||||||
|     except (ValueError, KeyError, IndexError) as e: |     except (ValueError, KeyError, IndexError): | ||||||
|         logging.getLogger(__name__).warning("Invalid PAPERLESS_FILENAME_FORMAT: {}, falling back to default,".format(settings.PAPERLESS_FILENAME_FORMAT)) |         logging.getLogger(__name__).warning("Invalid PAPERLESS_FILENAME_FORMAT: {}, falling back to default,".format(settings.PAPERLESS_FILENAME_FORMAT)) | ||||||
|  |  | ||||||
|     # Always append the primary key to guarantee uniqueness of filename |     # Always append the primary key to guarantee uniqueness of filename | ||||||
|   | |||||||
| @@ -32,6 +32,9 @@ class UploadForm(forms.Form): | |||||||
|  |  | ||||||
|         t = int(mktime(datetime.now().timetuple())) |         t = int(mktime(datetime.now().timetuple())) | ||||||
|  |  | ||||||
|  |         os.makedirs(settings.SCRATCH_DIR, exist_ok=True) | ||||||
|  |  | ||||||
|  |         # TODO: dont just append pdf. This is here for taht weird regex check at the start of the consumer. | ||||||
|         with tempfile.NamedTemporaryFile(prefix="paperless-upload-", suffix=".pdf", dir=settings.SCRATCH_DIR, delete=False) as f: |         with tempfile.NamedTemporaryFile(prefix="paperless-upload-", suffix=".pdf", dir=settings.SCRATCH_DIR, delete=False) as f: | ||||||
|  |  | ||||||
|             f.write(document) |             f.write(document) | ||||||
|   | |||||||
| @@ -1,4 +1,5 @@ | |||||||
| import logging | import logging | ||||||
|  | import uuid | ||||||
|  |  | ||||||
|  |  | ||||||
| class PaperlessHandler(logging.Handler): | class PaperlessHandler(logging.Handler): | ||||||
| @@ -13,3 +14,19 @@ class PaperlessHandler(logging.Handler): | |||||||
|             kwargs["group"] = record.group |             kwargs["group"] = record.group | ||||||
|  |  | ||||||
|         Log.objects.create(**kwargs) |         Log.objects.create(**kwargs) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class LoggingMixin: | ||||||
|  |  | ||||||
|  |     logging_group = None | ||||||
|  |  | ||||||
|  |     def renew_logging_group(self): | ||||||
|  |         self.logging_group = uuid.uuid4() | ||||||
|  |  | ||||||
|  |     def log(self, level, message): | ||||||
|  |         target = ".".join([self.__class__.__module__, self.__class__.__name__]) | ||||||
|  |         logger = logging.getLogger(target) | ||||||
|  |  | ||||||
|  |         getattr(logger, level)(message, extra={ | ||||||
|  |             "group": self.logging_group | ||||||
|  |         }) | ||||||
|   | |||||||
| @@ -1,7 +1,4 @@ | |||||||
| # Generated by Django 3.1.3 on 2020-11-07 12:35 | # Generated by Django 3.1.3 on 2020-11-07 12:35 | ||||||
| import os |  | ||||||
|  |  | ||||||
| from django.conf import settings |  | ||||||
| from django.db import migrations, models | from django.db import migrations, models | ||||||
| import django.db.models.deletion | import django.db.models.deletion | ||||||
|  |  | ||||||
|   | |||||||
| @@ -20,6 +20,7 @@ from django.utils import timezone | |||||||
| # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits | # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits | ||||||
| # - MONTH ZZZZ, with ZZZZ being 4 digits | # - MONTH ZZZZ, with ZZZZ being 4 digits | ||||||
| # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits | # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits | ||||||
|  | from documents.loggers import LoggingMixin | ||||||
| from documents.signals import document_consumer_declaration | from documents.signals import document_consumer_declaration | ||||||
|  |  | ||||||
| # TODO: isnt there a date parsing library for this? | # TODO: isnt there a date parsing library for this? | ||||||
| @@ -101,17 +102,17 @@ class ParseError(Exception): | |||||||
|     pass |     pass | ||||||
|  |  | ||||||
|  |  | ||||||
| class DocumentParser: | class DocumentParser(LoggingMixin): | ||||||
|     """ |     """ | ||||||
|     Subclass this to make your own parser.  Have a look at |     Subclass this to make your own parser.  Have a look at | ||||||
|     `paperless_tesseract.parsers` for inspiration. |     `paperless_tesseract.parsers` for inspiration. | ||||||
|     """ |     """ | ||||||
|  |  | ||||||
|     def __init__(self, path, logging_group): |     def __init__(self, path, logging_group): | ||||||
|  |         super().__init__() | ||||||
|  |         self.logging_group = logging_group | ||||||
|         self.document_path = path |         self.document_path = path | ||||||
|         self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) |         self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) | ||||||
|         self.logger = logging.getLogger(__name__) |  | ||||||
|         self.logging_group = logging_group |  | ||||||
|  |  | ||||||
|     def get_thumbnail(self): |     def get_thumbnail(self): | ||||||
|         """ |         """ | ||||||
| @@ -121,6 +122,7 @@ class DocumentParser: | |||||||
|  |  | ||||||
|     def optimise_thumbnail(self, in_path): |     def optimise_thumbnail(self, in_path): | ||||||
|  |  | ||||||
|  |         if settings.OPTIMIZE_THUMBNAILS: | ||||||
|             out_path = os.path.join(self.tempdir, "optipng.png") |             out_path = os.path.join(self.tempdir, "optipng.png") | ||||||
|  |  | ||||||
|             args = (settings.OPTIPNG_BINARY, "-silent", "-o5", in_path, "-out", out_path) |             args = (settings.OPTIPNG_BINARY, "-silent", "-o5", in_path, "-out", out_path) | ||||||
| @@ -131,6 +133,8 @@ class DocumentParser: | |||||||
|                 raise ParseError("Optipng failed at {}".format(args)) |                 raise ParseError("Optipng failed at {}".format(args)) | ||||||
|  |  | ||||||
|             return out_path |             return out_path | ||||||
|  |         else: | ||||||
|  |             return in_path | ||||||
|  |  | ||||||
|     def get_optimised_thumbnail(self): |     def get_optimised_thumbnail(self): | ||||||
|         return self.optimise_thumbnail(self.get_thumbnail()) |         return self.optimise_thumbnail(self.get_thumbnail()) | ||||||
| @@ -222,11 +226,6 @@ class DocumentParser: | |||||||
|  |  | ||||||
|         return date |         return date | ||||||
|  |  | ||||||
|     def log(self, level, message): |  | ||||||
|         getattr(self.logger, level)(message, extra={ |  | ||||||
|             "group": self.logging_group |  | ||||||
|         }) |  | ||||||
|  |  | ||||||
|     def cleanup(self): |     def cleanup(self): | ||||||
|         self.log("debug", "Deleting directory {}".format(self.tempdir)) |         self.log("debug", "Deleting directory {}".format(self.tempdir)) | ||||||
|         shutil.rmtree(self.tempdir) |         shutil.rmtree(self.tempdir) | ||||||
|   | |||||||
| @@ -2,11 +2,10 @@ import os | |||||||
| import shutil | import shutil | ||||||
| import tempfile | import tempfile | ||||||
| from unittest import mock | from unittest import mock | ||||||
| from unittest.mock import MagicMock |  | ||||||
|  |  | ||||||
| from django.contrib.auth.models import User | from django.contrib.auth.models import User | ||||||
| from django.test import override_settings | from django.test import override_settings | ||||||
| from rest_framework.test import APITestCase, APIClient | from rest_framework.test import APITestCase | ||||||
|  |  | ||||||
| from documents.models import Document, Correspondent, DocumentType, Tag | from documents.models import Document, Correspondent, DocumentType, Tag | ||||||
|  |  | ||||||
|   | |||||||
| @@ -80,6 +80,6 @@ class TestClassifier(TestCase): | |||||||
|  |  | ||||||
|         self.classifier.save_classifier() |         self.classifier.save_classifier() | ||||||
|  |  | ||||||
|         newClassifier = DocumentClassifier() |         new_classifier = DocumentClassifier() | ||||||
|         newClassifier.reload() |         new_classifier.reload() | ||||||
|         self.assertFalse(newClassifier.train()) |         self.assertFalse(new_classifier.train()) | ||||||
|   | |||||||
| @@ -5,8 +5,6 @@ import tempfile | |||||||
| from unittest import mock | from unittest import mock | ||||||
| from unittest.mock import MagicMock | from unittest.mock import MagicMock | ||||||
|  |  | ||||||
| from django.conf import settings |  | ||||||
| from django.db import DatabaseError |  | ||||||
| from django.test import TestCase, override_settings | from django.test import TestCase, override_settings | ||||||
|  |  | ||||||
| from ..consumer import Consumer, ConsumerError | from ..consumer import Consumer, ConsumerError | ||||||
| @@ -504,9 +502,9 @@ class TestConsumer(TestCase): | |||||||
|  |  | ||||||
|     def testOverrideFilename(self): |     def testOverrideFilename(self): | ||||||
|         filename = self.get_test_file() |         filename = self.get_test_file() | ||||||
|         overrideFilename = "My Bank - Statement for November.pdf" |         override_filename = "My Bank - Statement for November.pdf" | ||||||
|  |  | ||||||
|         document = self.consumer.try_consume_file(filename, override_filename=overrideFilename) |         document = self.consumer.try_consume_file(filename, override_filename=override_filename) | ||||||
|  |  | ||||||
|         self.assertEqual(document.correspondent.name, "My Bank") |         self.assertEqual(document.correspondent.name, "My Bank") | ||||||
|         self.assertEqual(document.title, "Statement for November") |         self.assertEqual(document.title, "Statement for November") | ||||||
|   | |||||||
| @@ -72,11 +72,11 @@ def binaries_check(app_configs, **kwargs): | |||||||
| @register() | @register() | ||||||
| def debug_mode_check(app_configs, **kwargs): | def debug_mode_check(app_configs, **kwargs): | ||||||
|     if settings.DEBUG: |     if settings.DEBUG: | ||||||
|         return [Warning("DEBUG mode is enabled. Disable Debug mode. " |         return [Warning( | ||||||
|                         "This is a serious security " |             "DEBUG mode is enabled. Disable Debug mode. This is a serious " | ||||||
|                         "issue, since it puts security overides in place which" |             "security issue, since it puts security overides in place which " | ||||||
|                         "are meant to be only used during development. This" |             "are meant to be only used during development. This " | ||||||
|                         "also means that paperless will tell anyone various" |             "also means that paperless will tell anyone various " | ||||||
|             "debugging information when something goes wrong.")] |             "debugging information when something goes wrong.")] | ||||||
|     else: |     else: | ||||||
|         return [] |         return [] | ||||||
|   | |||||||
| @@ -257,6 +257,14 @@ LOGGING = { | |||||||
|             "handlers": ["dbhandler", "streamhandler"], |             "handlers": ["dbhandler", "streamhandler"], | ||||||
|             "level": "DEBUG" |             "level": "DEBUG" | ||||||
|         }, |         }, | ||||||
|  |         "paperless_mail": { | ||||||
|  |             "handlers": ["dbhandler", "streamhandler"], | ||||||
|  |             "level": "DEBUG" | ||||||
|  |         }, | ||||||
|  |         "paperless_tesseract": { | ||||||
|  |             "handlers": ["dbhandler", "streamhandler"], | ||||||
|  |             "level": "DEBUG" | ||||||
|  |         }, | ||||||
|     }, |     }, | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -312,6 +320,8 @@ CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0)) | |||||||
|  |  | ||||||
| CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES") | CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES") | ||||||
|  |  | ||||||
|  | OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true") | ||||||
|  |  | ||||||
| # The default language that tesseract will attempt to use when parsing | # The default language that tesseract will attempt to use when parsing | ||||||
| # documents.  It should be a 3-letter language code consistent with ISO 639. | # documents.  It should be a 3-letter language code consistent with ISO 639. | ||||||
| OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng") | OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng") | ||||||
|   | |||||||
| @@ -1,18 +1,7 @@ | |||||||
| from django.contrib import admin | from django.contrib import admin | ||||||
| from django import forms |  | ||||||
|  |  | ||||||
| from paperless_mail.models import MailAccount, MailRule | from paperless_mail.models import MailAccount, MailRule | ||||||
|  |  | ||||||
|  |  | ||||||
| class MailAccountForm(forms.ModelForm): |  | ||||||
|  |  | ||||||
|     password = forms.CharField(widget=forms.PasswordInput) |  | ||||||
|  |  | ||||||
|     class Meta: |  | ||||||
|         fields = '__all__' |  | ||||||
|         model = MailAccount |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class MailAccountAdmin(admin.ModelAdmin): | class MailAccountAdmin(admin.ModelAdmin): | ||||||
|  |  | ||||||
|     list_display = ("name", "imap_server", "username") |     list_display = ("name", "imap_server", "username") | ||||||
| @@ -20,6 +9,8 @@ class MailAccountAdmin(admin.ModelAdmin): | |||||||
|  |  | ||||||
| class MailRuleAdmin(admin.ModelAdmin): | class MailRuleAdmin(admin.ModelAdmin): | ||||||
|  |  | ||||||
|  |     list_filter = ("account",) | ||||||
|  |  | ||||||
|     list_display = ("name", "account", "folder", "action") |     list_display = ("name", "account", "folder", "action") | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -8,6 +8,7 @@ from django_q.tasks import async_task | |||||||
| from imap_tools import MailBox, MailBoxUnencrypted, AND, MailMessageFlags, \ | from imap_tools import MailBox, MailBoxUnencrypted, AND, MailMessageFlags, \ | ||||||
|     MailboxFolderSelectError |     MailboxFolderSelectError | ||||||
|  |  | ||||||
|  | from documents.loggers import LoggingMixin | ||||||
| from documents.models import Correspondent | from documents.models import Correspondent | ||||||
| from paperless_mail.models import MailAccount, MailRule | from paperless_mail.models import MailAccount, MailRule | ||||||
|  |  | ||||||
| @@ -83,72 +84,6 @@ def make_criterias(rule): | |||||||
|     return {**criterias, **get_rule_action(rule).get_criteria()} |     return {**criterias, **get_rule_action(rule).get_criteria()} | ||||||
|  |  | ||||||
|  |  | ||||||
| def handle_mail_account(account): |  | ||||||
|  |  | ||||||
|     if account.imap_security == MailAccount.IMAP_SECURITY_NONE: |  | ||||||
|         mailbox = MailBoxUnencrypted(account.imap_server, account.imap_port) |  | ||||||
|     elif account.imap_security == MailAccount.IMAP_SECURITY_STARTTLS: |  | ||||||
|         mailbox = MailBox(account.imap_server, account.imap_port, starttls=True) |  | ||||||
|     elif account.imap_security == MailAccount.IMAP_SECURITY_SSL: |  | ||||||
|         mailbox = MailBox(account.imap_server, account.imap_port) |  | ||||||
|     else: |  | ||||||
|         raise ValueError("Unknown IMAP security") |  | ||||||
|  |  | ||||||
|     total_processed_files = 0 |  | ||||||
|  |  | ||||||
|     with mailbox as M: |  | ||||||
|  |  | ||||||
|         try: |  | ||||||
|             M.login(account.username, account.password) |  | ||||||
|         except Exception: |  | ||||||
|             raise MailError( |  | ||||||
|                 f"Error while authenticating account {account.name}") |  | ||||||
|  |  | ||||||
|         for rule in account.rules.all(): |  | ||||||
|  |  | ||||||
|             try: |  | ||||||
|                 M.folder.set(rule.folder) |  | ||||||
|             except MailboxFolderSelectError: |  | ||||||
|                 raise MailError( |  | ||||||
|                     f"Rule {rule.name}: Folder {rule.folder} does not exist " |  | ||||||
|                     f"in account {account.name}") |  | ||||||
|  |  | ||||||
|             criterias = make_criterias(rule) |  | ||||||
|  |  | ||||||
|             try: |  | ||||||
|                 messages = M.fetch(criteria=AND(**criterias), mark_seen=False) |  | ||||||
|             except Exception: |  | ||||||
|                 raise MailError( |  | ||||||
|                     f"Rule {rule.name}: Error while fetching folder " |  | ||||||
|                     f"{rule.folder} of account {account.name}") |  | ||||||
|  |  | ||||||
|             post_consume_messages = [] |  | ||||||
|  |  | ||||||
|             for message in messages: |  | ||||||
|                 try: |  | ||||||
|                     processed_files = handle_message(message, rule) |  | ||||||
|                 except Exception: |  | ||||||
|                     raise MailError( |  | ||||||
|                         f"Rule {rule.name}: Error while processing mail " |  | ||||||
|                         f"{message.uid} of account {account.name}") |  | ||||||
|                 if processed_files > 0: |  | ||||||
|                     post_consume_messages.append(message.uid) |  | ||||||
|  |  | ||||||
|                 total_processed_files += processed_files |  | ||||||
|             try: |  | ||||||
|                 get_rule_action(rule).post_consume( |  | ||||||
|                     M, |  | ||||||
|                     post_consume_messages, |  | ||||||
|                     rule.action_parameter) |  | ||||||
|  |  | ||||||
|             except Exception: |  | ||||||
|                 raise MailError( |  | ||||||
|                     f"Rule {rule.name}: Error while processing post-consume " |  | ||||||
|                     f"actions for account {account.name}") |  | ||||||
|  |  | ||||||
|     return total_processed_files |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def get_title(message, att, rule): | def get_title(message, att, rule): | ||||||
|     if rule.assign_title_from == MailRule.TITLE_FROM_SUBJECT: |     if rule.assign_title_from == MailRule.TITLE_FROM_SUBJECT: | ||||||
|         title = message.subject |         title = message.subject | ||||||
| @@ -189,10 +124,121 @@ def get_correspondent(message, rule): | |||||||
|     return correspondent |     return correspondent | ||||||
|  |  | ||||||
|  |  | ||||||
| def handle_message(message, rule): | def get_mailbox(server, port, security): | ||||||
|  |     if security == MailAccount.IMAP_SECURITY_NONE: | ||||||
|  |         mailbox = MailBoxUnencrypted(server, port) | ||||||
|  |     elif security == MailAccount.IMAP_SECURITY_STARTTLS: | ||||||
|  |         mailbox = MailBox(server, port, starttls=True) | ||||||
|  |     elif security == MailAccount.IMAP_SECURITY_SSL: | ||||||
|  |         mailbox = MailBox(server, port) | ||||||
|  |     else: | ||||||
|  |         raise ValueError("Unknown IMAP security") | ||||||
|  |     return mailbox | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class MailAccountHandler(LoggingMixin): | ||||||
|  |  | ||||||
|  |     def handle_mail_account(self, account): | ||||||
|  |  | ||||||
|  |         self.renew_logging_group() | ||||||
|  |  | ||||||
|  |         self.log('debug', f"Processing mail account {account}") | ||||||
|  |  | ||||||
|  |         total_processed_files = 0 | ||||||
|  |  | ||||||
|  |         with get_mailbox(account.imap_server, | ||||||
|  |                          account.imap_port, | ||||||
|  |                          account.imap_security) as M: | ||||||
|  |  | ||||||
|  |             try: | ||||||
|  |                 M.login(account.username, account.password) | ||||||
|  |             except Exception: | ||||||
|  |                 raise MailError( | ||||||
|  |                     f"Error while authenticating account {account.name}") | ||||||
|  |  | ||||||
|  |             self.log('debug', f"Account {account}: Processing " | ||||||
|  |                               f"{account.rules.count()} rule(s)") | ||||||
|  |  | ||||||
|  |             for rule in account.rules.all(): | ||||||
|  |                 self.log( | ||||||
|  |                     'debug', | ||||||
|  |                     f"Account {account}: Processing rule {rule.name}") | ||||||
|  |  | ||||||
|  |                 self.log( | ||||||
|  |                     'debug', | ||||||
|  |                     f"Rule {account}.{rule}: Selecting folder {rule.folder}") | ||||||
|  |  | ||||||
|  |                 try: | ||||||
|  |                     M.folder.set(rule.folder) | ||||||
|  |                 except MailboxFolderSelectError: | ||||||
|  |                     raise MailError( | ||||||
|  |                         f"Rule {rule.name}: Folder {rule.folder} does not exist " | ||||||
|  |                         f"in account {account.name}") | ||||||
|  |  | ||||||
|  |                 criterias = make_criterias(rule) | ||||||
|  |  | ||||||
|  |                 self.log( | ||||||
|  |                     'debug', | ||||||
|  |                     f"Rule {account}.{rule}: Searching folder with criteria " | ||||||
|  |                     f"{str(AND(**criterias))}") | ||||||
|  |  | ||||||
|  |                 try: | ||||||
|  |                     messages = M.fetch(criteria=AND(**criterias), mark_seen=False) | ||||||
|  |                 except Exception: | ||||||
|  |                     raise MailError( | ||||||
|  |                         f"Rule {rule.name}: Error while fetching folder " | ||||||
|  |                         f"{rule.folder} of account {account.name}") | ||||||
|  |  | ||||||
|  |                 post_consume_messages = [] | ||||||
|  |  | ||||||
|  |                 mails_processed = 0 | ||||||
|  |  | ||||||
|  |                 for message in messages: | ||||||
|  |                     try: | ||||||
|  |                         processed_files = self.handle_message(message, rule) | ||||||
|  |                     except Exception: | ||||||
|  |                         raise MailError( | ||||||
|  |                             f"Rule {rule.name}: Error while processing mail " | ||||||
|  |                             f"{message.uid} of account {account.name}") | ||||||
|  |                     if processed_files > 0: | ||||||
|  |                         post_consume_messages.append(message.uid) | ||||||
|  |  | ||||||
|  |                     total_processed_files += processed_files | ||||||
|  |                     mails_processed += 1 | ||||||
|  |  | ||||||
|  |                 self.log( | ||||||
|  |                     'debug', | ||||||
|  |                     f"Rule {account}.{rule}: Processed {mails_processed} " | ||||||
|  |                     f"matching mail(s)") | ||||||
|  |  | ||||||
|  |                 self.log( | ||||||
|  |                     'debug', | ||||||
|  |                     f"Rule {account}.{rule}: Running mail actions on " | ||||||
|  |                     f"{len(post_consume_messages)} mails") | ||||||
|  |  | ||||||
|  |                 try: | ||||||
|  |                     get_rule_action(rule).post_consume( | ||||||
|  |                         M, | ||||||
|  |                         post_consume_messages, | ||||||
|  |                         rule.action_parameter) | ||||||
|  |  | ||||||
|  |                 except Exception: | ||||||
|  |                     raise MailError( | ||||||
|  |                         f"Rule {rule.name}: Error while processing post-consume " | ||||||
|  |                         f"actions for account {account.name}") | ||||||
|  |  | ||||||
|  |         return total_processed_files | ||||||
|  |  | ||||||
|  |     def handle_message(self, message, rule): | ||||||
|         if not message.attachments: |         if not message.attachments: | ||||||
|             return 0 |             return 0 | ||||||
|  |  | ||||||
|  |         self.log( | ||||||
|  |             'debug', | ||||||
|  |             f"Rule {rule.account}.{rule}: " | ||||||
|  |             f"Processing mail {message.subject} from {message.from_} with " | ||||||
|  |             f"{len(message.attachments)} attachment(s)") | ||||||
|  |  | ||||||
|         correspondent = get_correspondent(message, rule) |         correspondent = get_correspondent(message, rule) | ||||||
|         tag = rule.assign_tag |         tag = rule.assign_tag | ||||||
|         doc_type = rule.assign_document_type |         doc_type = rule.assign_document_type | ||||||
| @@ -211,6 +257,12 @@ def handle_message(message, rule): | |||||||
|                 with open(temp_filename, 'wb') as f: |                 with open(temp_filename, 'wb') as f: | ||||||
|                     f.write(att.payload) |                     f.write(att.payload) | ||||||
|  |  | ||||||
|  |                 self.log( | ||||||
|  |                     'info', | ||||||
|  |                     f"Rule {rule.account}.{rule}: " | ||||||
|  |                     f"Consuming attachment {att.filename} from mail " | ||||||
|  |                     f"{message.subject} from {message.from_}") | ||||||
|  |  | ||||||
|                 async_task( |                 async_task( | ||||||
|                     "documents.tasks.consume_file", |                     "documents.tasks.consume_file", | ||||||
|                     path=temp_filename, |                     path=temp_filename, | ||||||
|   | |||||||
| @@ -1,6 +1,6 @@ | |||||||
| from django.core.management.base import BaseCommand | from django.core.management.base import BaseCommand | ||||||
|  |  | ||||||
| from paperless_mail import mail, tasks | from paperless_mail import tasks | ||||||
|  |  | ||||||
|  |  | ||||||
| class Command(BaseCommand): | class Command(BaseCommand): | ||||||
|   | |||||||
							
								
								
									
										23
									
								
								src/paperless_mail/migrations/0003_auto_20201118_1940.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								src/paperless_mail/migrations/0003_auto_20201118_1940.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | |||||||
|  | # Generated by Django 3.1.3 on 2020-11-18 19:40 | ||||||
|  |  | ||||||
|  | from django.db import migrations, models | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Migration(migrations.Migration): | ||||||
|  |  | ||||||
|  |     dependencies = [ | ||||||
|  |         ('paperless_mail', '0002_auto_20201117_1334'), | ||||||
|  |     ] | ||||||
|  |  | ||||||
|  |     operations = [ | ||||||
|  |         migrations.AlterField( | ||||||
|  |             model_name='mailaccount', | ||||||
|  |             name='imap_port', | ||||||
|  |             field=models.IntegerField(blank=True, help_text='This is usually 143 for unencrypted and STARTTLS connections, and 993 for SSL connections.', null=True), | ||||||
|  |         ), | ||||||
|  |         migrations.AlterField( | ||||||
|  |             model_name='mailrule', | ||||||
|  |             name='name', | ||||||
|  |             field=models.CharField(max_length=256, unique=True), | ||||||
|  |         ), | ||||||
|  |     ] | ||||||
| @@ -1,8 +1,5 @@ | |||||||
| from django.db import models | from django.db import models | ||||||
|  |  | ||||||
| # Create your models here. |  | ||||||
| from django.db import models |  | ||||||
|  |  | ||||||
| import documents.models as document_models | import documents.models as document_models | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -22,7 +19,11 @@ class MailAccount(models.Model): | |||||||
|  |  | ||||||
|     imap_server = models.CharField(max_length=256) |     imap_server = models.CharField(max_length=256) | ||||||
|  |  | ||||||
|     imap_port = models.IntegerField(blank=True, null=True) |     imap_port = models.IntegerField( | ||||||
|  |         blank=True, | ||||||
|  |         null=True, | ||||||
|  |         help_text="This is usually 143 for unencrypted and STARTTLS " | ||||||
|  |                   "connections, and 993 for SSL connections.") | ||||||
|  |  | ||||||
|     imap_security = models.PositiveIntegerField( |     imap_security = models.PositiveIntegerField( | ||||||
|         choices=IMAP_SECURITY_OPTIONS, |         choices=IMAP_SECURITY_OPTIONS, | ||||||
| @@ -71,7 +72,7 @@ class MailRule(models.Model): | |||||||
|         (CORRESPONDENT_FROM_CUSTOM, "Use correspondent selected below") |         (CORRESPONDENT_FROM_CUSTOM, "Use correspondent selected below") | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|     name = models.CharField(max_length=256) |     name = models.CharField(max_length=256, unique=True) | ||||||
|  |  | ||||||
|     account = models.ForeignKey( |     account = models.ForeignKey( | ||||||
|         MailAccount, |         MailAccount, | ||||||
|   | |||||||
| @@ -1,13 +1,13 @@ | |||||||
| import logging | import logging | ||||||
|  |  | ||||||
| from paperless_mail import mail | from paperless_mail.mail import MailAccountHandler | ||||||
| from paperless_mail.models import MailAccount | from paperless_mail.models import MailAccount | ||||||
|  |  | ||||||
|  |  | ||||||
| def process_mail_accounts(): | def process_mail_accounts(): | ||||||
|     total_new_documents = 0 |     total_new_documents = 0 | ||||||
|     for account in MailAccount.objects.all(): |     for account in MailAccount.objects.all(): | ||||||
|         total_new_documents += mail.handle_mail_account(account) |         total_new_documents += MailAccountHandler().handle_mail_account(account) | ||||||
|  |  | ||||||
|     if total_new_documents > 0: |     if total_new_documents > 0: | ||||||
|         return f"Added {total_new_documents} document(s)." |         return f"Added {total_new_documents} document(s)." | ||||||
| @@ -18,6 +18,6 @@ def process_mail_accounts(): | |||||||
| def process_mail_account(name): | def process_mail_account(name): | ||||||
|     account = MailAccount.objects.find(name=name) |     account = MailAccount.objects.find(name=name) | ||||||
|     if account: |     if account: | ||||||
|         mail.handle_mail_account(account) |         MailAccountHandler().handle_mail_account(account) | ||||||
|     else: |     else: | ||||||
|         logging.error("Unknown mail acccount: {}".format(name)) |         logging.error("Unknown mail acccount: {}".format(name)) | ||||||
|   | |||||||
| @@ -7,7 +7,7 @@ from django.test import TestCase | |||||||
| from imap_tools import MailMessageFlags, MailboxFolderSelectError | from imap_tools import MailMessageFlags, MailboxFolderSelectError | ||||||
|  |  | ||||||
| from documents.models import Correspondent | from documents.models import Correspondent | ||||||
| from paperless_mail.mail import get_correspondent, get_title, handle_message, handle_mail_account, MailError | from paperless_mail.mail import MailError, MailAccountHandler, get_correspondent, get_title | ||||||
| from paperless_mail.models import MailRule, MailAccount | from paperless_mail.models import MailRule, MailAccount | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -126,6 +126,8 @@ class TestMail(TestCase): | |||||||
|  |  | ||||||
|         self.reset_bogus_mailbox() |         self.reset_bogus_mailbox() | ||||||
|  |  | ||||||
|  |         self.mail_account_handler = MailAccountHandler() | ||||||
|  |  | ||||||
|     def reset_bogus_mailbox(self): |     def reset_bogus_mailbox(self): | ||||||
|         self.bogus_mailbox.messages = [] |         self.bogus_mailbox.messages = [] | ||||||
|         self.bogus_mailbox.messages_spam = [] |         self.bogus_mailbox.messages_spam = [] | ||||||
| @@ -145,10 +147,10 @@ class TestMail(TestCase): | |||||||
|         me_localhost = Correspondent.objects.create(name=message2.from_) |         me_localhost = Correspondent.objects.create(name=message2.from_) | ||||||
|         someone_else = Correspondent.objects.create(name="someone else") |         someone_else = Correspondent.objects.create(name="someone else") | ||||||
|  |  | ||||||
|         rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING) |         rule = MailRule(name="a", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING) | ||||||
|         self.assertIsNone(get_correspondent(message, rule)) |         self.assertIsNone(get_correspondent(message, rule)) | ||||||
|  |  | ||||||
|         rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL) |         rule = MailRule(name="b", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL) | ||||||
|         c = get_correspondent(message, rule) |         c = get_correspondent(message, rule) | ||||||
|         self.assertIsNotNone(c) |         self.assertIsNotNone(c) | ||||||
|         self.assertEqual(c.name, "someone@somewhere.com") |         self.assertEqual(c.name, "someone@somewhere.com") | ||||||
| @@ -157,7 +159,7 @@ class TestMail(TestCase): | |||||||
|         self.assertEqual(c.name, "me@localhost.com") |         self.assertEqual(c.name, "me@localhost.com") | ||||||
|         self.assertEqual(c.id, me_localhost.id) |         self.assertEqual(c.id, me_localhost.id) | ||||||
|  |  | ||||||
|         rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME) |         rule = MailRule(name="c", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME) | ||||||
|         c = get_correspondent(message, rule) |         c = get_correspondent(message, rule) | ||||||
|         self.assertIsNotNone(c) |         self.assertIsNotNone(c) | ||||||
|         self.assertEqual(c.name, "Someone!") |         self.assertEqual(c.name, "Someone!") | ||||||
| @@ -165,7 +167,7 @@ class TestMail(TestCase): | |||||||
|         self.assertIsNotNone(c) |         self.assertIsNotNone(c) | ||||||
|         self.assertEqual(c.id, me_localhost.id) |         self.assertEqual(c.id, me_localhost.id) | ||||||
|  |  | ||||||
|         rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_CUSTOM, assign_correspondent=someone_else) |         rule = MailRule(name="d", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_CUSTOM, assign_correspondent=someone_else) | ||||||
|         c = get_correspondent(message, rule) |         c = get_correspondent(message, rule) | ||||||
|         self.assertEqual(c, someone_else) |         self.assertEqual(c, someone_else) | ||||||
|  |  | ||||||
| @@ -174,14 +176,15 @@ class TestMail(TestCase): | |||||||
|         message.subject = "the message title" |         message.subject = "the message title" | ||||||
|         att = namedtuple('Attachment', []) |         att = namedtuple('Attachment', []) | ||||||
|         att.filename = "this_is_the_file.pdf" |         att.filename = "this_is_the_file.pdf" | ||||||
|         rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME) |         rule = MailRule(name="a", assign_title_from=MailRule.TITLE_FROM_FILENAME) | ||||||
|         self.assertEqual(get_title(message, att, rule), "this_is_the_file") |         self.assertEqual(get_title(message, att, rule), "this_is_the_file") | ||||||
|         rule = MailRule(assign_title_from=MailRule.TITLE_FROM_SUBJECT) |         rule = MailRule(name="b", assign_title_from=MailRule.TITLE_FROM_SUBJECT) | ||||||
|         self.assertEqual(get_title(message, att, rule), "the message title") |         self.assertEqual(get_title(message, att, rule), "the message title") | ||||||
|  |  | ||||||
|     def test_handle_message(self): |     def test_handle_message(self): | ||||||
|         message = namedtuple('MailMessage', []) |         message = namedtuple('MailMessage', []) | ||||||
|         message.subject = "the message title" |         message.subject = "the message title" | ||||||
|  |         message.from_ = "Myself" | ||||||
|  |  | ||||||
|         att = namedtuple('Attachment', []) |         att = namedtuple('Attachment', []) | ||||||
|         att.filename = "test1.pdf" |         att.filename = "test1.pdf" | ||||||
| @@ -200,9 +203,10 @@ class TestMail(TestCase): | |||||||
|  |  | ||||||
|         message.attachments = [att, att2, att3] |         message.attachments = [att, att2, att3] | ||||||
|  |  | ||||||
|         rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME) |         account = MailAccount() | ||||||
|  |         rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME, account=account) | ||||||
|  |  | ||||||
|         result = handle_message(message, rule) |         result = self.mail_account_handler.handle_message(message, rule) | ||||||
|  |  | ||||||
|         self.assertEqual(result, 2) |         self.assertEqual(result, 2) | ||||||
|  |  | ||||||
| @@ -224,7 +228,7 @@ class TestMail(TestCase): | |||||||
|         message.attachments = [] |         message.attachments = [] | ||||||
|         rule = MailRule() |         rule = MailRule() | ||||||
|  |  | ||||||
|         result = handle_message(message, rule) |         result = self.mail_account_handler.handle_message(message, rule) | ||||||
|  |  | ||||||
|         self.assertFalse(m.called) |         self.assertFalse(m.called) | ||||||
|         self.assertEqual(result, 0) |         self.assertEqual(result, 0) | ||||||
| @@ -235,11 +239,13 @@ class TestMail(TestCase): | |||||||
|  |  | ||||||
|         rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MARK_READ) |         rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MARK_READ) | ||||||
|  |  | ||||||
|  |         self.assertEqual(len(self.bogus_mailbox.messages), 3) | ||||||
|         self.assertEqual(self.async_task.call_count, 0) |         self.assertEqual(self.async_task.call_count, 0) | ||||||
|         self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 2) |         self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 2) | ||||||
|         handle_mail_account(account) |         self.mail_account_handler.handle_mail_account(account) | ||||||
|         self.assertEqual(self.async_task.call_count, 2) |         self.assertEqual(self.async_task.call_count, 2) | ||||||
|         self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 0) |         self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 0) | ||||||
|  |         self.assertEqual(len(self.bogus_mailbox.messages), 3) | ||||||
|  |  | ||||||
|     def test_handle_mail_account_delete(self): |     def test_handle_mail_account_delete(self): | ||||||
|  |  | ||||||
| @@ -249,7 +255,7 @@ class TestMail(TestCase): | |||||||
|  |  | ||||||
|         self.assertEqual(self.async_task.call_count, 0) |         self.assertEqual(self.async_task.call_count, 0) | ||||||
|         self.assertEqual(len(self.bogus_mailbox.messages), 3) |         self.assertEqual(len(self.bogus_mailbox.messages), 3) | ||||||
|         handle_mail_account(account) |         self.mail_account_handler.handle_mail_account(account) | ||||||
|         self.assertEqual(self.async_task.call_count, 2) |         self.assertEqual(self.async_task.call_count, 2) | ||||||
|         self.assertEqual(len(self.bogus_mailbox.messages), 1) |         self.assertEqual(len(self.bogus_mailbox.messages), 1) | ||||||
|  |  | ||||||
| @@ -258,11 +264,13 @@ class TestMail(TestCase): | |||||||
|  |  | ||||||
|         rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_FLAG, filter_subject="Invoice") |         rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_FLAG, filter_subject="Invoice") | ||||||
|  |  | ||||||
|  |         self.assertEqual(len(self.bogus_mailbox.messages), 3) | ||||||
|         self.assertEqual(self.async_task.call_count, 0) |         self.assertEqual(self.async_task.call_count, 0) | ||||||
|         self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 2) |         self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 2) | ||||||
|         handle_mail_account(account) |         self.mail_account_handler.handle_mail_account(account) | ||||||
|         self.assertEqual(self.async_task.call_count, 1) |         self.assertEqual(self.async_task.call_count, 1) | ||||||
|         self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 1) |         self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 1) | ||||||
|  |         self.assertEqual(len(self.bogus_mailbox.messages), 3) | ||||||
|  |  | ||||||
|     def test_handle_mail_account_move(self): |     def test_handle_mail_account_move(self): | ||||||
|         account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="secret") |         account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="secret") | ||||||
| @@ -272,7 +280,7 @@ class TestMail(TestCase): | |||||||
|         self.assertEqual(self.async_task.call_count, 0) |         self.assertEqual(self.async_task.call_count, 0) | ||||||
|         self.assertEqual(len(self.bogus_mailbox.messages), 3) |         self.assertEqual(len(self.bogus_mailbox.messages), 3) | ||||||
|         self.assertEqual(len(self.bogus_mailbox.messages_spam), 0) |         self.assertEqual(len(self.bogus_mailbox.messages_spam), 0) | ||||||
|         handle_mail_account(account) |         self.mail_account_handler.handle_mail_account(account) | ||||||
|         self.assertEqual(self.async_task.call_count, 1) |         self.assertEqual(self.async_task.call_count, 1) | ||||||
|         self.assertEqual(len(self.bogus_mailbox.messages), 2) |         self.assertEqual(len(self.bogus_mailbox.messages), 2) | ||||||
|         self.assertEqual(len(self.bogus_mailbox.messages_spam), 1) |         self.assertEqual(len(self.bogus_mailbox.messages_spam), 1) | ||||||
| @@ -281,7 +289,7 @@ class TestMail(TestCase): | |||||||
|         account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="wrong") |         account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="wrong") | ||||||
|  |  | ||||||
|         try: |         try: | ||||||
|             handle_mail_account(account) |             self.mail_account_handler.handle_mail_account(account) | ||||||
|         except MailError as e: |         except MailError as e: | ||||||
|             self.assertTrue(str(e).startswith("Error while authenticating account")) |             self.assertTrue(str(e).startswith("Error while authenticating account")) | ||||||
|         else: |         else: | ||||||
| @@ -291,7 +299,7 @@ class TestMail(TestCase): | |||||||
|         rule = MailRule.objects.create(name="testrule", account=account, folder="uuuh") |         rule = MailRule.objects.create(name="testrule", account=account, folder="uuuh") | ||||||
|  |  | ||||||
|         try: |         try: | ||||||
|             handle_mail_account(account) |             self.mail_account_handler.handle_mail_account(account) | ||||||
|         except MailError as e: |         except MailError as e: | ||||||
|             self.assertTrue("uuuh does not exist" in str(e)) |             self.assertTrue("uuuh does not exist" in str(e)) | ||||||
|         else: |         else: | ||||||
| @@ -299,10 +307,10 @@ class TestMail(TestCase): | |||||||
|  |  | ||||||
|         account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret") |         account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret") | ||||||
|  |  | ||||||
|         rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MOVE, action_parameter="doesnotexist", filter_subject="Claim") |         rule = MailRule.objects.create(name="testrule2", account=account, action=MailRule.ACTION_MOVE, action_parameter="doesnotexist", filter_subject="Claim") | ||||||
|  |  | ||||||
|         try: |         try: | ||||||
|             handle_mail_account(account) |             self.mail_account_handler.handle_mail_account(account) | ||||||
|         except MailError as e: |         except MailError as e: | ||||||
|             self.assertTrue("Error while processing post-consume actions" in str(e)) |             self.assertTrue("Error while processing post-consume actions" in str(e)) | ||||||
|         else: |         else: | ||||||
| @@ -311,12 +319,12 @@ class TestMail(TestCase): | |||||||
|     def test_filters(self): |     def test_filters(self): | ||||||
|  |  | ||||||
|         account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret") |         account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret") | ||||||
|         rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_DELETE, filter_subject="Claim") |         rule = MailRule.objects.create(name="testrule3", account=account, action=MailRule.ACTION_DELETE, filter_subject="Claim") | ||||||
|  |  | ||||||
|         self.assertEqual(self.async_task.call_count, 0) |         self.assertEqual(self.async_task.call_count, 0) | ||||||
|  |  | ||||||
|         self.assertEqual(len(self.bogus_mailbox.messages), 3) |         self.assertEqual(len(self.bogus_mailbox.messages), 3) | ||||||
|         handle_mail_account(account) |         self.mail_account_handler.handle_mail_account(account) | ||||||
|         self.assertEqual(len(self.bogus_mailbox.messages), 2) |         self.assertEqual(len(self.bogus_mailbox.messages), 2) | ||||||
|         self.assertEqual(self.async_task.call_count, 1) |         self.assertEqual(self.async_task.call_count, 1) | ||||||
|  |  | ||||||
| @@ -326,7 +334,7 @@ class TestMail(TestCase): | |||||||
|         rule.filter_body = "electronic" |         rule.filter_body = "electronic" | ||||||
|         rule.save() |         rule.save() | ||||||
|         self.assertEqual(len(self.bogus_mailbox.messages), 3) |         self.assertEqual(len(self.bogus_mailbox.messages), 3) | ||||||
|         handle_mail_account(account) |         self.mail_account_handler.handle_mail_account(account) | ||||||
|         self.assertEqual(len(self.bogus_mailbox.messages), 2) |         self.assertEqual(len(self.bogus_mailbox.messages), 2) | ||||||
|         self.assertEqual(self.async_task.call_count, 2) |         self.assertEqual(self.async_task.call_count, 2) | ||||||
|  |  | ||||||
| @@ -336,7 +344,7 @@ class TestMail(TestCase): | |||||||
|         rule.filter_body = None |         rule.filter_body = None | ||||||
|         rule.save() |         rule.save() | ||||||
|         self.assertEqual(len(self.bogus_mailbox.messages), 3) |         self.assertEqual(len(self.bogus_mailbox.messages), 3) | ||||||
|         handle_mail_account(account) |         self.mail_account_handler.handle_mail_account(account) | ||||||
|         self.assertEqual(len(self.bogus_mailbox.messages), 1) |         self.assertEqual(len(self.bogus_mailbox.messages), 1) | ||||||
|         self.assertEqual(self.async_task.call_count, 4) |         self.assertEqual(self.async_task.call_count, 4) | ||||||
|  |  | ||||||
| @@ -347,6 +355,6 @@ class TestMail(TestCase): | |||||||
|         rule.filter_subject = "Invoice" |         rule.filter_subject = "Invoice" | ||||||
|         rule.save() |         rule.save() | ||||||
|         self.assertEqual(len(self.bogus_mailbox.messages), 3) |         self.assertEqual(len(self.bogus_mailbox.messages), 3) | ||||||
|         handle_mail_account(account) |         self.mail_account_handler.handle_mail_account(account) | ||||||
|         self.assertEqual(len(self.bogus_mailbox.messages), 2) |         self.assertEqual(len(self.bogus_mailbox.messages), 2) | ||||||
|         self.assertEqual(self.async_task.call_count, 5) |         self.assertEqual(self.async_task.call_count, 5) | ||||||
|   | |||||||
| @@ -1,3 +0,0 @@ | |||||||
| from django.shortcuts import render |  | ||||||
|  |  | ||||||
| # Create your views here. |  | ||||||
| @@ -86,7 +86,7 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|             return self._text |             return self._text | ||||||
|  |  | ||||||
|         if not settings.OCR_ALWAYS and self._is_ocred(): |         if not settings.OCR_ALWAYS and self._is_ocred(): | ||||||
|             self.log("info", "Skipping OCR, using Text from PDF") |             self.log("debug", "Skipping OCR, using Text from PDF") | ||||||
|             self._text = get_text_from_pdf(self.document_path) |             self._text = get_text_from_pdf(self.document_path) | ||||||
|             return self._text |             return self._text | ||||||
|  |  | ||||||
| @@ -98,7 +98,7 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|         try: |         try: | ||||||
|  |  | ||||||
|             sample_page_index = int(len(images) / 2) |             sample_page_index = int(len(images) / 2) | ||||||
|             self.log("info", "Attempting language detection on page {} of {}...".format(sample_page_index + 1, len(images))) |             self.log("debug", "Attempting language detection on page {} of {}...".format(sample_page_index + 1, len(images))) | ||||||
|             sample_page_text = self._ocr([images[sample_page_index]], settings.OCR_LANGUAGE)[0] |             sample_page_text = self._ocr([images[sample_page_index]], settings.OCR_LANGUAGE)[0] | ||||||
|             guessed_language = self._guess_language(sample_page_text) |             guessed_language = self._guess_language(sample_page_text) | ||||||
|  |  | ||||||
| @@ -107,7 +107,7 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|                 ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text) |                 ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text) | ||||||
|  |  | ||||||
|             elif ISO639[guessed_language] == settings.OCR_LANGUAGE: |             elif ISO639[guessed_language] == settings.OCR_LANGUAGE: | ||||||
|                 self.log("info", "Detected language: {} (default language)".format(guessed_language)) |                 self.log("debug", "Detected language: {} (default language)".format(guessed_language)) | ||||||
|                 ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text) |                 ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text) | ||||||
|  |  | ||||||
|             elif not ISO639[guessed_language] in pyocr.get_available_tools()[0].get_available_languages(): |             elif not ISO639[guessed_language] in pyocr.get_available_tools()[0].get_available_languages(): | ||||||
| @@ -115,10 +115,10 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|                 ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text) |                 ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text) | ||||||
|  |  | ||||||
|             else: |             else: | ||||||
|                 self.log("info", "Detected language: {}".format(guessed_language)) |                 self.log("debug", "Detected language: {}".format(guessed_language)) | ||||||
|                 ocr_pages = self._ocr(images, ISO639[guessed_language]) |                 ocr_pages = self._ocr(images, ISO639[guessed_language]) | ||||||
|  |  | ||||||
|             self.log("info", "OCR completed.") |             self.log("debug", "OCR completed.") | ||||||
|             self._text = strip_excess_whitespace(" ".join(ocr_pages)) |             self._text = strip_excess_whitespace(" ".join(ocr_pages)) | ||||||
|             return self._text |             return self._text | ||||||
|  |  | ||||||
| @@ -130,7 +130,7 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|         Greyscale images are easier for Tesseract to OCR |         Greyscale images are easier for Tesseract to OCR | ||||||
|         """ |         """ | ||||||
|  |  | ||||||
|         self.log("info", "Converting document {} into greyscale images...".format(self.document_path)) |         self.log("debug", "Converting document {} into greyscale images...".format(self.document_path)) | ||||||
|  |  | ||||||
|         # Convert PDF to multiple PNMs |         # Convert PDF to multiple PNMs | ||||||
|         pnm = os.path.join(self.tempdir, "convert-%04d.pnm") |         pnm = os.path.join(self.tempdir, "convert-%04d.pnm") | ||||||
| @@ -148,7 +148,7 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|             if f.endswith(".pnm"): |             if f.endswith(".pnm"): | ||||||
|                 pnms.append(os.path.join(self.tempdir, f)) |                 pnms.append(os.path.join(self.tempdir, f)) | ||||||
|  |  | ||||||
|         self.log("info", "Running unpaper on {} pages...".format(len(pnms))) |         self.log("debug", "Running unpaper on {} pages...".format(len(pnms))) | ||||||
|  |  | ||||||
|         # Run unpaper in parallel on converted images |         # Run unpaper in parallel on converted images | ||||||
|         with ThreadPool(processes=settings.THREADS_PER_WORKER) as pool: |         with ThreadPool(processes=settings.THREADS_PER_WORKER) as pool: | ||||||
| @@ -161,11 +161,11 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|             guess = langdetect.detect(text) |             guess = langdetect.detect(text) | ||||||
|             return guess |             return guess | ||||||
|         except Exception as e: |         except Exception as e: | ||||||
|             self.log('debug', "Language detection failed with: {}".format(e)) |             self.log('warning', "Language detection failed with: {}".format(e)) | ||||||
|             return None |             return None | ||||||
|  |  | ||||||
|     def _ocr(self, imgs, lang): |     def _ocr(self, imgs, lang): | ||||||
|         self.log("info", "Performing OCR on {} page(s) with language {}".format(len(imgs), lang)) |         self.log("debug", "Performing OCR on {} page(s) with language {}".format(len(imgs), lang)) | ||||||
|         with ThreadPool(processes=settings.THREADS_PER_WORKER) as pool: |         with ThreadPool(processes=settings.THREADS_PER_WORKER) as pool: | ||||||
|             r = pool.map(image_to_string, itertools.product(imgs, [lang])) |             r = pool.map(image_to_string, itertools.product(imgs, [lang])) | ||||||
|             return r |             return r | ||||||
| @@ -180,7 +180,7 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|         images_copy = list(images) |         images_copy = list(images) | ||||||
|         del images_copy[sample_page_index] |         del images_copy[sample_page_index] | ||||||
|         if images_copy: |         if images_copy: | ||||||
|             self.log('info', 'Continuing ocr with default language.') |             self.log('debug', 'Continuing ocr with default language.') | ||||||
|             ocr_pages = self._ocr(images_copy, settings.OCR_LANGUAGE) |             ocr_pages = self._ocr(images_copy, settings.OCR_LANGUAGE) | ||||||
|             ocr_pages.insert(sample_page_index, sample_page) |             ocr_pages.insert(sample_page_index, sample_page) | ||||||
|             return ocr_pages |             return ocr_pages | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Jonas Winkler
					Jonas Winkler