Merge branch 'dev'

This commit is contained in:
Jonas Winkler 2020-11-18 23:02:48 +01:00
commit 8f5809d1fc
34 changed files with 480 additions and 346 deletions

View File

@ -1,82 +0,0 @@
###############################################################################
### Front end ###
###############################################################################
FROM node:current AS frontend
WORKDIR /usr/src/paperless/src-ui/
COPY src-ui/package* ./
RUN npm install
COPY src-ui .
RUN node_modules/.bin/ng build --prod --output-hashing none --sourceMap=false --output-path dist/paperless-ui
###############################################################################
### Back end ###
###############################################################################
FROM ubuntu:20.04
WORKDIR /usr/src/paperless/
COPY Pipfile* ./
#Dependencies
RUN apt-get update \
&& DEBIAN_FRONTEND="noninteractive" apt-get -y --no-install-recommends install \
build-essential \
curl \
ghostscript \
gnupg \
imagemagick \
libmagic-dev \
libpoppler-cpp-dev \
libpq-dev \
optipng \
python3 \
python3-dev \
python3-pip \
sudo \
tesseract-ocr \
tesseract-ocr-eng \
tesseract-ocr-deu \
tesseract-ocr-fra \
tesseract-ocr-ita \
tesseract-ocr-spa \
tzdata \
unpaper \
&& pip3 install --upgrade pipenv supervisor setuptools \
&& pipenv install --system --deploy \
&& pipenv --clear \
&& apt-get -y purge build-essential python3-pip python3-dev \
&& apt-get -y autoremove --purge \
&& rm -rf /var/lib/apt/lists/* \
&& mkdir /var/log/supervisord /var/run/supervisord
# copy scripts
# this fixes issues with imagemagick and PDF
COPY docker/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml
COPY docker/gunicorn.conf.py ./
COPY docker/supervisord.conf /etc/supervisord.conf
COPY docker/docker-entrypoint.sh /sbin/docker-entrypoint.sh
# copy app
COPY src/ ./src/
COPY --from=frontend /usr/src/paperless/src-ui/dist/paperless-ui/ ./src/documents/static/frontend/
# add users, setup scripts
RUN addgroup --gid 1000 paperless \
&& useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \
&& chown -R paperless:paperless . \
&& chmod 755 /sbin/docker-entrypoint.sh
WORKDIR /usr/src/paperless/src/
RUN sudo -HEu paperless python3 manage.py collectstatic --clear --no-input
VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/media", "/usr/src/paperless/consume", "/usr/src/paperless/export"]
ENTRYPOINT ["/sbin/docker-entrypoint.sh"]
CMD ["/usr/local/bin/supervisord", "-c", "/etc/supervisord.conf"]
LABEL maintainer="Jonas Winkler <dev@jpwinkler.de>"

View File

@ -3,6 +3,11 @@ url = "https://pypi.python.org/simple"
verify_ssl = true verify_ssl = true
name = "pypi" name = "pypi"
[[source]]
url = "https://www.piwheels.org/simple"
verify_ssl = true
name = "piwheels"
[packages] [packages]
django = "~=3.1" django = "~=3.1"
pillow = "*" pillow = "*"

51
Pipfile.lock generated
View File

@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "d6416e6844126b09200b9839a3abdcf3c24ef5cf70052b8f134d8bc804552c17" "sha256": "abc7e5f5a8d075d4b013ceafd06ca07f57e597f053d670f73449ba210511b114"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": {}, "requires": {},
@ -10,6 +10,11 @@
"name": "pypi", "name": "pypi",
"url": "https://pypi.python.org/simple", "url": "https://pypi.python.org/simple",
"verify_ssl": true "verify_ssl": true
},
{
"name": "piwheels",
"url": "https://www.piwheels.org/simple",
"verify_ssl": true
} }
] ]
}, },
@ -102,6 +107,7 @@
}, },
"filemagic": { "filemagic": {
"hashes": [ "hashes": [
"sha256:b2fd77411975510e28673220c4b8868ed81b5eb5906339b6f4c233b32122d7d3",
"sha256:e684359ef40820fe406f0ebc5bf8a78f89717bdb7fed688af68082d991d6dbf3" "sha256:e684359ef40820fe406f0ebc5bf8a78f89717bdb7fed688af68082d991d6dbf3"
], ],
"index": "pypi", "index": "pypi",
@ -142,6 +148,7 @@
"langdetect": { "langdetect": {
"hashes": [ "hashes": [
"sha256:363795ea005f1243c958e953245dac5d814fabdc025c9afa91588c5fa6b2fa83", "sha256:363795ea005f1243c958e953245dac5d814fabdc025c9afa91588c5fa6b2fa83",
"sha256:ae53a024643df713274c297c0795dbfb5a16b329902f8e543e7b2d7d45f699e4",
"sha256:f37495e63607865e47deed08d78f7f8e58172658216ff954b2f14671bcd87740" "sha256:f37495e63607865e47deed08d78f7f8e58172658216ff954b2f14671bcd87740"
], ],
"index": "pypi", "index": "pypi",
@ -162,6 +169,7 @@
"sha256:448ebb1b3bf64c0267d6b09a7cba26b5ae61b6d2dbabff7c91b660c7eccf2bdb", "sha256:448ebb1b3bf64c0267d6b09a7cba26b5ae61b6d2dbabff7c91b660c7eccf2bdb",
"sha256:50e86c076611212ca62e5a59f518edafe0c0730f7d9195fec718da1a5c2bb1fc", "sha256:50e86c076611212ca62e5a59f518edafe0c0730f7d9195fec718da1a5c2bb1fc",
"sha256:5734bdc0342aba9dfc6f04920988140fb41234db42381cf7ccba64169f9fe7ac", "sha256:5734bdc0342aba9dfc6f04920988140fb41234db42381cf7ccba64169f9fe7ac",
"sha256:5ddd1dfa2be066595c1993165b4cae84b9866b12339d0c903db7f21a094324a3",
"sha256:64324f64f90a9e4ef732be0928be853eee378fd6a01be21a0a8469c4f2682c83", "sha256:64324f64f90a9e4ef732be0928be853eee378fd6a01be21a0a8469c4f2682c83",
"sha256:6ae6c680f3ebf1cf7ad1d7748868b39d9f900836df774c453c11c5440bc15b36", "sha256:6ae6c680f3ebf1cf7ad1d7748868b39d9f900836df774c453c11c5440bc15b36",
"sha256:6d7593a705d662be5bfe24111af14763016765f43cb6923ed86223f965f52387", "sha256:6d7593a705d662be5bfe24111af14763016765f43cb6923ed86223f965f52387",
@ -189,7 +197,8 @@
}, },
"pathtools": { "pathtools": {
"hashes": [ "hashes": [
"sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0" "sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0",
"sha256:d77d982475e87f32b82157a43b09f0a5ef3e66c1d8f3c7eb8d2580e783cd8202"
], ],
"version": "==0.1.2" "version": "==0.1.2"
}, },
@ -217,6 +226,7 @@
"sha256:2fb113757a369a6cdb189f8df3226e995acfed0a8919a72416626af1a0a71140", "sha256:2fb113757a369a6cdb189f8df3226e995acfed0a8919a72416626af1a0a71140",
"sha256:4b0ef2470c4979e345e4e0cc1bbac65fda11d0d7b789dbac035e4c6ce3f98adb", "sha256:4b0ef2470c4979e345e4e0cc1bbac65fda11d0d7b789dbac035e4c6ce3f98adb",
"sha256:59e903ca800c8cfd1ebe482349ec7c35687b95e98cefae213e271c8c7fffa021", "sha256:59e903ca800c8cfd1ebe482349ec7c35687b95e98cefae213e271c8c7fffa021",
"sha256:5a3342d34289715928c914ee7f389351eb37fa4857caa9297fc7948f2ed3e53d",
"sha256:5abd653a23c35d980b332bc0431d39663b1709d64142e3652890df4c9b6970f6", "sha256:5abd653a23c35d980b332bc0431d39663b1709d64142e3652890df4c9b6970f6",
"sha256:5f9403af9c790cc18411ea398a6950ee2def2a830ad0cfe6dc9122e6d528b302", "sha256:5f9403af9c790cc18411ea398a6950ee2def2a830ad0cfe6dc9122e6d528b302",
"sha256:6b4a8fd632b4ebee28282a9fef4c341835a1aa8671e2770b6f89adc8e8c2703c", "sha256:6b4a8fd632b4ebee28282a9fef4c341835a1aa8671e2770b6f89adc8e8c2703c",
@ -274,8 +284,10 @@
"sha256:d14b140a4439d816e3b1229a4a525df917d6ea22a0771a2a78332273fd9528a4", "sha256:d14b140a4439d816e3b1229a4a525df917d6ea22a0771a2a78332273fd9528a4",
"sha256:d1b4ab59e02d9008efe10ceabd0b31e79519da6fb67f7d8e8977118832d0f449", "sha256:d1b4ab59e02d9008efe10ceabd0b31e79519da6fb67f7d8e8977118832d0f449",
"sha256:d5227b229005a696cc67676e24c214740efd90b148de5733419ac9aaba3773da", "sha256:d5227b229005a696cc67676e24c214740efd90b148de5733419ac9aaba3773da",
"sha256:d9f3a909b59ac4a3ca9beb77716f4bce627276edb039a71d4e9ec4b7548536a0",
"sha256:e1f57aa70d3f7cc6947fd88636a481638263ba04a742b4a37dd25c373e41491a", "sha256:e1f57aa70d3f7cc6947fd88636a481638263ba04a742b4a37dd25c373e41491a",
"sha256:e74a55f6bad0e7d3968399deb50f61f4db1926acf4a6d83beaaa7df986f48b1c", "sha256:e74a55f6bad0e7d3968399deb50f61f4db1926acf4a6d83beaaa7df986f48b1c",
"sha256:e7f5a465c6431c0ad8d4e69603ee3306e521a09d3c6af76a16bdb62946bdddf0",
"sha256:e82aba2188b9ba309fd8e271702bd0d0fc9148ae3150532bbb474f4590039ffb", "sha256:e82aba2188b9ba309fd8e271702bd0d0fc9148ae3150532bbb474f4590039ffb",
"sha256:ee69dad2c7155756ad114c02db06002f4cded41132cc51378e57aad79cc8e4f4", "sha256:ee69dad2c7155756ad114c02db06002f4cded41132cc51378e57aad79cc8e4f4",
"sha256:f5ab93a2cb2d8338b1674be43b442a7f544a0971da062a5da774ed40587f18f5" "sha256:f5ab93a2cb2d8338b1674be43b442a7f544a0971da062a5da774ed40587f18f5"
@ -285,7 +297,8 @@
}, },
"pyocr": { "pyocr": {
"hashes": [ "hashes": [
"sha256:fa15adc7e1cf0d345a2990495fe125a947c6e09a60ddba0256a1c14b2e603179" "sha256:fa15adc7e1cf0d345a2990495fe125a947c6e09a60ddba0256a1c14b2e603179",
"sha256:fd602af17b6e21985669aadc058a95f343ff921e962ed4aa6520ded32e4d1301"
], ],
"index": "pypi", "index": "pypi",
"version": "==0.7.2" "version": "==0.7.2"
@ -316,7 +329,10 @@
}, },
"python-levenshtein": { "python-levenshtein": {
"hashes": [ "hashes": [
"sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1" "sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1",
"sha256:15e26882728c29ccdf74cfc6ac4b49fc22c08b44d152348cb0eb1ec4f3dbf9df",
"sha256:3df5e5eb144570ecf5ad38864a2393068798328c7f05e7b167a49391d36a2db1",
"sha256:7f049b3ddc4b525bd469febafb98bf5202f789b722e0e4ccbec2ffbe8c07d7b4"
], ],
"index": "pypi", "index": "pypi",
"version": "==0.12.0" "version": "==0.12.0"
@ -331,6 +347,7 @@
"redis": { "redis": {
"hashes": [ "hashes": [
"sha256:0e7e0cfca8660dea8b7d5cd8c4f6c5e29e11f31158c0b0ae91a397f00e5a05a2", "sha256:0e7e0cfca8660dea8b7d5cd8c4f6c5e29e11f31158c0b0ae91a397f00e5a05a2",
"sha256:3f1c7f166fa6c803613eec222224848a80f5e5b9c6af3aa82461506643034a7a",
"sha256:432b788c4530cfe16d8d943a09d40ca6c16149727e4afe8c2c9d5580c59d9f24" "sha256:432b788c4530cfe16d8d943a09d40ca6c16149727e4afe8c2c9d5580c59d9f24"
], ],
"index": "pypi", "index": "pypi",
@ -360,7 +377,9 @@
"sha256:749078d1eb89484db5f34b4012092ad14b327944ee7f1c4f74d6279a6e4d1884", "sha256:749078d1eb89484db5f34b4012092ad14b327944ee7f1c4f74d6279a6e4d1884",
"sha256:7913bd25f4ab274ba37bc97ad0e21c31004224ccb02765ad984eef43e04acc6c", "sha256:7913bd25f4ab274ba37bc97ad0e21c31004224ccb02765ad984eef43e04acc6c",
"sha256:7a25fcbeae08f96a754b45bdc050e1fb94b95cab046bf56b016c25e9ab127b3e", "sha256:7a25fcbeae08f96a754b45bdc050e1fb94b95cab046bf56b016c25e9ab127b3e",
"sha256:80ef188c0e47a6c964eed71c55a73c245f8daf9f0a4a9d804e91275afb468ca4",
"sha256:83d6b356e116ca119db8e7c6fc2983289d87b27b3fac238cfe5dca529d884562", "sha256:83d6b356e116ca119db8e7c6fc2983289d87b27b3fac238cfe5dca529d884562",
"sha256:842fb985b2b99a82a2b145b6bbd588c5f5cfd83693402920fcb985d515794666",
"sha256:8b882a78c320478b12ff024e81dc7d43c1462aa4a3341c754ee65d857a521f85", "sha256:8b882a78c320478b12ff024e81dc7d43c1462aa4a3341c754ee65d857a521f85",
"sha256:8f6a2229e8ad946e36815f2a03386bb8353d4bde368fdf8ca5f0cb97264d3b5c", "sha256:8f6a2229e8ad946e36815f2a03386bb8353d4bde368fdf8ca5f0cb97264d3b5c",
"sha256:9801c4c1d9ae6a70aeb2128e5b4b68c45d4f0af0d1535500884d644fa9b768c6", "sha256:9801c4c1d9ae6a70aeb2128e5b4b68c45d4f0af0d1535500884d644fa9b768c6",
@ -384,6 +403,7 @@
}, },
"scikit-learn": { "scikit-learn": {
"hashes": [ "hashes": [
"sha256:090bbf144fd5823c1f2efa3e1a9bf180295b24294ca8f478e75b40ed54f8036e",
"sha256:0a127cc70990d4c15b1019680bfedc7fec6c23d14d3719fdf9b64b22d37cdeca", "sha256:0a127cc70990d4c15b1019680bfedc7fec6c23d14d3719fdf9b64b22d37cdeca",
"sha256:0d39748e7c9669ba648acf40fb3ce96b8a07b240db6888563a7cb76e05e0d9cc", "sha256:0d39748e7c9669ba648acf40fb3ce96b8a07b240db6888563a7cb76e05e0d9cc",
"sha256:1b8a391de95f6285a2f9adffb7db0892718950954b7149a70c783dc848f104ea", "sha256:1b8a391de95f6285a2f9adffb7db0892718950954b7149a70c783dc848f104ea",
@ -423,6 +443,7 @@
"sha256:9ad4fcddcbf5dc67619379782e6aeef41218a79e17979aaed01ed099876c0e62", "sha256:9ad4fcddcbf5dc67619379782e6aeef41218a79e17979aaed01ed099876c0e62",
"sha256:a254b98dbcc744c723a838c03b74a8a34c0558c9ac5c86d5561703362231107d", "sha256:a254b98dbcc744c723a838c03b74a8a34c0558c9ac5c86d5561703362231107d",
"sha256:b03c4338d6d3d299e8ca494194c0ae4f611548da59e3c038813f1a43976cb437", "sha256:b03c4338d6d3d299e8ca494194c0ae4f611548da59e3c038813f1a43976cb437",
"sha256:b5e9d3e4474644915809d6aa1416ff20430a3ed9ae723a5d295da5ddb24985e2",
"sha256:cc1f78ebc982cd0602c9a7615d878396bec94908db67d4ecddca864d049112f2", "sha256:cc1f78ebc982cd0602c9a7615d878396bec94908db67d4ecddca864d049112f2",
"sha256:d6d25c41a009e3c6b7e757338948d0076ee1dd1770d1c09ec131f11946883c54", "sha256:d6d25c41a009e3c6b7e757338948d0076ee1dd1770d1c09ec131f11946883c54",
"sha256:d84cadd7d7998433334c99fa55bcba0d8b4aeff0edb123b2a1dfcface538e474", "sha256:d84cadd7d7998433334c99fa55bcba0d8b4aeff0edb123b2a1dfcface538e474",
@ -468,6 +489,7 @@
}, },
"watchdog": { "watchdog": {
"hashes": [ "hashes": [
"sha256:034c85530b647486e8c8477410fe79476511282658f2ce496f97106d9e5acfb8",
"sha256:4214e1379d128b0588021880ccaf40317ee156d4603ac388b9adcf29165e0c04" "sha256:4214e1379d128b0588021880ccaf40317ee156d4603ac388b9adcf29165e0c04"
], ],
"index": "pypi", "index": "pypi",
@ -561,6 +583,7 @@
"sha256:29a6272fec10623fcbe158fdf9abc7a5fa032048ac1d8631f14b50fbfc10d17f", "sha256:29a6272fec10623fcbe158fdf9abc7a5fa032048ac1d8631f14b50fbfc10d17f",
"sha256:2b31f46bf7b31e6aa690d4c7a3d51bb262438c6dcb0d528adde446531d0d3bb7", "sha256:2b31f46bf7b31e6aa690d4c7a3d51bb262438c6dcb0d528adde446531d0d3bb7",
"sha256:2d43af2be93ffbad25dd959899b5b809618a496926146ce98ee0b23683f8c51c", "sha256:2d43af2be93ffbad25dd959899b5b809618a496926146ce98ee0b23683f8c51c",
"sha256:3188a7dfd96f734a7498f37cde6598b1e9c084f1ca68bc1aa04e88db31168ab6",
"sha256:381ead10b9b9af5f64646cd27107fb27b614ee7040bb1226f9c07ba96625cbb5", "sha256:381ead10b9b9af5f64646cd27107fb27b614ee7040bb1226f9c07ba96625cbb5",
"sha256:47a11bdbd8ada9b7ee628596f9d97fbd3851bd9999d398e9436bd67376dbece7", "sha256:47a11bdbd8ada9b7ee628596f9d97fbd3851bd9999d398e9436bd67376dbece7",
"sha256:4d6a42744139a7fa5b46a264874a781e8694bb32f1d76d8137b68138686f1729", "sha256:4d6a42744139a7fa5b46a264874a781e8694bb32f1d76d8137b68138686f1729",
@ -586,7 +609,8 @@
"sha256:c851b35fc078389bc16b915a0a7c1d5923e12e2c5aeec58c52f4aa8085ac8237", "sha256:c851b35fc078389bc16b915a0a7c1d5923e12e2c5aeec58c52f4aa8085ac8237",
"sha256:cb7df71de0af56000115eafd000b867d1261f786b5eebd88a0ca6360cccfaca7", "sha256:cb7df71de0af56000115eafd000b867d1261f786b5eebd88a0ca6360cccfaca7",
"sha256:cedb2f9e1f990918ea061f28a0f0077a07702e3819602d3507e2ff98c8d20636", "sha256:cedb2f9e1f990918ea061f28a0f0077a07702e3819602d3507e2ff98c8d20636",
"sha256:e8caf961e1b1a945db76f1b5fa9c91498d15f545ac0ababbe575cfab185d3bd8" "sha256:e8caf961e1b1a945db76f1b5fa9c91498d15f545ac0ababbe575cfab185d3bd8",
"sha256:ef221855191457fffeb909d5787d1807800ab4d0111f089e6c93ee68f577634d"
], ],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
"version": "==5.3" "version": "==5.3"
@ -608,6 +632,7 @@
}, },
"docopt": { "docopt": {
"hashes": [ "hashes": [
"sha256:15fde8252aa9f2804171014d50d069ffbf42c7a50b7d74bcbb82bfd5700fcfc2",
"sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491" "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491"
], ],
"version": "==0.6.2" "version": "==0.6.2"
@ -638,11 +663,11 @@
}, },
"faker": { "faker": {
"hashes": [ "hashes": [
"sha256:6afc461ab3f779c9c16e299fc731d775e39ea7e8e063b3053ee359ae198a15ca", "sha256:4d038ba51ae5e0a956d79cadd684d856e5750bfd608b61dad1807f8f08b1da49",
"sha256:ce1c38823eb0f927567cde5bf2e7c8ca565c7a70316139342050ce2ca74b4026" "sha256:f260f0375a44cd1e1a735c9b8c9b914304f607b5eef431d20e098c7c2f5b50a6"
], ],
"markers": "python_version >= '3.5'", "markers": "python_version >= '3.5'",
"version": "==4.14.2" "version": "==4.16.0"
}, },
"filelock": { "filelock": {
"hashes": [ "hashes": [
@ -653,6 +678,7 @@
}, },
"idna": { "idna": {
"hashes": [ "hashes": [
"sha256:4a57a6379512ade94fa99e2fa46d3cd0f2f553040548d0e2958c6ed90ee48226",
"sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
"sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
], ],
@ -670,12 +696,14 @@
"iniconfig": { "iniconfig": {
"hashes": [ "hashes": [
"sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3", "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
"sha256:8647b85c03813b8680f4ae9c9db2fd7293f8591ea536a10d73d90f6eb4b10aac",
"sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32" "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"
], ],
"version": "==1.1.1" "version": "==1.1.1"
}, },
"jinja2": { "jinja2": {
"hashes": [ "hashes": [
"sha256:3f172970d5670703bd3812e8ca6459a9a7e069fa8e51b40195f83c81db191ec4",
"sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0", "sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0",
"sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035" "sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035"
], ],
@ -689,8 +717,10 @@
"sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235", "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235",
"sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5", "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5",
"sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42", "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42",
"sha256:19536834abffb3fa155017053c607cb835b2ecc6a3a2554a88043d991dffb736",
"sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff", "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff",
"sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b", "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b",
"sha256:3d61f15e39611aacd91b7e71d903787da86d9e80896e683c0103fced9add7834",
"sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1", "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1",
"sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e", "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e",
"sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183", "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183",
@ -700,6 +730,7 @@
"sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15", "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15",
"sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1", "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1",
"sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e", "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e",
"sha256:7952deddf24b85c88dab48f6ec366ac6e39d2761b5280f2f9594911e03fcd064",
"sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b", "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b",
"sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905", "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905",
"sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735", "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735",
@ -795,6 +826,7 @@
}, },
"pytest-env": { "pytest-env": {
"hashes": [ "hashes": [
"sha256:33b4030383a021924fe3f3ba5ca4311990d8b1d02ca77389c2be020c4500f96a",
"sha256:7e94956aef7f2764f3c147d216ce066bf6c42948bb9e293169b1b1c880a580c2" "sha256:7e94956aef7f2764f3c147d216ce066bf6c42948bb9e293169b1b1c880a580c2"
], ],
"index": "pypi", "index": "pypi",
@ -802,6 +834,7 @@
}, },
"pytest-forked": { "pytest-forked": {
"hashes": [ "hashes": [
"sha256:2d1bfc93ab65a28324eb0a63503bfb500c2da6916efede7a24b43a04970fe63c",
"sha256:6aa9ac7e00ad1a539c41bec6d21011332de671e938c7637378ec9710204e37ca", "sha256:6aa9ac7e00ad1a539c41bec6d21011332de671e938c7637378ec9710204e37ca",
"sha256:dc4147784048e70ef5d437951728825a131b81714b398d5d52f17c7c144d8815" "sha256:dc4147784048e70ef5d437951728825a131b81714b398d5d52f17c7c144d8815"
], ],
@ -810,6 +843,7 @@
}, },
"pytest-sugar": { "pytest-sugar": {
"hashes": [ "hashes": [
"sha256:67a55a83c7b2717ad607704d3fe9004bb6543b54017ef82f9c6590acc38c1aec",
"sha256:b1b2186b0a72aada6859bea2a5764145e3aaa2c1cfbb23c3a19b5f7b697563d3" "sha256:b1b2186b0a72aada6859bea2a5764145e3aaa2c1cfbb23c3a19b5f7b697563d3"
], ],
"index": "pypi", "index": "pypi",
@ -927,6 +961,7 @@
}, },
"termcolor": { "termcolor": {
"hashes": [ "hashes": [
"sha256:19b1225d03bfb56571484caaa8521d8ec6e2473ae1640c9f48a48dda49417706",
"sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b" "sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"
], ],
"version": "==1.1.0" "version": "==1.1.0"

View File

@ -2,27 +2,25 @@
### Back end ### ### Back end ###
############################################################################### ###############################################################################
FROM ubuntu:20.04 FROM python:3.7-slim
WORKDIR /usr/src/paperless/ WORKDIR /usr/src/paperless/
COPY Pipfile* ./ COPY requirements.txt ./
#Dependencies #Dependencies
RUN apt-get update \ RUN apt-get update \
&& DEBIAN_FRONTEND="noninteractive" apt-get -y --no-install-recommends install \ && apt-get -y --no-install-recommends install \
build-essential \ build-essential \
curl \ curl \
ghostscript \ ghostscript \
gnupg \ gnupg \
imagemagick \ imagemagick \
libatlas-base-dev \
libmagic-dev \ libmagic-dev \
libpoppler-cpp-dev \ libpoppler-cpp-dev \
libpq-dev \ libpq-dev \
optipng \ optipng \
python3 \
python3-dev \
python3-pip \
sudo \ sudo \
tesseract-ocr \ tesseract-ocr \
tesseract-ocr-eng \ tesseract-ocr-eng \
@ -32,10 +30,9 @@ RUN apt-get update \
tesseract-ocr-spa \ tesseract-ocr-spa \
tzdata \ tzdata \
unpaper \ unpaper \
&& pip3 install --upgrade pipenv supervisor setuptools \ && pip3 install --upgrade supervisor setuptools \
&& pipenv install --system --deploy \ && pip install --no-cache-dir -r requirements.txt \
&& pipenv --clear \ && apt-get -y purge build-essential \
&& apt-get -y purge build-essential python3-pip python3-dev \
&& apt-get -y autoremove --purge \ && apt-get -y autoremove --purge \
&& rm -rf /var/lib/apt/lists/* \ && rm -rf /var/lib/apt/lists/* \
&& mkdir /var/log/supervisord /var/run/supervisord && mkdir /var/log/supervisord /var/run/supervisord

View File

@ -8,16 +8,40 @@ Administration
Making backups Making backups
############## ##############
.. warning:: Multiple options exist for making backups of your paperless instance,
depending on how you installed paperless.
This section is not updated to paperless-ng yet, the exporter is a valid tool Before making backups, make sure that paperless is not running.
for backups though.
So you're bored of this whole project, or you want to make a remote backup of Options available to any installation of paperless:
your files for whatever reason. This is easy to do, simply use the
:ref:`exporter <utilities-exporter>` to dump your documents and database out
into an arbitrary directory.
* Use the :ref:`document exporter <utilities-exporter>`.
The document exporter exports all your documents, thumbnails and
metadata to a specific folder. You may import your documents into a
fresh instance of paperless again or store your documents in another
DMS with this export.
Options available to docker installations:
* Backup the docker volumes. These usually reside within
``/var/lib/docker/volumes`` on the host and you need to be root in order
to access them.
Paperless uses 3 volumes:
* ``paperless_media``: This is where your documents are stored.
* ``paperless_data``: This is where auxilliary data is stored. This
folder also contains the SQLite database, if you use it.
* ``paperless_pgdata``: Exists only if you use PostgreSQL and contains
the database.
Options available to bare-metal and non-docker installations:
* Backup the entire paperless folder. This ensures that if your paperless instance
crashes at some point or your disk fails, you can simply copy the folder back
into place and it works.
When using PostgreSQL, you'll also have to backup the database.
.. _migrating-restoring: .. _migrating-restoring:
@ -25,6 +49,8 @@ Restoring
========= =========
.. _administration-updating: .. _administration-updating:
Updating paperless Updating paperless

View File

@ -128,6 +128,8 @@ consumer. Once complete, you should see the newly-created document,
automatically tagged with the appropriate data. automatically tagged with the appropriate data.
.. _advanced-automatic_matching:
Automatic matching Automatic matching
================== ==================
@ -175,8 +177,6 @@ then put the path to that script in ``paperless.conf`` with the variable name
of either ``PAPERLESS_PRE_CONSUME_SCRIPT`` or of either ``PAPERLESS_PRE_CONSUME_SCRIPT`` or
``PAPERLESS_POST_CONSUME_SCRIPT``. ``PAPERLESS_POST_CONSUME_SCRIPT``.
.. TODO HYPEREF TO CONFIG
.. important:: .. important::
These scripts are executed in a **blocking** process, which means that if These scripts are executed in a **blocking** process, which means that if
@ -319,6 +319,6 @@ for use in filenames.
.. code:: .. code::
PAPERLESS_FILENAME_FORMAT=../../my/custom/location/{title} PAPERLESS_FILENAME_FORMAT=../../my/custom/location/{title}
However, keep in mind that inside docker, if files get stored outside of the However, keep in mind that inside docker, if files get stored outside of the
predefined volumes, they will be lost after a restart of paperless. predefined volumes, they will be lost after a restart of paperless.

View File

@ -96,6 +96,8 @@ paperless-ng 0.9.0
sqlite. sqlite.
* ``PAPERLESS_OCR_THREADS`` is gone and replaced with ``PAPERLESS_TASK_WORKERS`` and * ``PAPERLESS_OCR_THREADS`` is gone and replaced with ``PAPERLESS_TASK_WORKERS`` and
``PAPERLESS_THREADS_PER_WORKER``. Refer to the config example for details. ``PAPERLESS_THREADS_PER_WORKER``. Refer to the config example for details.
* ``PAPERLESS_OPTIMIZE_THUMBNAILS`` allows you to disable or enable thumbnail
optimization. This is useful on less powerful devices.
* Many more small changes here and there. The usual stuff. * Many more small changes here and there. The usual stuff.

View File

@ -23,27 +23,35 @@ is
**Q:** *Will paperless-ng run on Raspberry Pi?* **Q:** *Will paperless-ng run on Raspberry Pi?*
**A:** The short answer is yes. The long answer is that certain parts of **A:** The short answer is yes. I've tested it on a Raspberry Pi 3 B.
The long answer is that certain parts of
Paperless will run very slow, such as the tesseract OCR. On Rasperry Pi, Paperless will run very slow, such as the tesseract OCR. On Rasperry Pi,
try to OCR documents before feeding them into paperless so that paperless can try to OCR documents before feeding them into paperless so that paperless can
reuse the text. The web interface should be alot snappier, since it runs reuse the text. The web interface should be alot snappier, since it runs
in your browser and paperless has to do much less work to serve the data. in your browser and paperless has to do much less work to serve the data.
.. note::
Consider setting ``PAPERLESS_OPTIMIZE_THUMBNAILS`` to false to speed up
the consumption process. This takes quite a bit of time on Raspberry Pi.
.. note::
Updating the :ref:`automatic matching algorithm <advanced-automatic_matching>`
takes quite a bit of time. However, the update mechanism checks if your
data has changed before doing the heavy lifting. If you experience the
algorithm taking too much cpu time, consider changing the schedule in the
admin interface to daily or weekly. You can also manually invoke the task
by changing the date and time of the next run to today/now.
The actual matching of the algorithm is fast and works on Raspberry Pi as
well as on any other device.
**Q:** *How do I install paperless-ng on Raspberry Pi?* **Q:** *How do I install paperless-ng on Raspberry Pi?*
**A:** There is not docker image for ARM available. If you know how to build **A:** There is not docker image for ARM available. If you know how to build
that automatically, I'm all ears. For now, you have to grab the latest release that automatically, I'm all ears. For now, you have to grab the latest release
archive from the project page and build the image yourself. The release comes archive from the project page and build the image yourself. The release comes
with the front end already compiled, so you don't have to do this on the Pi. with the front end already compiled, so you don't have to do this on the Pi.
You may encounter some issues during the build:
.. code:: shell-session
W: GPG error: http://ports.ubuntu.com/ubuntu-ports focal InRelease: At least one invalid signature was encountered.
E: The repository 'http://ports.ubuntu.com/ubuntu-ports focal InRelease' is not signed.
N: Updating from such a repository can't be done securely, and is therefore disabled by default.
N: See apt-secure(8) manpage for repository creation and user configuration details.
If this happens, look at `this thread <https://askubuntu.com/questions/1263284/>`:_.
You will need to update docker to the latest version to fix this issue.

View File

@ -10,7 +10,7 @@
# This is required for processing scheduled tasks such as email fetching, index # This is required for processing scheduled tasks such as email fetching, index
# optimization and for training the automatic document matcher. # optimization and for training the automatic document matcher.
# Defaults to localhost:6379. # Defaults to localhost:6379.
#PAPERLESS_REDIS="redis://localhost:6379" #PAPERLESS_REDIS=redis://localhost:6379
############################################################################### ###############################################################################
@ -22,15 +22,15 @@
# configuration for this is already done inside the docker-compose.env file. # configuration for this is already done inside the docker-compose.env file.
#Set PAPERLESS_DBHOST and postgresql will be used instead of mysql. #Set PAPERLESS_DBHOST and postgresql will be used instead of mysql.
#PAPERLESS_DBHOST="localhost" #PAPERLESS_DBHOST=localhost
#Adjust port if necessary #Adjust port if necessary
#PAPERLESS_DBPORT= #PAPERLESS_DBPORT=
#name, user and pass all default to "paperless" #name, user and pass all default to "paperless"
#PAPERLESS_DBNAME="paperless" #PAPERLESS_DBNAME=paperless
#PAPERLESS_DBUSER="paperless" #PAPERLESS_DBUSER=paperless
#PAPERLESS_DBPASS="paperless" #PAPERLESS_DBPASS=paperless
############################################################################### ###############################################################################
@ -40,23 +40,23 @@
# This where your documents should go to be consumed. Make sure that it exists # This where your documents should go to be consumed. Make sure that it exists
# and that the user running the paperless service can read/write its contents # and that the user running the paperless service can read/write its contents
# before you start Paperless. # before you start Paperless.
PAPERLESS_CONSUMPTION_DIR="../consume" PAPERLESS_CONSUMPTION_DIR=../consume
# This is where paperless stores all its data (search index, sqlite database, # This is where paperless stores all its data (search index, sqlite database,
# classification model, etc). # classification model, etc).
#PAPERLESS_DATA_DIR="../data" #PAPERLESS_DATA_DIR=../data
# This is where your documents and thumbnails are stored. # This is where your documents and thumbnails are stored.
#PAPERLESS_MEDIA_ROOT="../media" #PAPERLESS_MEDIA_ROOT=../media
# Override the default STATIC_ROOT here. This is where all static files # Override the default STATIC_ROOT here. This is where all static files
# created using "collectstatic" manager command are stored. # created using "collectstatic" manager command are stored.
#PAPERLESS_STATICDIR="../static" #PAPERLESS_STATICDIR=../static
# Override the STATIC_URL here. Unless you're hosting Paperless off a # Override the STATIC_URL here. Unless you're hosting Paperless off a
# subdomain like /paperless/, you probably don't need to change this. # subdomain like /paperless/, you probably don't need to change this.
#PAPERLESS_STATIC_URL="/static/" #PAPERLESS_STATIC_URL=/static/
# Specify a filename format for the document (directories are supported) # Specify a filename format for the document (directories are supported)
@ -69,7 +69,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
# * {tags[INDEX]} If your tags are strings, select the tag by index # * {tags[INDEX]} If your tags are strings, select the tag by index
# Uniqueness of filenames is ensured, as an incrementing counter is attached # Uniqueness of filenames is ensured, as an incrementing counter is attached
# to each filename. # to each filename.
#PAPERLESS_FILENAME_FORMAT="" #PAPERLESS_FILENAME_FORMAT=
############################################################################### ###############################################################################
#### Security #### #### Security ####
@ -77,10 +77,12 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
# Controls whether django's debug mode is enabled. Disable this on production # Controls whether django's debug mode is enabled. Disable this on production
# systems. Debug mode is disabled by default. # systems. Debug mode is disabled by default.
#PAPERLESS_DEBUG="false" #PAPERLESS_DEBUG=false
# GnuPG encryption is deprecated and will be removed in future versions. # GnuPG encryption is deprecated and will be removed in future versions.
# #
# Dont use it. It does not provide any security at all.
#
# Paperless can be instructed to attempt to encrypt your PDF files with GPG # Paperless can be instructed to attempt to encrypt your PDF files with GPG
# using the PAPERLESS_PASSPHRASE specified below. If however you're not # using the PAPERLESS_PASSPHRASE specified below. If however you're not
# concerned about encrypting these files (for example if you have disk # concerned about encrypting these files (for example if you have disk
@ -93,13 +95,13 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
# you've since changed it to a new one. # you've since changed it to a new one.
# #
# The default is to not use encryption at all. # The default is to not use encryption at all.
#PAPERLESS_PASSPHRASE="secret" #PAPERLESS_PASSPHRASE=secret
# The secret key has a default that should be fine so long as you're hosting # The secret key has a default that should be fine so long as you're hosting
# Paperless on a closed network. However, if you're putting this anywhere # Paperless on a closed network. However, if you're putting this anywhere
# public, you should change the key to something unique and verbose. # public, you should change the key to something unique and verbose.
#PAPERLESS_SECRET_KEY="change-me" #PAPERLESS_SECRET_KEY=change-me
# If you're planning on putting Paperless on the open internet, then you # If you're planning on putting Paperless on the open internet, then you
@ -109,19 +111,19 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
# #
# Just remember that this is a comma-separated list, so "example.com" is fine, # Just remember that this is a comma-separated list, so "example.com" is fine,
# as is "example.com,www.example.com", but NOT " example.com" or "example.com," # as is "example.com,www.example.com", but NOT " example.com" or "example.com,"
#PAPERLESS_ALLOWED_HOSTS="example.com,www.example.com" #PAPERLESS_ALLOWED_HOSTS=example.com,www.example.com
# If you decide to use the Paperless API in an ajax call, you need to add your # If you decide to use the Paperless API in an ajax call, you need to add your
# servers to the list of allowed hosts that can do CORS calls. By default # servers to the list of allowed hosts that can do CORS calls. By default
# Paperless allows calls from localhost:8080, but you'd like to change that, # Paperless allows calls from localhost:8080, but you'd like to change that,
# you can set this value to a comma-separated list. # you can set this value to a comma-separated list.
#PAPERLESS_CORS_ALLOWED_HOSTS="localhost:8080,example.com,localhost:8000" #PAPERLESS_CORS_ALLOWED_HOSTS=localhost:8080,example.com,localhost:8000
# To host paperless under a subpath url like example.com/paperless you set # To host paperless under a subpath url like example.com/paperless you set
# this value to /paperless. No trailing slash! # this value to /paperless. No trailing slash!
# #
# https://docs.djangoproject.com/en/1.11/ref/settings/#force-script-name # https://docs.djangoproject.com/en/1.11/ref/settings/#force-script-name
#PAPERLESS_FORCE_SCRIPT_NAME="" #PAPERLESS_FORCE_SCRIPT_NAME=
############################################################################### ###############################################################################
#### Software Tweaks #### #### Software Tweaks ####
@ -158,14 +160,19 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
# When the consumer detects a duplicate document, it will not touch the # When the consumer detects a duplicate document, it will not touch the
# original document. This default behavior can be changed here. # original document. This default behavior can be changed here.
#PAPERLESS_CONSUMER_DELETE_DUPLICATES="false" #PAPERLESS_CONSUMER_DELETE_DUPLICATES=false
# Use optipng to optimize thumbnails. This usually reduces the sice of
# thumbnails by about 20%, but uses considerable compute time during
# consumption.
#PAPERLESS_OPTIMIZE_THUMBNAILS=true
# After a document is consumed, Paperless can trigger an arbitrary script if # After a document is consumed, Paperless can trigger an arbitrary script if
# you like. This script will be passed a number of arguments for you to work # you like. This script will be passed a number of arguments for you to work
# with. The default is blank, which means nothing will be executed. For more # with. The default is blank, which means nothing will be executed. For more
# information, take a look at the docs: # information, take a look at the docs:
# http://paperless.readthedocs.org/en/latest/consumption.html#hooking-into-the-consumption-process # http://paperless.readthedocs.org/en/latest/consumption.html#hooking-into-the-consumption-process
#PAPERLESS_POST_CONSUME_SCRIPT="/path/to/an/arbitrary/script.sh" #PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
# By default, paperless will check the document text for document date information. # By default, paperless will check the document text for document date information.
# Uncomment the line below to enable checking the document filename for date # Uncomment the line below to enable checking the document filename for date
@ -173,7 +180,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
# https://dateparser.readthedocs.io/en/latest/#settings. The filename will be # https://dateparser.readthedocs.io/en/latest/#settings. The filename will be
# checked first, and if nothing is found, the document text will be checked # checked first, and if nothing is found, the document text will be checked
# as normal. # as normal.
#PAPERLESS_FILENAME_DATE_ORDER="YMD" #PAPERLESS_FILENAME_DATE_ORDER=YMD
# Sometimes devices won't create filenames which can be parsed properly # Sometimes devices won't create filenames which can be parsed properly
# by the filename parser (see # by the filename parser (see
@ -243,7 +250,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
# By default Paperless does not OCR a document if the text can be retrieved from # By default Paperless does not OCR a document if the text can be retrieved from
# the document directly. Set to true to always OCR documents. # the document directly. Set to true to always OCR documents.
#PAPERLESS_OCR_ALWAYS="false" #PAPERLESS_OCR_ALWAYS=false
############################################################################### ###############################################################################
@ -271,7 +278,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
#PAPERLESS_CONVERT_BINARY=/usr/bin/convert #PAPERLESS_CONVERT_BINARY=/usr/bin/convert
# Ghostscript # Ghostscript
#PAPERLESS_GS_BINARY = /usr/bin/gs #PAPERLESS_GS_BINARY=/usr/bin/gs
# Unpaper # Unpaper
#PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper #PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper

View File

@ -24,12 +24,17 @@ then
rm "$PAPERLESS_DIST" -r rm "$PAPERLESS_DIST" -r
fi fi
mkdir "$PAPERLESS_DIST"
mkdir "$PAPERLESS_DIST_APP"
mkdir "$PAPERLESS_DIST_APP/docker"
# setup dependencies. # setup dependencies.
cd "$PAPERLESS_ROOT" cd "$PAPERLESS_ROOT"
pipenv clean pipenv clean
pipenv install --dev pipenv install --dev
pipenv lock --keep-outdated -r > "$PAPERLESS_DIST_APP/requirements.txt"
# test if the application works. # test if the application works.
@ -44,10 +49,6 @@ make clean html
# copy stuff into place # copy stuff into place
mkdir "$PAPERLESS_DIST"
mkdir "$PAPERLESS_DIST_APP"
mkdir "$PAPERLESS_DIST_APP/docker"
# the application itself # the application itself
cp "$PAPERLESS_ROOT/.env" \ cp "$PAPERLESS_ROOT/.env" \
@ -92,8 +93,6 @@ cd "$PAPERLESS_DIST_APP"
docker build . -t "jonaswinkler/paperless-ng:$VERSION" docker build . -t "jonaswinkler/paperless-ng:$VERSION"
docker push "jonaswinkler/paperless-ng:$VERSION"
# works. package the app! # works. package the app!
cd "$PAPERLESS_DIST" cd "$PAPERLESS_DIST"

23
scripts/push-release.sh Executable file
View File

@ -0,0 +1,23 @@
#!/bin/bash
set -e
VERSION=$1
if [ -z "$VERSION" ]
then
echo "Need a version string."
exit 1
fi
# source root directory of paperless
PAPERLESS_ROOT=$(git rev-parse --show-toplevel)
# output directory
PAPERLESS_DIST="$PAPERLESS_ROOT/dist"
PAPERLESS_DIST_APP="$PAPERLESS_DIST/paperless-ng"
cd "$PAPERLESS_DIST_APP"
docker push "jonaswinkler/paperless-ng:$VERSION"

View File

@ -132,6 +132,28 @@
</a> </a>
</li> </li>
</ul> </ul>
<h6 class="sidebar-heading d-flex justify-content-between align-items-center px-3 mt-4 mb-1 text-muted">
<span>Misc</span>
</h6>
<ul class="nav flex-column mb-2">
<li class="nav-item">
<a class="nav-link" href="https://paperless-ng.readthedocs.io/en/latest/">
<svg class="sidebaricon" fill="currentColor">
<use xlink:href="assets/bootstrap-icons.svg#question-circle"/>
</svg>
Documentation
</a>
</li>
<li class="nav-item">
<a class="nav-link" href="https://github.com/jonaswinkler/paperless-ng">
<svg class="sidebaricon" fill="currentColor">
<use xlink:href="assets/bootstrap-icons.svg#link"/>
</svg>
Github
</a>
</li>
</ul>
</div> </div>
</nav> </nav>

View File

@ -1,6 +1,6 @@
<div class="row pt-3 pb-2 mb-3 border-bottom align-items-center"> <div class="row pt-3 pb-1 mb-3 border-bottom align-items-center" >
<div class="col text-truncate"> <div class="col text-truncate">
<h1 class="h2 text-truncate">{{title}}</h1> <h1 class="h2 text-truncate" style="line-height: 1.4">{{title}}</h1>
</div> </div>
<div class="btn-toolbar col-auto"> <div class="btn-toolbar col-auto">
<ng-content></ng-content> <ng-content></ng-content>

View File

@ -1,3 +1,7 @@
.log-entry-10 {
color: lightslategray !important;
}
.log-entry-30 { .log-entry-30 {
color: yellow !important; color: yellow !important;
} }

View File

@ -3,7 +3,6 @@ import hashlib
import logging import logging
import os import os
import re import re
import uuid
from django.conf import settings from django.conf import settings
from django.db import transaction from django.db import transaction
@ -12,6 +11,7 @@ from django.utils import timezone
from paperless.db import GnuPG from paperless.db import GnuPG
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
from .file_handling import generate_filename, create_source_path_directory from .file_handling import generate_filename, create_source_path_directory
from .loggers import LoggingMixin
from .models import Document, FileInfo, Correspondent, DocumentType, Tag from .models import Document, FileInfo, Correspondent, DocumentType, Tag
from .parsers import ParseError, get_parser_class from .parsers import ParseError, get_parser_class
from .signals import ( from .signals import (
@ -24,12 +24,10 @@ class ConsumerError(Exception):
pass pass
class Consumer: class Consumer(LoggingMixin):
def __init__(self): def __init__(self):
super().__init__()
self.logger = logging.getLogger(__name__)
self.logging_group = None
self.path = None self.path = None
self.filename = None self.filename = None
self.override_title = None self.override_title = None
@ -74,11 +72,6 @@ class Consumer:
os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True) os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True)
os.makedirs(settings.ORIGINALS_DIR, exist_ok=True) os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
def log(self, level, message):
getattr(self.logger, level)(message, extra={
"group": self.logging_group
})
def try_consume_file(self, def try_consume_file(self,
path, path,
override_filename=None, override_filename=None,
@ -100,7 +93,7 @@ class Consumer:
# this is for grouping logging entries for this particular file # this is for grouping logging entries for this particular file
# together. # together.
self.logging_group = uuid.uuid4() self.renew_logging_group()
# Make sure that preconditions for consuming the file are met. # Make sure that preconditions for consuming the file are met.

View File

@ -86,7 +86,7 @@ def generate_filename(document):
added_day=document.added.day if document.added else "none", added_day=document.added.day if document.added else "none",
tags=tags, tags=tags,
) )
except (ValueError, KeyError, IndexError) as e: except (ValueError, KeyError, IndexError):
logging.getLogger(__name__).warning("Invalid PAPERLESS_FILENAME_FORMAT: {}, falling back to default,".format(settings.PAPERLESS_FILENAME_FORMAT)) logging.getLogger(__name__).warning("Invalid PAPERLESS_FILENAME_FORMAT: {}, falling back to default,".format(settings.PAPERLESS_FILENAME_FORMAT))
# Always append the primary key to guarantee uniqueness of filename # Always append the primary key to guarantee uniqueness of filename

View File

@ -32,6 +32,9 @@ class UploadForm(forms.Form):
t = int(mktime(datetime.now().timetuple())) t = int(mktime(datetime.now().timetuple()))
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
# TODO: dont just append pdf. This is here for taht weird regex check at the start of the consumer.
with tempfile.NamedTemporaryFile(prefix="paperless-upload-", suffix=".pdf", dir=settings.SCRATCH_DIR, delete=False) as f: with tempfile.NamedTemporaryFile(prefix="paperless-upload-", suffix=".pdf", dir=settings.SCRATCH_DIR, delete=False) as f:
f.write(document) f.write(document)

View File

@ -1,4 +1,5 @@
import logging import logging
import uuid
class PaperlessHandler(logging.Handler): class PaperlessHandler(logging.Handler):
@ -13,3 +14,19 @@ class PaperlessHandler(logging.Handler):
kwargs["group"] = record.group kwargs["group"] = record.group
Log.objects.create(**kwargs) Log.objects.create(**kwargs)
class LoggingMixin:
logging_group = None
def renew_logging_group(self):
self.logging_group = uuid.uuid4()
def log(self, level, message):
target = ".".join([self.__class__.__module__, self.__class__.__name__])
logger = logging.getLogger(target)
getattr(logger, level)(message, extra={
"group": self.logging_group
})

View File

@ -1,7 +1,4 @@
# Generated by Django 3.1.3 on 2020-11-07 12:35 # Generated by Django 3.1.3 on 2020-11-07 12:35
import os
from django.conf import settings
from django.db import migrations, models from django.db import migrations, models
import django.db.models.deletion import django.db.models.deletion

View File

@ -20,6 +20,7 @@ from django.utils import timezone
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
# - MONTH ZZZZ, with ZZZZ being 4 digits # - MONTH ZZZZ, with ZZZZ being 4 digits
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
from documents.loggers import LoggingMixin
from documents.signals import document_consumer_declaration from documents.signals import document_consumer_declaration
# TODO: isnt there a date parsing library for this? # TODO: isnt there a date parsing library for this?
@ -101,17 +102,17 @@ class ParseError(Exception):
pass pass
class DocumentParser: class DocumentParser(LoggingMixin):
""" """
Subclass this to make your own parser. Have a look at Subclass this to make your own parser. Have a look at
`paperless_tesseract.parsers` for inspiration. `paperless_tesseract.parsers` for inspiration.
""" """
def __init__(self, path, logging_group): def __init__(self, path, logging_group):
super().__init__()
self.logging_group = logging_group
self.document_path = path self.document_path = path
self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
self.logger = logging.getLogger(__name__)
self.logging_group = logging_group
def get_thumbnail(self): def get_thumbnail(self):
""" """
@ -121,16 +122,19 @@ class DocumentParser:
def optimise_thumbnail(self, in_path): def optimise_thumbnail(self, in_path):
out_path = os.path.join(self.tempdir, "optipng.png") if settings.OPTIMIZE_THUMBNAILS:
out_path = os.path.join(self.tempdir, "optipng.png")
args = (settings.OPTIPNG_BINARY, "-silent", "-o5", in_path, "-out", out_path) args = (settings.OPTIPNG_BINARY, "-silent", "-o5", in_path, "-out", out_path)
self.log('debug', 'Execute: ' + " ".join(args)) self.log('debug', 'Execute: ' + " ".join(args))
if not subprocess.Popen(args).wait() == 0: if not subprocess.Popen(args).wait() == 0:
raise ParseError("Optipng failed at {}".format(args)) raise ParseError("Optipng failed at {}".format(args))
return out_path return out_path
else:
return in_path
def get_optimised_thumbnail(self): def get_optimised_thumbnail(self):
return self.optimise_thumbnail(self.get_thumbnail()) return self.optimise_thumbnail(self.get_thumbnail())
@ -222,11 +226,6 @@ class DocumentParser:
return date return date
def log(self, level, message):
getattr(self.logger, level)(message, extra={
"group": self.logging_group
})
def cleanup(self): def cleanup(self):
self.log("debug", "Deleting directory {}".format(self.tempdir)) self.log("debug", "Deleting directory {}".format(self.tempdir))
shutil.rmtree(self.tempdir) shutil.rmtree(self.tempdir)

View File

@ -2,11 +2,10 @@ import os
import shutil import shutil
import tempfile import tempfile
from unittest import mock from unittest import mock
from unittest.mock import MagicMock
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.test import override_settings from django.test import override_settings
from rest_framework.test import APITestCase, APIClient from rest_framework.test import APITestCase
from documents.models import Document, Correspondent, DocumentType, Tag from documents.models import Document, Correspondent, DocumentType, Tag

View File

@ -80,6 +80,6 @@ class TestClassifier(TestCase):
self.classifier.save_classifier() self.classifier.save_classifier()
newClassifier = DocumentClassifier() new_classifier = DocumentClassifier()
newClassifier.reload() new_classifier.reload()
self.assertFalse(newClassifier.train()) self.assertFalse(new_classifier.train())

View File

@ -5,8 +5,6 @@ import tempfile
from unittest import mock from unittest import mock
from unittest.mock import MagicMock from unittest.mock import MagicMock
from django.conf import settings
from django.db import DatabaseError
from django.test import TestCase, override_settings from django.test import TestCase, override_settings
from ..consumer import Consumer, ConsumerError from ..consumer import Consumer, ConsumerError
@ -504,9 +502,9 @@ class TestConsumer(TestCase):
def testOverrideFilename(self): def testOverrideFilename(self):
filename = self.get_test_file() filename = self.get_test_file()
overrideFilename = "My Bank - Statement for November.pdf" override_filename = "My Bank - Statement for November.pdf"
document = self.consumer.try_consume_file(filename, override_filename=overrideFilename) document = self.consumer.try_consume_file(filename, override_filename=override_filename)
self.assertEqual(document.correspondent.name, "My Bank") self.assertEqual(document.correspondent.name, "My Bank")
self.assertEqual(document.title, "Statement for November") self.assertEqual(document.title, "Statement for November")

View File

@ -72,11 +72,11 @@ def binaries_check(app_configs, **kwargs):
@register() @register()
def debug_mode_check(app_configs, **kwargs): def debug_mode_check(app_configs, **kwargs):
if settings.DEBUG: if settings.DEBUG:
return [Warning("DEBUG mode is enabled. Disable Debug mode. " return [Warning(
"This is a serious security " "DEBUG mode is enabled. Disable Debug mode. This is a serious "
"issue, since it puts security overides in place which" "security issue, since it puts security overides in place which "
"are meant to be only used during development. This" "are meant to be only used during development. This "
"also means that paperless will tell anyone various" "also means that paperless will tell anyone various "
"debugging information when something goes wrong.")] "debugging information when something goes wrong.")]
else: else:
return [] return []

View File

@ -257,6 +257,14 @@ LOGGING = {
"handlers": ["dbhandler", "streamhandler"], "handlers": ["dbhandler", "streamhandler"],
"level": "DEBUG" "level": "DEBUG"
}, },
"paperless_mail": {
"handlers": ["dbhandler", "streamhandler"],
"level": "DEBUG"
},
"paperless_tesseract": {
"handlers": ["dbhandler", "streamhandler"],
"level": "DEBUG"
},
}, },
} }
@ -312,6 +320,8 @@ CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0))
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES") CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
# The default language that tesseract will attempt to use when parsing # The default language that tesseract will attempt to use when parsing
# documents. It should be a 3-letter language code consistent with ISO 639. # documents. It should be a 3-letter language code consistent with ISO 639.
OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng") OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")

View File

@ -1,18 +1,7 @@
from django.contrib import admin from django.contrib import admin
from django import forms
from paperless_mail.models import MailAccount, MailRule from paperless_mail.models import MailAccount, MailRule
class MailAccountForm(forms.ModelForm):
password = forms.CharField(widget=forms.PasswordInput)
class Meta:
fields = '__all__'
model = MailAccount
class MailAccountAdmin(admin.ModelAdmin): class MailAccountAdmin(admin.ModelAdmin):
list_display = ("name", "imap_server", "username") list_display = ("name", "imap_server", "username")
@ -20,6 +9,8 @@ class MailAccountAdmin(admin.ModelAdmin):
class MailRuleAdmin(admin.ModelAdmin): class MailRuleAdmin(admin.ModelAdmin):
list_filter = ("account",)
list_display = ("name", "account", "folder", "action") list_display = ("name", "account", "folder", "action")

View File

@ -8,6 +8,7 @@ from django_q.tasks import async_task
from imap_tools import MailBox, MailBoxUnencrypted, AND, MailMessageFlags, \ from imap_tools import MailBox, MailBoxUnencrypted, AND, MailMessageFlags, \
MailboxFolderSelectError MailboxFolderSelectError
from documents.loggers import LoggingMixin
from documents.models import Correspondent from documents.models import Correspondent
from paperless_mail.models import MailAccount, MailRule from paperless_mail.models import MailAccount, MailRule
@ -83,72 +84,6 @@ def make_criterias(rule):
return {**criterias, **get_rule_action(rule).get_criteria()} return {**criterias, **get_rule_action(rule).get_criteria()}
def handle_mail_account(account):
if account.imap_security == MailAccount.IMAP_SECURITY_NONE:
mailbox = MailBoxUnencrypted(account.imap_server, account.imap_port)
elif account.imap_security == MailAccount.IMAP_SECURITY_STARTTLS:
mailbox = MailBox(account.imap_server, account.imap_port, starttls=True)
elif account.imap_security == MailAccount.IMAP_SECURITY_SSL:
mailbox = MailBox(account.imap_server, account.imap_port)
else:
raise ValueError("Unknown IMAP security")
total_processed_files = 0
with mailbox as M:
try:
M.login(account.username, account.password)
except Exception:
raise MailError(
f"Error while authenticating account {account.name}")
for rule in account.rules.all():
try:
M.folder.set(rule.folder)
except MailboxFolderSelectError:
raise MailError(
f"Rule {rule.name}: Folder {rule.folder} does not exist "
f"in account {account.name}")
criterias = make_criterias(rule)
try:
messages = M.fetch(criteria=AND(**criterias), mark_seen=False)
except Exception:
raise MailError(
f"Rule {rule.name}: Error while fetching folder "
f"{rule.folder} of account {account.name}")
post_consume_messages = []
for message in messages:
try:
processed_files = handle_message(message, rule)
except Exception:
raise MailError(
f"Rule {rule.name}: Error while processing mail "
f"{message.uid} of account {account.name}")
if processed_files > 0:
post_consume_messages.append(message.uid)
total_processed_files += processed_files
try:
get_rule_action(rule).post_consume(
M,
post_consume_messages,
rule.action_parameter)
except Exception:
raise MailError(
f"Rule {rule.name}: Error while processing post-consume "
f"actions for account {account.name}")
return total_processed_files
def get_title(message, att, rule): def get_title(message, att, rule):
if rule.assign_title_from == MailRule.TITLE_FROM_SUBJECT: if rule.assign_title_from == MailRule.TITLE_FROM_SUBJECT:
title = message.subject title = message.subject
@ -189,39 +124,156 @@ def get_correspondent(message, rule):
return correspondent return correspondent
def handle_message(message, rule): def get_mailbox(server, port, security):
if not message.attachments: if security == MailAccount.IMAP_SECURITY_NONE:
return 0 mailbox = MailBoxUnencrypted(server, port)
elif security == MailAccount.IMAP_SECURITY_STARTTLS:
mailbox = MailBox(server, port, starttls=True)
elif security == MailAccount.IMAP_SECURITY_SSL:
mailbox = MailBox(server, port)
else:
raise ValueError("Unknown IMAP security")
return mailbox
correspondent = get_correspondent(message, rule)
tag = rule.assign_tag
doc_type = rule.assign_document_type
processed_attachments = 0 class MailAccountHandler(LoggingMixin):
for att in message.attachments: def handle_mail_account(self, account):
title = get_title(message, att, rule) self.renew_logging_group()
# TODO: check with parsers what files types are supported self.log('debug', f"Processing mail account {account}")
if att.content_type == 'application/pdf':
os.makedirs(settings.SCRATCH_DIR, exist_ok=True) total_processed_files = 0
_, temp_filename = tempfile.mkstemp(prefix="paperless-mail-", dir=settings.SCRATCH_DIR)
with open(temp_filename, 'wb') as f:
f.write(att.payload)
async_task( with get_mailbox(account.imap_server,
"documents.tasks.consume_file", account.imap_port,
path=temp_filename, account.imap_security) as M:
override_filename=att.filename,
override_title=title,
override_correspondent_id=correspondent.id if correspondent else None,
override_document_type_id=doc_type.id if doc_type else None,
override_tag_ids=[tag.id] if tag else None,
task_name=f"Mail: {att.filename}"
)
processed_attachments += 1 try:
M.login(account.username, account.password)
except Exception:
raise MailError(
f"Error while authenticating account {account.name}")
return processed_attachments self.log('debug', f"Account {account}: Processing "
f"{account.rules.count()} rule(s)")
for rule in account.rules.all():
self.log(
'debug',
f"Account {account}: Processing rule {rule.name}")
self.log(
'debug',
f"Rule {account}.{rule}: Selecting folder {rule.folder}")
try:
M.folder.set(rule.folder)
except MailboxFolderSelectError:
raise MailError(
f"Rule {rule.name}: Folder {rule.folder} does not exist "
f"in account {account.name}")
criterias = make_criterias(rule)
self.log(
'debug',
f"Rule {account}.{rule}: Searching folder with criteria "
f"{str(AND(**criterias))}")
try:
messages = M.fetch(criteria=AND(**criterias), mark_seen=False)
except Exception:
raise MailError(
f"Rule {rule.name}: Error while fetching folder "
f"{rule.folder} of account {account.name}")
post_consume_messages = []
mails_processed = 0
for message in messages:
try:
processed_files = self.handle_message(message, rule)
except Exception:
raise MailError(
f"Rule {rule.name}: Error while processing mail "
f"{message.uid} of account {account.name}")
if processed_files > 0:
post_consume_messages.append(message.uid)
total_processed_files += processed_files
mails_processed += 1
self.log(
'debug',
f"Rule {account}.{rule}: Processed {mails_processed} "
f"matching mail(s)")
self.log(
'debug',
f"Rule {account}.{rule}: Running mail actions on "
f"{len(post_consume_messages)} mails")
try:
get_rule_action(rule).post_consume(
M,
post_consume_messages,
rule.action_parameter)
except Exception:
raise MailError(
f"Rule {rule.name}: Error while processing post-consume "
f"actions for account {account.name}")
return total_processed_files
def handle_message(self, message, rule):
if not message.attachments:
return 0
self.log(
'debug',
f"Rule {rule.account}.{rule}: "
f"Processing mail {message.subject} from {message.from_} with "
f"{len(message.attachments)} attachment(s)")
correspondent = get_correspondent(message, rule)
tag = rule.assign_tag
doc_type = rule.assign_document_type
processed_attachments = 0
for att in message.attachments:
title = get_title(message, att, rule)
# TODO: check with parsers what files types are supported
if att.content_type == 'application/pdf':
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
_, temp_filename = tempfile.mkstemp(prefix="paperless-mail-", dir=settings.SCRATCH_DIR)
with open(temp_filename, 'wb') as f:
f.write(att.payload)
self.log(
'info',
f"Rule {rule.account}.{rule}: "
f"Consuming attachment {att.filename} from mail "
f"{message.subject} from {message.from_}")
async_task(
"documents.tasks.consume_file",
path=temp_filename,
override_filename=att.filename,
override_title=title,
override_correspondent_id=correspondent.id if correspondent else None,
override_document_type_id=doc_type.id if doc_type else None,
override_tag_ids=[tag.id] if tag else None,
task_name=f"Mail: {att.filename}"
)
processed_attachments += 1
return processed_attachments

View File

@ -1,6 +1,6 @@
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from paperless_mail import mail, tasks from paperless_mail import tasks
class Command(BaseCommand): class Command(BaseCommand):

View File

@ -0,0 +1,23 @@
# Generated by Django 3.1.3 on 2020-11-18 19:40
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('paperless_mail', '0002_auto_20201117_1334'),
]
operations = [
migrations.AlterField(
model_name='mailaccount',
name='imap_port',
field=models.IntegerField(blank=True, help_text='This is usually 143 for unencrypted and STARTTLS connections, and 993 for SSL connections.', null=True),
),
migrations.AlterField(
model_name='mailrule',
name='name',
field=models.CharField(max_length=256, unique=True),
),
]

View File

@ -1,8 +1,5 @@
from django.db import models from django.db import models
# Create your models here.
from django.db import models
import documents.models as document_models import documents.models as document_models
@ -22,7 +19,11 @@ class MailAccount(models.Model):
imap_server = models.CharField(max_length=256) imap_server = models.CharField(max_length=256)
imap_port = models.IntegerField(blank=True, null=True) imap_port = models.IntegerField(
blank=True,
null=True,
help_text="This is usually 143 for unencrypted and STARTTLS "
"connections, and 993 for SSL connections.")
imap_security = models.PositiveIntegerField( imap_security = models.PositiveIntegerField(
choices=IMAP_SECURITY_OPTIONS, choices=IMAP_SECURITY_OPTIONS,
@ -71,7 +72,7 @@ class MailRule(models.Model):
(CORRESPONDENT_FROM_CUSTOM, "Use correspondent selected below") (CORRESPONDENT_FROM_CUSTOM, "Use correspondent selected below")
) )
name = models.CharField(max_length=256) name = models.CharField(max_length=256, unique=True)
account = models.ForeignKey( account = models.ForeignKey(
MailAccount, MailAccount,

View File

@ -1,13 +1,13 @@
import logging import logging
from paperless_mail import mail from paperless_mail.mail import MailAccountHandler
from paperless_mail.models import MailAccount from paperless_mail.models import MailAccount
def process_mail_accounts(): def process_mail_accounts():
total_new_documents = 0 total_new_documents = 0
for account in MailAccount.objects.all(): for account in MailAccount.objects.all():
total_new_documents += mail.handle_mail_account(account) total_new_documents += MailAccountHandler().handle_mail_account(account)
if total_new_documents > 0: if total_new_documents > 0:
return f"Added {total_new_documents} document(s)." return f"Added {total_new_documents} document(s)."
@ -18,6 +18,6 @@ def process_mail_accounts():
def process_mail_account(name): def process_mail_account(name):
account = MailAccount.objects.find(name=name) account = MailAccount.objects.find(name=name)
if account: if account:
mail.handle_mail_account(account) MailAccountHandler().handle_mail_account(account)
else: else:
logging.error("Unknown mail acccount: {}".format(name)) logging.error("Unknown mail acccount: {}".format(name))

View File

@ -7,7 +7,7 @@ from django.test import TestCase
from imap_tools import MailMessageFlags, MailboxFolderSelectError from imap_tools import MailMessageFlags, MailboxFolderSelectError
from documents.models import Correspondent from documents.models import Correspondent
from paperless_mail.mail import get_correspondent, get_title, handle_message, handle_mail_account, MailError from paperless_mail.mail import MailError, MailAccountHandler, get_correspondent, get_title
from paperless_mail.models import MailRule, MailAccount from paperless_mail.models import MailRule, MailAccount
@ -126,6 +126,8 @@ class TestMail(TestCase):
self.reset_bogus_mailbox() self.reset_bogus_mailbox()
self.mail_account_handler = MailAccountHandler()
def reset_bogus_mailbox(self): def reset_bogus_mailbox(self):
self.bogus_mailbox.messages = [] self.bogus_mailbox.messages = []
self.bogus_mailbox.messages_spam = [] self.bogus_mailbox.messages_spam = []
@ -145,10 +147,10 @@ class TestMail(TestCase):
me_localhost = Correspondent.objects.create(name=message2.from_) me_localhost = Correspondent.objects.create(name=message2.from_)
someone_else = Correspondent.objects.create(name="someone else") someone_else = Correspondent.objects.create(name="someone else")
rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING) rule = MailRule(name="a", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING)
self.assertIsNone(get_correspondent(message, rule)) self.assertIsNone(get_correspondent(message, rule))
rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL) rule = MailRule(name="b", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL)
c = get_correspondent(message, rule) c = get_correspondent(message, rule)
self.assertIsNotNone(c) self.assertIsNotNone(c)
self.assertEqual(c.name, "someone@somewhere.com") self.assertEqual(c.name, "someone@somewhere.com")
@ -157,7 +159,7 @@ class TestMail(TestCase):
self.assertEqual(c.name, "me@localhost.com") self.assertEqual(c.name, "me@localhost.com")
self.assertEqual(c.id, me_localhost.id) self.assertEqual(c.id, me_localhost.id)
rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME) rule = MailRule(name="c", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME)
c = get_correspondent(message, rule) c = get_correspondent(message, rule)
self.assertIsNotNone(c) self.assertIsNotNone(c)
self.assertEqual(c.name, "Someone!") self.assertEqual(c.name, "Someone!")
@ -165,7 +167,7 @@ class TestMail(TestCase):
self.assertIsNotNone(c) self.assertIsNotNone(c)
self.assertEqual(c.id, me_localhost.id) self.assertEqual(c.id, me_localhost.id)
rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_CUSTOM, assign_correspondent=someone_else) rule = MailRule(name="d", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_CUSTOM, assign_correspondent=someone_else)
c = get_correspondent(message, rule) c = get_correspondent(message, rule)
self.assertEqual(c, someone_else) self.assertEqual(c, someone_else)
@ -174,14 +176,15 @@ class TestMail(TestCase):
message.subject = "the message title" message.subject = "the message title"
att = namedtuple('Attachment', []) att = namedtuple('Attachment', [])
att.filename = "this_is_the_file.pdf" att.filename = "this_is_the_file.pdf"
rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME) rule = MailRule(name="a", assign_title_from=MailRule.TITLE_FROM_FILENAME)
self.assertEqual(get_title(message, att, rule), "this_is_the_file") self.assertEqual(get_title(message, att, rule), "this_is_the_file")
rule = MailRule(assign_title_from=MailRule.TITLE_FROM_SUBJECT) rule = MailRule(name="b", assign_title_from=MailRule.TITLE_FROM_SUBJECT)
self.assertEqual(get_title(message, att, rule), "the message title") self.assertEqual(get_title(message, att, rule), "the message title")
def test_handle_message(self): def test_handle_message(self):
message = namedtuple('MailMessage', []) message = namedtuple('MailMessage', [])
message.subject = "the message title" message.subject = "the message title"
message.from_ = "Myself"
att = namedtuple('Attachment', []) att = namedtuple('Attachment', [])
att.filename = "test1.pdf" att.filename = "test1.pdf"
@ -200,9 +203,10 @@ class TestMail(TestCase):
message.attachments = [att, att2, att3] message.attachments = [att, att2, att3]
rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME) account = MailAccount()
rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME, account=account)
result = handle_message(message, rule) result = self.mail_account_handler.handle_message(message, rule)
self.assertEqual(result, 2) self.assertEqual(result, 2)
@ -224,7 +228,7 @@ class TestMail(TestCase):
message.attachments = [] message.attachments = []
rule = MailRule() rule = MailRule()
result = handle_message(message, rule) result = self.mail_account_handler.handle_message(message, rule)
self.assertFalse(m.called) self.assertFalse(m.called)
self.assertEqual(result, 0) self.assertEqual(result, 0)
@ -235,11 +239,13 @@ class TestMail(TestCase):
rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MARK_READ) rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MARK_READ)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.assertEqual(self.async_task.call_count, 0) self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 2) self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 2)
handle_mail_account(account) self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 2) self.assertEqual(self.async_task.call_count, 2)
self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 0) self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 0)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
def test_handle_mail_account_delete(self): def test_handle_mail_account_delete(self):
@ -249,7 +255,7 @@ class TestMail(TestCase):
self.assertEqual(self.async_task.call_count, 0) self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.messages), 3) self.assertEqual(len(self.bogus_mailbox.messages), 3)
handle_mail_account(account) self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 2) self.assertEqual(self.async_task.call_count, 2)
self.assertEqual(len(self.bogus_mailbox.messages), 1) self.assertEqual(len(self.bogus_mailbox.messages), 1)
@ -258,11 +264,13 @@ class TestMail(TestCase):
rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_FLAG, filter_subject="Invoice") rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_FLAG, filter_subject="Invoice")
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.assertEqual(self.async_task.call_count, 0) self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 2) self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 2)
handle_mail_account(account) self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 1) self.assertEqual(self.async_task.call_count, 1)
self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 1) self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 1)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
def test_handle_mail_account_move(self): def test_handle_mail_account_move(self):
account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="secret") account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="secret")
@ -272,7 +280,7 @@ class TestMail(TestCase):
self.assertEqual(self.async_task.call_count, 0) self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.messages), 3) self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.assertEqual(len(self.bogus_mailbox.messages_spam), 0) self.assertEqual(len(self.bogus_mailbox.messages_spam), 0)
handle_mail_account(account) self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 1) self.assertEqual(self.async_task.call_count, 1)
self.assertEqual(len(self.bogus_mailbox.messages), 2) self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(len(self.bogus_mailbox.messages_spam), 1) self.assertEqual(len(self.bogus_mailbox.messages_spam), 1)
@ -281,7 +289,7 @@ class TestMail(TestCase):
account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="wrong") account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="wrong")
try: try:
handle_mail_account(account) self.mail_account_handler.handle_mail_account(account)
except MailError as e: except MailError as e:
self.assertTrue(str(e).startswith("Error while authenticating account")) self.assertTrue(str(e).startswith("Error while authenticating account"))
else: else:
@ -291,7 +299,7 @@ class TestMail(TestCase):
rule = MailRule.objects.create(name="testrule", account=account, folder="uuuh") rule = MailRule.objects.create(name="testrule", account=account, folder="uuuh")
try: try:
handle_mail_account(account) self.mail_account_handler.handle_mail_account(account)
except MailError as e: except MailError as e:
self.assertTrue("uuuh does not exist" in str(e)) self.assertTrue("uuuh does not exist" in str(e))
else: else:
@ -299,10 +307,10 @@ class TestMail(TestCase):
account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret") account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret")
rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MOVE, action_parameter="doesnotexist", filter_subject="Claim") rule = MailRule.objects.create(name="testrule2", account=account, action=MailRule.ACTION_MOVE, action_parameter="doesnotexist", filter_subject="Claim")
try: try:
handle_mail_account(account) self.mail_account_handler.handle_mail_account(account)
except MailError as e: except MailError as e:
self.assertTrue("Error while processing post-consume actions" in str(e)) self.assertTrue("Error while processing post-consume actions" in str(e))
else: else:
@ -311,12 +319,12 @@ class TestMail(TestCase):
def test_filters(self): def test_filters(self):
account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret") account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret")
rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_DELETE, filter_subject="Claim") rule = MailRule.objects.create(name="testrule3", account=account, action=MailRule.ACTION_DELETE, filter_subject="Claim")
self.assertEqual(self.async_task.call_count, 0) self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.messages), 3) self.assertEqual(len(self.bogus_mailbox.messages), 3)
handle_mail_account(account) self.mail_account_handler.handle_mail_account(account)
self.assertEqual(len(self.bogus_mailbox.messages), 2) self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(self.async_task.call_count, 1) self.assertEqual(self.async_task.call_count, 1)
@ -326,7 +334,7 @@ class TestMail(TestCase):
rule.filter_body = "electronic" rule.filter_body = "electronic"
rule.save() rule.save()
self.assertEqual(len(self.bogus_mailbox.messages), 3) self.assertEqual(len(self.bogus_mailbox.messages), 3)
handle_mail_account(account) self.mail_account_handler.handle_mail_account(account)
self.assertEqual(len(self.bogus_mailbox.messages), 2) self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(self.async_task.call_count, 2) self.assertEqual(self.async_task.call_count, 2)
@ -336,7 +344,7 @@ class TestMail(TestCase):
rule.filter_body = None rule.filter_body = None
rule.save() rule.save()
self.assertEqual(len(self.bogus_mailbox.messages), 3) self.assertEqual(len(self.bogus_mailbox.messages), 3)
handle_mail_account(account) self.mail_account_handler.handle_mail_account(account)
self.assertEqual(len(self.bogus_mailbox.messages), 1) self.assertEqual(len(self.bogus_mailbox.messages), 1)
self.assertEqual(self.async_task.call_count, 4) self.assertEqual(self.async_task.call_count, 4)
@ -347,6 +355,6 @@ class TestMail(TestCase):
rule.filter_subject = "Invoice" rule.filter_subject = "Invoice"
rule.save() rule.save()
self.assertEqual(len(self.bogus_mailbox.messages), 3) self.assertEqual(len(self.bogus_mailbox.messages), 3)
handle_mail_account(account) self.mail_account_handler.handle_mail_account(account)
self.assertEqual(len(self.bogus_mailbox.messages), 2) self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(self.async_task.call_count, 5) self.assertEqual(self.async_task.call_count, 5)

View File

@ -1,3 +0,0 @@
from django.shortcuts import render
# Create your views here.

View File

@ -86,7 +86,7 @@ class RasterisedDocumentParser(DocumentParser):
return self._text return self._text
if not settings.OCR_ALWAYS and self._is_ocred(): if not settings.OCR_ALWAYS and self._is_ocred():
self.log("info", "Skipping OCR, using Text from PDF") self.log("debug", "Skipping OCR, using Text from PDF")
self._text = get_text_from_pdf(self.document_path) self._text = get_text_from_pdf(self.document_path)
return self._text return self._text
@ -98,7 +98,7 @@ class RasterisedDocumentParser(DocumentParser):
try: try:
sample_page_index = int(len(images) / 2) sample_page_index = int(len(images) / 2)
self.log("info", "Attempting language detection on page {} of {}...".format(sample_page_index + 1, len(images))) self.log("debug", "Attempting language detection on page {} of {}...".format(sample_page_index + 1, len(images)))
sample_page_text = self._ocr([images[sample_page_index]], settings.OCR_LANGUAGE)[0] sample_page_text = self._ocr([images[sample_page_index]], settings.OCR_LANGUAGE)[0]
guessed_language = self._guess_language(sample_page_text) guessed_language = self._guess_language(sample_page_text)
@ -107,7 +107,7 @@ class RasterisedDocumentParser(DocumentParser):
ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text) ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text)
elif ISO639[guessed_language] == settings.OCR_LANGUAGE: elif ISO639[guessed_language] == settings.OCR_LANGUAGE:
self.log("info", "Detected language: {} (default language)".format(guessed_language)) self.log("debug", "Detected language: {} (default language)".format(guessed_language))
ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text) ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text)
elif not ISO639[guessed_language] in pyocr.get_available_tools()[0].get_available_languages(): elif not ISO639[guessed_language] in pyocr.get_available_tools()[0].get_available_languages():
@ -115,10 +115,10 @@ class RasterisedDocumentParser(DocumentParser):
ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text) ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text)
else: else:
self.log("info", "Detected language: {}".format(guessed_language)) self.log("debug", "Detected language: {}".format(guessed_language))
ocr_pages = self._ocr(images, ISO639[guessed_language]) ocr_pages = self._ocr(images, ISO639[guessed_language])
self.log("info", "OCR completed.") self.log("debug", "OCR completed.")
self._text = strip_excess_whitespace(" ".join(ocr_pages)) self._text = strip_excess_whitespace(" ".join(ocr_pages))
return self._text return self._text
@ -130,7 +130,7 @@ class RasterisedDocumentParser(DocumentParser):
Greyscale images are easier for Tesseract to OCR Greyscale images are easier for Tesseract to OCR
""" """
self.log("info", "Converting document {} into greyscale images...".format(self.document_path)) self.log("debug", "Converting document {} into greyscale images...".format(self.document_path))
# Convert PDF to multiple PNMs # Convert PDF to multiple PNMs
pnm = os.path.join(self.tempdir, "convert-%04d.pnm") pnm = os.path.join(self.tempdir, "convert-%04d.pnm")
@ -148,7 +148,7 @@ class RasterisedDocumentParser(DocumentParser):
if f.endswith(".pnm"): if f.endswith(".pnm"):
pnms.append(os.path.join(self.tempdir, f)) pnms.append(os.path.join(self.tempdir, f))
self.log("info", "Running unpaper on {} pages...".format(len(pnms))) self.log("debug", "Running unpaper on {} pages...".format(len(pnms)))
# Run unpaper in parallel on converted images # Run unpaper in parallel on converted images
with ThreadPool(processes=settings.THREADS_PER_WORKER) as pool: with ThreadPool(processes=settings.THREADS_PER_WORKER) as pool:
@ -161,11 +161,11 @@ class RasterisedDocumentParser(DocumentParser):
guess = langdetect.detect(text) guess = langdetect.detect(text)
return guess return guess
except Exception as e: except Exception as e:
self.log('debug', "Language detection failed with: {}".format(e)) self.log('warning', "Language detection failed with: {}".format(e))
return None return None
def _ocr(self, imgs, lang): def _ocr(self, imgs, lang):
self.log("info", "Performing OCR on {} page(s) with language {}".format(len(imgs), lang)) self.log("debug", "Performing OCR on {} page(s) with language {}".format(len(imgs), lang))
with ThreadPool(processes=settings.THREADS_PER_WORKER) as pool: with ThreadPool(processes=settings.THREADS_PER_WORKER) as pool:
r = pool.map(image_to_string, itertools.product(imgs, [lang])) r = pool.map(image_to_string, itertools.product(imgs, [lang]))
return r return r
@ -180,7 +180,7 @@ class RasterisedDocumentParser(DocumentParser):
images_copy = list(images) images_copy = list(images)
del images_copy[sample_page_index] del images_copy[sample_page_index]
if images_copy: if images_copy:
self.log('info', 'Continuing ocr with default language.') self.log('debug', 'Continuing ocr with default language.')
ocr_pages = self._ocr(images_copy, settings.OCR_LANGUAGE) ocr_pages = self._ocr(images_copy, settings.OCR_LANGUAGE)
ocr_pages.insert(sample_page_index, sample_page) ocr_pages.insert(sample_page_index, sample_page)
return ocr_pages return ocr_pages