mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge branch 'dev'
This commit is contained in:
commit
8f5809d1fc
82
Dockerfile
82
Dockerfile
@ -1,82 +0,0 @@
|
||||
###############################################################################
|
||||
### Front end ###
|
||||
###############################################################################
|
||||
|
||||
FROM node:current AS frontend
|
||||
|
||||
WORKDIR /usr/src/paperless/src-ui/
|
||||
|
||||
COPY src-ui/package* ./
|
||||
RUN npm install
|
||||
|
||||
COPY src-ui .
|
||||
RUN node_modules/.bin/ng build --prod --output-hashing none --sourceMap=false --output-path dist/paperless-ui
|
||||
|
||||
###############################################################################
|
||||
### Back end ###
|
||||
###############################################################################
|
||||
|
||||
FROM ubuntu:20.04
|
||||
|
||||
WORKDIR /usr/src/paperless/
|
||||
|
||||
COPY Pipfile* ./
|
||||
|
||||
#Dependencies
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND="noninteractive" apt-get -y --no-install-recommends install \
|
||||
build-essential \
|
||||
curl \
|
||||
ghostscript \
|
||||
gnupg \
|
||||
imagemagick \
|
||||
libmagic-dev \
|
||||
libpoppler-cpp-dev \
|
||||
libpq-dev \
|
||||
optipng \
|
||||
python3 \
|
||||
python3-dev \
|
||||
python3-pip \
|
||||
sudo \
|
||||
tesseract-ocr \
|
||||
tesseract-ocr-eng \
|
||||
tesseract-ocr-deu \
|
||||
tesseract-ocr-fra \
|
||||
tesseract-ocr-ita \
|
||||
tesseract-ocr-spa \
|
||||
tzdata \
|
||||
unpaper \
|
||||
&& pip3 install --upgrade pipenv supervisor setuptools \
|
||||
&& pipenv install --system --deploy \
|
||||
&& pipenv --clear \
|
||||
&& apt-get -y purge build-essential python3-pip python3-dev \
|
||||
&& apt-get -y autoremove --purge \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& mkdir /var/log/supervisord /var/run/supervisord
|
||||
|
||||
# copy scripts
|
||||
# this fixes issues with imagemagick and PDF
|
||||
COPY docker/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml
|
||||
COPY docker/gunicorn.conf.py ./
|
||||
COPY docker/supervisord.conf /etc/supervisord.conf
|
||||
COPY docker/docker-entrypoint.sh /sbin/docker-entrypoint.sh
|
||||
|
||||
# copy app
|
||||
COPY src/ ./src/
|
||||
COPY --from=frontend /usr/src/paperless/src-ui/dist/paperless-ui/ ./src/documents/static/frontend/
|
||||
|
||||
# add users, setup scripts
|
||||
RUN addgroup --gid 1000 paperless \
|
||||
&& useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \
|
||||
&& chown -R paperless:paperless . \
|
||||
&& chmod 755 /sbin/docker-entrypoint.sh
|
||||
|
||||
WORKDIR /usr/src/paperless/src/
|
||||
|
||||
RUN sudo -HEu paperless python3 manage.py collectstatic --clear --no-input
|
||||
|
||||
VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/media", "/usr/src/paperless/consume", "/usr/src/paperless/export"]
|
||||
ENTRYPOINT ["/sbin/docker-entrypoint.sh"]
|
||||
CMD ["/usr/local/bin/supervisord", "-c", "/etc/supervisord.conf"]
|
||||
|
||||
LABEL maintainer="Jonas Winkler <dev@jpwinkler.de>"
|
5
Pipfile
5
Pipfile
@ -3,6 +3,11 @@ url = "https://pypi.python.org/simple"
|
||||
verify_ssl = true
|
||||
name = "pypi"
|
||||
|
||||
[[source]]
|
||||
url = "https://www.piwheels.org/simple"
|
||||
verify_ssl = true
|
||||
name = "piwheels"
|
||||
|
||||
[packages]
|
||||
django = "~=3.1"
|
||||
pillow = "*"
|
||||
|
51
Pipfile.lock
generated
51
Pipfile.lock
generated
@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "d6416e6844126b09200b9839a3abdcf3c24ef5cf70052b8f134d8bc804552c17"
|
||||
"sha256": "abc7e5f5a8d075d4b013ceafd06ca07f57e597f053d670f73449ba210511b114"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {},
|
||||
@ -10,6 +10,11 @@
|
||||
"name": "pypi",
|
||||
"url": "https://pypi.python.org/simple",
|
||||
"verify_ssl": true
|
||||
},
|
||||
{
|
||||
"name": "piwheels",
|
||||
"url": "https://www.piwheels.org/simple",
|
||||
"verify_ssl": true
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -102,6 +107,7 @@
|
||||
},
|
||||
"filemagic": {
|
||||
"hashes": [
|
||||
"sha256:b2fd77411975510e28673220c4b8868ed81b5eb5906339b6f4c233b32122d7d3",
|
||||
"sha256:e684359ef40820fe406f0ebc5bf8a78f89717bdb7fed688af68082d991d6dbf3"
|
||||
],
|
||||
"index": "pypi",
|
||||
@ -142,6 +148,7 @@
|
||||
"langdetect": {
|
||||
"hashes": [
|
||||
"sha256:363795ea005f1243c958e953245dac5d814fabdc025c9afa91588c5fa6b2fa83",
|
||||
"sha256:ae53a024643df713274c297c0795dbfb5a16b329902f8e543e7b2d7d45f699e4",
|
||||
"sha256:f37495e63607865e47deed08d78f7f8e58172658216ff954b2f14671bcd87740"
|
||||
],
|
||||
"index": "pypi",
|
||||
@ -162,6 +169,7 @@
|
||||
"sha256:448ebb1b3bf64c0267d6b09a7cba26b5ae61b6d2dbabff7c91b660c7eccf2bdb",
|
||||
"sha256:50e86c076611212ca62e5a59f518edafe0c0730f7d9195fec718da1a5c2bb1fc",
|
||||
"sha256:5734bdc0342aba9dfc6f04920988140fb41234db42381cf7ccba64169f9fe7ac",
|
||||
"sha256:5ddd1dfa2be066595c1993165b4cae84b9866b12339d0c903db7f21a094324a3",
|
||||
"sha256:64324f64f90a9e4ef732be0928be853eee378fd6a01be21a0a8469c4f2682c83",
|
||||
"sha256:6ae6c680f3ebf1cf7ad1d7748868b39d9f900836df774c453c11c5440bc15b36",
|
||||
"sha256:6d7593a705d662be5bfe24111af14763016765f43cb6923ed86223f965f52387",
|
||||
@ -189,7 +197,8 @@
|
||||
},
|
||||
"pathtools": {
|
||||
"hashes": [
|
||||
"sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0"
|
||||
"sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0",
|
||||
"sha256:d77d982475e87f32b82157a43b09f0a5ef3e66c1d8f3c7eb8d2580e783cd8202"
|
||||
],
|
||||
"version": "==0.1.2"
|
||||
},
|
||||
@ -217,6 +226,7 @@
|
||||
"sha256:2fb113757a369a6cdb189f8df3226e995acfed0a8919a72416626af1a0a71140",
|
||||
"sha256:4b0ef2470c4979e345e4e0cc1bbac65fda11d0d7b789dbac035e4c6ce3f98adb",
|
||||
"sha256:59e903ca800c8cfd1ebe482349ec7c35687b95e98cefae213e271c8c7fffa021",
|
||||
"sha256:5a3342d34289715928c914ee7f389351eb37fa4857caa9297fc7948f2ed3e53d",
|
||||
"sha256:5abd653a23c35d980b332bc0431d39663b1709d64142e3652890df4c9b6970f6",
|
||||
"sha256:5f9403af9c790cc18411ea398a6950ee2def2a830ad0cfe6dc9122e6d528b302",
|
||||
"sha256:6b4a8fd632b4ebee28282a9fef4c341835a1aa8671e2770b6f89adc8e8c2703c",
|
||||
@ -274,8 +284,10 @@
|
||||
"sha256:d14b140a4439d816e3b1229a4a525df917d6ea22a0771a2a78332273fd9528a4",
|
||||
"sha256:d1b4ab59e02d9008efe10ceabd0b31e79519da6fb67f7d8e8977118832d0f449",
|
||||
"sha256:d5227b229005a696cc67676e24c214740efd90b148de5733419ac9aaba3773da",
|
||||
"sha256:d9f3a909b59ac4a3ca9beb77716f4bce627276edb039a71d4e9ec4b7548536a0",
|
||||
"sha256:e1f57aa70d3f7cc6947fd88636a481638263ba04a742b4a37dd25c373e41491a",
|
||||
"sha256:e74a55f6bad0e7d3968399deb50f61f4db1926acf4a6d83beaaa7df986f48b1c",
|
||||
"sha256:e7f5a465c6431c0ad8d4e69603ee3306e521a09d3c6af76a16bdb62946bdddf0",
|
||||
"sha256:e82aba2188b9ba309fd8e271702bd0d0fc9148ae3150532bbb474f4590039ffb",
|
||||
"sha256:ee69dad2c7155756ad114c02db06002f4cded41132cc51378e57aad79cc8e4f4",
|
||||
"sha256:f5ab93a2cb2d8338b1674be43b442a7f544a0971da062a5da774ed40587f18f5"
|
||||
@ -285,7 +297,8 @@
|
||||
},
|
||||
"pyocr": {
|
||||
"hashes": [
|
||||
"sha256:fa15adc7e1cf0d345a2990495fe125a947c6e09a60ddba0256a1c14b2e603179"
|
||||
"sha256:fa15adc7e1cf0d345a2990495fe125a947c6e09a60ddba0256a1c14b2e603179",
|
||||
"sha256:fd602af17b6e21985669aadc058a95f343ff921e962ed4aa6520ded32e4d1301"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.7.2"
|
||||
@ -316,7 +329,10 @@
|
||||
},
|
||||
"python-levenshtein": {
|
||||
"hashes": [
|
||||
"sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1"
|
||||
"sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1",
|
||||
"sha256:15e26882728c29ccdf74cfc6ac4b49fc22c08b44d152348cb0eb1ec4f3dbf9df",
|
||||
"sha256:3df5e5eb144570ecf5ad38864a2393068798328c7f05e7b167a49391d36a2db1",
|
||||
"sha256:7f049b3ddc4b525bd469febafb98bf5202f789b722e0e4ccbec2ffbe8c07d7b4"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.12.0"
|
||||
@ -331,6 +347,7 @@
|
||||
"redis": {
|
||||
"hashes": [
|
||||
"sha256:0e7e0cfca8660dea8b7d5cd8c4f6c5e29e11f31158c0b0ae91a397f00e5a05a2",
|
||||
"sha256:3f1c7f166fa6c803613eec222224848a80f5e5b9c6af3aa82461506643034a7a",
|
||||
"sha256:432b788c4530cfe16d8d943a09d40ca6c16149727e4afe8c2c9d5580c59d9f24"
|
||||
],
|
||||
"index": "pypi",
|
||||
@ -360,7 +377,9 @@
|
||||
"sha256:749078d1eb89484db5f34b4012092ad14b327944ee7f1c4f74d6279a6e4d1884",
|
||||
"sha256:7913bd25f4ab274ba37bc97ad0e21c31004224ccb02765ad984eef43e04acc6c",
|
||||
"sha256:7a25fcbeae08f96a754b45bdc050e1fb94b95cab046bf56b016c25e9ab127b3e",
|
||||
"sha256:80ef188c0e47a6c964eed71c55a73c245f8daf9f0a4a9d804e91275afb468ca4",
|
||||
"sha256:83d6b356e116ca119db8e7c6fc2983289d87b27b3fac238cfe5dca529d884562",
|
||||
"sha256:842fb985b2b99a82a2b145b6bbd588c5f5cfd83693402920fcb985d515794666",
|
||||
"sha256:8b882a78c320478b12ff024e81dc7d43c1462aa4a3341c754ee65d857a521f85",
|
||||
"sha256:8f6a2229e8ad946e36815f2a03386bb8353d4bde368fdf8ca5f0cb97264d3b5c",
|
||||
"sha256:9801c4c1d9ae6a70aeb2128e5b4b68c45d4f0af0d1535500884d644fa9b768c6",
|
||||
@ -384,6 +403,7 @@
|
||||
},
|
||||
"scikit-learn": {
|
||||
"hashes": [
|
||||
"sha256:090bbf144fd5823c1f2efa3e1a9bf180295b24294ca8f478e75b40ed54f8036e",
|
||||
"sha256:0a127cc70990d4c15b1019680bfedc7fec6c23d14d3719fdf9b64b22d37cdeca",
|
||||
"sha256:0d39748e7c9669ba648acf40fb3ce96b8a07b240db6888563a7cb76e05e0d9cc",
|
||||
"sha256:1b8a391de95f6285a2f9adffb7db0892718950954b7149a70c783dc848f104ea",
|
||||
@ -423,6 +443,7 @@
|
||||
"sha256:9ad4fcddcbf5dc67619379782e6aeef41218a79e17979aaed01ed099876c0e62",
|
||||
"sha256:a254b98dbcc744c723a838c03b74a8a34c0558c9ac5c86d5561703362231107d",
|
||||
"sha256:b03c4338d6d3d299e8ca494194c0ae4f611548da59e3c038813f1a43976cb437",
|
||||
"sha256:b5e9d3e4474644915809d6aa1416ff20430a3ed9ae723a5d295da5ddb24985e2",
|
||||
"sha256:cc1f78ebc982cd0602c9a7615d878396bec94908db67d4ecddca864d049112f2",
|
||||
"sha256:d6d25c41a009e3c6b7e757338948d0076ee1dd1770d1c09ec131f11946883c54",
|
||||
"sha256:d84cadd7d7998433334c99fa55bcba0d8b4aeff0edb123b2a1dfcface538e474",
|
||||
@ -468,6 +489,7 @@
|
||||
},
|
||||
"watchdog": {
|
||||
"hashes": [
|
||||
"sha256:034c85530b647486e8c8477410fe79476511282658f2ce496f97106d9e5acfb8",
|
||||
"sha256:4214e1379d128b0588021880ccaf40317ee156d4603ac388b9adcf29165e0c04"
|
||||
],
|
||||
"index": "pypi",
|
||||
@ -561,6 +583,7 @@
|
||||
"sha256:29a6272fec10623fcbe158fdf9abc7a5fa032048ac1d8631f14b50fbfc10d17f",
|
||||
"sha256:2b31f46bf7b31e6aa690d4c7a3d51bb262438c6dcb0d528adde446531d0d3bb7",
|
||||
"sha256:2d43af2be93ffbad25dd959899b5b809618a496926146ce98ee0b23683f8c51c",
|
||||
"sha256:3188a7dfd96f734a7498f37cde6598b1e9c084f1ca68bc1aa04e88db31168ab6",
|
||||
"sha256:381ead10b9b9af5f64646cd27107fb27b614ee7040bb1226f9c07ba96625cbb5",
|
||||
"sha256:47a11bdbd8ada9b7ee628596f9d97fbd3851bd9999d398e9436bd67376dbece7",
|
||||
"sha256:4d6a42744139a7fa5b46a264874a781e8694bb32f1d76d8137b68138686f1729",
|
||||
@ -586,7 +609,8 @@
|
||||
"sha256:c851b35fc078389bc16b915a0a7c1d5923e12e2c5aeec58c52f4aa8085ac8237",
|
||||
"sha256:cb7df71de0af56000115eafd000b867d1261f786b5eebd88a0ca6360cccfaca7",
|
||||
"sha256:cedb2f9e1f990918ea061f28a0f0077a07702e3819602d3507e2ff98c8d20636",
|
||||
"sha256:e8caf961e1b1a945db76f1b5fa9c91498d15f545ac0ababbe575cfab185d3bd8"
|
||||
"sha256:e8caf961e1b1a945db76f1b5fa9c91498d15f545ac0ababbe575cfab185d3bd8",
|
||||
"sha256:ef221855191457fffeb909d5787d1807800ab4d0111f089e6c93ee68f577634d"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
|
||||
"version": "==5.3"
|
||||
@ -608,6 +632,7 @@
|
||||
},
|
||||
"docopt": {
|
||||
"hashes": [
|
||||
"sha256:15fde8252aa9f2804171014d50d069ffbf42c7a50b7d74bcbb82bfd5700fcfc2",
|
||||
"sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491"
|
||||
],
|
||||
"version": "==0.6.2"
|
||||
@ -638,11 +663,11 @@
|
||||
},
|
||||
"faker": {
|
||||
"hashes": [
|
||||
"sha256:6afc461ab3f779c9c16e299fc731d775e39ea7e8e063b3053ee359ae198a15ca",
|
||||
"sha256:ce1c38823eb0f927567cde5bf2e7c8ca565c7a70316139342050ce2ca74b4026"
|
||||
"sha256:4d038ba51ae5e0a956d79cadd684d856e5750bfd608b61dad1807f8f08b1da49",
|
||||
"sha256:f260f0375a44cd1e1a735c9b8c9b914304f607b5eef431d20e098c7c2f5b50a6"
|
||||
],
|
||||
"markers": "python_version >= '3.5'",
|
||||
"version": "==4.14.2"
|
||||
"version": "==4.16.0"
|
||||
},
|
||||
"filelock": {
|
||||
"hashes": [
|
||||
@ -653,6 +678,7 @@
|
||||
},
|
||||
"idna": {
|
||||
"hashes": [
|
||||
"sha256:4a57a6379512ade94fa99e2fa46d3cd0f2f553040548d0e2958c6ed90ee48226",
|
||||
"sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
|
||||
"sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
|
||||
],
|
||||
@ -670,12 +696,14 @@
|
||||
"iniconfig": {
|
||||
"hashes": [
|
||||
"sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
|
||||
"sha256:8647b85c03813b8680f4ae9c9db2fd7293f8591ea536a10d73d90f6eb4b10aac",
|
||||
"sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"
|
||||
],
|
||||
"version": "==1.1.1"
|
||||
},
|
||||
"jinja2": {
|
||||
"hashes": [
|
||||
"sha256:3f172970d5670703bd3812e8ca6459a9a7e069fa8e51b40195f83c81db191ec4",
|
||||
"sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0",
|
||||
"sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035"
|
||||
],
|
||||
@ -689,8 +717,10 @@
|
||||
"sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235",
|
||||
"sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5",
|
||||
"sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42",
|
||||
"sha256:19536834abffb3fa155017053c607cb835b2ecc6a3a2554a88043d991dffb736",
|
||||
"sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff",
|
||||
"sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b",
|
||||
"sha256:3d61f15e39611aacd91b7e71d903787da86d9e80896e683c0103fced9add7834",
|
||||
"sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1",
|
||||
"sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e",
|
||||
"sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183",
|
||||
@ -700,6 +730,7 @@
|
||||
"sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15",
|
||||
"sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1",
|
||||
"sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e",
|
||||
"sha256:7952deddf24b85c88dab48f6ec366ac6e39d2761b5280f2f9594911e03fcd064",
|
||||
"sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b",
|
||||
"sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905",
|
||||
"sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735",
|
||||
@ -795,6 +826,7 @@
|
||||
},
|
||||
"pytest-env": {
|
||||
"hashes": [
|
||||
"sha256:33b4030383a021924fe3f3ba5ca4311990d8b1d02ca77389c2be020c4500f96a",
|
||||
"sha256:7e94956aef7f2764f3c147d216ce066bf6c42948bb9e293169b1b1c880a580c2"
|
||||
],
|
||||
"index": "pypi",
|
||||
@ -802,6 +834,7 @@
|
||||
},
|
||||
"pytest-forked": {
|
||||
"hashes": [
|
||||
"sha256:2d1bfc93ab65a28324eb0a63503bfb500c2da6916efede7a24b43a04970fe63c",
|
||||
"sha256:6aa9ac7e00ad1a539c41bec6d21011332de671e938c7637378ec9710204e37ca",
|
||||
"sha256:dc4147784048e70ef5d437951728825a131b81714b398d5d52f17c7c144d8815"
|
||||
],
|
||||
@ -810,6 +843,7 @@
|
||||
},
|
||||
"pytest-sugar": {
|
||||
"hashes": [
|
||||
"sha256:67a55a83c7b2717ad607704d3fe9004bb6543b54017ef82f9c6590acc38c1aec",
|
||||
"sha256:b1b2186b0a72aada6859bea2a5764145e3aaa2c1cfbb23c3a19b5f7b697563d3"
|
||||
],
|
||||
"index": "pypi",
|
||||
@ -927,6 +961,7 @@
|
||||
},
|
||||
"termcolor": {
|
||||
"hashes": [
|
||||
"sha256:19b1225d03bfb56571484caaa8521d8ec6e2473ae1640c9f48a48dda49417706",
|
||||
"sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"
|
||||
],
|
||||
"version": "==1.1.0"
|
||||
|
@ -2,27 +2,25 @@
|
||||
### Back end ###
|
||||
###############################################################################
|
||||
|
||||
FROM ubuntu:20.04
|
||||
FROM python:3.7-slim
|
||||
|
||||
WORKDIR /usr/src/paperless/
|
||||
|
||||
COPY Pipfile* ./
|
||||
COPY requirements.txt ./
|
||||
|
||||
#Dependencies
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND="noninteractive" apt-get -y --no-install-recommends install \
|
||||
&& apt-get -y --no-install-recommends install \
|
||||
build-essential \
|
||||
curl \
|
||||
ghostscript \
|
||||
gnupg \
|
||||
imagemagick \
|
||||
libatlas-base-dev \
|
||||
libmagic-dev \
|
||||
libpoppler-cpp-dev \
|
||||
libpq-dev \
|
||||
optipng \
|
||||
python3 \
|
||||
python3-dev \
|
||||
python3-pip \
|
||||
sudo \
|
||||
tesseract-ocr \
|
||||
tesseract-ocr-eng \
|
||||
@ -32,10 +30,9 @@ RUN apt-get update \
|
||||
tesseract-ocr-spa \
|
||||
tzdata \
|
||||
unpaper \
|
||||
&& pip3 install --upgrade pipenv supervisor setuptools \
|
||||
&& pipenv install --system --deploy \
|
||||
&& pipenv --clear \
|
||||
&& apt-get -y purge build-essential python3-pip python3-dev \
|
||||
&& pip3 install --upgrade supervisor setuptools \
|
||||
&& pip install --no-cache-dir -r requirements.txt \
|
||||
&& apt-get -y purge build-essential \
|
||||
&& apt-get -y autoremove --purge \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& mkdir /var/log/supervisord /var/run/supervisord
|
||||
|
@ -8,16 +8,40 @@ Administration
|
||||
Making backups
|
||||
##############
|
||||
|
||||
.. warning::
|
||||
Multiple options exist for making backups of your paperless instance,
|
||||
depending on how you installed paperless.
|
||||
|
||||
This section is not updated to paperless-ng yet, the exporter is a valid tool
|
||||
for backups though.
|
||||
Before making backups, make sure that paperless is not running.
|
||||
|
||||
So you're bored of this whole project, or you want to make a remote backup of
|
||||
your files for whatever reason. This is easy to do, simply use the
|
||||
:ref:`exporter <utilities-exporter>` to dump your documents and database out
|
||||
into an arbitrary directory.
|
||||
Options available to any installation of paperless:
|
||||
|
||||
* Use the :ref:`document exporter <utilities-exporter>`.
|
||||
The document exporter exports all your documents, thumbnails and
|
||||
metadata to a specific folder. You may import your documents into a
|
||||
fresh instance of paperless again or store your documents in another
|
||||
DMS with this export.
|
||||
|
||||
Options available to docker installations:
|
||||
|
||||
* Backup the docker volumes. These usually reside within
|
||||
``/var/lib/docker/volumes`` on the host and you need to be root in order
|
||||
to access them.
|
||||
|
||||
Paperless uses 3 volumes:
|
||||
|
||||
* ``paperless_media``: This is where your documents are stored.
|
||||
* ``paperless_data``: This is where auxilliary data is stored. This
|
||||
folder also contains the SQLite database, if you use it.
|
||||
* ``paperless_pgdata``: Exists only if you use PostgreSQL and contains
|
||||
the database.
|
||||
|
||||
Options available to bare-metal and non-docker installations:
|
||||
|
||||
* Backup the entire paperless folder. This ensures that if your paperless instance
|
||||
crashes at some point or your disk fails, you can simply copy the folder back
|
||||
into place and it works.
|
||||
|
||||
When using PostgreSQL, you'll also have to backup the database.
|
||||
|
||||
.. _migrating-restoring:
|
||||
|
||||
@ -25,6 +49,8 @@ Restoring
|
||||
=========
|
||||
|
||||
|
||||
|
||||
|
||||
.. _administration-updating:
|
||||
|
||||
Updating paperless
|
||||
|
@ -128,6 +128,8 @@ consumer. Once complete, you should see the newly-created document,
|
||||
automatically tagged with the appropriate data.
|
||||
|
||||
|
||||
.. _advanced-automatic_matching:
|
||||
|
||||
Automatic matching
|
||||
==================
|
||||
|
||||
@ -175,8 +177,6 @@ then put the path to that script in ``paperless.conf`` with the variable name
|
||||
of either ``PAPERLESS_PRE_CONSUME_SCRIPT`` or
|
||||
``PAPERLESS_POST_CONSUME_SCRIPT``.
|
||||
|
||||
.. TODO HYPEREF TO CONFIG
|
||||
|
||||
.. important::
|
||||
|
||||
These scripts are executed in a **blocking** process, which means that if
|
||||
@ -319,6 +319,6 @@ for use in filenames.
|
||||
.. code::
|
||||
|
||||
PAPERLESS_FILENAME_FORMAT=../../my/custom/location/{title}
|
||||
|
||||
|
||||
However, keep in mind that inside docker, if files get stored outside of the
|
||||
predefined volumes, they will be lost after a restart of paperless.
|
||||
|
@ -96,6 +96,8 @@ paperless-ng 0.9.0
|
||||
sqlite.
|
||||
* ``PAPERLESS_OCR_THREADS`` is gone and replaced with ``PAPERLESS_TASK_WORKERS`` and
|
||||
``PAPERLESS_THREADS_PER_WORKER``. Refer to the config example for details.
|
||||
* ``PAPERLESS_OPTIMIZE_THUMBNAILS`` allows you to disable or enable thumbnail
|
||||
optimization. This is useful on less powerful devices.
|
||||
|
||||
* Many more small changes here and there. The usual stuff.
|
||||
|
||||
|
34
docs/faq.rst
34
docs/faq.rst
@ -23,27 +23,35 @@ is
|
||||
|
||||
**Q:** *Will paperless-ng run on Raspberry Pi?*
|
||||
|
||||
**A:** The short answer is yes. The long answer is that certain parts of
|
||||
**A:** The short answer is yes. I've tested it on a Raspberry Pi 3 B.
|
||||
The long answer is that certain parts of
|
||||
Paperless will run very slow, such as the tesseract OCR. On Rasperry Pi,
|
||||
try to OCR documents before feeding them into paperless so that paperless can
|
||||
reuse the text. The web interface should be alot snappier, since it runs
|
||||
in your browser and paperless has to do much less work to serve the data.
|
||||
|
||||
.. note::
|
||||
|
||||
Consider setting ``PAPERLESS_OPTIMIZE_THUMBNAILS`` to false to speed up
|
||||
the consumption process. This takes quite a bit of time on Raspberry Pi.
|
||||
|
||||
.. note::
|
||||
|
||||
Updating the :ref:`automatic matching algorithm <advanced-automatic_matching>`
|
||||
takes quite a bit of time. However, the update mechanism checks if your
|
||||
data has changed before doing the heavy lifting. If you experience the
|
||||
algorithm taking too much cpu time, consider changing the schedule in the
|
||||
admin interface to daily or weekly. You can also manually invoke the task
|
||||
by changing the date and time of the next run to today/now.
|
||||
|
||||
The actual matching of the algorithm is fast and works on Raspberry Pi as
|
||||
well as on any other device.
|
||||
|
||||
|
||||
|
||||
**Q:** *How do I install paperless-ng on Raspberry Pi?*
|
||||
|
||||
**A:** There is not docker image for ARM available. If you know how to build
|
||||
that automatically, I'm all ears. For now, you have to grab the latest release
|
||||
archive from the project page and build the image yourself. The release comes
|
||||
with the front end already compiled, so you don't have to do this on the Pi.
|
||||
|
||||
You may encounter some issues during the build:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
W: GPG error: http://ports.ubuntu.com/ubuntu-ports focal InRelease: At least one invalid signature was encountered.
|
||||
E: The repository 'http://ports.ubuntu.com/ubuntu-ports focal InRelease' is not signed.
|
||||
N: Updating from such a repository can't be done securely, and is therefore disabled by default.
|
||||
N: See apt-secure(8) manpage for repository creation and user configuration details.
|
||||
|
||||
If this happens, look at `this thread <https://askubuntu.com/questions/1263284/>`:_.
|
||||
You will need to update docker to the latest version to fix this issue.
|
||||
|
@ -10,7 +10,7 @@
|
||||
# This is required for processing scheduled tasks such as email fetching, index
|
||||
# optimization and for training the automatic document matcher.
|
||||
# Defaults to localhost:6379.
|
||||
#PAPERLESS_REDIS="redis://localhost:6379"
|
||||
#PAPERLESS_REDIS=redis://localhost:6379
|
||||
|
||||
|
||||
###############################################################################
|
||||
@ -22,15 +22,15 @@
|
||||
# configuration for this is already done inside the docker-compose.env file.
|
||||
|
||||
#Set PAPERLESS_DBHOST and postgresql will be used instead of mysql.
|
||||
#PAPERLESS_DBHOST="localhost"
|
||||
#PAPERLESS_DBHOST=localhost
|
||||
|
||||
#Adjust port if necessary
|
||||
#PAPERLESS_DBPORT=
|
||||
|
||||
#name, user and pass all default to "paperless"
|
||||
#PAPERLESS_DBNAME="paperless"
|
||||
#PAPERLESS_DBUSER="paperless"
|
||||
#PAPERLESS_DBPASS="paperless"
|
||||
#PAPERLESS_DBNAME=paperless
|
||||
#PAPERLESS_DBUSER=paperless
|
||||
#PAPERLESS_DBPASS=paperless
|
||||
|
||||
|
||||
###############################################################################
|
||||
@ -40,23 +40,23 @@
|
||||
# This where your documents should go to be consumed. Make sure that it exists
|
||||
# and that the user running the paperless service can read/write its contents
|
||||
# before you start Paperless.
|
||||
PAPERLESS_CONSUMPTION_DIR="../consume"
|
||||
PAPERLESS_CONSUMPTION_DIR=../consume
|
||||
|
||||
# This is where paperless stores all its data (search index, sqlite database,
|
||||
# classification model, etc).
|
||||
#PAPERLESS_DATA_DIR="../data"
|
||||
#PAPERLESS_DATA_DIR=../data
|
||||
|
||||
# This is where your documents and thumbnails are stored.
|
||||
#PAPERLESS_MEDIA_ROOT="../media"
|
||||
#PAPERLESS_MEDIA_ROOT=../media
|
||||
|
||||
# Override the default STATIC_ROOT here. This is where all static files
|
||||
# created using "collectstatic" manager command are stored.
|
||||
#PAPERLESS_STATICDIR="../static"
|
||||
#PAPERLESS_STATICDIR=../static
|
||||
|
||||
|
||||
# Override the STATIC_URL here. Unless you're hosting Paperless off a
|
||||
# subdomain like /paperless/, you probably don't need to change this.
|
||||
#PAPERLESS_STATIC_URL="/static/"
|
||||
#PAPERLESS_STATIC_URL=/static/
|
||||
|
||||
|
||||
# Specify a filename format for the document (directories are supported)
|
||||
@ -69,7 +69,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
|
||||
# * {tags[INDEX]} If your tags are strings, select the tag by index
|
||||
# Uniqueness of filenames is ensured, as an incrementing counter is attached
|
||||
# to each filename.
|
||||
#PAPERLESS_FILENAME_FORMAT=""
|
||||
#PAPERLESS_FILENAME_FORMAT=
|
||||
|
||||
###############################################################################
|
||||
#### Security ####
|
||||
@ -77,10 +77,12 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
|
||||
|
||||
# Controls whether django's debug mode is enabled. Disable this on production
|
||||
# systems. Debug mode is disabled by default.
|
||||
#PAPERLESS_DEBUG="false"
|
||||
#PAPERLESS_DEBUG=false
|
||||
|
||||
# GnuPG encryption is deprecated and will be removed in future versions.
|
||||
#
|
||||
# Dont use it. It does not provide any security at all.
|
||||
#
|
||||
# Paperless can be instructed to attempt to encrypt your PDF files with GPG
|
||||
# using the PAPERLESS_PASSPHRASE specified below. If however you're not
|
||||
# concerned about encrypting these files (for example if you have disk
|
||||
@ -93,13 +95,13 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
|
||||
# you've since changed it to a new one.
|
||||
#
|
||||
# The default is to not use encryption at all.
|
||||
#PAPERLESS_PASSPHRASE="secret"
|
||||
#PAPERLESS_PASSPHRASE=secret
|
||||
|
||||
|
||||
# The secret key has a default that should be fine so long as you're hosting
|
||||
# Paperless on a closed network. However, if you're putting this anywhere
|
||||
# public, you should change the key to something unique and verbose.
|
||||
#PAPERLESS_SECRET_KEY="change-me"
|
||||
#PAPERLESS_SECRET_KEY=change-me
|
||||
|
||||
|
||||
# If you're planning on putting Paperless on the open internet, then you
|
||||
@ -109,19 +111,19 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
|
||||
#
|
||||
# Just remember that this is a comma-separated list, so "example.com" is fine,
|
||||
# as is "example.com,www.example.com", but NOT " example.com" or "example.com,"
|
||||
#PAPERLESS_ALLOWED_HOSTS="example.com,www.example.com"
|
||||
#PAPERLESS_ALLOWED_HOSTS=example.com,www.example.com
|
||||
|
||||
# If you decide to use the Paperless API in an ajax call, you need to add your
|
||||
# servers to the list of allowed hosts that can do CORS calls. By default
|
||||
# Paperless allows calls from localhost:8080, but you'd like to change that,
|
||||
# you can set this value to a comma-separated list.
|
||||
#PAPERLESS_CORS_ALLOWED_HOSTS="localhost:8080,example.com,localhost:8000"
|
||||
#PAPERLESS_CORS_ALLOWED_HOSTS=localhost:8080,example.com,localhost:8000
|
||||
|
||||
# To host paperless under a subpath url like example.com/paperless you set
|
||||
# this value to /paperless. No trailing slash!
|
||||
#
|
||||
# https://docs.djangoproject.com/en/1.11/ref/settings/#force-script-name
|
||||
#PAPERLESS_FORCE_SCRIPT_NAME=""
|
||||
#PAPERLESS_FORCE_SCRIPT_NAME=
|
||||
|
||||
###############################################################################
|
||||
#### Software Tweaks ####
|
||||
@ -158,14 +160,19 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
|
||||
|
||||
# When the consumer detects a duplicate document, it will not touch the
|
||||
# original document. This default behavior can be changed here.
|
||||
#PAPERLESS_CONSUMER_DELETE_DUPLICATES="false"
|
||||
#PAPERLESS_CONSUMER_DELETE_DUPLICATES=false
|
||||
|
||||
# Use optipng to optimize thumbnails. This usually reduces the sice of
|
||||
# thumbnails by about 20%, but uses considerable compute time during
|
||||
# consumption.
|
||||
#PAPERLESS_OPTIMIZE_THUMBNAILS=true
|
||||
|
||||
# After a document is consumed, Paperless can trigger an arbitrary script if
|
||||
# you like. This script will be passed a number of arguments for you to work
|
||||
# with. The default is blank, which means nothing will be executed. For more
|
||||
# information, take a look at the docs:
|
||||
# http://paperless.readthedocs.org/en/latest/consumption.html#hooking-into-the-consumption-process
|
||||
#PAPERLESS_POST_CONSUME_SCRIPT="/path/to/an/arbitrary/script.sh"
|
||||
#PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
|
||||
|
||||
# By default, paperless will check the document text for document date information.
|
||||
# Uncomment the line below to enable checking the document filename for date
|
||||
@ -173,7 +180,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
|
||||
# https://dateparser.readthedocs.io/en/latest/#settings. The filename will be
|
||||
# checked first, and if nothing is found, the document text will be checked
|
||||
# as normal.
|
||||
#PAPERLESS_FILENAME_DATE_ORDER="YMD"
|
||||
#PAPERLESS_FILENAME_DATE_ORDER=YMD
|
||||
|
||||
# Sometimes devices won't create filenames which can be parsed properly
|
||||
# by the filename parser (see
|
||||
@ -243,7 +250,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
|
||||
|
||||
# By default Paperless does not OCR a document if the text can be retrieved from
|
||||
# the document directly. Set to true to always OCR documents.
|
||||
#PAPERLESS_OCR_ALWAYS="false"
|
||||
#PAPERLESS_OCR_ALWAYS=false
|
||||
|
||||
|
||||
###############################################################################
|
||||
@ -271,7 +278,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
|
||||
#PAPERLESS_CONVERT_BINARY=/usr/bin/convert
|
||||
|
||||
# Ghostscript
|
||||
#PAPERLESS_GS_BINARY = /usr/bin/gs
|
||||
#PAPERLESS_GS_BINARY=/usr/bin/gs
|
||||
|
||||
# Unpaper
|
||||
#PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper
|
||||
|
@ -24,12 +24,17 @@ then
|
||||
rm "$PAPERLESS_DIST" -r
|
||||
fi
|
||||
|
||||
mkdir "$PAPERLESS_DIST"
|
||||
mkdir "$PAPERLESS_DIST_APP"
|
||||
mkdir "$PAPERLESS_DIST_APP/docker"
|
||||
|
||||
# setup dependencies.
|
||||
|
||||
cd "$PAPERLESS_ROOT"
|
||||
|
||||
pipenv clean
|
||||
pipenv install --dev
|
||||
pipenv lock --keep-outdated -r > "$PAPERLESS_DIST_APP/requirements.txt"
|
||||
|
||||
# test if the application works.
|
||||
|
||||
@ -44,10 +49,6 @@ make clean html
|
||||
|
||||
# copy stuff into place
|
||||
|
||||
mkdir "$PAPERLESS_DIST"
|
||||
mkdir "$PAPERLESS_DIST_APP"
|
||||
mkdir "$PAPERLESS_DIST_APP/docker"
|
||||
|
||||
# the application itself
|
||||
|
||||
cp "$PAPERLESS_ROOT/.env" \
|
||||
@ -92,8 +93,6 @@ cd "$PAPERLESS_DIST_APP"
|
||||
|
||||
docker build . -t "jonaswinkler/paperless-ng:$VERSION"
|
||||
|
||||
docker push "jonaswinkler/paperless-ng:$VERSION"
|
||||
|
||||
# works. package the app!
|
||||
|
||||
cd "$PAPERLESS_DIST"
|
||||
|
23
scripts/push-release.sh
Executable file
23
scripts/push-release.sh
Executable file
@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
|
||||
VERSION=$1
|
||||
|
||||
if [ -z "$VERSION" ]
|
||||
then
|
||||
echo "Need a version string."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# source root directory of paperless
|
||||
PAPERLESS_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# output directory
|
||||
PAPERLESS_DIST="$PAPERLESS_ROOT/dist"
|
||||
PAPERLESS_DIST_APP="$PAPERLESS_DIST/paperless-ng"
|
||||
|
||||
cd "$PAPERLESS_DIST_APP"
|
||||
|
||||
docker push "jonaswinkler/paperless-ng:$VERSION"
|
@ -132,6 +132,28 @@
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h6 class="sidebar-heading d-flex justify-content-between align-items-center px-3 mt-4 mb-1 text-muted">
|
||||
<span>Misc</span>
|
||||
</h6>
|
||||
<ul class="nav flex-column mb-2">
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="https://paperless-ng.readthedocs.io/en/latest/">
|
||||
<svg class="sidebaricon" fill="currentColor">
|
||||
<use xlink:href="assets/bootstrap-icons.svg#question-circle"/>
|
||||
</svg>
|
||||
Documentation
|
||||
</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="https://github.com/jonaswinkler/paperless-ng">
|
||||
<svg class="sidebaricon" fill="currentColor">
|
||||
<use xlink:href="assets/bootstrap-icons.svg#link"/>
|
||||
</svg>
|
||||
Github
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
<div class="row pt-3 pb-2 mb-3 border-bottom align-items-center">
|
||||
<div class="row pt-3 pb-1 mb-3 border-bottom align-items-center" >
|
||||
<div class="col text-truncate">
|
||||
<h1 class="h2 text-truncate">{{title}}</h1>
|
||||
<h1 class="h2 text-truncate" style="line-height: 1.4">{{title}}</h1>
|
||||
</div>
|
||||
<div class="btn-toolbar col-auto">
|
||||
<ng-content></ng-content>
|
||||
|
@ -1,3 +1,7 @@
|
||||
.log-entry-10 {
|
||||
color: lightslategray !important;
|
||||
}
|
||||
|
||||
.log-entry-30 {
|
||||
color: yellow !important;
|
||||
}
|
||||
|
@ -3,7 +3,6 @@ import hashlib
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import transaction
|
||||
@ -12,6 +11,7 @@ from django.utils import timezone
|
||||
from paperless.db import GnuPG
|
||||
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
|
||||
from .file_handling import generate_filename, create_source_path_directory
|
||||
from .loggers import LoggingMixin
|
||||
from .models import Document, FileInfo, Correspondent, DocumentType, Tag
|
||||
from .parsers import ParseError, get_parser_class
|
||||
from .signals import (
|
||||
@ -24,12 +24,10 @@ class ConsumerError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Consumer:
|
||||
class Consumer(LoggingMixin):
|
||||
|
||||
def __init__(self):
|
||||
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.logging_group = None
|
||||
super().__init__()
|
||||
self.path = None
|
||||
self.filename = None
|
||||
self.override_title = None
|
||||
@ -74,11 +72,6 @@ class Consumer:
|
||||
os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True)
|
||||
os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
|
||||
|
||||
def log(self, level, message):
|
||||
getattr(self.logger, level)(message, extra={
|
||||
"group": self.logging_group
|
||||
})
|
||||
|
||||
def try_consume_file(self,
|
||||
path,
|
||||
override_filename=None,
|
||||
@ -100,7 +93,7 @@ class Consumer:
|
||||
# this is for grouping logging entries for this particular file
|
||||
# together.
|
||||
|
||||
self.logging_group = uuid.uuid4()
|
||||
self.renew_logging_group()
|
||||
|
||||
# Make sure that preconditions for consuming the file are met.
|
||||
|
||||
|
@ -86,7 +86,7 @@ def generate_filename(document):
|
||||
added_day=document.added.day if document.added else "none",
|
||||
tags=tags,
|
||||
)
|
||||
except (ValueError, KeyError, IndexError) as e:
|
||||
except (ValueError, KeyError, IndexError):
|
||||
logging.getLogger(__name__).warning("Invalid PAPERLESS_FILENAME_FORMAT: {}, falling back to default,".format(settings.PAPERLESS_FILENAME_FORMAT))
|
||||
|
||||
# Always append the primary key to guarantee uniqueness of filename
|
||||
|
@ -32,6 +32,9 @@ class UploadForm(forms.Form):
|
||||
|
||||
t = int(mktime(datetime.now().timetuple()))
|
||||
|
||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||
|
||||
# TODO: dont just append pdf. This is here for taht weird regex check at the start of the consumer.
|
||||
with tempfile.NamedTemporaryFile(prefix="paperless-upload-", suffix=".pdf", dir=settings.SCRATCH_DIR, delete=False) as f:
|
||||
|
||||
f.write(document)
|
||||
|
@ -1,4 +1,5 @@
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
|
||||
class PaperlessHandler(logging.Handler):
|
||||
@ -13,3 +14,19 @@ class PaperlessHandler(logging.Handler):
|
||||
kwargs["group"] = record.group
|
||||
|
||||
Log.objects.create(**kwargs)
|
||||
|
||||
|
||||
class LoggingMixin:
|
||||
|
||||
logging_group = None
|
||||
|
||||
def renew_logging_group(self):
|
||||
self.logging_group = uuid.uuid4()
|
||||
|
||||
def log(self, level, message):
|
||||
target = ".".join([self.__class__.__module__, self.__class__.__name__])
|
||||
logger = logging.getLogger(target)
|
||||
|
||||
getattr(logger, level)(message, extra={
|
||||
"group": self.logging_group
|
||||
})
|
||||
|
@ -1,7 +1,4 @@
|
||||
# Generated by Django 3.1.3 on 2020-11-07 12:35
|
||||
import os
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
@ -20,6 +20,7 @@ from django.utils import timezone
|
||||
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - MONTH ZZZZ, with ZZZZ being 4 digits
|
||||
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
|
||||
from documents.loggers import LoggingMixin
|
||||
from documents.signals import document_consumer_declaration
|
||||
|
||||
# TODO: isnt there a date parsing library for this?
|
||||
@ -101,17 +102,17 @@ class ParseError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class DocumentParser:
|
||||
class DocumentParser(LoggingMixin):
|
||||
"""
|
||||
Subclass this to make your own parser. Have a look at
|
||||
`paperless_tesseract.parsers` for inspiration.
|
||||
"""
|
||||
|
||||
def __init__(self, path, logging_group):
|
||||
super().__init__()
|
||||
self.logging_group = logging_group
|
||||
self.document_path = path
|
||||
self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.logging_group = logging_group
|
||||
|
||||
def get_thumbnail(self):
|
||||
"""
|
||||
@ -121,16 +122,19 @@ class DocumentParser:
|
||||
|
||||
def optimise_thumbnail(self, in_path):
|
||||
|
||||
out_path = os.path.join(self.tempdir, "optipng.png")
|
||||
if settings.OPTIMIZE_THUMBNAILS:
|
||||
out_path = os.path.join(self.tempdir, "optipng.png")
|
||||
|
||||
args = (settings.OPTIPNG_BINARY, "-silent", "-o5", in_path, "-out", out_path)
|
||||
args = (settings.OPTIPNG_BINARY, "-silent", "-o5", in_path, "-out", out_path)
|
||||
|
||||
self.log('debug', 'Execute: ' + " ".join(args))
|
||||
self.log('debug', 'Execute: ' + " ".join(args))
|
||||
|
||||
if not subprocess.Popen(args).wait() == 0:
|
||||
raise ParseError("Optipng failed at {}".format(args))
|
||||
if not subprocess.Popen(args).wait() == 0:
|
||||
raise ParseError("Optipng failed at {}".format(args))
|
||||
|
||||
return out_path
|
||||
return out_path
|
||||
else:
|
||||
return in_path
|
||||
|
||||
def get_optimised_thumbnail(self):
|
||||
return self.optimise_thumbnail(self.get_thumbnail())
|
||||
@ -222,11 +226,6 @@ class DocumentParser:
|
||||
|
||||
return date
|
||||
|
||||
def log(self, level, message):
|
||||
getattr(self.logger, level)(message, extra={
|
||||
"group": self.logging_group
|
||||
})
|
||||
|
||||
def cleanup(self):
|
||||
self.log("debug", "Deleting directory {}".format(self.tempdir))
|
||||
shutil.rmtree(self.tempdir)
|
||||
|
@ -2,11 +2,10 @@ import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from unittest import mock
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import override_settings
|
||||
from rest_framework.test import APITestCase, APIClient
|
||||
from rest_framework.test import APITestCase
|
||||
|
||||
from documents.models import Document, Correspondent, DocumentType, Tag
|
||||
|
||||
|
@ -80,6 +80,6 @@ class TestClassifier(TestCase):
|
||||
|
||||
self.classifier.save_classifier()
|
||||
|
||||
newClassifier = DocumentClassifier()
|
||||
newClassifier.reload()
|
||||
self.assertFalse(newClassifier.train())
|
||||
new_classifier = DocumentClassifier()
|
||||
new_classifier.reload()
|
||||
self.assertFalse(new_classifier.train())
|
||||
|
@ -5,8 +5,6 @@ import tempfile
|
||||
from unittest import mock
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import DatabaseError
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from ..consumer import Consumer, ConsumerError
|
||||
@ -504,9 +502,9 @@ class TestConsumer(TestCase):
|
||||
|
||||
def testOverrideFilename(self):
|
||||
filename = self.get_test_file()
|
||||
overrideFilename = "My Bank - Statement for November.pdf"
|
||||
override_filename = "My Bank - Statement for November.pdf"
|
||||
|
||||
document = self.consumer.try_consume_file(filename, override_filename=overrideFilename)
|
||||
document = self.consumer.try_consume_file(filename, override_filename=override_filename)
|
||||
|
||||
self.assertEqual(document.correspondent.name, "My Bank")
|
||||
self.assertEqual(document.title, "Statement for November")
|
||||
|
@ -72,11 +72,11 @@ def binaries_check(app_configs, **kwargs):
|
||||
@register()
|
||||
def debug_mode_check(app_configs, **kwargs):
|
||||
if settings.DEBUG:
|
||||
return [Warning("DEBUG mode is enabled. Disable Debug mode. "
|
||||
"This is a serious security "
|
||||
"issue, since it puts security overides in place which"
|
||||
"are meant to be only used during development. This"
|
||||
"also means that paperless will tell anyone various"
|
||||
"debugging information when something goes wrong.")]
|
||||
return [Warning(
|
||||
"DEBUG mode is enabled. Disable Debug mode. This is a serious "
|
||||
"security issue, since it puts security overides in place which "
|
||||
"are meant to be only used during development. This "
|
||||
"also means that paperless will tell anyone various "
|
||||
"debugging information when something goes wrong.")]
|
||||
else:
|
||||
return []
|
||||
|
@ -257,6 +257,14 @@ LOGGING = {
|
||||
"handlers": ["dbhandler", "streamhandler"],
|
||||
"level": "DEBUG"
|
||||
},
|
||||
"paperless_mail": {
|
||||
"handlers": ["dbhandler", "streamhandler"],
|
||||
"level": "DEBUG"
|
||||
},
|
||||
"paperless_tesseract": {
|
||||
"handlers": ["dbhandler", "streamhandler"],
|
||||
"level": "DEBUG"
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@ -312,6 +320,8 @@ CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0))
|
||||
|
||||
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
|
||||
|
||||
OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
|
||||
|
||||
# The default language that tesseract will attempt to use when parsing
|
||||
# documents. It should be a 3-letter language code consistent with ISO 639.
|
||||
OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")
|
||||
|
@ -1,18 +1,7 @@
|
||||
from django.contrib import admin
|
||||
from django import forms
|
||||
|
||||
from paperless_mail.models import MailAccount, MailRule
|
||||
|
||||
|
||||
class MailAccountForm(forms.ModelForm):
|
||||
|
||||
password = forms.CharField(widget=forms.PasswordInput)
|
||||
|
||||
class Meta:
|
||||
fields = '__all__'
|
||||
model = MailAccount
|
||||
|
||||
|
||||
class MailAccountAdmin(admin.ModelAdmin):
|
||||
|
||||
list_display = ("name", "imap_server", "username")
|
||||
@ -20,6 +9,8 @@ class MailAccountAdmin(admin.ModelAdmin):
|
||||
|
||||
class MailRuleAdmin(admin.ModelAdmin):
|
||||
|
||||
list_filter = ("account",)
|
||||
|
||||
list_display = ("name", "account", "folder", "action")
|
||||
|
||||
|
||||
|
@ -8,6 +8,7 @@ from django_q.tasks import async_task
|
||||
from imap_tools import MailBox, MailBoxUnencrypted, AND, MailMessageFlags, \
|
||||
MailboxFolderSelectError
|
||||
|
||||
from documents.loggers import LoggingMixin
|
||||
from documents.models import Correspondent
|
||||
from paperless_mail.models import MailAccount, MailRule
|
||||
|
||||
@ -83,72 +84,6 @@ def make_criterias(rule):
|
||||
return {**criterias, **get_rule_action(rule).get_criteria()}
|
||||
|
||||
|
||||
def handle_mail_account(account):
|
||||
|
||||
if account.imap_security == MailAccount.IMAP_SECURITY_NONE:
|
||||
mailbox = MailBoxUnencrypted(account.imap_server, account.imap_port)
|
||||
elif account.imap_security == MailAccount.IMAP_SECURITY_STARTTLS:
|
||||
mailbox = MailBox(account.imap_server, account.imap_port, starttls=True)
|
||||
elif account.imap_security == MailAccount.IMAP_SECURITY_SSL:
|
||||
mailbox = MailBox(account.imap_server, account.imap_port)
|
||||
else:
|
||||
raise ValueError("Unknown IMAP security")
|
||||
|
||||
total_processed_files = 0
|
||||
|
||||
with mailbox as M:
|
||||
|
||||
try:
|
||||
M.login(account.username, account.password)
|
||||
except Exception:
|
||||
raise MailError(
|
||||
f"Error while authenticating account {account.name}")
|
||||
|
||||
for rule in account.rules.all():
|
||||
|
||||
try:
|
||||
M.folder.set(rule.folder)
|
||||
except MailboxFolderSelectError:
|
||||
raise MailError(
|
||||
f"Rule {rule.name}: Folder {rule.folder} does not exist "
|
||||
f"in account {account.name}")
|
||||
|
||||
criterias = make_criterias(rule)
|
||||
|
||||
try:
|
||||
messages = M.fetch(criteria=AND(**criterias), mark_seen=False)
|
||||
except Exception:
|
||||
raise MailError(
|
||||
f"Rule {rule.name}: Error while fetching folder "
|
||||
f"{rule.folder} of account {account.name}")
|
||||
|
||||
post_consume_messages = []
|
||||
|
||||
for message in messages:
|
||||
try:
|
||||
processed_files = handle_message(message, rule)
|
||||
except Exception:
|
||||
raise MailError(
|
||||
f"Rule {rule.name}: Error while processing mail "
|
||||
f"{message.uid} of account {account.name}")
|
||||
if processed_files > 0:
|
||||
post_consume_messages.append(message.uid)
|
||||
|
||||
total_processed_files += processed_files
|
||||
try:
|
||||
get_rule_action(rule).post_consume(
|
||||
M,
|
||||
post_consume_messages,
|
||||
rule.action_parameter)
|
||||
|
||||
except Exception:
|
||||
raise MailError(
|
||||
f"Rule {rule.name}: Error while processing post-consume "
|
||||
f"actions for account {account.name}")
|
||||
|
||||
return total_processed_files
|
||||
|
||||
|
||||
def get_title(message, att, rule):
|
||||
if rule.assign_title_from == MailRule.TITLE_FROM_SUBJECT:
|
||||
title = message.subject
|
||||
@ -189,39 +124,156 @@ def get_correspondent(message, rule):
|
||||
return correspondent
|
||||
|
||||
|
||||
def handle_message(message, rule):
|
||||
if not message.attachments:
|
||||
return 0
|
||||
def get_mailbox(server, port, security):
|
||||
if security == MailAccount.IMAP_SECURITY_NONE:
|
||||
mailbox = MailBoxUnencrypted(server, port)
|
||||
elif security == MailAccount.IMAP_SECURITY_STARTTLS:
|
||||
mailbox = MailBox(server, port, starttls=True)
|
||||
elif security == MailAccount.IMAP_SECURITY_SSL:
|
||||
mailbox = MailBox(server, port)
|
||||
else:
|
||||
raise ValueError("Unknown IMAP security")
|
||||
return mailbox
|
||||
|
||||
correspondent = get_correspondent(message, rule)
|
||||
tag = rule.assign_tag
|
||||
doc_type = rule.assign_document_type
|
||||
|
||||
processed_attachments = 0
|
||||
class MailAccountHandler(LoggingMixin):
|
||||
|
||||
for att in message.attachments:
|
||||
def handle_mail_account(self, account):
|
||||
|
||||
title = get_title(message, att, rule)
|
||||
self.renew_logging_group()
|
||||
|
||||
# TODO: check with parsers what files types are supported
|
||||
if att.content_type == 'application/pdf':
|
||||
self.log('debug', f"Processing mail account {account}")
|
||||
|
||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||
_, temp_filename = tempfile.mkstemp(prefix="paperless-mail-", dir=settings.SCRATCH_DIR)
|
||||
with open(temp_filename, 'wb') as f:
|
||||
f.write(att.payload)
|
||||
total_processed_files = 0
|
||||
|
||||
async_task(
|
||||
"documents.tasks.consume_file",
|
||||
path=temp_filename,
|
||||
override_filename=att.filename,
|
||||
override_title=title,
|
||||
override_correspondent_id=correspondent.id if correspondent else None,
|
||||
override_document_type_id=doc_type.id if doc_type else None,
|
||||
override_tag_ids=[tag.id] if tag else None,
|
||||
task_name=f"Mail: {att.filename}"
|
||||
)
|
||||
with get_mailbox(account.imap_server,
|
||||
account.imap_port,
|
||||
account.imap_security) as M:
|
||||
|
||||
processed_attachments += 1
|
||||
try:
|
||||
M.login(account.username, account.password)
|
||||
except Exception:
|
||||
raise MailError(
|
||||
f"Error while authenticating account {account.name}")
|
||||
|
||||
return processed_attachments
|
||||
self.log('debug', f"Account {account}: Processing "
|
||||
f"{account.rules.count()} rule(s)")
|
||||
|
||||
for rule in account.rules.all():
|
||||
self.log(
|
||||
'debug',
|
||||
f"Account {account}: Processing rule {rule.name}")
|
||||
|
||||
self.log(
|
||||
'debug',
|
||||
f"Rule {account}.{rule}: Selecting folder {rule.folder}")
|
||||
|
||||
try:
|
||||
M.folder.set(rule.folder)
|
||||
except MailboxFolderSelectError:
|
||||
raise MailError(
|
||||
f"Rule {rule.name}: Folder {rule.folder} does not exist "
|
||||
f"in account {account.name}")
|
||||
|
||||
criterias = make_criterias(rule)
|
||||
|
||||
self.log(
|
||||
'debug',
|
||||
f"Rule {account}.{rule}: Searching folder with criteria "
|
||||
f"{str(AND(**criterias))}")
|
||||
|
||||
try:
|
||||
messages = M.fetch(criteria=AND(**criterias), mark_seen=False)
|
||||
except Exception:
|
||||
raise MailError(
|
||||
f"Rule {rule.name}: Error while fetching folder "
|
||||
f"{rule.folder} of account {account.name}")
|
||||
|
||||
post_consume_messages = []
|
||||
|
||||
mails_processed = 0
|
||||
|
||||
for message in messages:
|
||||
try:
|
||||
processed_files = self.handle_message(message, rule)
|
||||
except Exception:
|
||||
raise MailError(
|
||||
f"Rule {rule.name}: Error while processing mail "
|
||||
f"{message.uid} of account {account.name}")
|
||||
if processed_files > 0:
|
||||
post_consume_messages.append(message.uid)
|
||||
|
||||
total_processed_files += processed_files
|
||||
mails_processed += 1
|
||||
|
||||
self.log(
|
||||
'debug',
|
||||
f"Rule {account}.{rule}: Processed {mails_processed} "
|
||||
f"matching mail(s)")
|
||||
|
||||
self.log(
|
||||
'debug',
|
||||
f"Rule {account}.{rule}: Running mail actions on "
|
||||
f"{len(post_consume_messages)} mails")
|
||||
|
||||
try:
|
||||
get_rule_action(rule).post_consume(
|
||||
M,
|
||||
post_consume_messages,
|
||||
rule.action_parameter)
|
||||
|
||||
except Exception:
|
||||
raise MailError(
|
||||
f"Rule {rule.name}: Error while processing post-consume "
|
||||
f"actions for account {account.name}")
|
||||
|
||||
return total_processed_files
|
||||
|
||||
def handle_message(self, message, rule):
|
||||
if not message.attachments:
|
||||
return 0
|
||||
|
||||
self.log(
|
||||
'debug',
|
||||
f"Rule {rule.account}.{rule}: "
|
||||
f"Processing mail {message.subject} from {message.from_} with "
|
||||
f"{len(message.attachments)} attachment(s)")
|
||||
|
||||
correspondent = get_correspondent(message, rule)
|
||||
tag = rule.assign_tag
|
||||
doc_type = rule.assign_document_type
|
||||
|
||||
processed_attachments = 0
|
||||
|
||||
for att in message.attachments:
|
||||
|
||||
title = get_title(message, att, rule)
|
||||
|
||||
# TODO: check with parsers what files types are supported
|
||||
if att.content_type == 'application/pdf':
|
||||
|
||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||
_, temp_filename = tempfile.mkstemp(prefix="paperless-mail-", dir=settings.SCRATCH_DIR)
|
||||
with open(temp_filename, 'wb') as f:
|
||||
f.write(att.payload)
|
||||
|
||||
self.log(
|
||||
'info',
|
||||
f"Rule {rule.account}.{rule}: "
|
||||
f"Consuming attachment {att.filename} from mail "
|
||||
f"{message.subject} from {message.from_}")
|
||||
|
||||
async_task(
|
||||
"documents.tasks.consume_file",
|
||||
path=temp_filename,
|
||||
override_filename=att.filename,
|
||||
override_title=title,
|
||||
override_correspondent_id=correspondent.id if correspondent else None,
|
||||
override_document_type_id=doc_type.id if doc_type else None,
|
||||
override_tag_ids=[tag.id] if tag else None,
|
||||
task_name=f"Mail: {att.filename}"
|
||||
)
|
||||
|
||||
processed_attachments += 1
|
||||
|
||||
return processed_attachments
|
||||
|
@ -1,6 +1,6 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from paperless_mail import mail, tasks
|
||||
from paperless_mail import tasks
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
23
src/paperless_mail/migrations/0003_auto_20201118_1940.py
Normal file
23
src/paperless_mail/migrations/0003_auto_20201118_1940.py
Normal file
@ -0,0 +1,23 @@
|
||||
# Generated by Django 3.1.3 on 2020-11-18 19:40
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('paperless_mail', '0002_auto_20201117_1334'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='mailaccount',
|
||||
name='imap_port',
|
||||
field=models.IntegerField(blank=True, help_text='This is usually 143 for unencrypted and STARTTLS connections, and 993 for SSL connections.', null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='mailrule',
|
||||
name='name',
|
||||
field=models.CharField(max_length=256, unique=True),
|
||||
),
|
||||
]
|
@ -1,8 +1,5 @@
|
||||
from django.db import models
|
||||
|
||||
# Create your models here.
|
||||
from django.db import models
|
||||
|
||||
import documents.models as document_models
|
||||
|
||||
|
||||
@ -22,7 +19,11 @@ class MailAccount(models.Model):
|
||||
|
||||
imap_server = models.CharField(max_length=256)
|
||||
|
||||
imap_port = models.IntegerField(blank=True, null=True)
|
||||
imap_port = models.IntegerField(
|
||||
blank=True,
|
||||
null=True,
|
||||
help_text="This is usually 143 for unencrypted and STARTTLS "
|
||||
"connections, and 993 for SSL connections.")
|
||||
|
||||
imap_security = models.PositiveIntegerField(
|
||||
choices=IMAP_SECURITY_OPTIONS,
|
||||
@ -71,7 +72,7 @@ class MailRule(models.Model):
|
||||
(CORRESPONDENT_FROM_CUSTOM, "Use correspondent selected below")
|
||||
)
|
||||
|
||||
name = models.CharField(max_length=256)
|
||||
name = models.CharField(max_length=256, unique=True)
|
||||
|
||||
account = models.ForeignKey(
|
||||
MailAccount,
|
||||
|
@ -1,13 +1,13 @@
|
||||
import logging
|
||||
|
||||
from paperless_mail import mail
|
||||
from paperless_mail.mail import MailAccountHandler
|
||||
from paperless_mail.models import MailAccount
|
||||
|
||||
|
||||
def process_mail_accounts():
|
||||
total_new_documents = 0
|
||||
for account in MailAccount.objects.all():
|
||||
total_new_documents += mail.handle_mail_account(account)
|
||||
total_new_documents += MailAccountHandler().handle_mail_account(account)
|
||||
|
||||
if total_new_documents > 0:
|
||||
return f"Added {total_new_documents} document(s)."
|
||||
@ -18,6 +18,6 @@ def process_mail_accounts():
|
||||
def process_mail_account(name):
|
||||
account = MailAccount.objects.find(name=name)
|
||||
if account:
|
||||
mail.handle_mail_account(account)
|
||||
MailAccountHandler().handle_mail_account(account)
|
||||
else:
|
||||
logging.error("Unknown mail acccount: {}".format(name))
|
||||
|
@ -7,7 +7,7 @@ from django.test import TestCase
|
||||
from imap_tools import MailMessageFlags, MailboxFolderSelectError
|
||||
|
||||
from documents.models import Correspondent
|
||||
from paperless_mail.mail import get_correspondent, get_title, handle_message, handle_mail_account, MailError
|
||||
from paperless_mail.mail import MailError, MailAccountHandler, get_correspondent, get_title
|
||||
from paperless_mail.models import MailRule, MailAccount
|
||||
|
||||
|
||||
@ -126,6 +126,8 @@ class TestMail(TestCase):
|
||||
|
||||
self.reset_bogus_mailbox()
|
||||
|
||||
self.mail_account_handler = MailAccountHandler()
|
||||
|
||||
def reset_bogus_mailbox(self):
|
||||
self.bogus_mailbox.messages = []
|
||||
self.bogus_mailbox.messages_spam = []
|
||||
@ -145,10 +147,10 @@ class TestMail(TestCase):
|
||||
me_localhost = Correspondent.objects.create(name=message2.from_)
|
||||
someone_else = Correspondent.objects.create(name="someone else")
|
||||
|
||||
rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING)
|
||||
rule = MailRule(name="a", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING)
|
||||
self.assertIsNone(get_correspondent(message, rule))
|
||||
|
||||
rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL)
|
||||
rule = MailRule(name="b", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL)
|
||||
c = get_correspondent(message, rule)
|
||||
self.assertIsNotNone(c)
|
||||
self.assertEqual(c.name, "someone@somewhere.com")
|
||||
@ -157,7 +159,7 @@ class TestMail(TestCase):
|
||||
self.assertEqual(c.name, "me@localhost.com")
|
||||
self.assertEqual(c.id, me_localhost.id)
|
||||
|
||||
rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME)
|
||||
rule = MailRule(name="c", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME)
|
||||
c = get_correspondent(message, rule)
|
||||
self.assertIsNotNone(c)
|
||||
self.assertEqual(c.name, "Someone!")
|
||||
@ -165,7 +167,7 @@ class TestMail(TestCase):
|
||||
self.assertIsNotNone(c)
|
||||
self.assertEqual(c.id, me_localhost.id)
|
||||
|
||||
rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_CUSTOM, assign_correspondent=someone_else)
|
||||
rule = MailRule(name="d", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_CUSTOM, assign_correspondent=someone_else)
|
||||
c = get_correspondent(message, rule)
|
||||
self.assertEqual(c, someone_else)
|
||||
|
||||
@ -174,14 +176,15 @@ class TestMail(TestCase):
|
||||
message.subject = "the message title"
|
||||
att = namedtuple('Attachment', [])
|
||||
att.filename = "this_is_the_file.pdf"
|
||||
rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME)
|
||||
rule = MailRule(name="a", assign_title_from=MailRule.TITLE_FROM_FILENAME)
|
||||
self.assertEqual(get_title(message, att, rule), "this_is_the_file")
|
||||
rule = MailRule(assign_title_from=MailRule.TITLE_FROM_SUBJECT)
|
||||
rule = MailRule(name="b", assign_title_from=MailRule.TITLE_FROM_SUBJECT)
|
||||
self.assertEqual(get_title(message, att, rule), "the message title")
|
||||
|
||||
def test_handle_message(self):
|
||||
message = namedtuple('MailMessage', [])
|
||||
message.subject = "the message title"
|
||||
message.from_ = "Myself"
|
||||
|
||||
att = namedtuple('Attachment', [])
|
||||
att.filename = "test1.pdf"
|
||||
@ -200,9 +203,10 @@ class TestMail(TestCase):
|
||||
|
||||
message.attachments = [att, att2, att3]
|
||||
|
||||
rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME)
|
||||
account = MailAccount()
|
||||
rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME, account=account)
|
||||
|
||||
result = handle_message(message, rule)
|
||||
result = self.mail_account_handler.handle_message(message, rule)
|
||||
|
||||
self.assertEqual(result, 2)
|
||||
|
||||
@ -224,7 +228,7 @@ class TestMail(TestCase):
|
||||
message.attachments = []
|
||||
rule = MailRule()
|
||||
|
||||
result = handle_message(message, rule)
|
||||
result = self.mail_account_handler.handle_message(message, rule)
|
||||
|
||||
self.assertFalse(m.called)
|
||||
self.assertEqual(result, 0)
|
||||
@ -235,11 +239,13 @@ class TestMail(TestCase):
|
||||
|
||||
rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MARK_READ)
|
||||
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 3)
|
||||
self.assertEqual(self.async_task.call_count, 0)
|
||||
self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 2)
|
||||
handle_mail_account(account)
|
||||
self.mail_account_handler.handle_mail_account(account)
|
||||
self.assertEqual(self.async_task.call_count, 2)
|
||||
self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 0)
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 3)
|
||||
|
||||
def test_handle_mail_account_delete(self):
|
||||
|
||||
@ -249,7 +255,7 @@ class TestMail(TestCase):
|
||||
|
||||
self.assertEqual(self.async_task.call_count, 0)
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 3)
|
||||
handle_mail_account(account)
|
||||
self.mail_account_handler.handle_mail_account(account)
|
||||
self.assertEqual(self.async_task.call_count, 2)
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 1)
|
||||
|
||||
@ -258,11 +264,13 @@ class TestMail(TestCase):
|
||||
|
||||
rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_FLAG, filter_subject="Invoice")
|
||||
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 3)
|
||||
self.assertEqual(self.async_task.call_count, 0)
|
||||
self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 2)
|
||||
handle_mail_account(account)
|
||||
self.mail_account_handler.handle_mail_account(account)
|
||||
self.assertEqual(self.async_task.call_count, 1)
|
||||
self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 1)
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 3)
|
||||
|
||||
def test_handle_mail_account_move(self):
|
||||
account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="secret")
|
||||
@ -272,7 +280,7 @@ class TestMail(TestCase):
|
||||
self.assertEqual(self.async_task.call_count, 0)
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 3)
|
||||
self.assertEqual(len(self.bogus_mailbox.messages_spam), 0)
|
||||
handle_mail_account(account)
|
||||
self.mail_account_handler.handle_mail_account(account)
|
||||
self.assertEqual(self.async_task.call_count, 1)
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 2)
|
||||
self.assertEqual(len(self.bogus_mailbox.messages_spam), 1)
|
||||
@ -281,7 +289,7 @@ class TestMail(TestCase):
|
||||
account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="wrong")
|
||||
|
||||
try:
|
||||
handle_mail_account(account)
|
||||
self.mail_account_handler.handle_mail_account(account)
|
||||
except MailError as e:
|
||||
self.assertTrue(str(e).startswith("Error while authenticating account"))
|
||||
else:
|
||||
@ -291,7 +299,7 @@ class TestMail(TestCase):
|
||||
rule = MailRule.objects.create(name="testrule", account=account, folder="uuuh")
|
||||
|
||||
try:
|
||||
handle_mail_account(account)
|
||||
self.mail_account_handler.handle_mail_account(account)
|
||||
except MailError as e:
|
||||
self.assertTrue("uuuh does not exist" in str(e))
|
||||
else:
|
||||
@ -299,10 +307,10 @@ class TestMail(TestCase):
|
||||
|
||||
account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret")
|
||||
|
||||
rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MOVE, action_parameter="doesnotexist", filter_subject="Claim")
|
||||
rule = MailRule.objects.create(name="testrule2", account=account, action=MailRule.ACTION_MOVE, action_parameter="doesnotexist", filter_subject="Claim")
|
||||
|
||||
try:
|
||||
handle_mail_account(account)
|
||||
self.mail_account_handler.handle_mail_account(account)
|
||||
except MailError as e:
|
||||
self.assertTrue("Error while processing post-consume actions" in str(e))
|
||||
else:
|
||||
@ -311,12 +319,12 @@ class TestMail(TestCase):
|
||||
def test_filters(self):
|
||||
|
||||
account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret")
|
||||
rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_DELETE, filter_subject="Claim")
|
||||
rule = MailRule.objects.create(name="testrule3", account=account, action=MailRule.ACTION_DELETE, filter_subject="Claim")
|
||||
|
||||
self.assertEqual(self.async_task.call_count, 0)
|
||||
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 3)
|
||||
handle_mail_account(account)
|
||||
self.mail_account_handler.handle_mail_account(account)
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 2)
|
||||
self.assertEqual(self.async_task.call_count, 1)
|
||||
|
||||
@ -326,7 +334,7 @@ class TestMail(TestCase):
|
||||
rule.filter_body = "electronic"
|
||||
rule.save()
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 3)
|
||||
handle_mail_account(account)
|
||||
self.mail_account_handler.handle_mail_account(account)
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 2)
|
||||
self.assertEqual(self.async_task.call_count, 2)
|
||||
|
||||
@ -336,7 +344,7 @@ class TestMail(TestCase):
|
||||
rule.filter_body = None
|
||||
rule.save()
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 3)
|
||||
handle_mail_account(account)
|
||||
self.mail_account_handler.handle_mail_account(account)
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 1)
|
||||
self.assertEqual(self.async_task.call_count, 4)
|
||||
|
||||
@ -347,6 +355,6 @@ class TestMail(TestCase):
|
||||
rule.filter_subject = "Invoice"
|
||||
rule.save()
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 3)
|
||||
handle_mail_account(account)
|
||||
self.mail_account_handler.handle_mail_account(account)
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 2)
|
||||
self.assertEqual(self.async_task.call_count, 5)
|
||||
|
@ -1,3 +0,0 @@
|
||||
from django.shortcuts import render
|
||||
|
||||
# Create your views here.
|
@ -86,7 +86,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
return self._text
|
||||
|
||||
if not settings.OCR_ALWAYS and self._is_ocred():
|
||||
self.log("info", "Skipping OCR, using Text from PDF")
|
||||
self.log("debug", "Skipping OCR, using Text from PDF")
|
||||
self._text = get_text_from_pdf(self.document_path)
|
||||
return self._text
|
||||
|
||||
@ -98,7 +98,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
try:
|
||||
|
||||
sample_page_index = int(len(images) / 2)
|
||||
self.log("info", "Attempting language detection on page {} of {}...".format(sample_page_index + 1, len(images)))
|
||||
self.log("debug", "Attempting language detection on page {} of {}...".format(sample_page_index + 1, len(images)))
|
||||
sample_page_text = self._ocr([images[sample_page_index]], settings.OCR_LANGUAGE)[0]
|
||||
guessed_language = self._guess_language(sample_page_text)
|
||||
|
||||
@ -107,7 +107,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text)
|
||||
|
||||
elif ISO639[guessed_language] == settings.OCR_LANGUAGE:
|
||||
self.log("info", "Detected language: {} (default language)".format(guessed_language))
|
||||
self.log("debug", "Detected language: {} (default language)".format(guessed_language))
|
||||
ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text)
|
||||
|
||||
elif not ISO639[guessed_language] in pyocr.get_available_tools()[0].get_available_languages():
|
||||
@ -115,10 +115,10 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text)
|
||||
|
||||
else:
|
||||
self.log("info", "Detected language: {}".format(guessed_language))
|
||||
self.log("debug", "Detected language: {}".format(guessed_language))
|
||||
ocr_pages = self._ocr(images, ISO639[guessed_language])
|
||||
|
||||
self.log("info", "OCR completed.")
|
||||
self.log("debug", "OCR completed.")
|
||||
self._text = strip_excess_whitespace(" ".join(ocr_pages))
|
||||
return self._text
|
||||
|
||||
@ -130,7 +130,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
Greyscale images are easier for Tesseract to OCR
|
||||
"""
|
||||
|
||||
self.log("info", "Converting document {} into greyscale images...".format(self.document_path))
|
||||
self.log("debug", "Converting document {} into greyscale images...".format(self.document_path))
|
||||
|
||||
# Convert PDF to multiple PNMs
|
||||
pnm = os.path.join(self.tempdir, "convert-%04d.pnm")
|
||||
@ -148,7 +148,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
if f.endswith(".pnm"):
|
||||
pnms.append(os.path.join(self.tempdir, f))
|
||||
|
||||
self.log("info", "Running unpaper on {} pages...".format(len(pnms)))
|
||||
self.log("debug", "Running unpaper on {} pages...".format(len(pnms)))
|
||||
|
||||
# Run unpaper in parallel on converted images
|
||||
with ThreadPool(processes=settings.THREADS_PER_WORKER) as pool:
|
||||
@ -161,11 +161,11 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
guess = langdetect.detect(text)
|
||||
return guess
|
||||
except Exception as e:
|
||||
self.log('debug', "Language detection failed with: {}".format(e))
|
||||
self.log('warning', "Language detection failed with: {}".format(e))
|
||||
return None
|
||||
|
||||
def _ocr(self, imgs, lang):
|
||||
self.log("info", "Performing OCR on {} page(s) with language {}".format(len(imgs), lang))
|
||||
self.log("debug", "Performing OCR on {} page(s) with language {}".format(len(imgs), lang))
|
||||
with ThreadPool(processes=settings.THREADS_PER_WORKER) as pool:
|
||||
r = pool.map(image_to_string, itertools.product(imgs, [lang]))
|
||||
return r
|
||||
@ -180,7 +180,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
images_copy = list(images)
|
||||
del images_copy[sample_page_index]
|
||||
if images_copy:
|
||||
self.log('info', 'Continuing ocr with default language.')
|
||||
self.log('debug', 'Continuing ocr with default language.')
|
||||
ocr_pages = self._ocr(images_copy, settings.OCR_LANGUAGE)
|
||||
ocr_pages.insert(sample_page_index, sample_page)
|
||||
return ocr_pages
|
||||
|
Loading…
x
Reference in New Issue
Block a user