mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge pull request #848 from p-h-a-i-l/feature-consume-eml
Feature ability to consume mails and eml files
This commit is contained in:
commit
4d4d545343
4
.github/workflows/ci.yml
vendored
4
.github/workflows/ci.yml
vendored
@ -106,6 +106,10 @@ jobs:
|
||||
PAPERLESS_MAIL_TEST_HOST: ${{ secrets.TEST_MAIL_HOST }}
|
||||
PAPERLESS_MAIL_TEST_USER: ${{ secrets.TEST_MAIL_USER }}
|
||||
PAPERLESS_MAIL_TEST_PASSWD: ${{ secrets.TEST_MAIL_PASSWD }}
|
||||
# Skip Tests which require convert
|
||||
PAPERLESS_TEST_SKIP_CONVERT: 1
|
||||
# Enable Gotenberg end to end testing
|
||||
GOTENBERG_LIVE: 1
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
|
4
Pipfile
4
Pipfile
@ -60,6 +60,9 @@ setproctitle = "*"
|
||||
nltk = "*"
|
||||
pdf2image = "*"
|
||||
flower = "*"
|
||||
bleach = "*"
|
||||
# https://www.piwheels.org/project/cryptography/ last built version
|
||||
cryptography = "==38.0.1"
|
||||
|
||||
[dev-packages]
|
||||
coveralls = "*"
|
||||
@ -76,4 +79,5 @@ black = "*"
|
||||
pre-commit = "*"
|
||||
sphinx-autobuild = "*"
|
||||
myst-parser = "*"
|
||||
imagehash = "*"
|
||||
mkdocs-material = "*"
|
||||
|
202
Pipfile.lock
generated
202
Pipfile.lock
generated
@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "0242e3e296e09b30fb69e0d7a2f2e8feb4c6a23d3c7ec99500f2883a032a8c84"
|
||||
"sha256": "cbfe9920231de6e7f993962efb3cc371abdb6b08975232d4cf64d1bad1b53d7a"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {},
|
||||
@ -110,6 +110,14 @@
|
||||
],
|
||||
"version": "==3.6.4.0"
|
||||
},
|
||||
"bleach": {
|
||||
"hashes": [
|
||||
"sha256:085f7f33c15bd408dd9b17a4ad77c577db66d76203e5984b1bd59baeee948b2a",
|
||||
"sha256:0d03255c47eb9bd2f26aa9bb7f2107732e7e8fe195ca2f64709fcf3b0a4a085c"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==5.0.1"
|
||||
},
|
||||
"celery": {
|
||||
"extras": [
|
||||
"redis"
|
||||
@ -219,7 +227,7 @@
|
||||
"sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845",
|
||||
"sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"markers": "python_full_version >= '3.6.0'",
|
||||
"version": "==2.1.1"
|
||||
},
|
||||
"click": {
|
||||
@ -235,7 +243,7 @@
|
||||
"sha256:a0713dc7a1de3f06bc0df5a9567ad19ead2d3d5689b434768a6145bff77c0667",
|
||||
"sha256:f184f0d851d96b6d29297354ed981b7dd71df7ff500d82fa6d11f0856bee8035"
|
||||
],
|
||||
"markers": "python_version < '4' and python_full_version >= '3.6.2'",
|
||||
"markers": "python_full_version >= '3.6.2' and python_full_version < '4.0.0'",
|
||||
"version": "==0.3.0"
|
||||
},
|
||||
"click-plugins": {
|
||||
@ -1625,7 +1633,7 @@
|
||||
"sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa",
|
||||
"sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e"
|
||||
],
|
||||
"markers": "python_version < '3.10'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==4.4.0"
|
||||
},
|
||||
"tzdata": {
|
||||
@ -1767,6 +1775,13 @@
|
||||
],
|
||||
"version": "==0.2.5"
|
||||
},
|
||||
"webencodings": {
|
||||
"hashes": [
|
||||
"sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78",
|
||||
"sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"
|
||||
],
|
||||
"version": "==0.5.1"
|
||||
},
|
||||
"websockets": {
|
||||
"hashes": [
|
||||
"sha256:00213676a2e46b6ebf6045bc11d0f529d9120baa6f58d122b4021ad92adabd41",
|
||||
@ -2055,7 +2070,7 @@
|
||||
"sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845",
|
||||
"sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"markers": "python_full_version >= '3.6.0'",
|
||||
"version": "==2.1.1"
|
||||
},
|
||||
"click": {
|
||||
@ -2075,9 +2090,7 @@
|
||||
"version": "==0.4.6"
|
||||
},
|
||||
"coverage": {
|
||||
"extras": [
|
||||
"toml"
|
||||
],
|
||||
"extras": [],
|
||||
"hashes": [
|
||||
"sha256:027018943386e7b942fa832372ebc120155fd970837489896099f5cfa2890f79",
|
||||
"sha256:11b990d520ea75e7ee8dcab5bc908072aaada194a794db9f6d7d5cfd19661e5a",
|
||||
@ -2225,6 +2238,14 @@
|
||||
"markers": "python_version >= '3.5'",
|
||||
"version": "==3.4"
|
||||
},
|
||||
"imagehash": {
|
||||
"hashes": [
|
||||
"sha256:5ad9a5cde14fe255745a8245677293ac0d67f09c330986a351f34b614ba62fb5",
|
||||
"sha256:7038d1b7f9e0585beb3dd8c0a956f02b95a346c0b5f24a9e8cc03ebadaf0aa70"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==4.3.1"
|
||||
},
|
||||
"imagesize": {
|
||||
"hashes": [
|
||||
"sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b",
|
||||
@ -2395,6 +2416,40 @@
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'",
|
||||
"version": "==1.7.0"
|
||||
},
|
||||
"numpy": {
|
||||
"hashes": [
|
||||
"sha256:0fe563fc8ed9dc4474cbf70742673fc4391d70f4363f917599a7fa99f042d5a8",
|
||||
"sha256:12ac457b63ec8ded85d85c1e17d85efd3c2b0967ca39560b307a35a6703a4735",
|
||||
"sha256:2341f4ab6dba0834b685cce16dad5f9b6606ea8a00e6da154f5dbded70fdc4dd",
|
||||
"sha256:296d17aed51161dbad3c67ed6d164e51fcd18dbcd5dd4f9d0a9c6055dce30810",
|
||||
"sha256:488a66cb667359534bc70028d653ba1cf307bae88eab5929cd707c761ff037db",
|
||||
"sha256:4d52914c88b4930dafb6c48ba5115a96cbab40f45740239d9f4159c4ba779962",
|
||||
"sha256:5e13030f8793e9ee42f9c7d5777465a560eb78fa7e11b1c053427f2ccab90c79",
|
||||
"sha256:61be02e3bf810b60ab74e81d6d0d36246dbfb644a462458bb53b595791251911",
|
||||
"sha256:7607b598217745cc40f751da38ffd03512d33ec06f3523fb0b5f82e09f6f676d",
|
||||
"sha256:7a70a7d3ce4c0e9284e92285cba91a4a3f5214d87ee0e95928f3614a256a1488",
|
||||
"sha256:7ab46e4e7ec63c8a5e6dbf5c1b9e1c92ba23a7ebecc86c336cb7bf3bd2fb10e5",
|
||||
"sha256:8981d9b5619569899666170c7c9748920f4a5005bf79c72c07d08c8a035757b0",
|
||||
"sha256:8c053d7557a8f022ec823196d242464b6955a7e7e5015b719e76003f63f82d0f",
|
||||
"sha256:926db372bc4ac1edf81cfb6c59e2a881606b409ddc0d0920b988174b2e2a767f",
|
||||
"sha256:95d79ada05005f6f4f337d3bb9de8a7774f259341c70bc88047a1f7b96a4bcb2",
|
||||
"sha256:95de7dc7dc47a312f6feddd3da2500826defdccbc41608d0031276a24181a2c0",
|
||||
"sha256:a0882323e0ca4245eb0a3d0a74f88ce581cc33aedcfa396e415e5bba7bf05f68",
|
||||
"sha256:a8365b942f9c1a7d0f0dc974747d99dd0a0cdfc5949a33119caf05cb314682d3",
|
||||
"sha256:a8aae2fb3180940011b4862b2dd3756616841c53db9734b27bb93813cd79fce6",
|
||||
"sha256:c237129f0e732885c9a6076a537e974160482eab8f10db6292e92154d4c67d71",
|
||||
"sha256:c67b833dbccefe97cdd3f52798d430b9d3430396af7cdb2a0c32954c3ef73894",
|
||||
"sha256:ce03305dd694c4873b9429274fd41fc7eb4e0e4dea07e0af97a933b079a5814f",
|
||||
"sha256:d331afac87c92373826af83d2b2b435f57b17a5c74e6268b79355b970626e329",
|
||||
"sha256:dada341ebb79619fe00a291185bba370c9803b1e1d7051610e01ed809ef3a4ba",
|
||||
"sha256:ed2cc92af0efad20198638c69bb0fc2870a58dabfba6eb722c933b48556c686c",
|
||||
"sha256:f260da502d7441a45695199b4e7fd8ca87db659ba1c78f2bbf31f934fe76ae0e",
|
||||
"sha256:f2f390aa4da44454db40a1f0201401f9036e8d578a25f01a6e237cea238337ef",
|
||||
"sha256:f76025acc8e2114bb664294a07ede0727aa75d63a06d2fae96bf29a81747e4a7"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.23.4"
|
||||
},
|
||||
"packaging": {
|
||||
"hashes": [
|
||||
"sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
|
||||
@ -2411,6 +2466,73 @@
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==0.10.1"
|
||||
},
|
||||
"pillow": {
|
||||
"hashes": [
|
||||
"sha256:03150abd92771742d4a8cd6f2fa6246d847dcd2e332a18d0c15cc75bf6703040",
|
||||
"sha256:073adb2ae23431d3b9bcbcff3fe698b62ed47211d0716b067385538a1b0f28b8",
|
||||
"sha256:0b07fffc13f474264c336298d1b4ce01d9c5a011415b79d4ee5527bb69ae6f65",
|
||||
"sha256:0b7257127d646ff8676ec8a15520013a698d1fdc48bc2a79ba4e53df792526f2",
|
||||
"sha256:12ce4932caf2ddf3e41d17fc9c02d67126935a44b86df6a206cf0d7161548627",
|
||||
"sha256:15c42fb9dea42465dfd902fb0ecf584b8848ceb28b41ee2b58f866411be33f07",
|
||||
"sha256:18498994b29e1cf86d505edcb7edbe814d133d2232d256db8c7a8ceb34d18cef",
|
||||
"sha256:1c7c8ae3864846fc95f4611c78129301e203aaa2af813b703c55d10cc1628535",
|
||||
"sha256:22b012ea2d065fd163ca096f4e37e47cd8b59cf4b0fd47bfca6abb93df70b34c",
|
||||
"sha256:276a5ca930c913f714e372b2591a22c4bd3b81a418c0f6635ba832daec1cbcfc",
|
||||
"sha256:2e0918e03aa0c72ea56edbb00d4d664294815aa11291a11504a377ea018330d3",
|
||||
"sha256:3033fbe1feb1b59394615a1cafaee85e49d01b51d54de0cbf6aa8e64182518a1",
|
||||
"sha256:3168434d303babf495d4ba58fc22d6604f6e2afb97adc6a423e917dab828939c",
|
||||
"sha256:32a44128c4bdca7f31de5be641187367fe2a450ad83b833ef78910397db491aa",
|
||||
"sha256:3dd6caf940756101205dffc5367babf288a30043d35f80936f9bfb37f8355b32",
|
||||
"sha256:40e1ce476a7804b0fb74bcfa80b0a2206ea6a882938eaba917f7a0f004b42502",
|
||||
"sha256:41e0051336807468be450d52b8edd12ac60bebaa97fe10c8b660f116e50b30e4",
|
||||
"sha256:4390e9ce199fc1951fcfa65795f239a8a4944117b5935a9317fb320e7767b40f",
|
||||
"sha256:502526a2cbfa431d9fc2a079bdd9061a2397b842bb6bc4239bb176da00993812",
|
||||
"sha256:51e0e543a33ed92db9f5ef69a0356e0b1a7a6b6a71b80df99f1d181ae5875636",
|
||||
"sha256:57751894f6618fd4308ed8e0c36c333e2f5469744c34729a27532b3db106ee20",
|
||||
"sha256:5d77adcd56a42d00cc1be30843d3426aa4e660cab4a61021dc84467123f7a00c",
|
||||
"sha256:655a83b0058ba47c7c52e4e2df5ecf484c1b0b0349805896dd350cbc416bdd91",
|
||||
"sha256:68943d632f1f9e3dce98908e873b3a090f6cba1cbb1b892a9e8d97c938871fbe",
|
||||
"sha256:6c738585d7a9961d8c2821a1eb3dcb978d14e238be3d70f0a706f7fa9316946b",
|
||||
"sha256:73bd195e43f3fadecfc50c682f5055ec32ee2c933243cafbfdec69ab1aa87cad",
|
||||
"sha256:772a91fc0e03eaf922c63badeca75e91baa80fe2f5f87bdaed4280662aad25c9",
|
||||
"sha256:77ec3e7be99629898c9a6d24a09de089fa5356ee408cdffffe62d67bb75fdd72",
|
||||
"sha256:7db8b751ad307d7cf238f02101e8e36a128a6cb199326e867d1398067381bff4",
|
||||
"sha256:801ec82e4188e935c7f5e22e006d01611d6b41661bba9fe45b60e7ac1a8f84de",
|
||||
"sha256:82409ffe29d70fd733ff3c1025a602abb3e67405d41b9403b00b01debc4c9a29",
|
||||
"sha256:828989c45c245518065a110434246c44a56a8b2b2f6347d1409c787e6e4651ee",
|
||||
"sha256:829f97c8e258593b9daa80638aee3789b7df9da5cf1336035016d76f03b8860c",
|
||||
"sha256:871b72c3643e516db4ecf20efe735deb27fe30ca17800e661d769faab45a18d7",
|
||||
"sha256:89dca0ce00a2b49024df6325925555d406b14aa3efc2f752dbb5940c52c56b11",
|
||||
"sha256:90fb88843d3902fe7c9586d439d1e8c05258f41da473952aa8b328d8b907498c",
|
||||
"sha256:97aabc5c50312afa5e0a2b07c17d4ac5e865b250986f8afe2b02d772567a380c",
|
||||
"sha256:9aaa107275d8527e9d6e7670b64aabaaa36e5b6bd71a1015ddd21da0d4e06448",
|
||||
"sha256:9f47eabcd2ded7698106b05c2c338672d16a6f2a485e74481f524e2a23c2794b",
|
||||
"sha256:a0a06a052c5f37b4ed81c613a455a81f9a3a69429b4fd7bb913c3fa98abefc20",
|
||||
"sha256:ab388aaa3f6ce52ac1cb8e122c4bd46657c15905904b3120a6248b5b8b0bc228",
|
||||
"sha256:ad58d27a5b0262c0c19b47d54c5802db9b34d38bbf886665b626aff83c74bacd",
|
||||
"sha256:ae5331c23ce118c53b172fa64a4c037eb83c9165aba3a7ba9ddd3ec9fa64a699",
|
||||
"sha256:af0372acb5d3598f36ec0914deed2a63f6bcdb7b606da04dc19a88d31bf0c05b",
|
||||
"sha256:afa4107d1b306cdf8953edde0534562607fe8811b6c4d9a486298ad31de733b2",
|
||||
"sha256:b03ae6f1a1878233ac620c98f3459f79fd77c7e3c2b20d460284e1fb370557d4",
|
||||
"sha256:b0915e734b33a474d76c28e07292f196cdf2a590a0d25bcc06e64e545f2d146c",
|
||||
"sha256:b4012d06c846dc2b80651b120e2cdd787b013deb39c09f407727ba90015c684f",
|
||||
"sha256:b472b5ea442148d1c3e2209f20f1e0bb0eb556538690fa70b5e1f79fa0ba8dc2",
|
||||
"sha256:b59430236b8e58840a0dfb4099a0e8717ffb779c952426a69ae435ca1f57210c",
|
||||
"sha256:b90f7616ea170e92820775ed47e136208e04c967271c9ef615b6fbd08d9af0e3",
|
||||
"sha256:b9a65733d103311331875c1dca05cb4606997fd33d6acfed695b1232ba1df193",
|
||||
"sha256:bac18ab8d2d1e6b4ce25e3424f709aceef668347db8637c2296bcf41acb7cf48",
|
||||
"sha256:bca31dd6014cb8b0b2db1e46081b0ca7d936f856da3b39744aef499db5d84d02",
|
||||
"sha256:be55f8457cd1eac957af0c3f5ece7bc3f033f89b114ef30f710882717670b2a8",
|
||||
"sha256:c7025dce65566eb6e89f56c9509d4f628fddcedb131d9465cacd3d8bac337e7e",
|
||||
"sha256:c935a22a557a560108d780f9a0fc426dd7459940dc54faa49d83249c8d3e760f",
|
||||
"sha256:dbb8e7f2abee51cef77673be97760abff1674ed32847ce04b4af90f610144c7b",
|
||||
"sha256:e6ea6b856a74d560d9326c0f5895ef8050126acfdc7ca08ad703eb0081e82b74",
|
||||
"sha256:ebf2029c1f464c59b8bdbe5143c79fa2045a581ac53679733d3a91d400ff9efb",
|
||||
"sha256:f1ff2ee69f10f13a9596480335f406dd1f70c3650349e2be67ca3139280cade0"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==9.3.0"
|
||||
},
|
||||
"platformdirs": {
|
||||
"hashes": [
|
||||
"sha256:0cb405749187a194f444c25c82ef7225232f11564721eabffc6ec70df83b11cb",
|
||||
@ -2538,6 +2660,37 @@
|
||||
],
|
||||
"version": "==2022.6"
|
||||
},
|
||||
"pywavelets": {
|
||||
"hashes": [
|
||||
"sha256:030670a213ee8fefa56f6387b0c8e7d970c7f7ad6850dc048bd7c89364771b9b",
|
||||
"sha256:058b46434eac4c04dd89aeef6fa39e4b6496a951d78c500b6641fd5b2cc2f9f4",
|
||||
"sha256:231b0e0b1cdc1112f4af3c24eea7bf181c418d37922a67670e9bf6cfa2d544d4",
|
||||
"sha256:23bafd60350b2b868076d976bdd92f950b3944f119b4754b1d7ff22b7acbf6c6",
|
||||
"sha256:3f19327f2129fb7977bc59b966b4974dfd72879c093e44a7287500a7032695de",
|
||||
"sha256:47cac4fa25bed76a45bc781a293c26ac63e8eaae9eb8f9be961758d22b58649c",
|
||||
"sha256:578af438a02a86b70f1975b546f68aaaf38f28fb082a61ceb799816049ed18aa",
|
||||
"sha256:6437af3ddf083118c26d8f97ab43b0724b956c9f958e9ea788659f6a2834ba93",
|
||||
"sha256:64c6bac6204327321db30b775060fbe8e8642316e6bff17f06b9f34936f88875",
|
||||
"sha256:67a0d28a08909f21400cb09ff62ba94c064882ffd9e3a6b27880a111211d59bd",
|
||||
"sha256:71ab30f51ee4470741bb55fc6b197b4a2b612232e30f6ac069106f0156342356",
|
||||
"sha256:7231461d7a8eb3bdc7aa2d97d9f67ea5a9f8902522818e7e2ead9c2b3408eeb1",
|
||||
"sha256:754fa5085768227c4f4a26c1e0c78bc509a266d9ebd0eb69a278be7e3ece943c",
|
||||
"sha256:7ab8d9db0fe549ab2ee0bea61f614e658dd2df419d5b75fba47baa761e95f8f2",
|
||||
"sha256:875d4d620eee655346e3589a16a73790cf9f8917abba062234439b594e706784",
|
||||
"sha256:88aa5449e109d8f5e7f0adef85f7f73b1ab086102865be64421a3a3d02d277f4",
|
||||
"sha256:91d3d393cffa634f0e550d88c0e3f217c96cfb9e32781f2960876f1808d9b45b",
|
||||
"sha256:9cb5ca8d11d3f98e89e65796a2125be98424d22e5ada360a0dbabff659fca0fc",
|
||||
"sha256:ab7da0a17822cd2f6545626946d3b82d1a8e106afc4b50e3387719ba01c7b966",
|
||||
"sha256:ad987748f60418d5f4138db89d82ba0cb49b086e0cbb8fd5c3ed4a814cfb705e",
|
||||
"sha256:d0e56cd7a53aed3cceca91a04d62feb3a0aca6725b1912d29546c26f6ea90426",
|
||||
"sha256:d854411eb5ee9cb4bc5d0e66e3634aeb8f594210f6a1bed96dbed57ec70f181c",
|
||||
"sha256:da7b9c006171be1f9ddb12cc6e0d3d703b95f7f43cb5e2c6f5f15d3233fcf202",
|
||||
"sha256:daf0aa79842b571308d7c31a9c43bc99a30b6328e6aea3f50388cd8f69ba7dbc",
|
||||
"sha256:de7cd61a88a982edfec01ea755b0740e94766e00a1ceceeafef3ed4c85c605cd"
|
||||
],
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==1.4.1"
|
||||
},
|
||||
"pyyaml": {
|
||||
"hashes": [
|
||||
"sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf",
|
||||
@ -2599,6 +2752,35 @@
|
||||
"markers": "python_version >= '3.7' and python_version < '4'",
|
||||
"version": "==2.28.1"
|
||||
},
|
||||
"scipy": {
|
||||
"hashes": [
|
||||
"sha256:02b567e722d62bddd4ac253dafb01ce7ed8742cf8031aea030a41414b86c1125",
|
||||
"sha256:1166514aa3bbf04cb5941027c6e294a000bba0cf00f5cdac6c77f2dad479b434",
|
||||
"sha256:1da52b45ce1a24a4a22db6c157c38b39885a990a566748fc904ec9f03ed8c6ba",
|
||||
"sha256:23b22fbeef3807966ea42d8163322366dd89da9bebdc075da7034cee3a1441ca",
|
||||
"sha256:28d2cab0c6ac5aa131cc5071a3a1d8e1366dad82288d9ec2ca44df78fb50e649",
|
||||
"sha256:2ef0fbc8bcf102c1998c1f16f15befe7cffba90895d6e84861cd6c6a33fb54f6",
|
||||
"sha256:3b69b90c9419884efeffaac2c38376d6ef566e6e730a231e15722b0ab58f0328",
|
||||
"sha256:4b93ec6f4c3c4d041b26b5f179a6aab8f5045423117ae7a45ba9710301d7e462",
|
||||
"sha256:4e53a55f6a4f22de01ffe1d2f016e30adedb67a699a310cdcac312806807ca81",
|
||||
"sha256:6311e3ae9cc75f77c33076cb2794fb0606f14c8f1b1c9ff8ce6005ba2c283621",
|
||||
"sha256:65b77f20202599c51eb2771d11a6b899b97989159b7975e9b5259594f1d35ef4",
|
||||
"sha256:6cc6b33139eb63f30725d5f7fa175763dc2df6a8f38ddf8df971f7c345b652dc",
|
||||
"sha256:70de2f11bf64ca9921fda018864c78af7147025e467ce9f4a11bc877266900a6",
|
||||
"sha256:70ebc84134cf0c504ce6a5f12d6db92cb2a8a53a49437a6bb4edca0bc101f11c",
|
||||
"sha256:83606129247e7610b58d0e1e93d2c5133959e9cf93555d3c27e536892f1ba1f2",
|
||||
"sha256:93d07494a8900d55492401917a119948ed330b8c3f1d700e0b904a578f10ead4",
|
||||
"sha256:9c4e3ae8a716c8b3151e16c05edb1daf4cb4d866caa385e861556aff41300c14",
|
||||
"sha256:9dd4012ac599a1e7eb63c114d1eee1bcfc6dc75a29b589ff0ad0bb3d9412034f",
|
||||
"sha256:9e3fb1b0e896f14a85aa9a28d5f755daaeeb54c897b746df7a55ccb02b340f33",
|
||||
"sha256:a0aa8220b89b2e3748a2836fbfa116194378910f1a6e78e4675a095bcd2c762d",
|
||||
"sha256:d3b3c8924252caaffc54d4a99f1360aeec001e61267595561089f8b5900821bb",
|
||||
"sha256:e013aed00ed776d790be4cb32826adb72799c61e318676172495383ba4570aa4",
|
||||
"sha256:f3e7a8867f307e3359cc0ed2c63b61a1e33a19080f92fe377bc7d49f646f2ec1"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.8.1"
|
||||
},
|
||||
"setuptools": {
|
||||
"hashes": [
|
||||
"sha256:d0b9a8433464d5800cbe05094acf5c6d52a91bfac9b52bcfc4d41382be5d5d31",
|
||||
@ -2714,7 +2896,7 @@
|
||||
"sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc",
|
||||
"sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"
|
||||
],
|
||||
"markers": "python_version < '3.11' and python_version >= '3.7'",
|
||||
"markers": "python_full_version < '3.11.0a7'",
|
||||
"version": "==2.0.1"
|
||||
},
|
||||
"tornado": {
|
||||
@ -2747,7 +2929,7 @@
|
||||
"sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa",
|
||||
"sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e"
|
||||
],
|
||||
"markers": "python_version < '3.10'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==4.4.0"
|
||||
},
|
||||
"urllib3": {
|
||||
|
@ -11,9 +11,12 @@ services:
|
||||
container_name: gotenberg
|
||||
network_mode: host
|
||||
restart: unless-stopped
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
command:
|
||||
- "gotenberg"
|
||||
- "--chromium-disable-routes=true"
|
||||
- "--chromium-disable-javascript=true"
|
||||
- "--chromium-allow-list=file:///tmp/.*"
|
||||
tika:
|
||||
image: ghcr.io/paperless-ngx/tika:latest
|
||||
hostname: tika
|
||||
|
@ -87,9 +87,12 @@ services:
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:7.6
|
||||
restart: unless-stopped
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
command:
|
||||
- "gotenberg"
|
||||
- "--chromium-disable-routes=true"
|
||||
- "--chromium-disable-javascript=true"
|
||||
- "--chromium-allow-list=file:///tmp/.*"
|
||||
|
||||
tika:
|
||||
image: ghcr.io/paperless-ngx/tika:latest
|
||||
|
@ -79,9 +79,13 @@ services:
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:7.6
|
||||
restart: unless-stopped
|
||||
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
command:
|
||||
- "gotenberg"
|
||||
- "--chromium-disable-routes=true"
|
||||
- "--chromium-disable-javascript=true"
|
||||
- "--chromium-allow-list=file:///tmp/.*"
|
||||
|
||||
tika:
|
||||
image: ghcr.io/paperless-ngx/tika:latest
|
||||
|
@ -67,9 +67,13 @@ services:
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:7.6
|
||||
restart: unless-stopped
|
||||
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
command:
|
||||
- "gotenberg"
|
||||
- "--chromium-disable-routes=true"
|
||||
- "--chromium-disable-javascript=true"
|
||||
- "--chromium-allow-list=file:///tmp/.*"
|
||||
|
||||
tika:
|
||||
image: ghcr.io/paperless-ngx/tika:latest
|
||||
|
@ -565,8 +565,10 @@ they use underscores instead of dashes.
|
||||
|
||||
Paperless can make use of [Tika](https://tika.apache.org/) and
|
||||
[Gotenberg](https://gotenberg.dev/) for parsing and converting
|
||||
"Office" documents (such as ".doc", ".xlsx" and ".odt"). If you
|
||||
wish to use this, you must provide a Tika server and a Gotenberg server,
|
||||
"Office" documents (such as ".doc", ".xlsx" and ".odt").
|
||||
Tika and Gotenberg are also needed to allow parsing of E-Mails (.eml).
|
||||
|
||||
If you wish to use this, you must provide a Tika server and a Gotenberg server,
|
||||
configure their endpoints, and enable the feature.
|
||||
|
||||
`PAPERLESS_TIKA_ENABLED=<bool>`
|
||||
@ -610,9 +612,12 @@ services:
|
||||
gotenberg:
|
||||
image: gotenberg/gotenberg:7.6
|
||||
restart: unless-stopped
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
command:
|
||||
- 'gotenberg'
|
||||
- '--chromium-disable-routes=true'
|
||||
- '--chromium-disable-javascript=true'
|
||||
- '--chromium-allow-list=file:///tmp/.*'
|
||||
|
||||
tika:
|
||||
image: ghcr.io/paperless-ngx/tika:latest
|
||||
|
@ -125,12 +125,12 @@ using docker-compose, this is achieved by the following configuration
|
||||
change in the `docker-compose.yml` file:
|
||||
|
||||
```yaml
|
||||
gotenberg:
|
||||
image: gotenberg/gotenberg:7.6
|
||||
restart: unless-stopped
|
||||
command:
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
command:
|
||||
- 'gotenberg'
|
||||
- '--chromium-disable-routes=true'
|
||||
- '--chromium-disable-javascript=true'
|
||||
- '--chromium-allow-list=file:///tmp/.*'
|
||||
- '--api-timeout=60'
|
||||
```
|
||||
|
||||
|
@ -2,5 +2,5 @@
|
||||
|
||||
docker run -p 5432:5432 -e POSTGRES_PASSWORD=password -v paperless_pgdata:/var/lib/postgresql/data -d postgres:13
|
||||
docker run -d -p 6379:6379 redis:latest
|
||||
docker run -p 3000:3000 -d gotenberg/gotenberg:7.6
|
||||
docker run -p 3000:3000 -d gotenberg/gotenberg:7.6 gotenberg --chromium-disable-javascript=true --chromium-allow-list="file:///tmp/.*"
|
||||
docker run -p 9998:9998 -d ghcr.io/paperless-ngx/tika:latest
|
||||
|
@ -56,6 +56,7 @@ class MailRuleAdmin(admin.ModelAdmin):
|
||||
"filter_body",
|
||||
"filter_attachment_filename",
|
||||
"maximum_age",
|
||||
"consumption_scope",
|
||||
"attachment_type",
|
||||
),
|
||||
},
|
||||
@ -65,8 +66,8 @@ class MailRuleAdmin(admin.ModelAdmin):
|
||||
{
|
||||
"description": _(
|
||||
"The action applied to the mail. This action is only "
|
||||
"performed when documents were consumed from the mail. "
|
||||
"Mails without attachments will remain entirely untouched.",
|
||||
"performed when the mail body or attachments were "
|
||||
"consumed from the mail.",
|
||||
),
|
||||
"fields": ("action", "action_parameter"),
|
||||
},
|
||||
|
@ -1,8 +1,17 @@
|
||||
from django.apps import AppConfig
|
||||
from django.conf import settings
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from paperless_mail.signals import mail_consumer_declaration
|
||||
|
||||
|
||||
class PaperlessMailConfig(AppConfig):
|
||||
name = "paperless_mail"
|
||||
|
||||
verbose_name = _("Paperless mail")
|
||||
|
||||
def ready(self):
|
||||
from documents.signals import document_consumer_declaration
|
||||
|
||||
if settings.TIKA_ENABLED:
|
||||
document_consumer_declaration.connect(mail_consumer_declaration)
|
||||
AppConfig.ready(self)
|
||||
|
@ -350,9 +350,16 @@ class MailAccountHandler(LoggingMixin):
|
||||
|
||||
return total_processed_files
|
||||
|
||||
def handle_message(self, message, rule) -> int:
|
||||
if not message.attachments:
|
||||
return 0
|
||||
def handle_message(self, message, rule: MailRule) -> int:
|
||||
processed_elements = 0
|
||||
|
||||
# Skip Message handling when only attachments are to be processed but
|
||||
# message doesn't have any.
|
||||
if (
|
||||
not message.attachments
|
||||
and rule.consumption_scope == MailRule.ConsumptionScope.ATTACHMENTS_ONLY
|
||||
):
|
||||
return processed_elements
|
||||
|
||||
self.log(
|
||||
"debug",
|
||||
@ -365,8 +372,41 @@ class MailAccountHandler(LoggingMixin):
|
||||
tag_ids = [tag.id for tag in rule.assign_tags.all()]
|
||||
doc_type = rule.assign_document_type
|
||||
|
||||
processed_attachments = 0
|
||||
if (
|
||||
rule.consumption_scope == MailRule.ConsumptionScope.EML_ONLY
|
||||
or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING
|
||||
):
|
||||
processed_elements += self.process_eml(
|
||||
message,
|
||||
rule,
|
||||
correspondent,
|
||||
tag_ids,
|
||||
doc_type,
|
||||
)
|
||||
|
||||
if (
|
||||
rule.consumption_scope == MailRule.ConsumptionScope.ATTACHMENTS_ONLY
|
||||
or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING
|
||||
):
|
||||
processed_elements += self.process_attachments(
|
||||
message,
|
||||
rule,
|
||||
correspondent,
|
||||
tag_ids,
|
||||
doc_type,
|
||||
)
|
||||
|
||||
return processed_elements
|
||||
|
||||
def process_attachments(
|
||||
self,
|
||||
message: MailMessage,
|
||||
rule: MailRule,
|
||||
correspondent,
|
||||
tag_ids,
|
||||
doc_type,
|
||||
):
|
||||
processed_attachments = 0
|
||||
for att in message.attachments:
|
||||
|
||||
if (
|
||||
@ -436,5 +476,59 @@ class MailAccountHandler(LoggingMixin):
|
||||
f"since guessed mime type {mime_type} is not supported "
|
||||
f"by paperless",
|
||||
)
|
||||
|
||||
return processed_attachments
|
||||
|
||||
def process_eml(
|
||||
self,
|
||||
message: MailMessage,
|
||||
rule: MailRule,
|
||||
correspondent,
|
||||
tag_ids,
|
||||
doc_type,
|
||||
):
|
||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||
_, temp_filename = tempfile.mkstemp(
|
||||
prefix="paperless-mail-",
|
||||
dir=settings.SCRATCH_DIR,
|
||||
suffix=".eml",
|
||||
)
|
||||
with open(temp_filename, "wb") as f:
|
||||
# Move "From"-header to beginning of file
|
||||
# TODO: This ugly workaround is needed because the parser is
|
||||
# chosen only by the mime_type detected via magic
|
||||
# (see documents/consumer.py "mime_type = magic.from_file")
|
||||
# Unfortunately magic sometimes fails to detect the mime
|
||||
# type of .eml files correctly as message/rfc822 and instead
|
||||
# detects text/plain.
|
||||
# This also effects direct file consumption of .eml files
|
||||
# which are not treated with this workaround.
|
||||
from_element = None
|
||||
for i, header in enumerate(message.obj._headers):
|
||||
if header[0] == "From":
|
||||
from_element = i
|
||||
if from_element:
|
||||
new_headers = [message.obj._headers.pop(from_element)]
|
||||
new_headers += message.obj._headers
|
||||
message.obj._headers = new_headers
|
||||
|
||||
f.write(message.obj.as_bytes())
|
||||
|
||||
self.log(
|
||||
"info",
|
||||
f"Rule {rule}: "
|
||||
f"Consuming eml from mail "
|
||||
f"{message.subject} from {message.from_}",
|
||||
)
|
||||
|
||||
consume_file.delay(
|
||||
path=temp_filename,
|
||||
override_filename=pathvalidate.sanitize_filename(
|
||||
message.subject + ".eml",
|
||||
),
|
||||
override_title=message.subject,
|
||||
override_correspondent_id=correspondent.id if correspondent else None,
|
||||
override_document_type_id=doc_type.id if doc_type else None,
|
||||
override_tag_ids=tag_ids,
|
||||
)
|
||||
processed_elements = 1
|
||||
return processed_elements
|
||||
|
@ -0,0 +1,32 @@
|
||||
# Generated by Django 4.0.4 on 2022-07-11 22:02
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("paperless_mail", "0015_alter_mailrule_action"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="mailrule",
|
||||
name="consumption_scope",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(1, "Only process attachments."),
|
||||
(
|
||||
2,
|
||||
"Process full Mail (with embedded attachments in file) as .eml",
|
||||
),
|
||||
(
|
||||
3,
|
||||
"Process full Mail (with embedded attachments in file) as .eml + process attachments as separate documents",
|
||||
),
|
||||
],
|
||||
default=1,
|
||||
verbose_name="consumption scope",
|
||||
),
|
||||
),
|
||||
]
|
@ -56,6 +56,14 @@ class MailRule(models.Model):
|
||||
verbose_name = _("mail rule")
|
||||
verbose_name_plural = _("mail rules")
|
||||
|
||||
class ConsumptionScope(models.IntegerChoices):
|
||||
ATTACHMENTS_ONLY = 1, _("Only process attachments.")
|
||||
EML_ONLY = 2, _("Process full Mail (with embedded attachments in file) as .eml")
|
||||
EVERYTHING = 3, _(
|
||||
"Process full Mail (with embedded attachments in file) as .eml "
|
||||
"+ process attachments as separate documents",
|
||||
)
|
||||
|
||||
class AttachmentProcessing(models.IntegerChoices):
|
||||
ATTACHMENTS_ONLY = 1, _("Only process attachments.")
|
||||
EVERYTHING = 2, _("Process all files, including 'inline' " "attachments.")
|
||||
@ -145,6 +153,12 @@ class MailRule(models.Model):
|
||||
),
|
||||
)
|
||||
|
||||
consumption_scope = models.PositiveIntegerField(
|
||||
_("consumption scope"),
|
||||
choices=ConsumptionScope.choices,
|
||||
default=ConsumptionScope.ATTACHMENTS_ONLY,
|
||||
)
|
||||
|
||||
action = models.PositiveIntegerField(
|
||||
_("action"),
|
||||
choices=MailAction.choices,
|
||||
|
333
src/paperless_mail/parsers.py
Normal file
333
src/paperless_mail/parsers.py
Normal file
@ -0,0 +1,333 @@
|
||||
import os
|
||||
import re
|
||||
from html import escape
|
||||
from io import BytesIO
|
||||
from io import StringIO
|
||||
|
||||
import requests
|
||||
from bleach import clean
|
||||
from bleach import linkify
|
||||
from django.conf import settings
|
||||
from documents.parsers import DocumentParser
|
||||
from documents.parsers import make_thumbnail_from_pdf
|
||||
from documents.parsers import ParseError
|
||||
from humanfriendly import format_size
|
||||
from imap_tools import MailMessage
|
||||
from tika import parser
|
||||
|
||||
|
||||
class MailDocumentParser(DocumentParser):
|
||||
"""
|
||||
This parser uses imap_tools to parse .eml files, generates pdf using
|
||||
gotenbergs and sends the html part to a local tika server for text extraction.
|
||||
"""
|
||||
|
||||
gotenberg_server = settings.TIKA_GOTENBERG_ENDPOINT
|
||||
tika_server = settings.TIKA_ENDPOINT
|
||||
|
||||
logging_name = "paperless.parsing.mail"
|
||||
_parsed = None
|
||||
|
||||
def get_parsed(self, document_path) -> MailMessage:
|
||||
if not self._parsed:
|
||||
try:
|
||||
with open(document_path, "rb") as eml:
|
||||
self._parsed = MailMessage.from_bytes(eml.read())
|
||||
except Exception as err:
|
||||
raise ParseError(
|
||||
f"Could not parse {document_path}: {err}",
|
||||
)
|
||||
if not self._parsed.from_values:
|
||||
self._parsed = None
|
||||
raise ParseError(
|
||||
f"Could not parse {document_path}: Missing 'from'",
|
||||
)
|
||||
|
||||
return self._parsed
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
if not self.archive_path:
|
||||
self.archive_path = self.generate_pdf(document_path)
|
||||
|
||||
return make_thumbnail_from_pdf(
|
||||
self.archive_path,
|
||||
self.tempdir,
|
||||
self.logging_group,
|
||||
)
|
||||
|
||||
def extract_metadata(self, document_path, mime_type):
|
||||
result = []
|
||||
|
||||
try:
|
||||
mail = self.get_parsed(document_path)
|
||||
except ParseError as e:
|
||||
self.log(
|
||||
"warning",
|
||||
f"Error while fetching document metadata for " f"{document_path}: {e}",
|
||||
)
|
||||
return result
|
||||
|
||||
for key, value in mail.headers.items():
|
||||
value = ", ".join(i for i in value)
|
||||
|
||||
result.append(
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "header",
|
||||
"key": key,
|
||||
"value": value,
|
||||
},
|
||||
)
|
||||
|
||||
result.append(
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "",
|
||||
"key": "attachments",
|
||||
"value": ", ".join(
|
||||
f"{attachment.filename}"
|
||||
f"({format_size(attachment.size, binary=True)})"
|
||||
for attachment in mail.attachments
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
result.append(
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "",
|
||||
"key": "date",
|
||||
"value": mail.date.strftime("%Y-%m-%d %H:%M:%S %Z"),
|
||||
},
|
||||
)
|
||||
|
||||
result.sort(key=lambda item: (item["prefix"], item["key"]))
|
||||
return result
|
||||
|
||||
def parse(self, document_path, mime_type, file_name=None):
|
||||
def strip_text(text: str):
|
||||
text = re.sub(r"\s+", " ", text)
|
||||
text = re.sub(r"(\n *)+", "\n", text)
|
||||
return text.strip()
|
||||
|
||||
mail = self.get_parsed(document_path)
|
||||
|
||||
self.text = f"Subject: {mail.subject}\n\n"
|
||||
self.text += f"From: {mail.from_values.full}\n\n"
|
||||
self.text += f"To: {', '.join(address.full for address in mail.to_values)}\n\n"
|
||||
if len(mail.cc_values) >= 1:
|
||||
self.text += (
|
||||
f"CC: {', '.join(address.full for address in mail.cc_values)}\n\n"
|
||||
)
|
||||
if len(mail.bcc_values) >= 1:
|
||||
self.text += (
|
||||
f"BCC: {', '.join(address.full for address in mail.bcc_values)}\n\n"
|
||||
)
|
||||
if len(mail.attachments) >= 1:
|
||||
att = []
|
||||
for a in mail.attachments:
|
||||
att.append(f"{a.filename} ({format_size(a.size, binary=True)})")
|
||||
|
||||
self.text += f"Attachments: {', '.join(att)}\n\n"
|
||||
|
||||
if mail.html != "":
|
||||
self.text += "HTML content: " + strip_text(self.tika_parse(mail.html))
|
||||
|
||||
self.text += f"\n\n{strip_text(mail.text)}"
|
||||
|
||||
self.date = mail.date
|
||||
self.archive_path = self.generate_pdf(document_path)
|
||||
|
||||
def tika_parse(self, html: str):
|
||||
self.log("info", "Sending content to Tika server")
|
||||
|
||||
try:
|
||||
parsed = parser.from_buffer(html, self.tika_server)
|
||||
except Exception as err:
|
||||
raise ParseError(
|
||||
f"Could not parse content with tika server at "
|
||||
f"{self.tika_server}: {err}",
|
||||
)
|
||||
if parsed["content"]:
|
||||
return parsed["content"]
|
||||
else:
|
||||
return ""
|
||||
|
||||
def generate_pdf(self, document_path):
|
||||
pdf_collection = []
|
||||
url_merge = self.gotenberg_server + "/forms/pdfengines/merge"
|
||||
pdf_path = os.path.join(self.tempdir, "merged.pdf")
|
||||
mail = self.get_parsed(document_path)
|
||||
|
||||
pdf_collection.append(("1_mail.pdf", self.generate_pdf_from_mail(mail)))
|
||||
|
||||
if mail.html == "":
|
||||
with open(pdf_path, "wb") as file:
|
||||
file.write(pdf_collection[0][1])
|
||||
file.close()
|
||||
return pdf_path
|
||||
else:
|
||||
pdf_collection.append(
|
||||
(
|
||||
"2_html.pdf",
|
||||
self.generate_pdf_from_html(mail.html, mail.attachments),
|
||||
),
|
||||
)
|
||||
|
||||
files = {}
|
||||
for name, content in pdf_collection:
|
||||
files[name] = (name, BytesIO(content))
|
||||
headers = {}
|
||||
try:
|
||||
response = requests.post(url_merge, files=files, headers=headers)
|
||||
response.raise_for_status() # ensure we notice bad responses
|
||||
except Exception as err:
|
||||
raise ParseError(f"Error while converting document to PDF: {err}")
|
||||
|
||||
with open(pdf_path, "wb") as file:
|
||||
file.write(response.content)
|
||||
file.close()
|
||||
|
||||
return pdf_path
|
||||
|
||||
@staticmethod
|
||||
def mail_to_html(mail: MailMessage) -> StringIO:
|
||||
data = {}
|
||||
|
||||
def clean_html(text: str):
|
||||
if isinstance(text, list):
|
||||
text = "\n".join([str(e) for e in text])
|
||||
if type(text) != str:
|
||||
text = str(text)
|
||||
text = escape(text)
|
||||
text = clean(text)
|
||||
text = linkify(text, parse_email=True)
|
||||
text = text.replace("\n", "<br>")
|
||||
return text
|
||||
|
||||
data["subject"] = clean_html(mail.subject)
|
||||
if data["subject"] != "":
|
||||
data["subject_label"] = "Subject"
|
||||
data["from"] = clean_html(mail.from_values.full)
|
||||
if data["from"] != "":
|
||||
data["from_label"] = "From"
|
||||
data["to"] = clean_html(", ".join(address.full for address in mail.to_values))
|
||||
if data["to"] != "":
|
||||
data["to_label"] = "To"
|
||||
data["cc"] = clean_html(", ".join(address.full for address in mail.cc_values))
|
||||
if data["cc"] != "":
|
||||
data["cc_label"] = "CC"
|
||||
data["bcc"] = clean_html(", ".join(address.full for address in mail.bcc_values))
|
||||
if data["bcc"] != "":
|
||||
data["bcc_label"] = "BCC"
|
||||
|
||||
att = []
|
||||
for a in mail.attachments:
|
||||
att.append(f"{a.filename} ({format_size(a.size, binary=True)})")
|
||||
data["attachments"] = clean_html(", ".join(att))
|
||||
if data["attachments"] != "":
|
||||
data["attachments_label"] = "Attachments"
|
||||
|
||||
data["date"] = clean_html(mail.date.astimezone().strftime("%Y-%m-%d %H:%M"))
|
||||
data["content"] = clean_html(mail.text.strip())
|
||||
|
||||
html = StringIO()
|
||||
|
||||
from django.template.loader import render_to_string
|
||||
|
||||
rendered = render_to_string("email_msg_template.html", context=data)
|
||||
|
||||
html.write(rendered)
|
||||
html.seek(0)
|
||||
|
||||
return html
|
||||
|
||||
def generate_pdf_from_mail(self, mail):
|
||||
|
||||
url = self.gotenberg_server + "/forms/chromium/convert/html"
|
||||
self.log("info", "Converting mail to PDF")
|
||||
|
||||
css_file = os.path.join(os.path.dirname(__file__), "templates/output.css")
|
||||
|
||||
with open(css_file, "rb") as css_handle:
|
||||
|
||||
files = {
|
||||
"html": ("index.html", self.mail_to_html(mail)),
|
||||
"css": ("output.css", css_handle),
|
||||
}
|
||||
headers = {}
|
||||
data = {
|
||||
"marginTop": "0.1",
|
||||
"marginBottom": "0.1",
|
||||
"marginLeft": "0.1",
|
||||
"marginRight": "0.1",
|
||||
"paperWidth": "8.27",
|
||||
"paperHeight": "11.7",
|
||||
"scale": "1.0",
|
||||
}
|
||||
try:
|
||||
response = requests.post(
|
||||
url,
|
||||
files=files,
|
||||
headers=headers,
|
||||
data=data,
|
||||
)
|
||||
response.raise_for_status() # ensure we notice bad responses
|
||||
except Exception as err:
|
||||
raise ParseError(f"Error while converting document to PDF: {err}")
|
||||
|
||||
return response.content
|
||||
|
||||
@staticmethod
|
||||
def transform_inline_html(html, attachments):
|
||||
def clean_html_script(text: str):
|
||||
compiled_open = re.compile(re.escape("<script"), re.IGNORECASE)
|
||||
text = compiled_open.sub("<div hidden ", text)
|
||||
|
||||
compiled_close = re.compile(re.escape("</script"), re.IGNORECASE)
|
||||
text = compiled_close.sub("</div", text)
|
||||
return text
|
||||
|
||||
html_clean = clean_html_script(html)
|
||||
files = []
|
||||
|
||||
for a in attachments:
|
||||
name_cid = "cid:" + a.content_id
|
||||
name_clean = "".join(e for e in name_cid if e.isalnum())
|
||||
files.append((name_clean, BytesIO(a.payload)))
|
||||
html_clean = html_clean.replace(name_cid, name_clean)
|
||||
|
||||
files.append(("index.html", StringIO(html_clean)))
|
||||
|
||||
return files
|
||||
|
||||
def generate_pdf_from_html(self, orig_html, attachments):
|
||||
url = self.gotenberg_server + "/forms/chromium/convert/html"
|
||||
self.log("info", "Converting html to PDF")
|
||||
|
||||
files = {}
|
||||
for name, file in self.transform_inline_html(orig_html, attachments):
|
||||
files[name] = (name, file)
|
||||
|
||||
headers = {}
|
||||
data = {
|
||||
"marginTop": "0.1",
|
||||
"marginBottom": "0.1",
|
||||
"marginLeft": "0.1",
|
||||
"marginRight": "0.1",
|
||||
"paperWidth": "8.27",
|
||||
"paperHeight": "11.7",
|
||||
"scale": "1.0",
|
||||
}
|
||||
try:
|
||||
response = requests.post(
|
||||
url,
|
||||
files=files,
|
||||
headers=headers,
|
||||
data=data,
|
||||
)
|
||||
response.raise_for_status() # ensure we notice bad responses
|
||||
except Exception as err:
|
||||
raise ParseError(f"Error while converting document to PDF: {err}")
|
||||
|
||||
return response.content
|
14
src/paperless_mail/signals.py
Normal file
14
src/paperless_mail/signals.py
Normal file
@ -0,0 +1,14 @@
|
||||
def get_parser(*args, **kwargs):
|
||||
from .parsers import MailDocumentParser
|
||||
|
||||
return MailDocumentParser(*args, **kwargs)
|
||||
|
||||
|
||||
def mail_consumer_declaration(sender, **kwargs):
|
||||
return {
|
||||
"parser": get_parser,
|
||||
"weight": 20,
|
||||
"mime_types": {
|
||||
"message/rfc822": ".eml",
|
||||
},
|
||||
}
|
48
src/paperless_mail/templates/email_msg_template.html
Normal file
48
src/paperless_mail/templates/email_msg_template.html
Normal file
@ -0,0 +1,48 @@
|
||||
{% autoescape off %}
|
||||
<!doctype html>
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<link href="output.css" rel="stylesheet">
|
||||
</head>
|
||||
|
||||
<body class="bg-white w-screen flex flex-col items-center">
|
||||
<div class="container max-w-4xl">
|
||||
<!-- Header -->
|
||||
<div class="grid gap-x-2 bg-slate-200 p-4">
|
||||
|
||||
<div class="col-start-9 col-span-4 row-start-1 text-right">{{ date }}</div>
|
||||
|
||||
<div class="col-start-1 row-start-1 text-slate-400 text-right">{{ from_label }}</div>
|
||||
<div class="col-start-2 col-span-7 row-start-1">{{ from }}</div>
|
||||
|
||||
<div class="col-start-1 row-start-2 text-slate-400 text-right">{{ subject_label }}</div>
|
||||
<div class=" col-start-2 col-span-10 row-start-2 font-bold">{{ subject }}</div>
|
||||
|
||||
<div class="col-start-1 row-start-3 text-slate-400 text-right">{{ to_label }}</div>
|
||||
<div class="col-start-2 col-span-10 row-start-3 text-sm my-0.5">{{ to }}</div>
|
||||
|
||||
<div class="col-start-1 row-start-4 text-slate-400 text-right">{{ cc_label }}</div>
|
||||
<div class="col-start-2 col-span-10 row-start-4 text-sm my-0.5">{{ cc }}</div>
|
||||
|
||||
<div class="col-start-1 row-start-5 text-slate-400 text-right">{{ bcc_label }}</div>
|
||||
<div class="col-start-2 col-span-10 row-start-5" text-sm my-0.5>{{ bcc }}</div>
|
||||
|
||||
<div class="col-start-1 row-start-6 text-slate-400 text-right">{{ attachments_label }}</div>
|
||||
<div class="col-start-2 col-span-10 row-start-6">{{ attachments }}</div>
|
||||
</div>
|
||||
|
||||
<!-- Separator-->
|
||||
<div class="border-t border-solid border-b w-full h-[1px] box-content border-black mb-5 bg-slate-200"></div>
|
||||
|
||||
<!-- Content-->
|
||||
<div class="w-full break-words">{{ content }}</div>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
|
||||
</html>
|
||||
|
||||
{% endautoescape %}
|
3
src/paperless_mail/templates/input.css
Normal file
3
src/paperless_mail/templates/input.css
Normal file
@ -0,0 +1,3 @@
|
||||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
706
src/paperless_mail/templates/output.css
Normal file
706
src/paperless_mail/templates/output.css
Normal file
@ -0,0 +1,706 @@
|
||||
/*
|
||||
! tailwindcss v3.0.24 | MIT License | https://tailwindcss.com
|
||||
*/
|
||||
|
||||
/*
|
||||
1. Prevent padding and border from affecting element width. (https://github.com/mozdevs/cssremedy/issues/4)
|
||||
2. Allow adding a border to an element by just adding a border-width. (https://github.com/tailwindcss/tailwindcss/pull/116)
|
||||
*/
|
||||
|
||||
*,
|
||||
::before,
|
||||
::after {
|
||||
box-sizing: border-box;
|
||||
/* 1 */
|
||||
border-width: 0;
|
||||
/* 2 */
|
||||
border-style: solid;
|
||||
/* 2 */
|
||||
border-color: #e5e7eb;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
::before,
|
||||
::after {
|
||||
--tw-content: '';
|
||||
}
|
||||
|
||||
/*
|
||||
1. Use a consistent sensible line-height in all browsers.
|
||||
2. Prevent adjustments of font size after orientation changes in iOS.
|
||||
3. Use a more readable tab size.
|
||||
4. Use the user's configured `sans` font-family by default.
|
||||
*/
|
||||
|
||||
html {
|
||||
line-height: 1.5;
|
||||
/* 1 */
|
||||
-webkit-text-size-adjust: 100%;
|
||||
/* 2 */
|
||||
-moz-tab-size: 4;
|
||||
/* 3 */
|
||||
-o-tab-size: 4;
|
||||
tab-size: 4;
|
||||
/* 3 */
|
||||
font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
|
||||
/* 4 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Remove the margin in all browsers.
|
||||
2. Inherit line-height from `html` so users can set them as a class directly on the `html` element.
|
||||
*/
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
/* 1 */
|
||||
line-height: inherit;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Add the correct height in Firefox.
|
||||
2. Correct the inheritance of border color in Firefox. (https://bugzilla.mozilla.org/show_bug.cgi?id=190655)
|
||||
3. Ensure horizontal rules are visible by default.
|
||||
*/
|
||||
|
||||
hr {
|
||||
height: 0;
|
||||
/* 1 */
|
||||
color: inherit;
|
||||
/* 2 */
|
||||
border-top-width: 1px;
|
||||
/* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct text decoration in Chrome, Edge, and Safari.
|
||||
*/
|
||||
|
||||
abbr:where([title]) {
|
||||
-webkit-text-decoration: underline dotted;
|
||||
text-decoration: underline dotted;
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the default font size and weight for headings.
|
||||
*/
|
||||
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
h5,
|
||||
h6 {
|
||||
font-size: inherit;
|
||||
font-weight: inherit;
|
||||
}
|
||||
|
||||
/*
|
||||
Reset links to optimize for opt-in styling instead of opt-out.
|
||||
*/
|
||||
|
||||
a {
|
||||
color: inherit;
|
||||
text-decoration: inherit;
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct font weight in Edge and Safari.
|
||||
*/
|
||||
|
||||
b,
|
||||
strong {
|
||||
font-weight: bolder;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Use the user's configured `mono` font family by default.
|
||||
2. Correct the odd `em` font sizing in all browsers.
|
||||
*/
|
||||
|
||||
code,
|
||||
kbd,
|
||||
samp,
|
||||
pre {
|
||||
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
|
||||
/* 1 */
|
||||
font-size: 1em;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct font size in all browsers.
|
||||
*/
|
||||
|
||||
small {
|
||||
font-size: 80%;
|
||||
}
|
||||
|
||||
/*
|
||||
Prevent `sub` and `sup` elements from affecting the line height in all browsers.
|
||||
*/
|
||||
|
||||
sub,
|
||||
sup {
|
||||
font-size: 75%;
|
||||
line-height: 0;
|
||||
position: relative;
|
||||
vertical-align: baseline;
|
||||
}
|
||||
|
||||
sub {
|
||||
bottom: -0.25em;
|
||||
}
|
||||
|
||||
sup {
|
||||
top: -0.5em;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Remove text indentation from table contents in Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=999088, https://bugs.webkit.org/show_bug.cgi?id=201297)
|
||||
2. Correct table border color inheritance in all Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=935729, https://bugs.webkit.org/show_bug.cgi?id=195016)
|
||||
3. Remove gaps between table borders by default.
|
||||
*/
|
||||
|
||||
table {
|
||||
text-indent: 0;
|
||||
/* 1 */
|
||||
border-color: inherit;
|
||||
/* 2 */
|
||||
border-collapse: collapse;
|
||||
/* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Change the font styles in all browsers.
|
||||
2. Remove the margin in Firefox and Safari.
|
||||
3. Remove default padding in all browsers.
|
||||
*/
|
||||
|
||||
button,
|
||||
input,
|
||||
optgroup,
|
||||
select,
|
||||
textarea {
|
||||
font-family: inherit;
|
||||
/* 1 */
|
||||
font-size: 100%;
|
||||
/* 1 */
|
||||
line-height: inherit;
|
||||
/* 1 */
|
||||
color: inherit;
|
||||
/* 1 */
|
||||
margin: 0;
|
||||
/* 2 */
|
||||
padding: 0;
|
||||
/* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the inheritance of text transform in Edge and Firefox.
|
||||
*/
|
||||
|
||||
button,
|
||||
select {
|
||||
text-transform: none;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the inability to style clickable types in iOS and Safari.
|
||||
2. Remove default button styles.
|
||||
*/
|
||||
|
||||
button,
|
||||
[type='button'],
|
||||
[type='reset'],
|
||||
[type='submit'] {
|
||||
-webkit-appearance: button;
|
||||
/* 1 */
|
||||
background-color: transparent;
|
||||
/* 2 */
|
||||
background-image: none;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Use the modern Firefox focus style for all focusable elements.
|
||||
*/
|
||||
|
||||
:-moz-focusring {
|
||||
outline: auto;
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the additional `:invalid` styles in Firefox. (https://github.com/mozilla/gecko-dev/blob/2f9eacd9d3d995c937b4251a5557d95d494c9be1/layout/style/res/forms.css#L728-L737)
|
||||
*/
|
||||
|
||||
:-moz-ui-invalid {
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct vertical alignment in Chrome and Firefox.
|
||||
*/
|
||||
|
||||
progress {
|
||||
vertical-align: baseline;
|
||||
}
|
||||
|
||||
/*
|
||||
Correct the cursor style of increment and decrement buttons in Safari.
|
||||
*/
|
||||
|
||||
::-webkit-inner-spin-button,
|
||||
::-webkit-outer-spin-button {
|
||||
height: auto;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the odd appearance in Chrome and Safari.
|
||||
2. Correct the outline style in Safari.
|
||||
*/
|
||||
|
||||
[type='search'] {
|
||||
-webkit-appearance: textfield;
|
||||
/* 1 */
|
||||
outline-offset: -2px;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the inner padding in Chrome and Safari on macOS.
|
||||
*/
|
||||
|
||||
::-webkit-search-decoration {
|
||||
-webkit-appearance: none;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the inability to style clickable types in iOS and Safari.
|
||||
2. Change font properties to `inherit` in Safari.
|
||||
*/
|
||||
|
||||
::-webkit-file-upload-button {
|
||||
-webkit-appearance: button;
|
||||
/* 1 */
|
||||
font: inherit;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct display in Chrome and Safari.
|
||||
*/
|
||||
|
||||
summary {
|
||||
display: list-item;
|
||||
}
|
||||
|
||||
/*
|
||||
Removes the default spacing and border for appropriate elements.
|
||||
*/
|
||||
|
||||
blockquote,
|
||||
dl,
|
||||
dd,
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
h5,
|
||||
h6,
|
||||
hr,
|
||||
figure,
|
||||
p,
|
||||
pre {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
fieldset {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
legend {
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
ol,
|
||||
ul,
|
||||
menu {
|
||||
list-style: none;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/*
|
||||
Prevent resizing textareas horizontally by default.
|
||||
*/
|
||||
|
||||
textarea {
|
||||
resize: vertical;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Reset the default placeholder opacity in Firefox. (https://github.com/tailwindlabs/tailwindcss/issues/3300)
|
||||
2. Set the default placeholder color to the user's configured gray 400 color.
|
||||
*/
|
||||
|
||||
input::-moz-placeholder, textarea::-moz-placeholder {
|
||||
opacity: 1;
|
||||
/* 1 */
|
||||
color: #9ca3af;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
input:-ms-input-placeholder, textarea:-ms-input-placeholder {
|
||||
opacity: 1;
|
||||
/* 1 */
|
||||
color: #9ca3af;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
input::placeholder,
|
||||
textarea::placeholder {
|
||||
opacity: 1;
|
||||
/* 1 */
|
||||
color: #9ca3af;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Set the default cursor for buttons.
|
||||
*/
|
||||
|
||||
button,
|
||||
[role="button"] {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
/*
|
||||
Make sure disabled buttons don't get the pointer cursor.
|
||||
*/
|
||||
|
||||
:disabled {
|
||||
cursor: default;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Make replaced elements `display: block` by default. (https://github.com/mozdevs/cssremedy/issues/14)
|
||||
2. Add `vertical-align: middle` to align replaced elements more sensibly by default. (https://github.com/jensimmons/cssremedy/issues/14#issuecomment-634934210)
|
||||
This can trigger a poorly considered lint error in some tools but is included by design.
|
||||
*/
|
||||
|
||||
img,
|
||||
svg,
|
||||
video,
|
||||
canvas,
|
||||
audio,
|
||||
iframe,
|
||||
embed,
|
||||
object {
|
||||
display: block;
|
||||
/* 1 */
|
||||
vertical-align: middle;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Constrain images and videos to the parent width and preserve their intrinsic aspect ratio. (https://github.com/mozdevs/cssremedy/issues/14)
|
||||
*/
|
||||
|
||||
img,
|
||||
video {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
/*
|
||||
Ensure the default browser behavior of the `hidden` attribute.
|
||||
*/
|
||||
|
||||
[hidden] {
|
||||
display: none;
|
||||
}
|
||||
|
||||
*, ::before, ::after {
|
||||
--tw-translate-x: 0;
|
||||
--tw-translate-y: 0;
|
||||
--tw-rotate: 0;
|
||||
--tw-skew-x: 0;
|
||||
--tw-skew-y: 0;
|
||||
--tw-scale-x: 1;
|
||||
--tw-scale-y: 1;
|
||||
--tw-pan-x: ;
|
||||
--tw-pan-y: ;
|
||||
--tw-pinch-zoom: ;
|
||||
--tw-scroll-snap-strictness: proximity;
|
||||
--tw-ordinal: ;
|
||||
--tw-slashed-zero: ;
|
||||
--tw-numeric-figure: ;
|
||||
--tw-numeric-spacing: ;
|
||||
--tw-numeric-fraction: ;
|
||||
--tw-ring-inset: ;
|
||||
--tw-ring-offset-width: 0px;
|
||||
--tw-ring-offset-color: #fff;
|
||||
--tw-ring-color: rgb(59 130 246 / 0.5);
|
||||
--tw-ring-offset-shadow: 0 0 #0000;
|
||||
--tw-ring-shadow: 0 0 #0000;
|
||||
--tw-shadow: 0 0 #0000;
|
||||
--tw-shadow-colored: 0 0 #0000;
|
||||
--tw-blur: ;
|
||||
--tw-brightness: ;
|
||||
--tw-contrast: ;
|
||||
--tw-grayscale: ;
|
||||
--tw-hue-rotate: ;
|
||||
--tw-invert: ;
|
||||
--tw-saturate: ;
|
||||
--tw-sepia: ;
|
||||
--tw-drop-shadow: ;
|
||||
--tw-backdrop-blur: ;
|
||||
--tw-backdrop-brightness: ;
|
||||
--tw-backdrop-contrast: ;
|
||||
--tw-backdrop-grayscale: ;
|
||||
--tw-backdrop-hue-rotate: ;
|
||||
--tw-backdrop-invert: ;
|
||||
--tw-backdrop-opacity: ;
|
||||
--tw-backdrop-saturate: ;
|
||||
--tw-backdrop-sepia: ;
|
||||
}
|
||||
|
||||
.container {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
@media (min-width: 640px) {
|
||||
.container {
|
||||
max-width: 640px;
|
||||
}
|
||||
}
|
||||
|
||||
@media (min-width: 768px) {
|
||||
.container {
|
||||
max-width: 768px;
|
||||
}
|
||||
}
|
||||
|
||||
@media (min-width: 1024px) {
|
||||
.container {
|
||||
max-width: 1024px;
|
||||
}
|
||||
}
|
||||
|
||||
@media (min-width: 1280px) {
|
||||
.container {
|
||||
max-width: 1280px;
|
||||
}
|
||||
}
|
||||
|
||||
@media (min-width: 1536px) {
|
||||
.container {
|
||||
max-width: 1536px;
|
||||
}
|
||||
}
|
||||
|
||||
.col-span-2 {
|
||||
grid-column: span 2 / span 2;
|
||||
}
|
||||
|
||||
.col-span-8 {
|
||||
grid-column: span 8 / span 8;
|
||||
}
|
||||
|
||||
.col-span-10 {
|
||||
grid-column: span 10 / span 10;
|
||||
}
|
||||
|
||||
.col-span-3 {
|
||||
grid-column: span 3 / span 3;
|
||||
}
|
||||
|
||||
.col-span-4 {
|
||||
grid-column: span 4 / span 4;
|
||||
}
|
||||
|
||||
.col-span-7 {
|
||||
grid-column: span 7 / span 7;
|
||||
}
|
||||
|
||||
.col-start-11 {
|
||||
grid-column-start: 11;
|
||||
}
|
||||
|
||||
.col-start-1 {
|
||||
grid-column-start: 1;
|
||||
}
|
||||
|
||||
.col-start-2 {
|
||||
grid-column-start: 2;
|
||||
}
|
||||
|
||||
.col-start-10 {
|
||||
grid-column-start: 10;
|
||||
}
|
||||
|
||||
.col-start-9 {
|
||||
grid-column-start: 9;
|
||||
}
|
||||
|
||||
.row-start-1 {
|
||||
grid-row-start: 1;
|
||||
}
|
||||
|
||||
.row-start-2 {
|
||||
grid-row-start: 2;
|
||||
}
|
||||
|
||||
.row-start-3 {
|
||||
grid-row-start: 3;
|
||||
}
|
||||
|
||||
.row-start-4 {
|
||||
grid-row-start: 4;
|
||||
}
|
||||
|
||||
.row-start-5 {
|
||||
grid-row-start: 5;
|
||||
}
|
||||
|
||||
.row-start-6 {
|
||||
grid-row-start: 6;
|
||||
}
|
||||
|
||||
.my-1 {
|
||||
margin-top: 0.25rem;
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
|
||||
.my-0\.5 {
|
||||
margin-top: 0.125rem;
|
||||
margin-bottom: 0.125rem;
|
||||
}
|
||||
|
||||
.my-0 {
|
||||
margin-top: 0px;
|
||||
margin-bottom: 0px;
|
||||
}
|
||||
|
||||
.mb-5 {
|
||||
margin-bottom: 1.25rem;
|
||||
}
|
||||
|
||||
.box-content {
|
||||
box-sizing: content-box;
|
||||
}
|
||||
|
||||
.flex {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.grid {
|
||||
display: grid;
|
||||
}
|
||||
|
||||
.h-\[1px\] {
|
||||
height: 1px;
|
||||
}
|
||||
|
||||
.w-screen {
|
||||
width: 100vw;
|
||||
}
|
||||
|
||||
.w-full {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.max-w-4xl {
|
||||
max-width: 56rem;
|
||||
}
|
||||
|
||||
.grid-cols-12 {
|
||||
grid-template-columns: repeat(12, minmax(0, 1fr));
|
||||
}
|
||||
|
||||
.grid-rows-5 {
|
||||
grid-template-rows: repeat(5, minmax(0, 1fr));
|
||||
}
|
||||
|
||||
.flex-col {
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.items-center {
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.gap-x-2 {
|
||||
-moz-column-gap: 0.5rem;
|
||||
column-gap: 0.5rem;
|
||||
}
|
||||
|
||||
.whitespace-pre-line {
|
||||
white-space: pre-line;
|
||||
}
|
||||
|
||||
.break-words {
|
||||
overflow-wrap: break-word;
|
||||
}
|
||||
|
||||
.border-t {
|
||||
border-top-width: 1px;
|
||||
}
|
||||
|
||||
.border-b {
|
||||
border-bottom-width: 1px;
|
||||
}
|
||||
|
||||
.border-solid {
|
||||
border-style: solid;
|
||||
}
|
||||
|
||||
.border-black {
|
||||
--tw-border-opacity: 1;
|
||||
border-color: rgb(0 0 0 / var(--tw-border-opacity));
|
||||
}
|
||||
|
||||
.bg-white {
|
||||
--tw-bg-opacity: 1;
|
||||
background-color: rgb(255 255 255 / var(--tw-bg-opacity));
|
||||
}
|
||||
|
||||
.bg-slate-200 {
|
||||
--tw-bg-opacity: 1;
|
||||
background-color: rgb(226 232 240 / var(--tw-bg-opacity));
|
||||
}
|
||||
|
||||
.p-4 {
|
||||
padding: 1rem;
|
||||
}
|
||||
|
||||
.text-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
.text-sm {
|
||||
font-size: 0.875rem;
|
||||
line-height: 1.25rem;
|
||||
}
|
||||
|
||||
.font-bold {
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.text-slate-400 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(148 163 184 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.text-blue-600 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(37 99 235 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.underline {
|
||||
-webkit-text-decoration-line: underline;
|
||||
text-decoration-line: underline;
|
||||
}
|
1260
src/paperless_mail/templates/package-lock.json
generated
Normal file
1260
src/paperless_mail/templates/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
5
src/paperless_mail/templates/package.json
Normal file
5
src/paperless_mail/templates/package.json
Normal file
@ -0,0 +1,5 @@
|
||||
{
|
||||
"devDependencies": {
|
||||
"tailwindcss": "^3.0.24"
|
||||
}
|
||||
}
|
7
src/paperless_mail/templates/tailwind.config.js
Normal file
7
src/paperless_mail/templates/tailwind.config.js
Normal file
@ -0,0 +1,7 @@
|
||||
module.exports = {
|
||||
content: ['./*.html'],
|
||||
theme: {
|
||||
extend: {},
|
||||
},
|
||||
plugins: [],
|
||||
}
|
1
src/paperless_mail/tests/samples/broken.eml
Normal file
1
src/paperless_mail/tests/samples/broken.eml
Normal file
@ -0,0 +1 @@
|
||||
This is not a valid eml.
|
BIN
src/paperless_mail/tests/samples/first.pdf
Normal file
BIN
src/paperless_mail/tests/samples/first.pdf
Normal file
Binary file not shown.
11599
src/paperless_mail/tests/samples/html.eml
Normal file
11599
src/paperless_mail/tests/samples/html.eml
Normal file
File diff suppressed because it is too large
Load Diff
45
src/paperless_mail/tests/samples/html.eml.html
Normal file
45
src/paperless_mail/tests/samples/html.eml.html
Normal file
@ -0,0 +1,45 @@
|
||||
<!doctype html>
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<link href="output.css" rel="stylesheet">
|
||||
</head>
|
||||
|
||||
<body class="bg-white w-screen flex flex-col items-center">
|
||||
<div class="container max-w-4xl">
|
||||
<!-- Header -->
|
||||
<div class="grid gap-x-2 bg-slate-200 p-4">
|
||||
|
||||
<div class="col-start-9 col-span-4 row-start-1 text-right">2022-10-15 09:23</div>
|
||||
|
||||
<div class="col-start-1 row-start-1 text-slate-400 text-right">From</div>
|
||||
<div class="col-start-2 col-span-7 row-start-1">Name <<a href="mailto:someone@example.de">someone@example.de</a>></div>
|
||||
|
||||
<div class="col-start-1 row-start-2 text-slate-400 text-right">Subject</div>
|
||||
<div class=" col-start-2 col-span-10 row-start-2 font-bold">HTML Message</div>
|
||||
|
||||
<div class="col-start-1 row-start-3 text-slate-400 text-right">To</div>
|
||||
<div class="col-start-2 col-span-10 row-start-3 text-sm my-0.5"><a href="mailto:someone@example.de">someone@example.de</a></div>
|
||||
|
||||
<div class="col-start-1 row-start-4 text-slate-400 text-right"></div>
|
||||
<div class="col-start-2 col-span-10 row-start-4 text-sm my-0.5"></div>
|
||||
|
||||
<div class="col-start-1 row-start-5 text-slate-400 text-right"></div>
|
||||
<div class="col-start-2 col-span-10 row-start-5" text-sm my-0.5></div>
|
||||
|
||||
<div class="col-start-1 row-start-6 text-slate-400 text-right">Attachments</div>
|
||||
<div class="col-start-2 col-span-10 row-start-6">IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)</div>
|
||||
</div>
|
||||
|
||||
<!-- Separator-->
|
||||
<div class="border-t border-solid border-b w-full h-[1px] box-content border-black mb-5 bg-slate-200"></div>
|
||||
|
||||
<!-- Content-->
|
||||
<div class="w-full break-words">Some Text<br><br>and an embedded image.</div>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
|
||||
</html>
|
BIN
src/paperless_mail/tests/samples/html.eml.pdf
Normal file
BIN
src/paperless_mail/tests/samples/html.eml.pdf
Normal file
Binary file not shown.
BIN
src/paperless_mail/tests/samples/html.eml.pdf.webp
Normal file
BIN
src/paperless_mail/tests/samples/html.eml.pdf.webp
Normal file
Binary file not shown.
After Width: | Height: | Size: 6.0 KiB |
19
src/paperless_mail/tests/samples/sample.html
Normal file
19
src/paperless_mail/tests/samples/sample.html
Normal file
@ -0,0 +1,19 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
|
||||
</head>
|
||||
<body>
|
||||
<p>Some Text</p>
|
||||
<p>
|
||||
<img src="cid:part1.pNdUSz0s.D3NqVtPg@example.de" alt="Has to be rewritten to work..">
|
||||
<img src="https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png" alt="This image should not be shown.">
|
||||
</p>
|
||||
|
||||
<p>and an embedded image.<br>
|
||||
</p>
|
||||
<p id="changeme">Paragraph unchanged.</p>
|
||||
<scRipt>
|
||||
document.getElementById("changeme").innerHTML = "Paragraph changed via Java Script.";
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
BIN
src/paperless_mail/tests/samples/sample.html.pdf
Normal file
BIN
src/paperless_mail/tests/samples/sample.html.pdf
Normal file
Binary file not shown.
BIN
src/paperless_mail/tests/samples/sample.html.pdf.webp
Normal file
BIN
src/paperless_mail/tests/samples/sample.html.pdf.webp
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.8 KiB |
BIN
src/paperless_mail/tests/samples/sample.png
Normal file
BIN
src/paperless_mail/tests/samples/sample.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 6.9 KiB |
BIN
src/paperless_mail/tests/samples/second.pdf
Normal file
BIN
src/paperless_mail/tests/samples/second.pdf
Normal file
Binary file not shown.
25
src/paperless_mail/tests/samples/simple_text.eml
Normal file
25
src/paperless_mail/tests/samples/simple_text.eml
Normal file
@ -0,0 +1,25 @@
|
||||
Return-Path: <mail@someserver.de>
|
||||
Delivered-To: mail@someserver.de
|
||||
Received: from mail.someserver.org ([::1])
|
||||
by e1acdba3bd07 with LMTP
|
||||
id KBKZGD2YR2NTCgQAjubtDA
|
||||
(envelope-from <mail@someserver.de>)
|
||||
for <mail@someserver.de>; Wed, 10 Oct 2022 11:40:46 +0200
|
||||
Received: from [127.0.0.1] (localhost [127.0.0.1]) by localhost (Mailerdaemon) with ESMTPSA id 2BC9064C1616
|
||||
for <some@one.de>; Wed, 12 Oct 2022 21:40:46 +0200 (CEST)
|
||||
Message-ID: <6e99e34d-e20a-80c4-ea61-d8234b612be9@someserver.de>
|
||||
Date: Wed, 12 Oct 2022 21:40:43 +0200
|
||||
MIME-Version: 1.0
|
||||
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101
|
||||
Thunderbird/102.3.1
|
||||
Content-Language: en-US
|
||||
To: some@one.de
|
||||
Cc: asdasd@æsdasd.de, asdadasdasdasda.asdasd@æsdasd.de
|
||||
Bcc: fdf@fvf.de
|
||||
From: Some One <mail@someserver.de>
|
||||
Content-Type: text/plain; charset=UTF-8; format=flowed
|
||||
Content-Transfer-Encoding: 7bit
|
||||
X-Last-TLS-Session-Version: TLSv1.3
|
||||
Subject: Simple Text Mail
|
||||
|
||||
This is just a simple Text Mail.
|
BIN
src/paperless_mail/tests/samples/simple_text.eml.pdf
Normal file
BIN
src/paperless_mail/tests/samples/simple_text.eml.pdf
Normal file
Binary file not shown.
BIN
src/paperless_mail/tests/samples/simple_text.eml.pdf.webp
Normal file
BIN
src/paperless_mail/tests/samples/simple_text.eml.pdf.webp
Normal file
Binary file not shown.
After Width: | Height: | Size: 5.2 KiB |
688
src/paperless_mail/tests/test_parsers.py
Normal file
688
src/paperless_mail/tests/test_parsers.py
Normal file
@ -0,0 +1,688 @@
|
||||
import datetime
|
||||
import os
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase
|
||||
from documents.parsers import ParseError
|
||||
from paperless_mail.parsers import MailDocumentParser
|
||||
|
||||
|
||||
class TestParser(TestCase):
|
||||
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
|
||||
|
||||
def setUp(self) -> None:
|
||||
self.parser = MailDocumentParser(logging_group=None)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
self.parser.cleanup()
|
||||
|
||||
def test_get_parsed_missing_file(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh parser
|
||||
WHEN:
|
||||
- A nonexistent file should be parsed
|
||||
THEN:
|
||||
- An Exception is thrown
|
||||
"""
|
||||
# Check if exception is raised when parsing fails.
|
||||
self.assertRaises(
|
||||
ParseError,
|
||||
self.parser.get_parsed,
|
||||
os.path.join(self.SAMPLE_FILES, "na"),
|
||||
)
|
||||
|
||||
def test_get_parsed_broken_file(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh parser
|
||||
WHEN:
|
||||
- A faulty file should be parsed
|
||||
THEN:
|
||||
- An Exception is thrown
|
||||
"""
|
||||
# Check if exception is raised when the mail is faulty.
|
||||
self.assertRaises(
|
||||
ParseError,
|
||||
self.parser.get_parsed,
|
||||
os.path.join(self.SAMPLE_FILES, "broken.eml"),
|
||||
)
|
||||
|
||||
def test_get_parsed_simple_text_mail(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh parser
|
||||
WHEN:
|
||||
- A .eml file should be parsed
|
||||
THEN:
|
||||
- The content of the mail should be available in the parse result.
|
||||
"""
|
||||
# Parse Test file and check relevant content
|
||||
parsed1 = self.parser.get_parsed(
|
||||
os.path.join(self.SAMPLE_FILES, "simple_text.eml"),
|
||||
)
|
||||
|
||||
self.assertEqual(parsed1.date.year, 2022)
|
||||
self.assertEqual(parsed1.date.month, 10)
|
||||
self.assertEqual(parsed1.date.day, 12)
|
||||
self.assertEqual(parsed1.date.hour, 21)
|
||||
self.assertEqual(parsed1.date.minute, 40)
|
||||
self.assertEqual(parsed1.date.second, 43)
|
||||
self.assertEqual(parsed1.date.tzname(), "UTC+02:00")
|
||||
self.assertEqual(parsed1.from_, "mail@someserver.de")
|
||||
self.assertEqual(parsed1.subject, "Simple Text Mail")
|
||||
self.assertEqual(parsed1.text, "This is just a simple Text Mail.\n")
|
||||
self.assertEqual(parsed1.to, ("some@one.de",))
|
||||
|
||||
def test_get_parsed_reparse(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- An E-Mail was parsed
|
||||
WHEN:
|
||||
- Another .eml file should be parsed
|
||||
THEN:
|
||||
- The parser should not retry to parse and return the old results
|
||||
"""
|
||||
# Parse Test file and check relevant content
|
||||
parsed1 = self.parser.get_parsed(
|
||||
os.path.join(self.SAMPLE_FILES, "simple_text.eml"),
|
||||
)
|
||||
# Check if same parsed object as before is returned, even if another file is given.
|
||||
parsed2 = self.parser.get_parsed(
|
||||
os.path.join(os.path.join(self.SAMPLE_FILES, "html.eml")),
|
||||
)
|
||||
self.assertEqual(parsed1, parsed2)
|
||||
|
||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
|
||||
@mock.patch("paperless_mail.parsers.make_thumbnail_from_pdf")
|
||||
def test_get_thumbnail(
|
||||
self,
|
||||
mock_make_thumbnail_from_pdf: mock.MagicMock,
|
||||
mock_generate_pdf: mock.MagicMock,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- An E-Mail was parsed
|
||||
WHEN:
|
||||
- The Thumbnail is requested
|
||||
THEN:
|
||||
- The parser should call the functions which generate the thumbnail
|
||||
"""
|
||||
mocked_return = "Passing the return value through.."
|
||||
mock_make_thumbnail_from_pdf.return_value = mocked_return
|
||||
|
||||
mock_generate_pdf.return_value = "Mocked return value.."
|
||||
|
||||
thumb = self.parser.get_thumbnail(
|
||||
os.path.join(self.SAMPLE_FILES, "simple_text.eml"),
|
||||
"message/rfc822",
|
||||
)
|
||||
self.assertEqual(
|
||||
self.parser.archive_path,
|
||||
mock_make_thumbnail_from_pdf.call_args_list[0].args[0],
|
||||
)
|
||||
self.assertEqual(
|
||||
self.parser.tempdir,
|
||||
mock_make_thumbnail_from_pdf.call_args_list[0].args[1],
|
||||
)
|
||||
self.assertEqual(mocked_return, thumb)
|
||||
|
||||
@mock.patch("documents.loggers.LoggingMixin.log")
|
||||
def test_extract_metadata_fail(self, m: mock.MagicMock):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- Metadata extraction is triggered for nonexistent file
|
||||
THEN:
|
||||
- A log warning should be generated
|
||||
"""
|
||||
# Validate if warning is logged when parsing fails
|
||||
self.assertEqual([], self.parser.extract_metadata("na", "message/rfc822"))
|
||||
self.assertEqual("warning", m.call_args[0][0])
|
||||
|
||||
def test_extract_metadata(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- Metadata extraction is triggered
|
||||
THEN:
|
||||
- metadata is returned
|
||||
"""
|
||||
# Validate Metadata parsing returns the expected results
|
||||
metadata = self.parser.extract_metadata(
|
||||
os.path.join(self.SAMPLE_FILES, "simple_text.eml"),
|
||||
"message/rfc822",
|
||||
)
|
||||
|
||||
self.assertIn(
|
||||
{"namespace": "", "prefix": "", "key": "attachments", "value": ""},
|
||||
metadata,
|
||||
)
|
||||
self.assertIn(
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "",
|
||||
"key": "date",
|
||||
"value": "2022-10-12 21:40:43 UTC+02:00",
|
||||
},
|
||||
metadata,
|
||||
)
|
||||
self.assertIn(
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "header",
|
||||
"key": "content-language",
|
||||
"value": "en-US",
|
||||
},
|
||||
metadata,
|
||||
)
|
||||
self.assertIn(
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "header",
|
||||
"key": "content-type",
|
||||
"value": "text/plain; charset=UTF-8; format=flowed",
|
||||
},
|
||||
metadata,
|
||||
)
|
||||
self.assertIn(
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "header",
|
||||
"key": "date",
|
||||
"value": "Wed, 12 Oct 2022 21:40:43 +0200",
|
||||
},
|
||||
metadata,
|
||||
)
|
||||
self.assertIn(
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "header",
|
||||
"key": "delivered-to",
|
||||
"value": "mail@someserver.de",
|
||||
},
|
||||
metadata,
|
||||
)
|
||||
self.assertIn(
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "header",
|
||||
"key": "from",
|
||||
"value": "Some One <mail@someserver.de>",
|
||||
},
|
||||
metadata,
|
||||
)
|
||||
self.assertIn(
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "header",
|
||||
"key": "message-id",
|
||||
"value": "<6e99e34d-e20a-80c4-ea61-d8234b612be9@someserver.de>",
|
||||
},
|
||||
metadata,
|
||||
)
|
||||
self.assertIn(
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "header",
|
||||
"key": "mime-version",
|
||||
"value": "1.0",
|
||||
},
|
||||
metadata,
|
||||
)
|
||||
self.assertIn(
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "header",
|
||||
"key": "received",
|
||||
"value": "from mail.someserver.org ([::1])\n\tby e1acdba3bd07 with LMTP\n\tid KBKZGD2YR2NTCgQAjubtDA\n\t(envelope-from <mail@someserver.de>)\n\tfor <mail@someserver.de>; Wed, 10 Oct 2022 11:40:46 +0200, from [127.0.0.1] (localhost [127.0.0.1]) by localhost (Mailerdaemon) with ESMTPSA id 2BC9064C1616\n\tfor <some@one.de>; Wed, 12 Oct 2022 21:40:46 +0200 (CEST)",
|
||||
},
|
||||
metadata,
|
||||
)
|
||||
self.assertIn(
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "header",
|
||||
"key": "return-path",
|
||||
"value": "<mail@someserver.de>",
|
||||
},
|
||||
metadata,
|
||||
)
|
||||
self.assertIn(
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "header",
|
||||
"key": "subject",
|
||||
"value": "Simple Text Mail",
|
||||
},
|
||||
metadata,
|
||||
)
|
||||
self.assertIn(
|
||||
{"namespace": "", "prefix": "header", "key": "to", "value": "some@one.de"},
|
||||
metadata,
|
||||
)
|
||||
self.assertIn(
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "header",
|
||||
"key": "user-agent",
|
||||
"value": "Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101\n Thunderbird/102.3.1",
|
||||
},
|
||||
metadata,
|
||||
)
|
||||
self.assertIn(
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "header",
|
||||
"key": "x-last-tls-session-version",
|
||||
"value": "TLSv1.3",
|
||||
},
|
||||
metadata,
|
||||
)
|
||||
|
||||
def test_parse_na(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- parsing is attempted with nonexistent file
|
||||
THEN:
|
||||
- Exception is thrown
|
||||
"""
|
||||
# Check if exception is raised when parsing fails.
|
||||
self.assertRaises(
|
||||
ParseError,
|
||||
self.parser.parse,
|
||||
os.path.join(self.SAMPLE_FILES, "na"),
|
||||
"message/rfc822",
|
||||
)
|
||||
|
||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.tika_parse")
|
||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
|
||||
def test_parse_html_eml(self, n, mock_tika_parse: mock.MagicMock):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- parsing is done with html mail
|
||||
THEN:
|
||||
- Tika is called, parsed information from non html parts is available
|
||||
"""
|
||||
# Validate parsing returns the expected results
|
||||
text_expected = "Subject: HTML Message\n\nFrom: Name <someone@example.de>\n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)\n\nHTML content: tika return\n\nSome Text and an embedded image."
|
||||
mock_tika_parse.return_value = "tika return"
|
||||
|
||||
self.parser.parse(os.path.join(self.SAMPLE_FILES, "html.eml"), "message/rfc822")
|
||||
|
||||
self.assertEqual(text_expected, self.parser.text)
|
||||
self.assertEqual(
|
||||
datetime.datetime(
|
||||
2022,
|
||||
10,
|
||||
15,
|
||||
11,
|
||||
23,
|
||||
19,
|
||||
tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)),
|
||||
),
|
||||
self.parser.date,
|
||||
)
|
||||
|
||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
|
||||
def test_parse_simple_eml(self, n):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- parsing is done with non html mail
|
||||
THEN:
|
||||
- parsed information is available
|
||||
"""
|
||||
# Validate parsing returns the expected results
|
||||
|
||||
self.parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "simple_text.eml"),
|
||||
"message/rfc822",
|
||||
)
|
||||
text_expected = "Subject: Simple Text Mail\n\nFrom: Some One <mail@someserver.de>\n\nTo: some@one.de\n\nCC: asdasd@æsdasd.de, asdadasdasdasda.asdasd@æsdasd.de\n\nBCC: fdf@fvf.de\n\n\n\nThis is just a simple Text Mail."
|
||||
self.assertEqual(text_expected, self.parser.text)
|
||||
self.assertEqual(
|
||||
datetime.datetime(
|
||||
2022,
|
||||
10,
|
||||
12,
|
||||
21,
|
||||
40,
|
||||
43,
|
||||
tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)),
|
||||
),
|
||||
self.parser.date,
|
||||
)
|
||||
|
||||
# Just check if file exists, the unittest for generate_pdf() goes deeper.
|
||||
self.assertTrue(os.path.isfile(self.parser.archive_path))
|
||||
|
||||
@mock.patch("paperless_mail.parsers.parser.from_buffer")
|
||||
def test_tika_parse_unsuccessful(self, mock_from_buffer: mock.MagicMock):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- tika parsing fails
|
||||
THEN:
|
||||
- the parser should return an empty string
|
||||
"""
|
||||
# Check unsuccessful parsing
|
||||
mock_from_buffer.return_value = {"content": None}
|
||||
parsed = self.parser.tika_parse(None)
|
||||
self.assertEqual("", parsed)
|
||||
|
||||
@mock.patch("paperless_mail.parsers.parser.from_buffer")
|
||||
def test_tika_parse(self, mock_from_buffer: mock.MagicMock):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- tika parsing is called
|
||||
THEN:
|
||||
- a web request to tika shall be done and the reply es returned
|
||||
"""
|
||||
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
|
||||
expected_text = "Some Text"
|
||||
|
||||
# Check successful parsing
|
||||
mock_from_buffer.return_value = {"content": expected_text}
|
||||
parsed = self.parser.tika_parse(html)
|
||||
self.assertEqual(expected_text, parsed.strip())
|
||||
mock_from_buffer.assert_called_with(html, self.parser.tika_server)
|
||||
|
||||
@mock.patch("paperless_mail.parsers.parser.from_buffer")
|
||||
def test_tika_parse_exception(self, mock_from_buffer: mock.MagicMock):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- tika parsing is called and an exception is thrown on the request
|
||||
THEN:
|
||||
- a ParseError Exception is thrown
|
||||
"""
|
||||
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
|
||||
|
||||
# Check ParseError
|
||||
def my_side_effect():
|
||||
raise Exception("Test")
|
||||
|
||||
mock_from_buffer.side_effect = my_side_effect
|
||||
self.assertRaises(ParseError, self.parser.tika_parse, html)
|
||||
|
||||
def test_tika_parse_unreachable(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- tika parsing is called but tika is not available
|
||||
THEN:
|
||||
- a ParseError Exception is thrown
|
||||
"""
|
||||
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
|
||||
|
||||
# Check if exception is raised when Tika cannot be reached.
|
||||
self.parser.tika_server = ""
|
||||
self.assertRaises(ParseError, self.parser.tika_parse, html)
|
||||
|
||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail")
|
||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_html")
|
||||
def test_generate_pdf_parse_error(self, m: mock.MagicMock, n: mock.MagicMock):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- pdf generation is requested but gotenberg can not be reached
|
||||
THEN:
|
||||
- a ParseError Exception is thrown
|
||||
"""
|
||||
m.return_value = b""
|
||||
n.return_value = b""
|
||||
|
||||
# Check if exception is raised when the pdf can not be created.
|
||||
self.parser.gotenberg_server = ""
|
||||
self.assertRaises(
|
||||
ParseError,
|
||||
self.parser.generate_pdf,
|
||||
os.path.join(self.SAMPLE_FILES, "html.eml"),
|
||||
)
|
||||
|
||||
def test_generate_pdf_exception(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- pdf generation is requested but parsing throws an exception
|
||||
THEN:
|
||||
- a ParseError Exception is thrown
|
||||
"""
|
||||
# Check if exception is raised when the mail can not be parsed.
|
||||
self.assertRaises(
|
||||
ParseError,
|
||||
self.parser.generate_pdf,
|
||||
os.path.join(self.SAMPLE_FILES, "broken.eml"),
|
||||
)
|
||||
|
||||
@mock.patch("paperless_mail.parsers.requests.post")
|
||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail")
|
||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_html")
|
||||
def test_generate_pdf(
|
||||
self,
|
||||
mock_generate_pdf_from_html: mock.MagicMock,
|
||||
mock_generate_pdf_from_mail: mock.MagicMock,
|
||||
mock_post: mock.MagicMock,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- pdf generation is requested
|
||||
THEN:
|
||||
- gotenberg is called and the resulting file is returned
|
||||
"""
|
||||
mock_generate_pdf_from_mail.return_value = b"Mail Return"
|
||||
mock_generate_pdf_from_html.return_value = b"HTML Return"
|
||||
|
||||
mock_response = mock.MagicMock()
|
||||
mock_response.content = b"Content"
|
||||
mock_post.return_value = mock_response
|
||||
pdf_path = self.parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "html.eml"))
|
||||
self.assertTrue(os.path.isfile(pdf_path))
|
||||
|
||||
mock_generate_pdf_from_mail.assert_called_once_with(
|
||||
self.parser.get_parsed(None),
|
||||
)
|
||||
mock_generate_pdf_from_html.assert_called_once_with(
|
||||
self.parser.get_parsed(None).html,
|
||||
self.parser.get_parsed(None).attachments,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.parser.gotenberg_server + "/forms/pdfengines/merge",
|
||||
mock_post.call_args.args[0],
|
||||
)
|
||||
self.assertEqual({}, mock_post.call_args.kwargs["headers"])
|
||||
self.assertEqual(
|
||||
b"Mail Return",
|
||||
mock_post.call_args.kwargs["files"]["1_mail.pdf"][1].read(),
|
||||
)
|
||||
self.assertEqual(
|
||||
b"HTML Return",
|
||||
mock_post.call_args.kwargs["files"]["2_html.pdf"][1].read(),
|
||||
)
|
||||
|
||||
mock_response.raise_for_status.assert_called_once()
|
||||
|
||||
with open(pdf_path, "rb") as file:
|
||||
self.assertEqual(b"Content", file.read())
|
||||
|
||||
def test_mail_to_html(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- conversion from eml to html is requested
|
||||
THEN:
|
||||
- html should be returned
|
||||
"""
|
||||
mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml"))
|
||||
html_handle = self.parser.mail_to_html(mail)
|
||||
html_received = html_handle.read()
|
||||
|
||||
with open(
|
||||
os.path.join(self.SAMPLE_FILES, "html.eml.html"),
|
||||
) as html_expected_handle:
|
||||
html_expected = html_expected_handle.read()
|
||||
|
||||
self.assertHTMLEqual(html_expected, html_received)
|
||||
|
||||
@mock.patch("paperless_mail.parsers.requests.post")
|
||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.mail_to_html")
|
||||
def test_generate_pdf_from_mail(
|
||||
self,
|
||||
mock_mail_to_html: mock.MagicMock,
|
||||
mock_post: mock.MagicMock,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- conversion of PDF from .eml is requested
|
||||
THEN:
|
||||
- gotenberg should be called with valid intermediary html files, the resulting pdf is returned
|
||||
"""
|
||||
mock_response = mock.MagicMock()
|
||||
mock_response.content = b"Content"
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
mock_mail_to_html.return_value = "Testresponse"
|
||||
|
||||
mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml"))
|
||||
|
||||
retval = self.parser.generate_pdf_from_mail(mail)
|
||||
self.assertEqual(b"Content", retval)
|
||||
|
||||
mock_mail_to_html.assert_called_once_with(mail)
|
||||
self.assertEqual(
|
||||
self.parser.gotenberg_server + "/forms/chromium/convert/html",
|
||||
mock_post.call_args.args[0],
|
||||
)
|
||||
self.assertEqual({}, mock_post.call_args.kwargs["headers"])
|
||||
self.assertEqual(
|
||||
{
|
||||
"marginTop": "0.1",
|
||||
"marginBottom": "0.1",
|
||||
"marginLeft": "0.1",
|
||||
"marginRight": "0.1",
|
||||
"paperWidth": "8.27",
|
||||
"paperHeight": "11.7",
|
||||
"scale": "1.0",
|
||||
},
|
||||
mock_post.call_args.kwargs["data"],
|
||||
)
|
||||
self.assertEqual(
|
||||
"Testresponse",
|
||||
mock_post.call_args.kwargs["files"]["html"][1],
|
||||
)
|
||||
self.assertEqual(
|
||||
"output.css",
|
||||
mock_post.call_args.kwargs["files"]["css"][0],
|
||||
)
|
||||
|
||||
mock_response.raise_for_status.assert_called_once()
|
||||
|
||||
def test_transform_inline_html(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- transforming of html content from an email with an inline image attachment is requested
|
||||
THEN:
|
||||
- html is returned and sanitized
|
||||
"""
|
||||
|
||||
class MailAttachmentMock:
|
||||
def __init__(self, payload, content_id):
|
||||
self.payload = payload
|
||||
self.content_id = content_id
|
||||
|
||||
result = None
|
||||
|
||||
with open(os.path.join(self.SAMPLE_FILES, "sample.html")) as html_file:
|
||||
with open(os.path.join(self.SAMPLE_FILES, "sample.png"), "rb") as png_file:
|
||||
html = html_file.read()
|
||||
png = png_file.read()
|
||||
attachments = [
|
||||
MailAttachmentMock(png, "part1.pNdUSz0s.D3NqVtPg@example.de"),
|
||||
]
|
||||
result = self.parser.transform_inline_html(html, attachments)
|
||||
|
||||
resulting_html = result[-1][1].read()
|
||||
self.assertTrue(result[-1][0] == "index.html")
|
||||
self.assertIn(result[0][0], resulting_html)
|
||||
self.assertNotIn("<script", resulting_html.lower())
|
||||
|
||||
@mock.patch("paperless_mail.parsers.requests.post")
|
||||
def test_generate_pdf_from_html(self, mock_post: mock.MagicMock):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- generating pdf from html with inline attachments is attempted
|
||||
THEN:
|
||||
- gotenberg is called with the correct parameters and the resulting pdf is returned
|
||||
"""
|
||||
|
||||
class MailAttachmentMock:
|
||||
def __init__(self, payload, content_id):
|
||||
self.payload = payload
|
||||
self.content_id = content_id
|
||||
|
||||
mock_response = mock.MagicMock()
|
||||
mock_response.content = b"Content"
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
result = None
|
||||
|
||||
with open(os.path.join(self.SAMPLE_FILES, "sample.html")) as html_file:
|
||||
with open(os.path.join(self.SAMPLE_FILES, "sample.png"), "rb") as png_file:
|
||||
html = html_file.read()
|
||||
png = png_file.read()
|
||||
attachments = [
|
||||
MailAttachmentMock(png, "part1.pNdUSz0s.D3NqVtPg@example.de"),
|
||||
]
|
||||
result = self.parser.generate_pdf_from_html(html, attachments)
|
||||
|
||||
self.assertEqual(
|
||||
self.parser.gotenberg_server + "/forms/chromium/convert/html",
|
||||
mock_post.call_args.args[0],
|
||||
)
|
||||
self.assertEqual({}, mock_post.call_args.kwargs["headers"])
|
||||
self.assertEqual(
|
||||
{
|
||||
"marginTop": "0.1",
|
||||
"marginBottom": "0.1",
|
||||
"marginLeft": "0.1",
|
||||
"marginRight": "0.1",
|
||||
"paperWidth": "8.27",
|
||||
"paperHeight": "11.7",
|
||||
"scale": "1.0",
|
||||
},
|
||||
mock_post.call_args.kwargs["data"],
|
||||
)
|
||||
|
||||
# read to assert it is a file like object.
|
||||
mock_post.call_args.kwargs["files"]["cidpart1pNdUSz0sD3NqVtPgexamplede"][
|
||||
1
|
||||
].read()
|
||||
mock_post.call_args.kwargs["files"]["index.html"][1].read()
|
||||
|
||||
mock_response.raise_for_status.assert_called_once()
|
||||
|
||||
self.assertEqual(b"Content", result)
|
361
src/paperless_mail/tests/test_parsers_live.py
Normal file
361
src/paperless_mail/tests/test_parsers_live.py
Normal file
@ -0,0 +1,361 @@
|
||||
import os
|
||||
from unittest import mock
|
||||
from urllib.error import HTTPError
|
||||
from urllib.request import urlopen
|
||||
|
||||
import pytest
|
||||
from django.test import TestCase
|
||||
from documents.parsers import ParseError
|
||||
from documents.parsers import run_convert
|
||||
from imagehash import average_hash
|
||||
from paperless_mail.parsers import MailDocumentParser
|
||||
from pdfminer.high_level import extract_text
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class TestParserLive(TestCase):
|
||||
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
|
||||
|
||||
def setUp(self) -> None:
|
||||
self.parser = MailDocumentParser(logging_group=None)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
self.parser.cleanup()
|
||||
|
||||
@staticmethod
|
||||
def imagehash(file, hash_size=18):
|
||||
return f"{average_hash(Image.open(file), hash_size)}"
|
||||
|
||||
# Only run if convert is available
|
||||
@pytest.mark.skipif(
|
||||
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
|
||||
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
|
||||
)
|
||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
|
||||
def test_get_thumbnail(self, mock_generate_pdf: mock.MagicMock):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- The Thumbnail is requested
|
||||
THEN:
|
||||
- The returned thumbnail image file is as expected
|
||||
"""
|
||||
mock_generate_pdf.return_value = os.path.join(
|
||||
self.SAMPLE_FILES,
|
||||
"simple_text.eml.pdf",
|
||||
)
|
||||
thumb = self.parser.get_thumbnail(
|
||||
os.path.join(self.SAMPLE_FILES, "simple_text.eml"),
|
||||
"message/rfc822",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(thumb))
|
||||
|
||||
expected = os.path.join(self.SAMPLE_FILES, "simple_text.eml.pdf.webp")
|
||||
|
||||
self.assertEqual(
|
||||
self.imagehash(thumb),
|
||||
self.imagehash(expected),
|
||||
f"Created Thumbnail {thumb} differs from expected file {expected}",
|
||||
)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
"TIKA_LIVE" not in os.environ,
|
||||
reason="No tika server",
|
||||
)
|
||||
def test_tika_parse_successful(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- tika parsing is called
|
||||
THEN:
|
||||
- a web request to tika shall be done and the reply es returned
|
||||
"""
|
||||
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
|
||||
expected_text = "Some Text"
|
||||
|
||||
# Check successful parsing
|
||||
parsed = self.parser.tika_parse(html)
|
||||
self.assertEqual(expected_text, parsed.strip())
|
||||
|
||||
@pytest.mark.skipif(
|
||||
"TIKA_LIVE" not in os.environ,
|
||||
reason="No tika server",
|
||||
)
|
||||
def test_tika_parse_unsuccessful(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- tika parsing fails
|
||||
THEN:
|
||||
- the parser should return an empty string
|
||||
"""
|
||||
# Check unsuccessful parsing
|
||||
parsed = self.parser.tika_parse(None)
|
||||
self.assertEqual("", parsed)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
"GOTENBERG_LIVE" not in os.environ,
|
||||
reason="No gotenberg server",
|
||||
)
|
||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail")
|
||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_html")
|
||||
def test_generate_pdf_gotenberg_merging(
|
||||
self,
|
||||
mock_generate_pdf_from_html: mock.MagicMock,
|
||||
mock_generate_pdf_from_mail: mock.MagicMock,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Intermediary pdfs to be merged
|
||||
WHEN:
|
||||
- pdf generation is requested with html file requiring merging of pdfs
|
||||
THEN:
|
||||
- gotenberg is called to merge files and the resulting file is returned
|
||||
"""
|
||||
with open(os.path.join(self.SAMPLE_FILES, "first.pdf"), "rb") as first:
|
||||
mock_generate_pdf_from_mail.return_value = first.read()
|
||||
|
||||
with open(os.path.join(self.SAMPLE_FILES, "second.pdf"), "rb") as second:
|
||||
mock_generate_pdf_from_html.return_value = second.read()
|
||||
|
||||
pdf_path = self.parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "html.eml"))
|
||||
self.assertTrue(os.path.isfile(pdf_path))
|
||||
|
||||
extracted = extract_text(pdf_path)
|
||||
expected = (
|
||||
"first\tPDF\tto\tbe\tmerged.\n\n\x0csecond\tPDF\tto\tbe\tmerged.\n\n\x0c"
|
||||
)
|
||||
self.assertEqual(expected, extracted)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
"GOTENBERG_LIVE" not in os.environ,
|
||||
reason="No gotenberg server",
|
||||
)
|
||||
def test_generate_pdf_from_mail_no_convert(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- pdf generation from simple eml file is requested
|
||||
THEN:
|
||||
- gotenberg is called and the resulting file is returned and contains the expected text.
|
||||
"""
|
||||
mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml"))
|
||||
|
||||
pdf_path = os.path.join(self.parser.tempdir, "html.eml.pdf")
|
||||
|
||||
with open(pdf_path, "wb") as file:
|
||||
file.write(self.parser.generate_pdf_from_mail(mail))
|
||||
|
||||
extracted = extract_text(pdf_path)
|
||||
expected = extract_text(os.path.join(self.SAMPLE_FILES, "html.eml.pdf"))
|
||||
self.assertEqual(expected, extracted)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
"GOTENBERG_LIVE" not in os.environ,
|
||||
reason="No gotenberg server",
|
||||
)
|
||||
# Only run if convert is available
|
||||
@pytest.mark.skipif(
|
||||
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
|
||||
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
|
||||
)
|
||||
def test_generate_pdf_from_mail(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- pdf generation from simple eml file is requested
|
||||
THEN:
|
||||
- gotenberg is called and the resulting file is returned and look as expected.
|
||||
"""
|
||||
mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml"))
|
||||
|
||||
pdf_path = os.path.join(self.parser.tempdir, "html.eml.pdf")
|
||||
|
||||
with open(pdf_path, "wb") as file:
|
||||
file.write(self.parser.generate_pdf_from_mail(mail))
|
||||
|
||||
converted = os.path.join(
|
||||
self.parser.tempdir,
|
||||
"html.eml.pdf.webp",
|
||||
)
|
||||
run_convert(
|
||||
density=300,
|
||||
scale="500x5000>",
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=False,
|
||||
auto_orient=True,
|
||||
input_file=f"{pdf_path}", # Do net define an index to convert all pages.
|
||||
output_file=converted,
|
||||
logging_group=None,
|
||||
)
|
||||
self.assertTrue(os.path.isfile(converted))
|
||||
thumb_hash = self.imagehash(converted)
|
||||
|
||||
# The created pdf is not reproducible. But the converted image should always look the same.
|
||||
expected_hash = self.imagehash(
|
||||
os.path.join(self.SAMPLE_FILES, "html.eml.pdf.webp"),
|
||||
)
|
||||
self.assertEqual(
|
||||
thumb_hash,
|
||||
expected_hash,
|
||||
f"PDF looks different. Check if {converted} looks weird.",
|
||||
)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
"GOTENBERG_LIVE" not in os.environ,
|
||||
reason="No gotenberg server",
|
||||
)
|
||||
def test_generate_pdf_from_html_no_convert(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- pdf generation from html eml file is requested
|
||||
THEN:
|
||||
- gotenberg is called and the resulting file is returned and contains the expected text.
|
||||
"""
|
||||
|
||||
class MailAttachmentMock:
|
||||
def __init__(self, payload, content_id):
|
||||
self.payload = payload
|
||||
self.content_id = content_id
|
||||
|
||||
result = None
|
||||
|
||||
with open(os.path.join(self.SAMPLE_FILES, "sample.html")) as html_file:
|
||||
with open(os.path.join(self.SAMPLE_FILES, "sample.png"), "rb") as png_file:
|
||||
html = html_file.read()
|
||||
png = png_file.read()
|
||||
attachments = [
|
||||
MailAttachmentMock(png, "part1.pNdUSz0s.D3NqVtPg@example.de"),
|
||||
]
|
||||
result = self.parser.generate_pdf_from_html(html, attachments)
|
||||
|
||||
pdf_path = os.path.join(self.parser.tempdir, "sample.html.pdf")
|
||||
|
||||
with open(pdf_path, "wb") as file:
|
||||
file.write(result)
|
||||
|
||||
extracted = extract_text(pdf_path)
|
||||
expected = extract_text(os.path.join(self.SAMPLE_FILES, "sample.html.pdf"))
|
||||
self.assertEqual(expected, extracted)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
"GOTENBERG_LIVE" not in os.environ,
|
||||
reason="No gotenberg server",
|
||||
)
|
||||
# Only run if convert is available
|
||||
@pytest.mark.skipif(
|
||||
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
|
||||
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
|
||||
)
|
||||
def test_generate_pdf_from_html(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- pdf generation from html eml file is requested
|
||||
THEN:
|
||||
- gotenberg is called and the resulting file is returned and look as expected.
|
||||
"""
|
||||
|
||||
class MailAttachmentMock:
|
||||
def __init__(self, payload, content_id):
|
||||
self.payload = payload
|
||||
self.content_id = content_id
|
||||
|
||||
result = None
|
||||
|
||||
with open(os.path.join(self.SAMPLE_FILES, "sample.html")) as html_file:
|
||||
with open(os.path.join(self.SAMPLE_FILES, "sample.png"), "rb") as png_file:
|
||||
html = html_file.read()
|
||||
png = png_file.read()
|
||||
attachments = [
|
||||
MailAttachmentMock(png, "part1.pNdUSz0s.D3NqVtPg@example.de"),
|
||||
]
|
||||
result = self.parser.generate_pdf_from_html(html, attachments)
|
||||
|
||||
pdf_path = os.path.join(self.parser.tempdir, "sample.html.pdf")
|
||||
|
||||
with open(pdf_path, "wb") as file:
|
||||
file.write(result)
|
||||
|
||||
converted = os.path.join(self.parser.tempdir, "sample.html.pdf.webp")
|
||||
run_convert(
|
||||
density=300,
|
||||
scale="500x5000>",
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=False,
|
||||
auto_orient=True,
|
||||
input_file=f"{pdf_path}", # Do net define an index to convert all pages.
|
||||
output_file=converted,
|
||||
logging_group=None,
|
||||
)
|
||||
self.assertTrue(os.path.isfile(converted))
|
||||
thumb_hash = self.imagehash(converted)
|
||||
|
||||
# The created pdf is not reproducible. But the converted image should always look the same.
|
||||
expected_hash = self.imagehash(
|
||||
os.path.join(self.SAMPLE_FILES, "sample.html.pdf.webp"),
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
thumb_hash,
|
||||
expected_hash,
|
||||
f"PDF looks different. Check if {converted} looks weird. "
|
||||
f"If Rick Astley is shown, Gotenberg loads from web which is bad for Mail content.",
|
||||
)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
"GOTENBERG_LIVE" not in os.environ,
|
||||
reason="No gotenberg server",
|
||||
)
|
||||
def test_online_image_exception_on_not_available(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- nonexistent image is requested
|
||||
THEN:
|
||||
- An exception shall be thrown
|
||||
"""
|
||||
"""
|
||||
A public image is used in the html sample file. We have no control
|
||||
whether this image stays online forever, so here we check if we can detect if is not
|
||||
available anymore.
|
||||
"""
|
||||
|
||||
# Start by Testing if nonexistent URL really throws an Exception
|
||||
self.assertRaises(
|
||||
HTTPError,
|
||||
urlopen,
|
||||
"https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png",
|
||||
)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
"GOTENBERG_LIVE" not in os.environ,
|
||||
reason="No gotenberg server",
|
||||
)
|
||||
def test_is_online_image_still_available(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Fresh start
|
||||
WHEN:
|
||||
- A public image used in the html sample file is requested
|
||||
THEN:
|
||||
- No exception shall be thrown
|
||||
"""
|
||||
"""
|
||||
A public image is used in the html sample file. We have no control
|
||||
whether this image stays online forever, so here we check if it is still there
|
||||
"""
|
||||
|
||||
# Now check the URL used in samples/sample.html
|
||||
urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png")
|
Loading…
x
Reference in New Issue
Block a user