Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Jonas Winkler 2018-12-11 12:06:15 +01:00
commit 766109ae4e
46 changed files with 1171 additions and 695 deletions

25
.editorconfig Normal file
View File

@ -0,0 +1,25 @@
# EditorConfig: http://EditorConfig.org
root = true
[*]
indent_style = tab
indent_size = 2
insert_final_newline = true
trim_trailing_whitespace = true
end_of_line = lf
charset = utf-8
max_line_length = 79
[{*.html,*.css,*.js}]
max_line_length = off
[*.py]
indent_size = 4
indent_style = space
# Tests don't get a line width restriction. It's still a good idea to follow
# the 79 character rule, but in the interests of clarity, tests often need to
# violate it.
[**/test_*.py]
max_line_length = off

1
.gitignore vendored
View File

@ -73,7 +73,6 @@ db.sqlite3
# Other stuff that doesn't belong # Other stuff that doesn't belong
.virtualenv .virtualenv
virtualenv virtualenv
.vagrant
docker-compose.yml docker-compose.yml
docker-compose.env docker-compose.env

View File

@ -2,7 +2,7 @@ language: python
before_install: before_install:
- sudo apt-get update -qq - sudo apt-get update -qq
- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr tesseract-ocr-eng - sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr tesseract-ocr-eng tesseract-ocr-cat
sudo: false sudo: false

View File

@ -1,4 +1,4 @@
FROM alpine:3.7 FROM alpine:3.8
LABEL maintainer="The Paperless Project https://github.com/danielquinn/paperless" \ LABEL maintainer="The Paperless Project https://github.com/danielquinn/paperless" \
contributors="Guy Addadi <addadi@gmail.com>, Pit Kleyersburg <pitkley@googlemail.com>, \ contributors="Guy Addadi <addadi@gmail.com>, Pit Kleyersburg <pitkley@googlemail.com>, \
@ -12,12 +12,11 @@ COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh
ENV PAPERLESS_EXPORT_DIR=/export \ ENV PAPERLESS_EXPORT_DIR=/export \
PAPERLESS_CONSUMPTION_DIR=/consume PAPERLESS_CONSUMPTION_DIR=/consume
# Install dependencies
RUN apk --no-cache --update add \ RUN apk update --no-cache && apk add python3 gnupg libmagic libpq bash shadow curl \
python3 gnupg libmagic bash shadow curl \ sudo poppler tesseract-ocr imagemagick ghostscript unpaper optipng && \
sudo poppler tesseract-ocr imagemagick ghostscript unpaper && \ apk add --virtual .build-dependencies \
apk --no-cache add --virtual .build-dependencies \ python3-dev poppler-dev postgresql-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
python3-dev poppler-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
# Install python dependencies # Install python dependencies
python3 -m ensurepip && \ python3 -m ensurepip && \
rm -r /usr/lib/python*/ensurepip && \ rm -r /usr/lib/python*/ensurepip && \

View File

@ -25,6 +25,8 @@ python-dateutil = "*"
python-dotenv = "*" python-dotenv = "*"
python-gnupg = "*" python-gnupg = "*"
pytz = "*" pytz = "*"
sphinx = "*"
tox = "*"
pycodestyle = "*" pycodestyle = "*"
pytest = "*" pytest = "*"
pytest-cov = "*" pytest-cov = "*"
@ -35,6 +37,3 @@ pytest-xdist = "*"
[dev-packages] [dev-packages]
ipython = "*" ipython = "*"
sphinx = "*"
tox = "*"

623
Pipfile.lock generated
View File

@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "6d8bad24aa5d0c102b13b5ae27acba04836cd5a07a4003cb2763de1e0a3406b7" "sha256": "3782f7e6b5461c39c8fd0d0048a4622418f247439113bd3cdc91712fd47036f6"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": {}, "requires": {},
@ -14,12 +14,18 @@
] ]
}, },
"default": { "default": {
"alabaster": {
"hashes": [
"sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359",
"sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"
],
"version": "==0.7.12"
},
"apipkg": { "apipkg": {
"hashes": [ "hashes": [
"sha256:37228cda29411948b422fae072f57e31d3396d2ee1c9783775980ee9c9990af6", "sha256:37228cda29411948b422fae072f57e31d3396d2ee1c9783775980ee9c9990af6",
"sha256:58587dd4dc3daefad0487f6d9ae32b4542b185e1c36db6993290e7c41ca2b47c" "sha256:58587dd4dc3daefad0487f6d9ae32b4542b185e1c36db6993290e7c41ca2b47c"
], ],
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
"version": "==1.5" "version": "==1.5"
}, },
"atomicwrites": { "atomicwrites": {
@ -27,7 +33,6 @@
"sha256:0312ad34fcad8fac3704d441f7b317e50af620823353ec657a53e981f92920c0", "sha256:0312ad34fcad8fac3704d441f7b317e50af620823353ec657a53e981f92920c0",
"sha256:ec9ae8adaae229e4f8446952d204a3e4b5fdd2d099f9be3aaf556120135fb3ee" "sha256:ec9ae8adaae229e4f8446952d204a3e4b5fdd2d099f9be3aaf556120135fb3ee"
], ],
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
"version": "==1.2.1" "version": "==1.2.1"
}, },
"attrs": { "attrs": {
@ -37,12 +42,26 @@
], ],
"version": "==18.2.0" "version": "==18.2.0"
}, },
"babel": {
"hashes": [
"sha256:6778d85147d5d85345c14a26aada5e478ab04e39b078b0745ee6870c2b5cf669",
"sha256:8cba50f48c529ca3fa18cf81fa9403be176d374ac4d60738b839122dfaaa3d23"
],
"version": "==2.6.0"
},
"backcall": {
"hashes": [
"sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4",
"sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"
],
"version": "==0.1.0"
},
"certifi": { "certifi": {
"hashes": [ "hashes": [
"sha256:376690d6f16d32f9d1fe8932551d80b23e9d393a8578c5633a2ed39a64861638", "sha256:339dc09518b07e2fa7eda5450740925974815557727d6bd35d319c1524a04a4c",
"sha256:456048c7e371c089d0a77a5212fb37a2c2dce1e24146e3b7e0261736aaeaa22a" "sha256:6d58c986d22b038c8c0df30d639f23a3e6d172a05c3583e766f4c0b785c0986a"
], ],
"version": "==2018.8.24" "version": "==2018.10.15"
}, },
"chardet": { "chardet": {
"hashes": [ "hashes": [
@ -55,6 +74,7 @@
"hashes": [ "hashes": [
"sha256:03481e81d558d30d230bc12999e3edffe392d244349a90f4ef9b88425fac74ba", "sha256:03481e81d558d30d230bc12999e3edffe392d244349a90f4ef9b88425fac74ba",
"sha256:0b136648de27201056c1869a6c0d4e23f464750fd9a9ba9750b8336a244429ed", "sha256:0b136648de27201056c1869a6c0d4e23f464750fd9a9ba9750b8336a244429ed",
"sha256:0bf8cbbd71adfff0ef1f3a1531e6402d13b7b01ac50a79c97ca15f030dba6306",
"sha256:10a46017fef60e16694a30627319f38a2b9b52e90182dddb6e37dcdab0f4bf95", "sha256:10a46017fef60e16694a30627319f38a2b9b52e90182dddb6e37dcdab0f4bf95",
"sha256:198626739a79b09fa0a2f06e083ffd12eb55449b5f8bfdbeed1df4910b2ca640", "sha256:198626739a79b09fa0a2f06e083ffd12eb55449b5f8bfdbeed1df4910b2ca640",
"sha256:23d341cdd4a0371820eb2b0bd6b88f5003a7438bbedb33688cd33b8eae59affd", "sha256:23d341cdd4a0371820eb2b0bd6b88f5003a7438bbedb33688cd33b8eae59affd",
@ -83,18 +103,18 @@
"sha256:c1bb572fab8208c400adaf06a8133ac0712179a334c09224fb11393e920abcdd", "sha256:c1bb572fab8208c400adaf06a8133ac0712179a334c09224fb11393e920abcdd",
"sha256:de4418dadaa1c01d497e539210cb6baa015965526ff5afc078c57ca69160108d", "sha256:de4418dadaa1c01d497e539210cb6baa015965526ff5afc078c57ca69160108d",
"sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6", "sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6",
"sha256:f05a636b4564104120111800021a92e43397bc12a5c72fed7036be8556e0029e",
"sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80" "sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80"
], ],
"markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4'",
"version": "==4.5.1" "version": "==4.5.1"
}, },
"coveralls": { "coveralls": {
"hashes": [ "hashes": [
"sha256:9dee67e78ec17b36c52b778247762851c8e19a893c9a14e921a2fc37f05fac22", "sha256:ab638e88d38916a6cedbf80a9cd8992d5fa55c77ab755e262e00b36792b7cd6d",
"sha256:aec5a1f5e34224b9089664a1b62217732381c7de361b6ed1b3c394d7187b352a" "sha256:b2388747e2529fa4c669fb1e3e2756e4e07b6ee56c7d9fce05f35ccccc913aa0"
], ],
"index": "pypi", "index": "pypi",
"version": "==1.5.0" "version": "==1.5.1"
}, },
"dateparser": { "dateparser": {
"hashes": [ "hashes": [
@ -104,13 +124,20 @@
"index": "pypi", "index": "pypi",
"version": "==0.7.0" "version": "==0.7.0"
}, },
"decorator": {
"hashes": [
"sha256:2c51dff8ef3c447388fe5e4453d24a2bf128d3a4c32af3fabef1f01c6851ab82",
"sha256:c39efa13fbdeb4506c476c9b3babf6a718da943dab7811c206005a4a956c080c"
],
"version": "==4.3.0"
},
"django": { "django": {
"hashes": [ "hashes": [
"sha256:0c5b65847d00845ee404bbc0b4a85686f15eb3001ffddda3db4e9baa265bf136", "sha256:25df265e1fdb74f7e7305a1de620a84681bcc9c05e84a3ed97e4a1a63024f18d",
"sha256:68aeea369a8130259354b6ba1fa9babe0c5ee6bced505dea4afcd00f765ae38b" "sha256:d6d94554abc82ca37e447c3d28958f5ac39bd7d4adaa285543ae97fb1129fd69"
], ],
"index": "pypi", "index": "pypi",
"version": "==2.0.8" "version": "==2.0.9"
}, },
"django-cors-headers": { "django-cors-headers": {
"hashes": [ "hashes": [
@ -130,11 +157,11 @@
}, },
"django-extensions": { "django-extensions": {
"hashes": [ "hashes": [
"sha256:1f626353a11479014bfe0d77e76d8f866ebca1bb5d595cb57b776230b9e0eb92", "sha256:30cb6a8c7d6f75a55edf0c0c4491bd98f8264ae1616ce105f9cecac4387edd07",
"sha256:f21b898598a1628cb73017fb9672e2c5e624133be9764f0eb138e0abf8a62b62" "sha256:4ad86a7a5e84f1c77db030761ae87a600647250c652030a2b71a16e87f3a3d62"
], ],
"index": "pypi", "index": "pypi",
"version": "==2.1.2" "version": "==2.1.3"
}, },
"django-filter": { "django-filter": {
"hashes": [ "hashes": [
@ -146,11 +173,11 @@
}, },
"djangorestframework": { "djangorestframework": {
"hashes": [ "hashes": [
"sha256:b6714c3e4b0f8d524f193c91ecf5f5450092c2145439ac2769711f7eba89a9d9", "sha256:607865b0bb1598b153793892101d881466bd5a991de12bd6229abb18b1c86136",
"sha256:c375e4f95a3a64fccac412e36fb42ba36881e52313ec021ef410b40f67cddca4" "sha256:63f76cbe1e7d12b94c357d7e54401103b2e52aef0f7c1650d6c820ad708776e5"
], ],
"index": "pypi", "index": "pypi",
"version": "==3.8.2" "version": "==3.9.0"
}, },
"docopt": { "docopt": {
"hashes": [ "hashes": [
@ -158,12 +185,19 @@
], ],
"version": "==0.6.2" "version": "==0.6.2"
}, },
"docutils": {
"hashes": [
"sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6",
"sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274",
"sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6"
],
"version": "==0.14"
},
"execnet": { "execnet": {
"hashes": [ "hashes": [
"sha256:a7a84d5fa07a089186a329528f127c9d73b9de57f1a1131b82bb5320ee651f6a", "sha256:a7a84d5fa07a089186a329528f127c9d73b9de57f1a1131b82bb5320ee651f6a",
"sha256:fc155a6b553c66c838d1a22dba1dc9f5f505c43285a878c6f74a79c024750b83" "sha256:fc155a6b553c66c838d1a22dba1dc9f5f505c43285a878c6f74a79c024750b83"
], ],
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
"version": "==1.5.0" "version": "==1.5.0"
}, },
"factory-boy": { "factory-boy": {
@ -176,11 +210,17 @@
}, },
"faker": { "faker": {
"hashes": [ "hashes": [
"sha256:ea7cfd3aeb1544732d08bd9cfba40c5b78e3a91e17b1a0698ab81bfc5554c628", "sha256:2621643b80a10b91999925cfd20f64d2b36f20bf22136bbdc749bb57d6ffe124",
"sha256:f6d67f04abfb2b4bea7afc7fa6c18cf4c523a67956e455668be9ae42bccc21ad" "sha256:5ed822d31bd2d6edf10944d176d30dc9c886afdd381eefb7ba8b7aad86171646"
], ],
"markers": "python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.2.*' and python_version >= '2.7'", "version": "==0.9.2"
"version": "==0.9.0" },
"filelock": {
"hashes": [
"sha256:b8d5ca5ca1c815e1574aee746650ea7301de63d87935b3463d26368b76e31633",
"sha256:d610c1bb404daf85976d7a82eb2ada120f04671007266b708606565dd03b5be6"
],
"version": "==3.0.10"
}, },
"filemagic": { "filemagic": {
"hashes": [ "hashes": [
@ -190,12 +230,14 @@
"version": "==1.6" "version": "==1.6"
}, },
"fuzzywuzzy": { "fuzzywuzzy": {
"extras": [
"speedup"
],
"hashes": [ "hashes": [
"sha256:3759bc6859daa0eecef8c82b45404bdac20c23f23136cf4c18b46b426bbc418f", "sha256:3759bc6859daa0eecef8c82b45404bdac20c23f23136cf4c18b46b426bbc418f",
"sha256:5b36957ccf836e700f4468324fa80ba208990385392e217be077d5cd738ae602" "sha256:5b36957ccf836e700f4468324fa80ba208990385392e217be077d5cd738ae602"
], ],
"index": "pypi", "index": "pypi",
"markers": null,
"version": "==0.15.0" "version": "==0.15.0"
}, },
"gunicorn": { "gunicorn": {
@ -213,6 +255,13 @@
], ],
"version": "==2.7" "version": "==2.7"
}, },
"imagesize": {
"hashes": [
"sha256:3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8",
"sha256:f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5"
],
"version": "==1.1.0"
},
"inotify-simple": { "inotify-simple": {
"hashes": [ "hashes": [
"sha256:fc2c10dd73278a1027d0663f2db51240af5946390f363a154361406ebdddd8dd" "sha256:fc2c10dd73278a1027d0663f2db51240af5946390f363a154361406ebdddd8dd"
@ -220,6 +269,35 @@
"index": "pypi", "index": "pypi",
"version": "==1.1.8" "version": "==1.1.8"
}, },
"ipython": {
"hashes": [
"sha256:a5781d6934a3341a1f9acb4ea5acdc7ea0a0855e689dbe755d070ca51e995435",
"sha256:b10a7ddd03657c761fc503495bc36471c8158e3fc948573fb9fe82a7029d8efd"
],
"index": "pypi",
"version": "==7.1.1"
},
"ipython-genutils": {
"hashes": [
"sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
"sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
],
"version": "==0.2.0"
},
"jedi": {
"hashes": [
"sha256:0191c447165f798e6a730285f2eee783fff81b0d3df261945ecb80983b5c3ca7",
"sha256:b7493f73a2febe0dc33d51c99b474547f7f6c0b2c8fb2b21f453eef204c12148"
],
"version": "==0.13.1"
},
"jinja2": {
"hashes": [
"sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd",
"sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4"
],
"version": "==2.10"
},
"langdetect": { "langdetect": {
"hashes": [ "hashes": [
"sha256:91a170d5f0ade380db809b3ba67f08e95fe6c6c8641f96d67a51ff7e98a9bf30" "sha256:91a170d5f0ade380db809b3ba67f08e95fe6c6c8641f96d67a51ff7e98a9bf30"
@ -227,6 +305,12 @@
"index": "pypi", "index": "pypi",
"version": "==1.0.7" "version": "==1.0.7"
}, },
"markupsafe": {
"hashes": [
"sha256:a6be69091dac236ea9c6bc7d012beab42010fa914c459791d627dad4910eb665"
],
"version": "==1.0"
},
"more-itertools": { "more-itertools": {
"hashes": [ "hashes": [
"sha256:c187a73da93e7a8acc0001572aebc7e3c69daf7bf6881a2cea10650bd4420092", "sha256:c187a73da93e7a8acc0001572aebc7e3c69daf7bf6881a2cea10650bd4420092",
@ -235,64 +319,106 @@
], ],
"version": "==4.3.0" "version": "==4.3.0"
}, },
"packaging": {
"hashes": [
"sha256:0886227f54515e592aaa2e5a553332c73962917f2831f1b0f9b9f4380a4b9807",
"sha256:f95a1e147590f204328170981833854229bb2912ac3d5f89e2a8ccd2834800c9"
],
"version": "==18.0"
},
"parso": {
"hashes": [
"sha256:35704a43a3c113cce4de228ddb39aab374b8004f4f2407d070b6a2ca784ce8a2",
"sha256:895c63e93b94ac1e1690f5fdd40b65f07c8171e3e53cbd7793b5b96c0e0a7f24"
],
"version": "==0.3.1"
},
"pdftotext": { "pdftotext": {
"hashes": [ "hashes": [
"sha256:b7312302007e19fc784263a321b41682f01a582af84e14200cef53b3f4e69a50" "sha256:e3ad11efe0aa22cbfc46aa1296b2ea5a52ad208b778288311f2801adef178ccb"
], ],
"index": "pypi", "index": "pypi",
"version": "==2.1.0" "version": "==2.1.1"
},
"pexpect": {
"hashes": [
"sha256:2a8e88259839571d1251d278476f3eec5db26deb73a70be5ed5dc5435e418aba",
"sha256:3fbd41d4caf27fa4a377bfd16fef87271099463e6fa73e92a52f92dfee5d425b"
],
"markers": "sys_platform != 'win32'",
"version": "==4.6.0"
},
"pickleshare": {
"hashes": [
"sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca",
"sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"
],
"version": "==0.7.5"
}, },
"pillow": { "pillow": {
"hashes": [ "hashes": [
"sha256:00def5b638994f888d1058e4d17c86dec8e1113c3741a0a8a659039aec59a83a", "sha256:00203f406818c3f45d47bb8fe7e67d3feddb8dcbbd45a289a1de7dd789226360",
"sha256:026449b64e559226cdb8e6d8c931b5965d8fc90ec18ebbb0baa04c5b36503c72", "sha256:0616f800f348664e694dddb0b0c88d26761dd5e9f34e1ed7b7a7d2da14b40cb7",
"sha256:03dbb224ee196ef30ed2156d41b579143e1efeb422974719a5392fc035e4f574", "sha256:1f7908aab90c92ad85af9d2fec5fc79456a89b3adcc26314d2cde0e238bd789e",
"sha256:03eb0e04f929c102ae24bc436bf1c0c60a4e63b07ebd388e84d8b219df3e6acd", "sha256:2ea3517cd5779843de8a759c2349a3cd8d3893e03ab47053b66d5ec6f8bc4f93",
"sha256:1be66b9a89e367e7d20d6cae419794997921fe105090fafd86ef39e20a3baab2", "sha256:48a9f0538c91fc136b3a576bee0e7cd174773dc9920b310c21dcb5519722e82c",
"sha256:1e977a3ed998a599bda5021fb2c2889060617627d3ae228297a529a082a3cd5c", "sha256:5280ebc42641a1283b7b1f2c20e5b936692198b9dd9995527c18b794850be1a8",
"sha256:22cf3406d135cfcc13ec6228ade774c8461e125c940e80455f500638429be273", "sha256:5e34e4b5764af65551647f5cc67cf5198c1d05621781d5173b342e5e55bf023b",
"sha256:24adccf1e834f82718c7fc8e3ec1093738da95144b8b1e44c99d5fc7d3e9c554", "sha256:63b120421ab85cad909792583f83b6ca3584610c2fe70751e23f606a3c2e87f0",
"sha256:2a3e362c97a5e6a259ee9cd66553292a1f8928a5bdfa3622fdb1501570834612", "sha256:696b5e0109fe368d0057f484e2e91717b49a03f1e310f857f133a4acec9f91dd",
"sha256:3832e26ecbc9d8a500821e3a1d3765bda99d04ae29ffbb2efba49f5f788dc934", "sha256:870ed021a42b1b02b5fe4a739ea735f671a84128c0a666c705db2cb9abd528eb",
"sha256:4fd1f0c2dc02aaec729d91c92cd85a2df0289d88e9f68d1e8faba750bb9c4786", "sha256:916da1c19e4012d06a372127d7140dae894806fad67ef44330e5600d77833581",
"sha256:4fda62030f2c515b6e2e673c57caa55cb04026a81968f3128aae10fc28e5cc27", "sha256:9303a289fa0811e1c6abd9ddebfc770556d7c3311cb2b32eff72164ddc49bc64",
"sha256:5044d75a68b49ce36a813c82d8201384207112d5d81643937fc758c05302f05b", "sha256:9577888ecc0ad7d06c3746afaba339c94d62b59da16f7a5d1cff9e491f23dace",
"sha256:522184556921512ec484cb93bd84e0bab915d0ac5a372d49571c241a7f73db62", "sha256:987e1c94a33c93d9b209315bfda9faa54b8edfce6438a1e93ae866ba20de5956",
"sha256:5914cff11f3e920626da48e564be6818831713a3087586302444b9c70e8552d9", "sha256:99a3bbdbb844f4fb5d6dd59fac836a40749781c1fa63c563bc216c27aef63f60",
"sha256:6661a7908d68c4a133e03dac8178287aa20a99f841ea90beeb98a233ae3fd710", "sha256:99db8dc3097ceafbcff9cb2bff384b974795edeb11d167d391a02c7bfeeb6e16",
"sha256:79258a8df3e309a54c7ef2ef4a59bb8e28f7e4a8992a3ad17c24b1889ced44f3", "sha256:a5a96cf49eb580756a44ecf12949e52f211e20bffbf5a95760ac14b1e499cd37",
"sha256:7d74c20b8f1c3e99d3f781d3b8ff5abfefdd7363d61e23bdeba9992ff32cc4b4", "sha256:aa6ca3eb56704cdc0d876fc6047ffd5ee960caad52452fbee0f99908a141a0ae",
"sha256:81918afeafc16ba5d9d0d4e9445905f21aac969a4ebb6f2bff4b9886da100f4b", "sha256:aade5e66795c94e4a2b2624affeea8979648d1b0ae3fcee17e74e2c647fc4a8a",
"sha256:8194d913ca1f459377c8a4ed8f9b7ad750068b8e0e3f3f9c6963fcc87a84515f", "sha256:b78905860336c1d292409e3df6ad39cc1f1c7f0964e66844bbc2ebfca434d073",
"sha256:84d5d31200b11b3c76fab853b89ac898bf2d05c8b3da07c1fcc23feb06359d6e", "sha256:b92f521cdc4e4a3041cc343625b699f20b0b5f976793fb45681aac1efda565f8",
"sha256:989981db57abffb52026b114c9a1f114c7142860a6d30a352d28f8cbf186500b", "sha256:bfde84bbd6ae5f782206d454b67b7ee8f7f818c29b99fd02bf022fd33bab14cb",
"sha256:a3d7511d3fad1618a82299aab71a5fceee5c015653a77ffea75ced9ef917e71a", "sha256:c2b62d3df80e694c0e4a0ed47754c9480521e25642251b3ab1dff050a4e60409",
"sha256:b3ef168d4d6fd4fa6685aef7c91400f59f7ab1c0da734541f7031699741fb23f", "sha256:c5e2be6c263b64f6f7656e23e18a4a9980cffc671442795682e8c4e4f815dd9f",
"sha256:c1c5792b6e74bbf2af0f8e892272c2a6c48efa895903211f11b8342e03129fea", "sha256:c99aa3c63104e0818ec566f8ff3942fb7c7a8f35f9912cb63fd8e12318b214b2",
"sha256:c5dcb5a56aebb8a8f2585042b2f5c496d7624f0bcfe248f0cc33ceb2fd8d39e7", "sha256:dae06620d3978da346375ebf88b9e2dd7d151335ba668c995aea9ed07af7add4",
"sha256:e2bed4a04e2ca1050bb5f00865cf2f83c0b92fd62454d9244f690fcd842e27a4", "sha256:db5499d0710823fa4fb88206050d46544e8f0e0136a9a5f5570b026584c8fd74",
"sha256:e87a527c06319428007e8c30511e1f0ce035cb7f14bb4793b003ed532c3b9333", "sha256:f36baafd82119c4a114b9518202f2a983819101dcc14b26e43fc12cbefdce00e",
"sha256:f63e420180cbe22ff6e32558b612e75f50616fc111c5e095a4631946c782e109", "sha256:f52b79c8796d81391ab295b04e520bda6feed54d54931708872e8f9ae9db0ea1",
"sha256:f8b3d413c5a8f84b12cd4c5df1d8e211777c9852c6be3ee9c094b626644d3eab" "sha256:ff8cff01582fa1a7e533cb97f628531c4014af4b5f38e33cdcfe5eec29b6d888"
], ],
"index": "pypi", "index": "pypi",
"version": "==5.2.0" "version": "==5.3.0"
}, },
"pluggy": { "pluggy": {
"hashes": [ "hashes": [
"sha256:6e3836e39f4d36ae72840833db137f7b7d35105079aee6ec4a62d9f80d594dd1", "sha256:447ba94990e8014ee25ec853339faf7b0fc8050cdc3289d4d71f7f410fb90095",
"sha256:95eb8364a4708392bae89035f45341871286a333f749c3141c20573d2b3876e1" "sha256:bde19360a8ec4dfd8a20dcb811780a30998101f078fc7ded6162f0076f50508f"
], ],
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'", "version": "==0.8.0"
"version": "==0.7.1" },
"prompt-toolkit": {
"hashes": [
"sha256:c1d6aff5252ab2ef391c2fe498ed8c088066f66bc64a8d5c095bbf795d9fec34",
"sha256:d4c47f79b635a0e70b84fdb97ebd9a274203706b1ee5ed44c10da62755cf3ec9",
"sha256:fd17048d8335c1e6d5ee403c3569953ba3eb8555d710bfc548faf0712666ea39"
],
"version": "==2.0.7"
},
"ptyprocess": {
"hashes": [
"sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0",
"sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"
],
"version": "==0.6.0"
}, },
"py": { "py": {
"hashes": [ "hashes": [
"sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1", "sha256:bf92637198836372b520efcba9e020c330123be8ce527e535d185ed4b6f45694",
"sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6" "sha256:e76826342cefe3c3d5f7e8ee4316b80d1dd8a300781612ddbc765c17ba25a6c6"
], ],
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'", "version": "==1.7.0"
"version": "==1.6.0"
}, },
"pycodestyle": { "pycodestyle": {
"hashes": [ "hashes": [
@ -302,6 +428,13 @@
"index": "pypi", "index": "pypi",
"version": "==2.4.0" "version": "==2.4.0"
}, },
"pygments": {
"hashes": [
"sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
"sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc"
],
"version": "==2.2.0"
},
"pyocr": { "pyocr": {
"hashes": [ "hashes": [
"sha256:b6ba6263fd92da56627dff6d263d991a2246aacd117d1788f11b93f419ca395f" "sha256:b6ba6263fd92da56627dff6d263d991a2246aacd117d1788f11b93f419ca395f"
@ -309,13 +442,20 @@
"index": "pypi", "index": "pypi",
"version": "==0.5.3" "version": "==0.5.3"
}, },
"pyparsing": {
"hashes": [
"sha256:40856e74d4987de5d01761a22d1621ae1c7f8774585acae358aa5c5936c6c90b",
"sha256:f353aab21fd474459d97b709e527b5571314ee5f067441dc9f88e33eecd96592"
],
"version": "==2.3.0"
},
"pytest": { "pytest": {
"hashes": [ "hashes": [
"sha256:453cbbbe5ce6db38717d282b758b917de84802af4288910c12442984bde7b823", "sha256:a9e5e8d7ab9d5b0747f37740276eb362e6a76275d76cebbb52c6049d93b475db",
"sha256:a8a07f84e680482eb51e244370aaf2caa6301ef265f37c2bdefb3dd3b663f99d" "sha256:bf47e8ed20d03764f963f0070ff1c8fda6e2671fc5dd562a4d3b7148ad60f5ca"
], ],
"index": "pypi", "index": "pypi",
"version": "==3.8.0" "version": "==3.9.3"
}, },
"pytest-cov": { "pytest-cov": {
"hashes": [ "hashes": [
@ -327,11 +467,11 @@
}, },
"pytest-django": { "pytest-django": {
"hashes": [ "hashes": [
"sha256:2d2e0a618d91c280d463e90bcbea9b4e417609157f611a79685b1c561c4c0836", "sha256:49e9ffc856bc6a1bec1c26c5c7b7213dff7cc8bc6b64d624c4d143d04aff0bcf",
"sha256:59683def396923b78d7e191a7086a48193f8d5db869ace79acb38f906522bc7b" "sha256:b379282feaf89069cb790775ab6bbbd2bd2038a68c7ef9b84a41898e0b551081"
], ],
"index": "pypi", "index": "pypi",
"version": "==3.4.2" "version": "==3.4.3"
}, },
"pytest-env": { "pytest-env": {
"hashes": [ "hashes": [
@ -345,7 +485,6 @@
"sha256:e4500cd0509ec4a26535f7d4112a8cc0f17d3a41c29ffd4eab479d2a55b30805", "sha256:e4500cd0509ec4a26535f7d4112a8cc0f17d3a41c29ffd4eab479d2a55b30805",
"sha256:f275cb48a73fc61a6710726348e1da6d68a978f0ec0c54ece5a5fae5977e5a08" "sha256:f275cb48a73fc61a6710726348e1da6d68a978f0ec0c54ece5a5fae5977e5a08"
], ],
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
"version": "==0.2" "version": "==0.2"
}, },
"pytest-sugar": { "pytest-sugar": {
@ -357,19 +496,19 @@
}, },
"pytest-xdist": { "pytest-xdist": {
"hashes": [ "hashes": [
"sha256:0875deac20f6d96597036bdf63970887a6f36d28289c2f6682faf652dfea687b", "sha256:3bc9dcb6ff47e607d3c710727cd9996fd7ac1466d405c3b40bb495da99b6b669",
"sha256:28e25e79698b2662b648319d3971c0f9ae0e6500f88258ccb9b153c31110ba9b" "sha256:8e188d13ce6614c7a678179a76f46231199ffdfe6163de031c17e62ffa256917"
], ],
"index": "pypi", "index": "pypi",
"version": "==1.23.0" "version": "==1.24.0"
}, },
"python-dateutil": { "python-dateutil": {
"hashes": [ "hashes": [
"sha256:1adb80e7a782c12e52ef9a8182bebeb73f1d7e24e374397af06fb4956c8dc5c0", "sha256:063df5763652e21de43de7d9e00ccf239f953a832941e37be541614732cdfc93",
"sha256:e27001de32f627c22380a688bcc43ce83504a7bc5da472209b4c70f02829f0b8" "sha256:88f9287c0174266bb0d8cedd395cfba9c58e87e5ad86b2ce58859bc11be3cf02"
], ],
"index": "pypi", "index": "pypi",
"version": "==2.7.3" "version": "==2.7.5"
}, },
"python-dotenv": { "python-dotenv": {
"hashes": [ "hashes": [
@ -391,273 +530,37 @@
"hashes": [ "hashes": [
"sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1" "sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1"
], ],
"markers": "extra == 'speedup'",
"version": "==0.12.0" "version": "==0.12.0"
}, },
"pytz": { "pytz": {
"hashes": [ "hashes": [
"sha256:a061aa0a9e06881eb8b3b2b43f05b9439d6583c206d0a6c340ff72a7b6669053", "sha256:31cb35c89bd7d333cd32c5f278fca91b523b0834369e757f4c5641ea252236ca",
"sha256:ffb9ef1de172603304d9d2819af6f5ece76f2e85ec10692a524dd876e72bf277" "sha256:8e0f8568c118d3077b46be7d654cc8167fa916092e28320cde048e54bfc9f1e6"
], ],
"index": "pypi", "index": "pypi",
"version": "==2018.5" "version": "==2018.7"
}, },
"regex": { "regex": {
"hashes": [ "hashes": [
"sha256:22d7ef8c2df344328a8a3c61edade2ee714e5de9360911d22a9213931c769faa", "sha256:0ef96690c3d2294155b7d44187ca4a151e45c931cb768e106ba464a9fa64c5da",
"sha256:3a699780c6b712c67dc23207b129ccc6a7e1270233f7aadead3ea3f83c893702", "sha256:251683e01a3bcacd9188acf0d4caf7b29a3b963c843159311825613ae144cddb",
"sha256:42f460d349baebd5faec02a0c920988fb0300b24baf898d9c139886565b66b6c", "sha256:3fe15a75fe00f04d1ec16713d55cf1e206077c450267a10b33318756fb8b3f99",
"sha256:43bf3d79940cbdf19adda838d8b26b28b47bec793cda46590b5b25703742f440", "sha256:53a962f9dc28cdf403978a142cb1e054479759ad64d312a999f9f042c25b5c9a",
"sha256:47d6c7f0588ef33464e00023067c4e7cce68e0d6a686a73c7ee15abfdad503d4", "sha256:8bd1da6a93d32336a5e5432886dd8543004f0591c39b83dbfa60705cccdf414d",
"sha256:5b879f59f25ed9b91bc8693a9a994014b431f224f492519ad0255ce6b54b83e5", "sha256:b5423061918f602e9342b54d746ac31c598d328ecaf4ef0618763e960c926fd4",
"sha256:8ba0093c412900f636b0f826c597a0c3ea0e395344bc99894ddefe88b76c9c7e", "sha256:d80ebc65b1f7d0403117f59309c16eac24be6a0bc730b593a79f703462858d94",
"sha256:a4789254a1a0bd7a637036cce0b7ed72d8cc864e93f2e9cfd10ac00ae27bb7b0", "sha256:fd8419979639b7de7fb964a13bce3ac47e6fe33043b83de0398c3067986e5659",
"sha256:b73cea07117dca888b0c3671770b501bef19aac9c45c8ffdb5bea2cca2377b0a", "sha256:ff2f15b2b0b4b58ba8a1de651780a0d3fd54f96ad6b77dceb77695220e5d7b7a"
"sha256:d3eb59fa3e5b5438438ec97acd9dc86f077428e020b015b43987e35bea68ef4c",
"sha256:d51d232b4e2f106deaf286001f563947fee255bc5bd209a696f027e15cf0a1e7",
"sha256:d59b03131a8e35061b47a8f186324a95eaf30d5f6ee9cc0637e7b87d29c7c9b5",
"sha256:dd705df1b47470388fc4630e4df3cbbe7677e2ab80092a1c660cae630a307b2d",
"sha256:e87fffa437a4b00afb17af785da9b01618425d6cd984c677639deb937037d8f2",
"sha256:ed40e0474ab5ab228a8d133759d451b31d3ccdebaff698646e54aff82c3de4f8"
], ],
"version": "==2018.8.29" "version": "==2018.11.2"
}, },
"requests": { "requests": {
"hashes": [ "hashes": [
"sha256:63b52e3c866428a224f97cab011de738c36aec0185aa91cfacd418b5d58911d1", "sha256:99dcfdaaeb17caf6e526f32b6a7b780461512ab3f1d992187801694cba42770c",
"sha256:ec22d826a36ed72a7358ff3fe56cbd4ba69dd7a6718ffd450ff0e9df7a47ce6a" "sha256:a84b8c9ab6239b578f22d1c21d51b696dcfe004032bb80ea832398d6909d7279"
], ],
"version": "==2.19.1" "version": "==2.20.0"
},
"six": {
"hashes": [
"sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
"sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
],
"version": "==1.11.0"
},
"termcolor": {
"hashes": [
"sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"
],
"version": "==1.1.0"
},
"text-unidecode": {
"hashes": [
"sha256:5a1375bb2ba7968740508ae38d92e1f889a0832913cb1c447d5e2046061a396d",
"sha256:801e38bd550b943563660a91de8d4b6fa5df60a542be9093f7abf819f86050cc"
],
"version": "==1.2"
},
"tzlocal": {
"hashes": [
"sha256:4ebeb848845ac898da6519b9b31879cf13b6626f7184c496037b818e238f2c4e"
],
"version": "==1.5.1"
},
"urllib3": {
"hashes": [
"sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf",
"sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5"
],
"markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4' and python_version != '3.3.*'",
"version": "==1.23"
}
},
"develop": {
"alabaster": {
"hashes": [
"sha256:674bb3bab080f598371f4443c5008cbfeb1a5e622dd312395d2d82af2c54c456",
"sha256:b63b1f4dc77c074d386752ec4a8a7517600f6c0db8cd42980cae17ab7b3275d7"
],
"version": "==0.7.11"
},
"babel": {
"hashes": [
"sha256:6778d85147d5d85345c14a26aada5e478ab04e39b078b0745ee6870c2b5cf669",
"sha256:8cba50f48c529ca3fa18cf81fa9403be176d374ac4d60738b839122dfaaa3d23"
],
"version": "==2.6.0"
},
"backcall": {
"hashes": [
"sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4",
"sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"
],
"version": "==0.1.0"
},
"certifi": {
"hashes": [
"sha256:376690d6f16d32f9d1fe8932551d80b23e9d393a8578c5633a2ed39a64861638",
"sha256:456048c7e371c089d0a77a5212fb37a2c2dce1e24146e3b7e0261736aaeaa22a"
],
"version": "==2018.8.24"
},
"chardet": {
"hashes": [
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
],
"version": "==3.0.4"
},
"decorator": {
"hashes": [
"sha256:2c51dff8ef3c447388fe5e4453d24a2bf128d3a4c32af3fabef1f01c6851ab82",
"sha256:c39efa13fbdeb4506c476c9b3babf6a718da943dab7811c206005a4a956c080c"
],
"version": "==4.3.0"
},
"docutils": {
"hashes": [
"sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6",
"sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274",
"sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6"
],
"version": "==0.14"
},
"idna": {
"hashes": [
"sha256:156a6814fb5ac1fc6850fb002e0852d56c0c8d2531923a51032d1b70760e186e",
"sha256:684a38a6f903c1d71d6d5fac066b58d7768af4de2b832e426ec79c30daa94a16"
],
"version": "==2.7"
},
"imagesize": {
"hashes": [
"sha256:3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8",
"sha256:f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5"
],
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
"version": "==1.1.0"
},
"ipython": {
"hashes": [
"sha256:007dcd929c14631f83daff35df0147ea51d1af420da303fd078343878bd5fb62",
"sha256:b0f2ef9eada4a68ef63ee10b6dde4f35c840035c50fd24265f8052c98947d5a4"
],
"index": "pypi",
"version": "==6.5.0"
},
"ipython-genutils": {
"hashes": [
"sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
"sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
],
"version": "==0.2.0"
},
"jedi": {
"hashes": [
"sha256:b409ed0f6913a701ed474a614a3bb46e6953639033e31f769ca7581da5bd1ec1",
"sha256:c254b135fb39ad76e78d4d8f92765ebc9bf92cbc76f49e97ade1d5f5121e1f6f"
],
"version": "==0.12.1"
},
"jinja2": {
"hashes": [
"sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd",
"sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4"
],
"version": "==2.10"
},
"markupsafe": {
"hashes": [
"sha256:a6be69091dac236ea9c6bc7d012beab42010fa914c459791d627dad4910eb665"
],
"version": "==1.0"
},
"packaging": {
"hashes": [
"sha256:e9215d2d2535d3ae866c3d6efc77d5b24a0192cce0ff20e42896cc0664f889c0",
"sha256:f019b770dd64e585a99714f1fd5e01c7a8f11b45635aa953fd41c689a657375b"
],
"version": "==17.1"
},
"parso": {
"hashes": [
"sha256:35704a43a3c113cce4de228ddb39aab374b8004f4f2407d070b6a2ca784ce8a2",
"sha256:895c63e93b94ac1e1690f5fdd40b65f07c8171e3e53cbd7793b5b96c0e0a7f24"
],
"version": "==0.3.1"
},
"pexpect": {
"hashes": [
"sha256:2a8e88259839571d1251d278476f3eec5db26deb73a70be5ed5dc5435e418aba",
"sha256:3fbd41d4caf27fa4a377bfd16fef87271099463e6fa73e92a52f92dfee5d425b"
],
"markers": "sys_platform != 'win32'",
"version": "==4.6.0"
},
"pickleshare": {
"hashes": [
"sha256:84a9257227dfdd6fe1b4be1319096c20eb85ff1e82c7932f36efccfe1b09737b",
"sha256:c9a2541f25aeabc070f12f452e1f2a8eae2abd51e1cd19e8430402bdf4c1d8b5"
],
"version": "==0.7.4"
},
"pluggy": {
"hashes": [
"sha256:6e3836e39f4d36ae72840833db137f7b7d35105079aee6ec4a62d9f80d594dd1",
"sha256:95eb8364a4708392bae89035f45341871286a333f749c3141c20573d2b3876e1"
],
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
"version": "==0.7.1"
},
"prompt-toolkit": {
"hashes": [
"sha256:1df952620eccb399c53ebb359cc7d9a8d3a9538cb34c5a1344bdbeb29fbcc381",
"sha256:3f473ae040ddaa52b52f97f6b4a493cfa9f5920c255a12dc56a7d34397a398a4",
"sha256:858588f1983ca497f1cf4ffde01d978a3ea02b01c8a26a8bbc5cd2e66d816917"
],
"version": "==1.0.15"
},
"ptyprocess": {
"hashes": [
"sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0",
"sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"
],
"version": "==0.6.0"
},
"py": {
"hashes": [
"sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1",
"sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6"
],
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
"version": "==1.6.0"
},
"pygments": {
"hashes": [
"sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
"sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc"
],
"version": "==2.2.0"
},
"pyparsing": {
"hashes": [
"sha256:0832bcf47acd283788593e7a0f542407bd9550a55a8a8435214a1960e04bcb04",
"sha256:fee43f17a9c4087e7ed1605bd6df994c6173c1e977d7ade7b651292fab2bd010"
],
"version": "==2.2.0"
},
"pytz": {
"hashes": [
"sha256:a061aa0a9e06881eb8b3b2b43f05b9439d6583c206d0a6c340ff72a7b6669053",
"sha256:ffb9ef1de172603304d9d2819af6f5ece76f2e85ec10692a524dd876e72bf277"
],
"index": "pypi",
"version": "==2018.5"
},
"requests": {
"hashes": [
"sha256:63b52e3c866428a224f97cab011de738c36aec0185aa91cfacd418b5d58911d1",
"sha256:ec22d826a36ed72a7358ff3fe56cbd4ba69dd7a6718ffd450ff0e9df7a47ce6a"
],
"version": "==2.19.1"
},
"simplegeneric": {
"hashes": [
"sha256:dc972e06094b9af5b855b3df4a646395e43d1c9d0d39ed345b7393560d0b9173"
],
"version": "==0.8.1"
}, },
"six": { "six": {
"hashes": [ "hashes": [
@ -675,27 +578,46 @@
}, },
"sphinx": { "sphinx": {
"hashes": [ "hashes": [
"sha256:217a7705adcb573da5bbe1e0f5cab4fa0bd89fd9342c9159121746f593c2d5a4", "sha256:652eb8c566f18823a022bb4b6dbc868d366df332a11a0226b5bc3a798a479f17",
"sha256:a602513f385f1d5785ff1ca420d9c7eb1a1b63381733b2f0ea8188a391314a86" "sha256:d222626d8356de702431e813a05c68a35967e3d66c6cd1c2c89539bb179a7464"
], ],
"index": "pypi", "index": "pypi",
"version": "==1.7.9" "version": "==1.8.1"
}, },
"sphinxcontrib-websupport": { "sphinxcontrib-websupport": {
"hashes": [ "hashes": [
"sha256:68ca7ff70785cbe1e7bccc71a48b5b6d965d79ca50629606c7861a21b206d9dd", "sha256:68ca7ff70785cbe1e7bccc71a48b5b6d965d79ca50629606c7861a21b206d9dd",
"sha256:9de47f375baf1ea07cdb3436ff39d7a9c76042c10a769c52353ec46e4e8fc3b9" "sha256:9de47f375baf1ea07cdb3436ff39d7a9c76042c10a769c52353ec46e4e8fc3b9"
], ],
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
"version": "==1.1.0" "version": "==1.1.0"
}, },
"termcolor": {
"hashes": [
"sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"
],
"version": "==1.1.0"
},
"text-unidecode": {
"hashes": [
"sha256:5a1375bb2ba7968740508ae38d92e1f889a0832913cb1c447d5e2046061a396d",
"sha256:801e38bd550b943563660a91de8d4b6fa5df60a542be9093f7abf819f86050cc"
],
"version": "==1.2"
},
"toml": {
"hashes": [
"sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c",
"sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e"
],
"version": "==0.10.0"
},
"tox": { "tox": {
"hashes": [ "hashes": [
"sha256:37cf240781b662fb790710c6998527e65ca6851eace84d1595ee71f7af4e85f7", "sha256:513e32fdf2f9e2d583c2f248f47ba9886428c949f068ac54a0469cac55df5862",
"sha256:eb61aa5bcce65325538686f09848f04ef679b5cd9b83cc491272099b28739600" "sha256:75fa30e8329b41b664585f5fb837e23ce1d7e6fa1f7811f2be571c990f9d911b"
], ],
"index": "pypi", "index": "pypi",
"version": "==3.2.1" "version": "==3.5.3"
}, },
"traitlets": { "traitlets": {
"hashes": [ "hashes": [
@ -704,21 +626,25 @@
], ],
"version": "==4.3.2" "version": "==4.3.2"
}, },
"tzlocal": {
"hashes": [
"sha256:4ebeb848845ac898da6519b9b31879cf13b6626f7184c496037b818e238f2c4e"
],
"version": "==1.5.1"
},
"urllib3": { "urllib3": {
"hashes": [ "hashes": [
"sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf", "sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39",
"sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5" "sha256:de9529817c93f27c8ccbfead6985011db27bd0ddfcdb2d86f3f663385c6a9c22"
], ],
"markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4' and python_version != '3.3.*'", "version": "==1.24.1"
"version": "==1.23"
}, },
"virtualenv": { "virtualenv": {
"hashes": [ "hashes": [
"sha256:2ce32cd126117ce2c539f0134eb89de91a8413a29baac49cbab3eb50e2026669", "sha256:686176c23a538ecc56d27ed9d5217abd34644823d6391cbeb232f42bf722baad",
"sha256:ca07b4c0b54e14a91af9f34d0919790b016923d157afda5efdde55c96718f752" "sha256:f899fafcd92e1150f40c8215328be38ff24b519cd95357fa6e78e006c7638208"
], ],
"markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.2.*' and python_version != '3.1.*'", "version": "==16.1.0"
"version": "==16.0.0"
}, },
"wcwidth": { "wcwidth": {
"hashes": [ "hashes": [
@ -727,5 +653,6 @@
], ],
"version": "==0.1.7" "version": "==0.1.7"
} }
} },
"develop": {}
} }

20
Vagrantfile vendored
View File

@ -1,20 +0,0 @@
# -*- mode: ruby -*-
# vi: set ft=ruby :
VAGRANT_API_VERSION = "2"
Vagrant.configure(VAGRANT_API_VERSION) do |config|
config.vm.box = "ubuntu/trusty64"
# Provision using shell
config.vm.host_name = "dev.paperless"
config.vm.synced_folder ".", "/opt/paperless"
config.vm.provision "shell", path: "scripts/vagrant-provision"
# Networking details
config.vm.network "private_network", ip: "172.28.128.4"
config.vm.provider "virtualbox" do |vb|
# Customize the amount of memory on the VM:
vb.memory = "1024"
end
end

View File

@ -1,6 +1,71 @@
Changelog Changelog
######### #########
2.6.0
=====
* Allow an infinite number of logs to be deleted. Thanks to `Ulli`_ for noting
the problem in `#433`_.
* Fix the ``RecentCorrespondentsFilter`` correspondents filter that was added
in 2.4 to play nice with the defaults. Thanks to `tsia`_ and `Sblop`_ who
pointed this out. `#423`_.
* Updated dependencies to include (among other things) a security patch to
requests.
* Fix text in sample data for tests so that the language guesser stops thinking
that everything is in Catalan because we had *Lorem ipsum* in there.
* Tweaked the gunicorn sample command to use filesystem paths instead of Python
paths. `#441`_
* Added pretty colour boxes next to the hex values in the Tags section, thanks
to a pull request from `Joshua Taillon`_ `#442`_.
* Added a ``.editorconfig`` file to better specify coding style.
* `Joshua Taillon`_ also added some logic to tie Paperless' date guessing logic
into how it parses file names on import. `#440`_
2.5.0
=====
* **New dependency**: Paperless now optimises thumbnail generation with
`optipng`_, so you'll need to install that somewhere in your PATH or declare
its location in ``PAPERLESS_OPTIPNG_BINARY``. The Docker image has already
been updated on the Docker Hub, so you just need to pull the latest one from
there if you're a Docker user.
* "Login free" instances of Paperless were breaking whenever you tried to edit
objects in the admin: adding/deleting tags or correspondents, or even fixing
spelling. This was due to the "user hack" we were applying to sessions that
weren't using a login, as that hack user didn't have a valid id. The fix was
to attribute the first user id in the system to this hack user. `#394`_
* A problem in how we handle slug values on Tags and Correspondents required a
few changes to how we handle this field `#393`_:
1. Slugs are no longer editable. They're derived from the name of the tag or
correspondent at save time, so if you wanna change the slug, you have to
change the name, and even then you're restricted to the rules of the
``slugify()`` function. The slug value is still visible in the admin
though.
2. I've added a migration to go over all existing tags & correspondents and
rewrite the ``.slug`` values to ones conforming to the ``slugify()``
rules.
3. The consumption process now uses the same rules as ``.save()`` in
determining a slug and using that to check for an existing
tag/correspondent.
* An annoying bug in the date capture code was causing some bogus dates to be
attached to documents, which in turn busted the UI. Thanks to `Andrew Peng`_
for reporting this. `#414`_.
* A bug in the Dockerfile meant that Tesseract language files weren't being
installed correctly. `euri10`_ was quick to provide a fix: `#406`_, `#413`_.
* Document consumption is now wrapped in a transaction as per an old ticket
`#262`_.
* The ``get_date()`` functionality of the parsers has been consolidated onto
the ``DocumentParser`` class since much of that code was redundant anyway.
2.4.0 2.4.0
===== =====
@ -12,13 +77,13 @@ Changelog
It's now in the import step that we decide the storage type. This allows you It's now in the import step that we decide the storage type. This allows you
to export from an encrypted system and import into an unencrypted one, or to export from an encrypted system and import into an unencrypted one, or
vice-versa. vice-versa.
* The migration history has been slightly modified to accomodate PostgreSQL * The migration history has been slightly modified to accommodate PostgreSQL
users. Additionally, you can now tell paperless to use PostgreSQL simply by users. Additionally, you can now tell paperless to use PostgreSQL simply by
declaring ``PAPERLESS_DBUSER`` in your environment. This will attempt to declaring ``PAPERLESS_DBUSER`` in your environment. This will attempt to
connect to your Postgres database without a password unless you also set connect to your Postgres database without a password unless you also set
``PAPERLESS_DBPASS``. ``PAPERLESS_DBPASS``.
* A bug was found in the REST API filter system that was the result of an * A bug was found in the REST API filter system that was the result of an
update of django-filter some time ago. This has now been patched `#412`_. update of django-filter some time ago. This has now been patched in `#412`_.
Thanks to `thepill`_ for spotting it! Thanks to `thepill`_ for spotting it!
@ -525,6 +590,11 @@ bulk of the work on this big change.
.. _ahyear: https://github.com/ahyear .. _ahyear: https://github.com/ahyear
.. _jonaswinkler: https://github.com/jonaswinkler .. _jonaswinkler: https://github.com/jonaswinkler
.. _thepill: https://github.com/thepill .. _thepill: https://github.com/thepill
.. _Andrew Peng: https://github.com/pengc99
.. _euri10: https://github.com/euri10
.. _Ulli: https://github.com/Ulli2k
.. _tsia: https://github.com/tsia
.. _Sblop: https://github.com/Sblop
.. _#20: https://github.com/danielquinn/paperless/issues/20 .. _#20: https://github.com/danielquinn/paperless/issues/20
.. _#44: https://github.com/danielquinn/paperless/issues/44 .. _#44: https://github.com/danielquinn/paperless/issues/44
@ -590,6 +660,7 @@ bulk of the work on this big change.
.. _#322: https://github.com/danielquinn/paperless/pull/322 .. _#322: https://github.com/danielquinn/paperless/pull/322
.. _#328: https://github.com/danielquinn/paperless/pull/328 .. _#328: https://github.com/danielquinn/paperless/pull/328
.. _#253: https://github.com/danielquinn/paperless/issues/253 .. _#253: https://github.com/danielquinn/paperless/issues/253
.. _#262: https://github.com/danielquinn/paperless/issues/262
.. _#323: https://github.com/danielquinn/paperless/issues/323 .. _#323: https://github.com/danielquinn/paperless/issues/323
.. _#344: https://github.com/danielquinn/paperless/pull/344 .. _#344: https://github.com/danielquinn/paperless/pull/344
.. _#351: https://github.com/danielquinn/paperless/pull/351 .. _#351: https://github.com/danielquinn/paperless/pull/351
@ -606,13 +677,24 @@ bulk of the work on this big change.
.. _#391: https://github.com/danielquinn/paperless/pull/391 .. _#391: https://github.com/danielquinn/paperless/pull/391
.. _#390: https://github.com/danielquinn/paperless/pull/390 .. _#390: https://github.com/danielquinn/paperless/pull/390
.. _#392: https://github.com/danielquinn/paperless/issues/392 .. _#392: https://github.com/danielquinn/paperless/issues/392
.. _#393: https://github.com/danielquinn/paperless/issues/393
.. _#395: https://github.com/danielquinn/paperless/pull/395 .. _#395: https://github.com/danielquinn/paperless/pull/395
.. _#394: https://github.com/danielquinn/paperless/issues/394
.. _#396: https://github.com/danielquinn/paperless/pull/396 .. _#396: https://github.com/danielquinn/paperless/pull/396
.. _#399: https://github.com/danielquinn/paperless/pull/399 .. _#399: https://github.com/danielquinn/paperless/pull/399
.. _#400: https://github.com/danielquinn/paperless/pull/400 .. _#400: https://github.com/danielquinn/paperless/pull/400
.. _#401: https://github.com/danielquinn/paperless/pull/401 .. _#401: https://github.com/danielquinn/paperless/pull/401
.. _#405: https://github.com/danielquinn/paperless/pull/405 .. _#405: https://github.com/danielquinn/paperless/pull/405
.. _#406: https://github.com/danielquinn/paperless/issues/406
.. _#412: https://github.com/danielquinn/paperless/issues/412 .. _#412: https://github.com/danielquinn/paperless/issues/412
.. _#413: https://github.com/danielquinn/paperless/pull/413
.. _#414: https://github.com/danielquinn/paperless/issues/414
.. _#423: https://github.com/danielquinn/paperless/issues/423
.. _#433: https://github.com/danielquinn/paperless/issues/433
.. _#440: https://github.com/danielquinn/paperless/pull/440
.. _#441: https://github.com/danielquinn/paperless/pull/441
.. _#442: https://github.com/danielquinn/paperless/pull/442
.. _pipenv: https://docs.pipenv.org/ .. _pipenv: https://docs.pipenv.org/
.. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/ .. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
.. _optipng: http://optipng.sourceforge.net/

View File

@ -43,6 +43,16 @@ These however wouldn't work:
* ``Some Company Name, Invoice 2016-01-01, money, invoices.pdf`` * ``Some Company Name, Invoice 2016-01-01, money, invoices.pdf``
* ``Another Company- Letter of Reference.jpg`` * ``Another Company- Letter of Reference.jpg``
Do I have to be so strict about naming?
---------------------------------------
Rather than using the strict document naming rules, one can also set the option
``PAPERLESS_FILENAME_DATE_ORDER`` in ``paperless.conf`` to any date order
that is accepted by dateparser_. Doing so will cause ``paperless`` to default
to any date format that is found in the title, instead of a date pulled from
the document's text, without requiring the strict formatting of the document
filename as described above.
.. _dateparser: https://github.com/scrapinghub/dateparser/blob/v0.7.0/docs/usage.rst#settings
.. _guesswork-content: .. _guesswork-content:
@ -82,11 +92,11 @@ text and matching algorithm. From the help info there:
uses a regex to match the PDF. If you don't know what a regex is, you uses a regex to match the PDF. If you don't know what a regex is, you
probably don't want this option. probably don't want this option.
When using the "any" or "all" matching algorithms, you can search for terms that When using the "any" or "all" matching algorithms, you can search for terms
consist of multiple words by enclosing them in double quotes. For example, defining that consist of multiple words by enclosing them in double quotes. For example,
a match text of ``"Bank of America" BofA`` using the "any" algorithm, will match defining a match text of ``"Bank of America" BofA`` using the "any" algorithm,
documents that contain either "Bank of America" or "BofA", but will not match will match documents that contain either "Bank of America" or "BofA", but will
documents containing "Bank of South America". not match documents containing "Bank of South America".
Then just save your tag/correspondent and run another document through the Then just save your tag/correspondent and run another document through the
consumer. Once complete, you should see the newly-created document, consumer. Once complete, you should see the newly-created document,

View File

@ -82,6 +82,7 @@ rolled in as part of the update:
$ cd /path/to/project $ cd /path/to/project
$ git pull $ git pull
$ pip install -r requirements.txt
$ cd src $ cd src
$ ./manage.py migrate $ ./manage.py migrate

View File

@ -33,7 +33,7 @@ In addition to the above, there are a number of Python requirements, all of
which are listed in a file called ``requirements.txt`` in the project root which are listed in a file called ``requirements.txt`` in the project root
directory. directory.
If you're not working on a virtual environment (like Vagrant or Docker), you If you're not working on a virtual environment (like Docker), you
should probably be using a virtualenv, but that's your call. The reasons why should probably be using a virtualenv, but that's your call. The reasons why
you might choose a virtualenv or not aren't really within the scope of this you might choose a virtualenv or not aren't really within the scope of this
document. Needless to say if you don't know what a virtualenv is, you should document. Needless to say if you don't know what a virtualenv is, you should

View File

@ -42,18 +42,14 @@ Installation & Configuration
You can go multiple routes with setting up and running Paperless: You can go multiple routes with setting up and running Paperless:
* The `bare metal route`_ * The `bare metal route`_
* The `vagrant route`_
* The `docker route`_ * The `docker route`_
The `Vagrant route`_ is quick & easy, but means you're running a VM which comes The `docker route`_ is quick & easy.
with memory consumption, cpu overhead etc. The `docker route`_ offers the same
simplicity as Vagrant with lower resource consumption.
The `bare metal route`_ is a bit more complicated to setup but makes it easier The `bare metal route`_ is a bit more complicated to setup but makes it easier
should you want to contribute some code back. should you want to contribute some code back.
.. _Vagrant route: setup-installation-vagrant_
.. _docker route: setup-installation-docker_ .. _docker route: setup-installation-docker_
.. _bare metal route: setup-installation-bare-metal_ .. _bare metal route: setup-installation-bare-metal_
.. _Docker Machine: https://docs.docker.com/machine/ .. _Docker Machine: https://docs.docker.com/machine/
@ -267,54 +263,6 @@ Docker Method
newer ``docker-compose.yml.example`` file newer ``docker-compose.yml.example`` file
.. _setup-installation-vagrant:
Vagrant Method
++++++++++++++
1. Install `Vagrant`_. How you do that is really between you and your OS.
2. Run ``vagrant up``. An instance will start up for you. When it's ready and
provisioned...
3. Run ``vagrant ssh`` and once inside your new vagrant box, edit
``/etc/paperless.conf`` and set the values for:
* ``PAPERLESS_CONSUMPTION_DIR``: This is where your documents will be
dumped to be consumed by Paperless.
* ``PAPERLESS_PASSPHRASE``: This is the passphrase Paperless uses to
encrypt/decrypt the original document. It's only required if you want
your original files to be encrypted, otherwise, just leave it unset.
* ``PAPERLESS_EMAIL_SECRET``: this is the "magic word" used when consuming
documents from mail or via the API. If you don't use either, leaving it
blank is just fine.
4. Exit the vagrant box and re-enter it with ``vagrant ssh`` again. This
updates the environment to make use of the changes you made to the config
file.
5. Initialise the database with ``/opt/paperless/src/manage.py migrate``.
6. Still inside your vagrant box, create a user for your Paperless instance
with ``/opt/paperless/src/manage.py createsuperuser``. Follow the prompts to
create your user.
7. Start the webserver with
``/opt/paperless/src/manage.py runserver 0.0.0.0:8000``. You should now be
able to visit your (empty) `Paperless webserver`_ at ``172.28.128.4:8000``.
You can login with the user/pass you created in #6.
8. In a separate window, run ``vagrant ssh`` again, but this time once inside
your vagrant instance, you should start the consumer script with
``/opt/paperless/src/manage.py document_consumer``.
9. Scan something. Put it in the ``CONSUMPTION_DIR``.
10. Wait a few minutes
11. Visit the document list on your webserver, and it should be there, indexed
and downloadable.
.. caution::
This installation is not secure. Once everything is working head up to
`Making things more permanent`_
.. _Vagrant: https://vagrantup.com/
.. _Paperless server: http://172.28.128.4:8000
.. _setup-permanent: .. _setup-permanent:
Making Things a Little more Permanent Making Things a Little more Permanent
@ -398,7 +346,7 @@ instance listening on localhost port 8000.
location /static { location /static {
autoindex on; autoindex on;
alias <path-to-paperless-static-directory> alias <path-to-paperless-static-directory>;
} }
@ -409,7 +357,7 @@ instance listening on localhost port 8000.
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme; proxy_set_header X-Forwarded-Proto $scheme;
proxy_pass http://127.0.0.1:8000 proxy_pass http://127.0.0.1:8000;
} }
} }
@ -418,7 +366,7 @@ The gunicorn server can be started with the command:
.. code-block:: shell .. code-block:: shell
$ <path-to-paperless-virtual-environment>/bin/gunicorn <path-to-paperless>/src/paperless.wsgi -w 2 $ <path-to-paperless-virtual-environment>/bin/gunicorn --pythonpath=<path-to-paperless>/src paperless.wsgi -w 2
.. _setup-permanent-standard-systemd: .. _setup-permanent-standard-systemd:
@ -475,7 +423,7 @@ after restarting your system:
respawn limit 10 5 respawn limit 10 5
script script
exec <path to paperless virtual environment>/bin/gunicorn <path to parperless>/src/paperless.wsgi -w 2 exec <path to paperless virtual environment>/bin/gunicorn --pythonpath=<path to parperless>/src paperless.wsgi -w 2
end script end script
Note that you'll need to replace ``/srv/paperless/src/manage.py`` with the Note that you'll need to replace ``/srv/paperless/src/manage.py`` with the
@ -513,13 +461,6 @@ second period.
.. _Upstart: http://upstart.ubuntu.com/ .. _Upstart: http://upstart.ubuntu.com/
Vagrant
~~~~~~~
You may use the Ubuntu explanation above. Replace
``(local-filesystems and net-device-up IFACE=eth0)`` with ``vagrant-mounted``.
.. _setup-permanent-docker: .. _setup-permanent-docker:
Docker Docker

View File

@ -14,9 +14,8 @@ FORGIVING_OCR is enabled``, then you might need to install the
`Tesseract language files <http://packages.ubuntu.com/search?keywords=tesseract-ocr>`_ `Tesseract language files <http://packages.ubuntu.com/search?keywords=tesseract-ocr>`_
marching your document's languages. marching your document's languages.
As an example, if you are running Paperless from the Vagrant setup provided As an example, if you are running Paperless from any Ubuntu or Debian
(or from any Ubuntu or Debian box), and your documents are written in Spanish box, and your documents are written in Spanish you may need to run::
you may need to run::
apt-get install -y tesseract-ocr-spa apt-get install -y tesseract-ocr-spa

11
overrides/README.md Normal file
View File

@ -0,0 +1,11 @@
# Customizing Paperless
*See customization
[documentation](https://paperless.readthedocs.io/en/latest/customising.html)
for more detail!*
The example `.css` and `.js` snippets in this folder can be placed into
one of two files in your ``PAPERLESS_MEDIADIR`` folder: `overrides.js` or
`overrides.css`. Please feel free to submit pull requests to the main
repository with other examples of customizations that you think others may
find useful.

View File

@ -144,6 +144,14 @@ PAPERLESS_DEBUG="false"
# "true", the document will instead be opened in the browser, if possible. # "true", the document will instead be opened in the browser, if possible.
#PAPERLESS_INLINE_DOC="false" #PAPERLESS_INLINE_DOC="false"
# By default, paperless will check the document text for document date information.
# Uncomment the line below to enable checking the document filename for date
# information. The date order can be set to any option as specified in
# https://dateparser.readthedocs.io/en/latest/#settings. The filename will be
# checked first, and if nothing is found, the document text will be checked
# as normal.
#PAPERLESS_FILENAME_DATE_ORDER="YMD"
# #
# The following values use sensible defaults for modern systems, but if you're # The following values use sensible defaults for modern systems, but if you're
# running Paperless on a low-resource device (like a Raspberry Pi), modifying # running Paperless on a low-resource device (like a Raspberry Pi), modifying
@ -205,6 +213,12 @@ PAPERLESS_DEBUG="false"
#PAPERLESS_CONSUMER_LOOP_TIME=10 #PAPERLESS_CONSUMER_LOOP_TIME=10
# By default Paperless stops consuming a document if no language can be
# detected. Set to true to consume documents even if the language detection
# fails.
#PAPERLESS_FORGIVING_OCR="false"
############################################################################### ###############################################################################
#### Interface #### #### Interface ####
############################################################################### ###############################################################################
@ -230,3 +244,23 @@ PAPERLESS_DEBUG="false"
# The number of years for which a correspondent will be included in the recent # The number of years for which a correspondent will be included in the recent
# correspondents filter. # correspondents filter.
#PAPERLESS_RECENT_CORRESPONDENT_YEARS=1 #PAPERLESS_RECENT_CORRESPONDENT_YEARS=1
###############################################################################
#### Third-Party Binaries ####
###############################################################################
# There are a few external software packages that Paperless expects to find on
# your system when it starts up. Unless you've done something creative with
# their installation, you probably won't need to edit any of these. However,
# if you've installed these programs somewhere where simply typing the name of
# the program doesn't automatically execute it (ie. the program isn't in your
# $PATH), then you'll need to specify the literal path for that program here.
# Convert (part of the ImageMagick suite)
#PAPERLESS_CONVERT_BINARY=/usr/bin/convert
# Unpaper
#PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper
# Optipng (for optimising thumbnail sizes)
#PAPERLESS_OPTIPNG_BINARY=/usr/bin/optipng

View File

@ -1,54 +1,83 @@
-i https://pypi.python.org/simple -i https://pypi.python.org/simple
apipkg==1.5; python_version != '3.3.*' alabaster==0.7.12
atomicwrites==1.2.1; python_version != '3.3.*' apipkg==1.5
atomicwrites==1.2.1
attrs==18.2.0 attrs==18.2.0
certifi==2018.8.24 babel==2.6.0
backcall==0.1.0
certifi==2018.10.15
chardet==3.0.4 chardet==3.0.4
coverage==4.5.1; python_version < '4' coverage==4.5.1
coveralls==1.5.0 coveralls==1.5.1
dateparser==0.7.0 dateparser==0.7.0
decorator==4.3.0
django-cors-headers==2.4.0 django-cors-headers==2.4.0
django-crispy-forms==1.7.2 django-crispy-forms==1.7.2
django-extensions==2.1.2 django-extensions==2.1.3
django-filter==2.0.0 django-filter==2.0.0
django==2.0.8 django==2.0.9
djangorestframework==3.8.2 djangorestframework==3.9.0
docopt==0.6.2 docopt==0.6.2
execnet==1.5.0; python_version != '3.3.*' docutils==0.14
execnet==1.5.0
factory-boy==2.11.1 factory-boy==2.11.1
faker==0.9.0; python_version >= '2.7' faker==0.9.2
filelock==3.0.10
filemagic==1.6 filemagic==1.6
fuzzywuzzy==0.15.0 fuzzywuzzy[speedup]==0.15.0
gunicorn==19.9.0 gunicorn==19.9.0
idna==2.7 idna==2.7
imagesize==1.1.0
inotify-simple==1.1.8 inotify-simple==1.1.8
ipython-genutils==0.2.0
ipython==7.1.1
jedi==0.13.1
jinja2==2.10
langdetect==1.0.7 langdetect==1.0.7
markupsafe==1.0
more-itertools==4.3.0 more-itertools==4.3.0
numpy==1.15.1 numpy==1.15.1
pdftotext==2.1.0 packaging==18.0
pillow==5.2.0 parso==0.3.1
pluggy==0.7.1; python_version != '3.3.*' pdftotext==2.1.1
py==1.6.0; python_version != '3.3.*' pexpect==4.6.0
pickleshare==0.7.5
pillow==5.3.0
pluggy==0.8.0
psycopg2==2.7.6.1
prompt-toolkit==2.0.7
ptyprocess==0.6.0
py==1.7.0
pycodestyle==2.4.0 pycodestyle==2.4.0
pygments==2.2.0
pyocr==0.5.3 pyocr==0.5.3
pyparsing==2.3.0
pytest-cov==2.6.0 pytest-cov==2.6.0
pytest-django==3.4.2 pytest-django==3.4.3
pytest-env==0.6.2 pytest-env==0.6.2
pytest-forked==0.2; python_version != '3.3.*' pytest-forked==0.2
pytest-sugar==0.9.1 pytest-sugar==0.9.1
pytest-xdist==1.23.0 pytest-xdist==1.24.0
pytest==3.8.0 pytest==3.9.3
python-dateutil==2.7.3 python-dateutil==2.7.5
python-dotenv==0.9.1 python-dotenv==0.9.1
python-gnupg==0.4.3 python-gnupg==0.4.3
python-levenshtein==0.12.0 python-levenshtein==0.12.0 ; extra == 'speedup'
pytz==2018.5 pytz==2018.7
regex==2018.8.29 regex==2018.11.2
requests==2.19.1 requests==2.20.0
six==1.11.0 six==1.11.0
scikit-learn==0.19.2 scikit-learn==0.19.2
scipy==1.1.0 scipy==1.1.0
snowballstemmer==1.2.1
sphinx==1.8.1
sphinxcontrib-websupport==1.1.0
termcolor==1.1.0 termcolor==1.1.0
text-unidecode==1.2 text-unidecode==1.2
toml==0.10.0
tox==3.5.3
traitlets==4.3.2
tzlocal==1.5.1 tzlocal==1.5.1
urllib3==1.23; python_version != '3.3.*' urllib3==1.24.1
virtualenv==16.1.0
wcwidth==0.1.7

View File

@ -4,7 +4,7 @@ Description=Paperless webserver
[Service] [Service]
User=paperless User=paperless
Group=paperless Group=paperless
ExecStart=/home/paperless/project/virtualenv/bin/gunicorn /home/paperless/project/src/paperless.wsgi -w 2 ExecStart=/home/paperless/project/virtualenv/bin/gunicorn --pythonpath=/home/paperless/project/src paperless.wsgi -w 2
[Install] [Install]
WantedBy=multi-user.target WantedBy=multi-user.target

View File

@ -1,31 +0,0 @@
#!/bin/bash
# Install packages
apt-get update
apt-get build-dep -y python-imaging
apt-get install -y libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev
apt-get install -y build-essential python3-dev python3-pip sqlite3 libsqlite3-dev git
apt-get install -y tesseract-ocr tesseract-ocr-eng imagemagick unpaper
# Python dependencies
pip3 install -r /opt/paperless/requirements.txt
# Create the environment file
cat /opt/paperless/paperless.conf.example | sed -e 's#CONSUMPTION_DIR=""#CONSUMPTION_DIR="/home/vagrant/consumption"#' > /etc/paperless.conf
chmod 0640 /etc/paperless.conf
chown root:vagrant /etc/paperless.conf
# Create the consumption directory
mkdir /home/vagrant/consumption
chown vagrant:vagrant /home/vagrant/consumption
echo "
Now follow the remaining steps in the Vagrant section of the setup
documentation to complete the process:
http://paperless.readthedocs.org/en/latest/setup.html#setup-installation-vagrant
"

View File

@ -64,12 +64,12 @@ class FinancialYearFilter(admin.SimpleListFilter):
# To keep it simple we use the same string for both # To keep it simple we use the same string for both
# query parameter and the display. # query parameter and the display.
return (query, query) return query, query
else: else:
query = "{0}-{0}".format(date.year) query = "{0}-{0}".format(date.year)
display = "{}".format(date.year) display = "{}".format(date.year)
return (query, display) return query, display
def lookups(self, request, model_admin): def lookups(self, request, model_admin):
if not settings.FY_START or not settings.FY_END: if not settings.FY_START or not settings.FY_END:
@ -91,25 +91,24 @@ class FinancialYearFilter(admin.SimpleListFilter):
class RecentCorrespondentFilter(admin.RelatedFieldListFilter): class RecentCorrespondentFilter(admin.RelatedFieldListFilter):
"""
def __init__(self, *args, **kwargs): If PAPERLESS_RECENT_CORRESPONDENT_YEARS is set, we limit the available
super().__init__(*args, **kwargs) correspondents to documents sent our way over the past ``n`` years.
self.title = "correspondent (recent)" """
def field_choices(self, field, request, model_admin): def field_choices(self, field, request, model_admin):
years = settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS years = settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS
days = 365 * years correspondents = Correspondent.objects.all()
lookups = []
if years and years > 0: if years and years > 0:
correspondents = Correspondent.objects.filter( self.title = "Correspondent (Recent)"
days = 365 * years
correspondents = correspondents.filter(
documents__created__gte=datetime.now() - timedelta(days=days) documents__created__gte=datetime.now() - timedelta(days=days)
).distinct() ).distinct()
for c in correspondents:
lookups.append((c.id, c.name))
return lookups return [(c.id, c.name) for c in correspondents]
class CommonAdmin(admin.ModelAdmin): class CommonAdmin(admin.ModelAdmin):
@ -124,7 +123,9 @@ class CorrespondentAdmin(CommonAdmin):
"document_count", "document_count",
"last_correspondence" "last_correspondence"
) )
list_editable = ("automatic_classification") list_editable = ("automatic_classification",)
readonly_fields = ("slug",)
def get_queryset(self, request): def get_queryset(self, request):
qs = super(CorrespondentAdmin, self).get_queryset(request) qs = super(CorrespondentAdmin, self).get_queryset(request)
@ -149,6 +150,11 @@ class TagAdmin(CommonAdmin):
list_filter = ("colour",) list_filter = ("colour",)
list_editable = ("colour", "automatic_classification") list_editable = ("colour", "automatic_classification")
readonly_fields = ("slug",)
class Media:
js = ("js/colours.js",)
def get_queryset(self, request): def get_queryset(self, request):
qs = super(TagAdmin, self).get_queryset(request) qs = super(TagAdmin, self).get_queryset(request)
qs = qs.annotate(document_count=models.Count("documents")) qs = qs.annotate(document_count=models.Count("documents"))
@ -164,6 +170,8 @@ class DocumentTypeAdmin(CommonAdmin):
list_display = ("name", "automatic_classification", "document_count") list_display = ("name", "automatic_classification", "document_count")
list_editable = ("automatic_classification",) list_editable = ("automatic_classification",)
readonly_fields = ("slug",)
def get_queryset(self, request): def get_queryset(self, request):
qs = super(DocumentTypeAdmin, self).get_queryset(request) qs = super(DocumentTypeAdmin, self).get_queryset(request)
qs = qs.annotate(document_count=models.Count("documents")) qs = qs.annotate(document_count=models.Count("documents"))
@ -182,14 +190,13 @@ class DocumentAdmin(CommonAdmin):
} }
search_fields = ("correspondent__name", "title", "content", "tags__name") search_fields = ("correspondent__name", "title", "content", "tags__name")
readonly_fields = ("added",) readonly_fields = ("added", "file_type", "storage_type",)
list_display = ("title", "created", "added", "thumbnail", "correspondent", list_display = ("title", "created", "added", "thumbnail", "correspondent",
"tags_", "archive_serial_number", "document_type") "tags_", "archive_serial_number", "document_type")
list_filter = ( list_filter = (
"document_type", "document_type",
"tags", "tags",
("correspondent", RecentCorrespondentFilter), ("correspondent", RecentCorrespondentFilter),
"correspondent",
FinancialYearFilter FinancialYearFilter
) )

View File

@ -1,3 +1,4 @@
from django.db import transaction
import datetime import datetime
import hashlib import hashlib
import logging import logging
@ -111,8 +112,11 @@ class Consumer:
if not self.try_consume_file(file): if not self.try_consume_file(file):
self._ignore.append((file, mtime)) self._ignore.append((file, mtime))
@transaction.atomic
def try_consume_file(self, file): def try_consume_file(self, file):
"Return True if file was consumed" """
Return True if file was consumed
"""
if not re.match(FileInfo.REGEXES["title"], file): if not re.match(FileInfo.REGEXES["title"], file):
return False return False
@ -145,7 +149,7 @@ class Consumer:
parsed_document = parser_class(doc) parsed_document = parser_class(doc)
try: try:
thumbnail = parsed_document.get_thumbnail() thumbnail = parsed_document.get_optimised_thumbnail()
date = parsed_document.get_date() date = parsed_document.get_date()
document = self._store( document = self._store(
parsed_document.get_text(), parsed_document.get_text(),

View File

@ -1,4 +1,4 @@
from django_filters.rest_framework import CharFilter, FilterSet, BooleanFilter, ModelChoiceFilter from django_filters.rest_framework import BooleanFilter, FilterSet
from .models import Correspondent, Document, Tag, DocumentType from .models import Correspondent, Document, Tag, DocumentType

View File

@ -0,0 +1,52 @@
# Generated by Django 2.0.8 on 2018-10-07 14:20
from django.db import migrations, models
from django.utils.text import slugify
def re_slug_all_the_things(apps, schema_editor):
"""
Rewrite all slug values to make sure they're actually slugs before we brand
them as uneditable.
"""
Tag = apps.get_model("documents", "Tag")
Correspondent = apps.get_model("documents", "Correspondent")
for klass in (Tag, Correspondent):
for instance in klass.objects.all():
klass.objects.filter(
pk=instance.pk
).update(
slug=slugify(instance.slug)
)
class Migration(migrations.Migration):
dependencies = [
('documents', '0021_document_storage_type'),
]
operations = [
migrations.AlterModelOptions(
name='tag',
options={'ordering': ('name',)},
),
migrations.AlterField(
model_name='correspondent',
name='slug',
field=models.SlugField(blank=True, editable=False),
),
migrations.AlterField(
model_name='document',
name='file_type',
field=models.CharField(choices=[('pdf', 'PDF'), ('png', 'PNG'), ('jpg', 'JPG'), ('gif', 'GIF'), ('tiff', 'TIFF'), ('txt', 'TXT'), ('csv', 'CSV'), ('md', 'MD')], editable=False, max_length=4),
),
migrations.AlterField(
model_name='tag',
name='slug',
field=models.SlugField(blank=True, editable=False),
),
migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop)
]

View File

@ -6,7 +6,7 @@ from django.db import migrations, models
class Migration(migrations.Migration): class Migration(migrations.Migration):
dependencies = [ dependencies = [
('documents', '0021_document_storage_type'), ('documents', '0022_auto_20181007_1420'),
] ]
operations = [ operations = [

View File

@ -7,7 +7,7 @@ import django.db.models.deletion
class Migration(migrations.Migration): class Migration(migrations.Migration):
dependencies = [ dependencies = [
('documents', '0022_workflow_improvements'), ('documents', '1001_workflow_improvements'),
] ]
operations = [ operations = [

View File

@ -18,7 +18,7 @@ def reverse_automatic_classification(apps, schema_editor):
class Migration(migrations.Migration): class Migration(migrations.Migration):
dependencies = [ dependencies = [
('documents', '0023_auto_20180823_1155'), ('documents', '1002_auto_20180823_1155'),
] ]
operations = [ operations = [

View File

@ -0,0 +1,36 @@
# Generated by Django 2.0.8 on 2018-10-07 14:20
from django.db import migrations, models
from django.utils.text import slugify
def re_slug_all_the_things(apps, schema_editor):
"""
Rewrite all slug values to make sure they're actually slugs before we brand
them as uneditable.
"""
DocumentType = apps.get_model("documents", "DocumentType")
for instance in DocumentType.objects.all():
DocumentType.objects.filter(
pk=instance.pk
).update(
slug=slugify(instance.slug)
)
class Migration(migrations.Migration):
dependencies = [
('documents', '1003_auto_20180904_1425'),
]
operations = [
migrations.AlterField(
model_name='documenttype',
name='slug',
field=models.SlugField(blank=True, editable=False),
),
migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop)
]

View File

@ -11,6 +11,7 @@ from django.conf import settings
from django.db import models from django.db import models
from django.template.defaultfilters import slugify from django.template.defaultfilters import slugify
from django.utils import timezone from django.utils import timezone
from django.utils.text import slugify
from fuzzywuzzy import fuzz from fuzzywuzzy import fuzz
from .managers import LogManager from .managers import LogManager
@ -24,7 +25,7 @@ except ImportError:
class MatchingModel(models.Model): class MatchingModel(models.Model):
name = models.CharField(max_length=128, unique=True) name = models.CharField(max_length=128, unique=True)
slug = models.SlugField(blank=True) slug = models.SlugField(blank=True, editable=False)
automatic_classification = models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.') automatic_classification = models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.')
@ -37,8 +38,7 @@ class MatchingModel(models.Model):
def save(self, *args, **kwargs): def save(self, *args, **kwargs):
if not self.slug: self.slug = slugify(self.name)
self.slug = slugify(self.name)
models.Model.save(self, *args, **kwargs) models.Model.save(self, *args, **kwargs)
@ -369,7 +369,7 @@ class FileInfo:
r = [] r = []
for t in tags.split(","): for t in tags.split(","):
r.append(Tag.objects.get_or_create( r.append(Tag.objects.get_or_create(
slug=t.lower(), slug=slugify(t),
defaults={"name": t} defaults={"name": t}
)[0]) )[0])
return tuple(r) return tuple(r)

View File

@ -1,23 +1,31 @@
import logging import logging
import shutil import os
import tempfile
import re import re
import shutil
import subprocess
import tempfile
import dateparser
from django.conf import settings from django.conf import settings
from django.utils import timezone
# This regular expression will try to find dates in the document at # This regular expression will try to find dates in the document at
# hand and will match the following formats: # hand and will match the following formats:
# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits # - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits # - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits # - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - ZZZZ.XX.YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - ZZZZ/XX/YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - ZZZZ-XX-YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
# - MONTH ZZZZ, with ZZZZ being 4 digits # - MONTH ZZZZ, with ZZZZ being 4 digits
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
DATE_REGEX = re.compile( DATE_REGEX = re.compile(
r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' + r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' + # NOQA: E501
r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' + r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' + # NOQA: E501
r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' + r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' + # NOQA: E501
r'\b([^\W\d_]{3,9} [0-9]{4})\b' r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' +
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
) )
@ -32,6 +40,9 @@ class DocumentParser:
""" """
SCRATCH = settings.SCRATCH_DIR SCRATCH = settings.SCRATCH_DIR
DATE_ORDER = settings.DATE_ORDER
FILENAME_DATE_ORDER = settings.FILENAME_DATE_ORDER
OPTIPNG = settings.OPTIPNG_BINARY
def __init__(self, path): def __init__(self, path):
self.document_path = path self.document_path = path
@ -45,6 +56,19 @@ class DocumentParser:
""" """
raise NotImplementedError() raise NotImplementedError()
def optimise_thumbnail(self, in_path):
out_path = os.path.join(self.tempdir, "optipng.png")
args = (self.OPTIPNG, "-o5", in_path, "-out", out_path)
if not subprocess.Popen(args).wait() == 0:
raise ParseError("Optipng failed at {}".format(args))
return out_path
def get_optimised_thumbnail(self):
return self.optimise_thumbnail(self.get_thumbnail())
def get_text(self): def get_text(self):
""" """
Returns the text from the document and only the text. Returns the text from the document and only the text.
@ -55,7 +79,82 @@ class DocumentParser:
""" """
Returns the date of the document. Returns the date of the document.
""" """
raise NotImplementedError()
def __parser(ds, date_order):
"""
Call dateparser.parse with a particular date ordering
"""
return dateparser.parse(
ds,
settings={
"DATE_ORDER": date_order,
"PREFER_DAY_OF_MONTH": "first",
"RETURN_AS_TIMEZONE_AWARE":
True
}
)
date = None
date_string = None
next_year = timezone.now().year + 5 # Arbitrary 5 year future limit
title = os.path.basename(self.document_path)
# if filename date parsing is enabled, search there first:
if self.FILENAME_DATE_ORDER:
self.log("info", "Checking document title for date")
for m in re.finditer(DATE_REGEX, title):
date_string = m.group(0)
try:
date = __parser(date_string, self.FILENAME_DATE_ORDER)
except TypeError:
# Skip all matches that do not parse to a proper date
continue
if date is not None and next_year > date.year > 1900:
self.log(
"info",
"Detected document date {} based on string {} "
"from document title"
"".format(date.isoformat(), date_string)
)
return date
try:
# getting text after checking filename will save time if only
# looking at the filename instead of the whole text
text = self.get_text()
except ParseError:
return None
# Iterate through all regex matches in text and try to parse the date
for m in re.finditer(DATE_REGEX, text):
date_string = m.group(0)
try:
date = __parser(date_string, self.DATE_ORDER)
except TypeError:
# Skip all matches that do not parse to a proper date
continue
if date is not None and next_year > date.year > 1900:
break
else:
date = None
if date is not None:
self.log(
"info",
"Detected document date {} based on string {}".format(
date.isoformat(),
date_string
)
)
else:
self.log("info", "Unable to detect date for document")
return date
def log(self, level, message): def log(self, level, message):
getattr(self.logger, level)(message, extra={ getattr(self.logger, level)(message, extra={

View File

@ -0,0 +1,66 @@
// The following jQuery snippet will add a small square next to the selection
// drop-down on the `Add tag` page that will update to show the selected tag
// color as the drop-down value is changed.
django.jQuery(document).ready(function(){
if (django.jQuery("#id_colour").length) {
let colour;
let colour_num;
colour_num = django.jQuery("#id_colour").val() - 1;
colour = django.jQuery('#id_colour')[0][colour_num].text;
django.jQuery('#id_colour').after('<div class="colour_square"></div>');
django.jQuery('.colour_square').css({
'float': 'left',
'width': '20px',
'height': '20px',
'margin': '5px',
'border': '1px solid rgba(0, 0, 0, .2)',
'background': colour
});
django.jQuery('#id_colour').change(function () {
colour_num = django.jQuery("#id_colour").val() - 1;
colour = django.jQuery('#id_colour')[0][colour_num].text;
django.jQuery('.colour_square').css({'background': colour});
});
} else if (django.jQuery("select[id*='colour']").length) {
django.jQuery('select[id*="-colour"]').each(function (index, element) {
let id;
let loop_colour_num;
let loop_colour;
id = "colour_square_" + index;
django.jQuery(element).after('<div class="colour_square" id="' + id + '"></div>');
loop_colour_num = django.jQuery(element).val() - 1;
loop_colour = django.jQuery(element)[0][loop_colour_num].text;
django.jQuery("<style type='text/css'>\
.colour_square{ \
float: left; \
width: 20px; \
height: 20px; \
margin: 5px; \
border: 1px solid rgba(0,0,0,.2); \
} </style>").appendTo("head");
django.jQuery('#' + id).css({'background': loop_colour});
console.log(id, loop_colour_num, loop_colour);
django.jQuery(element).change(function () {
loop_colour_num = django.jQuery(element).val() - 1;
loop_colour = django.jQuery(element)[0][loop_colour_num].text;
django.jQuery('#' + id).css({'background': loop_colour});
console.log('#' + id, loop_colour)
});
})
}
});

View File

@ -76,7 +76,12 @@ def binaries_check(app_configs, **kwargs):
error = "Paperless can't find {}. Without it, consumption is impossible." error = "Paperless can't find {}. Without it, consumption is impossible."
hint = "Either it's not in your ${PATH} or it's not installed." hint = "Either it's not in your ${PATH} or it's not installed."
binaries = (settings.CONVERT_BINARY, settings.UNPAPER_BINARY, "tesseract") binaries = (
settings.CONVERT_BINARY,
settings.OPTIPNG_BINARY,
settings.UNPAPER_BINARY,
"tesseract"
)
check_messages = [] check_messages = []
for binary in binaries: for binary in binaries:

View File

@ -1,15 +1,20 @@
from django.contrib.auth.models import User as DjangoUser
class User: class User:
""" """
This is a dummy django User used with our middleware to disable This is a dummy django User used with our middleware to disable
login authentication if that is configured in paperless.conf login authentication if that is configured in paperless.conf
""" """
is_superuser = True is_superuser = True
is_active = True is_active = True
is_staff = True is_staff = True
is_authenticated = True is_authenticated = True
# Must be -1 to avoid colliding with real user ID's (which start at 1) @property
id = -1 def id(self):
return DjangoUser.objects.order_by("pk").first().pk
@property @property
def pk(self): def pk(self):
@ -17,9 +22,9 @@ class User:
""" """
NOTE: These are here as a hack instead of being in the User definition NOTE: These are here as a hack instead of being in the User definition
above due to the way pycodestyle handles lamdbdas. NOTE: above due to the way pycodestyle handles lamdbdas.
See https://github.com/PyCQA/pycodestyle/issues/379 for more. NOTE: See https://github.com/PyCQA/pycodestyle/issues/379 for more.
""" """
User.has_module_perms = lambda *_: True User.has_module_perms = lambda *_: True

View File

@ -152,6 +152,10 @@ if os.getenv("PAPERLESS_DBENGINE"):
} }
if os.getenv("PAPERLESS_DBPASS"): if os.getenv("PAPERLESS_DBPASS"):
DATABASES["default"]["PASSWORD"] = os.getenv("PAPERLESS_DBPASS") DATABASES["default"]["PASSWORD"] = os.getenv("PAPERLESS_DBPASS")
if os.getenv("PAPERLESS_DBHOST"):
DATABASES["default"]["HOST"] = os.getenv("PAPERLESS_DBHOST")
if os.getenv("PAPERLESS_DBPORT"):
DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
# Password validation # Password validation
@ -199,6 +203,16 @@ STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", "/static/")
MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/") MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/")
# Other
# Disable Django's artificial limit on the number of form fields to submit at
# once. This is a protection against overloading the server, but since this is
# a self-hosted sort of gig, the benefits of being able to mass-delete a tonne
# of log entries outweight the benefits of such a safeguard.
DATA_UPLOAD_MAX_NUMBER_FIELDS = None
# Document classification models location # Document classification models location
MODEL_FILE = os.getenv( MODEL_FILE = os.getenv(
"PAPERLESS_MODEL_FILE", os.path.join(BASE_DIR, "..", "models", "model.pickle")) "PAPERLESS_MODEL_FILE", os.path.join(BASE_DIR, "..", "models", "model.pickle"))
@ -252,6 +266,9 @@ CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR")
CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT") CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
CONVERT_DENSITY = os.getenv("PAPERLESS_CONVERT_DENSITY") CONVERT_DENSITY = os.getenv("PAPERLESS_CONVERT_DENSITY")
# OptiPNG
OPTIPNG_BINARY = os.getenv("PAPERLESS_OPTIPNG_BINARY", "optipng")
# Unpaper # Unpaper
UNPAPER_BINARY = os.getenv("PAPERLESS_UNPAPER_BINARY", "unpaper") UNPAPER_BINARY = os.getenv("PAPERLESS_UNPAPER_BINARY", "unpaper")
@ -298,6 +315,7 @@ FY_END = os.getenv("PAPERLESS_FINANCIAL_YEAR_END")
# Specify the default date order (for autodetected dates) # Specify the default date order (for autodetected dates)
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY") DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
# Specify for how many years a correspondent is considered recent. Recent # Specify for how many years a correspondent is considered recent. Recent
# correspondents will be shown in a separate "Recent correspondents" filter as # correspondents will be shown in a separate "Recent correspondents" filter as

View File

@ -1 +1 @@
__version__ = (2, 3, 0) __version__ = (2, 6, 0)

View File

@ -4,7 +4,6 @@ import re
import subprocess import subprocess
from multiprocessing.pool import Pool from multiprocessing.pool import Pool
import dateparser
import langdetect import langdetect
import pyocr import pyocr
from django.conf import settings from django.conf import settings
@ -14,7 +13,7 @@ from pyocr.libtesseract.tesseract_raw import \
from pyocr.tesseract import TesseractError from pyocr.tesseract import TesseractError
import pdftotext import pdftotext
from documents.parsers import DocumentParser, ParseError, DATE_REGEX from documents.parsers import DocumentParser, ParseError
from .languages import ISO639 from .languages import ISO639
@ -33,7 +32,6 @@ class RasterisedDocumentParser(DocumentParser):
DENSITY = settings.CONVERT_DENSITY if settings.CONVERT_DENSITY else 300 DENSITY = settings.CONVERT_DENSITY if settings.CONVERT_DENSITY else 300
THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
UNPAPER = settings.UNPAPER_BINARY UNPAPER = settings.UNPAPER_BINARY
DATE_ORDER = settings.DATE_ORDER
DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
OCR_ALWAYS = settings.OCR_ALWAYS OCR_ALWAYS = settings.OCR_ALWAYS
@ -46,15 +44,18 @@ class RasterisedDocumentParser(DocumentParser):
The thumbnail of a PDF is just a 500px wide image of the first page. The thumbnail of a PDF is just a 500px wide image of the first page.
""" """
out_path = os.path.join(self.tempdir, "convert.png")
# Run convert to get a decent thumbnail
run_convert( run_convert(
self.CONVERT, self.CONVERT,
"-scale", "500x5000", "-scale", "500x5000",
"-alpha", "remove", "-alpha", "remove",
"{}[0]".format(self.document_path), "{}[0]".format(self.document_path),
os.path.join(self.tempdir, "convert.png") out_path
) )
return os.path.join(self.tempdir, "convert.png") return out_path
def _is_ocred(self): def _is_ocred(self):
@ -152,7 +153,10 @@ class RasterisedDocumentParser(DocumentParser):
) )
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text) raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
return raw_text return raw_text
raise OCRError("Language detection failed") error_msg = ("Language detection failed. Set "
"PAPERLESS_FORGIVING_OCR in config file to continue "
"anyway.")
raise OCRError(error_msg)
if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE: if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text) raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
@ -202,40 +206,6 @@ class RasterisedDocumentParser(DocumentParser):
text += self._ocr(imgs[middle + 1:], self.DEFAULT_OCR_LANGUAGE) text += self._ocr(imgs[middle + 1:], self.DEFAULT_OCR_LANGUAGE)
return text return text
def get_date(self):
date = None
datestring = None
try:
text = self.get_text()
except ParseError as e:
return None
# Iterate through all regex matches and try to parse the date
for m in re.finditer(DATE_REGEX, text):
datestring = m.group(0)
try:
date = dateparser.parse(
datestring,
settings={'DATE_ORDER': self.DATE_ORDER,
'PREFER_DAY_OF_MONTH': 'first',
'RETURN_AS_TIMEZONE_AWARE': True})
except TypeError:
# Skip all matches that do not parse to a proper date
continue
if date is not None:
break
if date is not None:
self.log("info", "Detected document date " + date.isoformat() +
" based on string " + datestring)
else:
self.log("info", "Unable to detect date for document")
return date
def run_convert(*args): def run_convert(*args):
@ -251,7 +221,8 @@ def run_convert(*args):
def run_unpaper(args): def run_unpaper(args):
unpaper, pnm = args unpaper, pnm = args
command_args = unpaper, pnm, pnm.replace(".pnm", ".unpaper.pnm") command_args = (unpaper, "--overwrite", pnm,
pnm.replace(".pnm", ".unpaper.pnm"))
if not subprocess.Popen(command_args).wait() == 0: if not subprocess.Popen(command_args).wait() == 0:
raise ParseError("Unpaper failed at {}".format(command_args)) raise ParseError("Unpaper failed at {}".format(command_args))

Binary file not shown.

After

Width:  |  Height:  |  Size: 136 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 138 KiB

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 138 KiB

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 136 KiB

View File

@ -5,9 +5,10 @@ from unittest import mock
from uuid import uuid4 from uuid import uuid4
from dateutil import tz from dateutil import tz
from django.test import TestCase from django.test import TestCase, override_settings
from ..parsers import RasterisedDocumentParser from ..parsers import RasterisedDocumentParser
from django.conf import settings
class TestDate(TestCase): class TestDate(TestCase):
@ -59,9 +60,13 @@ class TestDate(TestCase):
input_file = os.path.join(self.SAMPLE_FILES, "") input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document._text = "lorem ipsum 13.02.2018 lorem ipsum" document._text = "lorem ipsum 13.02.2018 lorem ipsum"
date = document.get_date()
self.assertEqual( self.assertEqual(
document.get_date(), date,
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2018, 2, 13, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch( @mock.patch(
@ -72,10 +77,16 @@ class TestDate(TestCase):
input_file = os.path.join(self.SAMPLE_FILES, "") input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document._text = ( document._text = (
"lorem ipsum 130218, 2018, 20180213 and 13.02.2018 lorem ipsum") "lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem "
"ipsum"
)
date = document.get_date()
self.assertEqual( self.assertEqual(
document.get_date(), date,
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2018, 2, 13, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch( @mock.patch(
@ -110,9 +121,13 @@ class TestDate(TestCase):
"März 2019\n" "März 2019\n"
"lorem ipsum" "lorem ipsum"
) )
date = document.get_date()
self.assertEqual( self.assertEqual(
document.get_date(), date,
datetime.datetime(2019, 3, 1, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2019, 3, 1, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch( @mock.patch(
@ -122,19 +137,25 @@ class TestDate(TestCase):
def test_date_format_8(self): def test_date_format_8(self):
input_file = os.path.join(self.SAMPLE_FILES, "") input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document._text = ("lorem ipsum\n" document._text = (
"Wohnort\n" "lorem ipsum\n"
"3100\n" "Wohnort\n"
"IBAN\n" "3100\n"
"AT87 4534\n" "IBAN\n"
"1234\n" "AT87 4534\n"
"1234 5678\n" "1234\n"
"BIC\n" "1234 5678\n"
"lorem ipsum\n" "BIC\n"
"März 2020") "lorem ipsum\n"
self.assertEqual(document.get_date(), "März 2020"
datetime.datetime(2020, 3, 1, 0, 0, )
tzinfo=tz.tzutc())) self.assertEqual(
document.get_date(),
datetime.datetime(
2020, 3, 1, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch( @mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH", "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
@ -143,13 +164,19 @@ class TestDate(TestCase):
def test_date_format_9(self): def test_date_format_9(self):
input_file = os.path.join(self.SAMPLE_FILES, "") input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document._text = ("lorem ipsum\n" document._text = (
"27. Nullmonth 2020\n" "lorem ipsum\n"
"März 2020\n" "27. Nullmonth 2020\n"
"lorem ipsum") "März 2020\n"
self.assertEqual(document.get_date(), "lorem ipsum"
datetime.datetime(2020, 3, 1, 0, 0, )
tzinfo=tz.tzutc())) self.assertEqual(
document.get_date(),
datetime.datetime(
2020, 3, 1, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch( @mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH", "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
@ -158,11 +185,16 @@ class TestDate(TestCase):
def test_get_text_1_pdf(self): def test_get_text_1_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.pdf") input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.pdf")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text() document.get_text()
date = document.get_date()
self.assertEqual(document._is_ocred(), True) self.assertEqual(document._is_ocred(), True)
self.assertEqual( self.assertEqual(
document.get_date(), date,
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2018, 4, 1, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch( @mock.patch(
@ -172,11 +204,15 @@ class TestDate(TestCase):
def test_get_text_1_png(self): def test_get_text_1_png(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.png") input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.png")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text() document.get_text()
self.assertEqual(document._is_ocred(), False) self.assertEqual(document._is_ocred(), False)
self.assertEqual( self.assertEqual(
document.get_date(), document.get_date(),
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2018, 4, 1, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch( @mock.patch(
@ -186,11 +222,15 @@ class TestDate(TestCase):
def test_get_text_2_pdf(self): def test_get_text_2_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.pdf") input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.pdf")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text() document.get_text()
self.assertEqual(document._is_ocred(), True) self.assertEqual(document._is_ocred(), True)
self.assertEqual( self.assertEqual(
document.get_date(), document.get_date(),
datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2013, 2, 1, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch( @mock.patch(
@ -200,67 +240,91 @@ class TestDate(TestCase):
def test_get_text_2_png(self): def test_get_text_2_png(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.png") input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.png")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text() document.get_text()
self.assertEqual(document._is_ocred(), False) self.assertEqual(document._is_ocred(), False)
self.assertEqual( self.assertEqual(
document.get_date(), document.get_date(),
datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2013, 2, 1, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch( @mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH", "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH SCRATCH
) )
@override_settings(OCR_LANGUAGE="deu")
def test_get_text_3_pdf(self): def test_get_text_3_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.pdf") input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.pdf")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text() document.get_text()
self.assertEqual(document._is_ocred(), True) self.assertEqual(document._is_ocred(), True)
self.assertEqual( self.assertEqual(
document.get_date(), document.get_date(),
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2018, 10, 5, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch( @mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH", "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH SCRATCH
) )
@override_settings(OCR_LANGUAGE="deu")
def test_get_text_3_png(self): def test_get_text_3_png(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.png") input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.png")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text() document.get_text()
self.assertEqual(document._is_ocred(), False) self.assertEqual(document._is_ocred(), False)
self.assertEqual( self.assertEqual(
document.get_date(), document.get_date(),
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2018, 10, 5, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch( @mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH", "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH SCRATCH
) )
@override_settings(OCR_LANGUAGE="eng")
def test_get_text_4_pdf(self): def test_get_text_4_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.pdf") input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.pdf")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text() document.get_text()
self.assertEqual(document._is_ocred(), True) self.assertEqual(document._is_ocred(), True)
self.assertEqual( self.assertEqual(
document.get_date(), document.get_date(),
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2018, 10, 5, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch( @mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH", "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH SCRATCH
) )
@override_settings(OCR_LANGUAGE="eng")
def test_get_text_4_png(self): def test_get_text_4_png(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.png") input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.png")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text() document.get_text()
self.assertEqual(document._is_ocred(), False) self.assertEqual(document._is_ocred(), False)
self.assertEqual( self.assertEqual(
document.get_date(), document.get_date(),
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2018, 10, 5, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch( @mock.patch(
@ -270,11 +334,15 @@ class TestDate(TestCase):
def test_get_text_5_pdf(self): def test_get_text_5_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.pdf") input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.pdf")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text() document.get_text()
self.assertEqual(document._is_ocred(), True) self.assertEqual(document._is_ocred(), True)
self.assertEqual( self.assertEqual(
document.get_date(), document.get_date(),
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2018, 12, 17, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch( @mock.patch(
@ -284,11 +352,15 @@ class TestDate(TestCase):
def test_get_text_5_png(self): def test_get_text_5_png(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.png") input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.png")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text() document.get_text()
self.assertEqual(document._is_ocred(), False) self.assertEqual(document._is_ocred(), False)
self.assertEqual( self.assertEqual(
document.get_date(), document.get_date(),
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2018, 12, 17, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch( @mock.patch(
@ -303,7 +375,10 @@ class TestDate(TestCase):
self.assertEqual(document._is_ocred(), True) self.assertEqual(document._is_ocred(), True)
self.assertEqual( self.assertEqual(
document.get_date(), document.get_date(),
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2018, 12, 17, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch( @mock.patch(
@ -318,7 +393,10 @@ class TestDate(TestCase):
self.assertEqual(document._is_ocred(), False) self.assertEqual(document._is_ocred(), False)
self.assertEqual( self.assertEqual(
document.get_date(), document.get_date(),
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2018, 12, 17, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch( @mock.patch(
@ -328,6 +406,7 @@ class TestDate(TestCase):
def test_get_text_6_pdf_eu(self): def test_get_text_6_pdf_eu(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf") input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text() document.get_text()
self.assertEqual(document._is_ocred(), True) self.assertEqual(document._is_ocred(), True)
self.assertEqual(document.get_date(), None) self.assertEqual(document.get_date(), None)
@ -339,6 +418,7 @@ class TestDate(TestCase):
def test_get_text_6_png_eu(self): def test_get_text_6_png_eu(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png") input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text() document.get_text()
self.assertEqual(document._is_ocred(), False) self.assertEqual(document._is_ocred(), False)
self.assertEqual(document.get_date(), None) self.assertEqual(document.get_date(), None)
@ -350,11 +430,15 @@ class TestDate(TestCase):
def test_get_text_7_pdf(self): def test_get_text_7_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_7.pdf") input_file = os.path.join(self.SAMPLE_FILES, "tests_date_7.pdf")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text() document.get_text()
self.assertEqual(document._is_ocred(), True) self.assertEqual(document._is_ocred(), True)
self.assertEqual( self.assertEqual(
document.get_date(), document.get_date(),
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2018, 4, 1, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch( @mock.patch(
@ -364,11 +448,15 @@ class TestDate(TestCase):
def test_get_text_8_pdf(self): def test_get_text_8_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_8.pdf") input_file = os.path.join(self.SAMPLE_FILES, "tests_date_8.pdf")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text() document.get_text()
self.assertEqual(document._is_ocred(), True) self.assertEqual(document._is_ocred(), True)
self.assertEqual( self.assertEqual(
document.get_date(), document.get_date(),
datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2017, 12, 31, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch( @mock.patch(
@ -378,9 +466,137 @@ class TestDate(TestCase):
def test_get_text_9_pdf(self): def test_get_text_9_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_9.pdf") input_file = os.path.join(self.SAMPLE_FILES, "tests_date_9.pdf")
document = RasterisedDocumentParser(input_file) document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text() document.get_text()
self.assertEqual(document._is_ocred(), True) self.assertEqual(document._is_ocred(), True)
self.assertEqual( self.assertEqual(
document.get_date(), document.get_date(),
datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc()) datetime.datetime(
2017, 12, 31, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
) )
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_filename_date_1_pdf(self):
input_file = os.path.join(
self.SAMPLE_FILES,
"tests_date_in_filename_2018-03-20_1.pdf"
)
document = RasterisedDocumentParser(input_file)
document.FILENAME_DATE_ORDER = 'YMD'
document.get_text()
date = document.get_date()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
date,
datetime.datetime(
2018, 3, 20, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_filename_date_1_png(self):
input_file = os.path.join(
self.SAMPLE_FILES,
"tests_date_in_filename_2018-03-20_1.png"
)
document = RasterisedDocumentParser(input_file)
document.FILENAME_DATE_ORDER = 'YMD'
date = document.get_date()
self.assertEqual(document._is_ocred(), False)
self.assertEqual(
date,
datetime.datetime(
2018, 3, 20, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_filename_date_2_pdf(self):
input_file = os.path.join(
self.SAMPLE_FILES,
"2013-12-11_tests_date_in_filename_2.pdf"
)
document = RasterisedDocumentParser(input_file)
document.FILENAME_DATE_ORDER = 'YMD'
date = document.get_date()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
date,
datetime.datetime(
2013, 12, 11, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_filename_date_2_png(self):
input_file = os.path.join(
self.SAMPLE_FILES,
"2013-12-11_tests_date_in_filename_2.png"
)
document = RasterisedDocumentParser(input_file)
document.FILENAME_DATE_ORDER = 'YMD'
date = document.get_date()
self.assertEqual(document._is_ocred(), False)
self.assertEqual(
date,
datetime.datetime(
2013, 12, 11, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
return_value="01-07-0590 00:00:00"
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_crazy_date_past(self, *args):
document = RasterisedDocumentParser("/dev/null")
document.get_text()
self.assertIsNone(document.get_date())
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
return_value="01-07-2350 00:00:00"
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_crazy_date_future(self, *args):
document = RasterisedDocumentParser("/dev/null")
document.get_text()
self.assertIsNone(document.get_date())
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
return_value="01-07-0590 00:00:00"
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_crazy_date_past(self, *args):
document = RasterisedDocumentParser("/dev/null")
document.get_text()
self.assertIsNone(document.get_date())

View File

@ -1,11 +1,9 @@
import os import os
import re
import subprocess import subprocess
import dateparser
from django.conf import settings from django.conf import settings
from documents.parsers import DocumentParser, ParseError, DATE_REGEX from documents.parsers import DocumentParser, ParseError
class TextDocumentParser(DocumentParser): class TextDocumentParser(DocumentParser):
@ -16,7 +14,6 @@ class TextDocumentParser(DocumentParser):
CONVERT = settings.CONVERT_BINARY CONVERT = settings.CONVERT_BINARY
THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
UNPAPER = settings.UNPAPER_BINARY UNPAPER = settings.UNPAPER_BINARY
DATE_ORDER = settings.DATE_ORDER
DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
OCR_ALWAYS = settings.OCR_ALWAYS OCR_ALWAYS = settings.OCR_ALWAYS
@ -26,7 +23,7 @@ class TextDocumentParser(DocumentParser):
def get_thumbnail(self): def get_thumbnail(self):
""" """
The thumbnail of a txt is just a 500px wide image of the text The thumbnail of a text file is just a 500px wide image of the text
rendered onto a letter-sized page. rendered onto a letter-sized page.
""" """
# The below is heavily cribbed from https://askubuntu.com/a/590951 # The below is heavily cribbed from https://askubuntu.com/a/590951
@ -35,7 +32,7 @@ class TextDocumentParser(DocumentParser):
text_color = "black" # text color text_color = "black" # text color
psize = [500, 647] # icon size psize = [500, 647] # icon size
n_lines = 50 # number of lines to show n_lines = 50 # number of lines to show
output_file = os.path.join(self.tempdir, "convert-txt.png") out_path = os.path.join(self.tempdir, "convert.png")
temp_bg = os.path.join(self.tempdir, "bg.png") temp_bg = os.path.join(self.tempdir, "bg.png")
temp_txlayer = os.path.join(self.tempdir, "tx.png") temp_txlayer = os.path.join(self.tempdir, "tx.png")
@ -46,9 +43,13 @@ class TextDocumentParser(DocumentParser):
work_size = ",".join([str(n - 1) for n in psize]) work_size = ",".join([str(n - 1) for n in psize])
r = str(round(psize[0] / 10)) r = str(round(psize[0] / 10))
rounded = ",".join([r, r]) rounded = ",".join([r, r])
run_command(self.CONVERT, "-size ", picsize, ' xc:none -draw ', run_command(
'"fill ', bg_color, ' roundrectangle 0,0,', self.CONVERT,
work_size, ",", rounded, '" ', temp_bg) "-size ", picsize,
' xc:none -draw ',
'"fill ', bg_color, ' roundrectangle 0,0,', work_size, ",", rounded, '" ', # NOQA: E501
temp_bg
)
def read_text(): def read_text():
with open(self.document_path, 'r') as src: with open(self.document_path, 'r') as src:
@ -57,22 +58,29 @@ class TextDocumentParser(DocumentParser):
return text.replace('"', "'") return text.replace('"', "'")
def create_txlayer(): def create_txlayer():
run_command(self.CONVERT, run_command(
"-background none", self.CONVERT,
"-fill", "-background none",
text_color, "-fill",
"-pointsize", "12", text_color,
"-border 4 -bordercolor none", "-pointsize", "12",
"-size ", txsize, "-border 4 -bordercolor none",
' caption:"', read_text(), '" ', "-size ", txsize,
temp_txlayer) ' caption:"', read_text(), '" ',
temp_txlayer
)
create_txlayer() create_txlayer()
create_bg() create_bg()
run_command(self.CONVERT, temp_bg, temp_txlayer, run_command(
"-background None -layers merge ", output_file) self.CONVERT,
temp_bg,
temp_txlayer,
"-background None -layers merge ",
out_path
)
return output_file return out_path
def get_text(self): def get_text(self):
@ -84,40 +92,6 @@ class TextDocumentParser(DocumentParser):
return self._text return self._text
def get_date(self):
date = None
datestring = None
try:
text = self.get_text()
except ParseError as e:
return None
# Iterate through all regex matches and try to parse the date
for m in re.finditer(DATE_REGEX, text):
datestring = m.group(0)
try:
date = dateparser.parse(
datestring,
settings={'DATE_ORDER': self.DATE_ORDER,
'PREFER_DAY_OF_MONTH': 'first',
'RETURN_AS_TIMEZONE_AWARE': True})
except TypeError:
# Skip all matches that do not parse to a proper date
continue
if date is not None:
break
if date is not None:
self.log("info", "Detected document date " + date.isoformat() +
" based on string " + datestring)
else:
self.log("info", "Unable to detect date for document")
return date
def run_command(*args): def run_command(*args):
environment = os.environ.copy() environment = os.environ.copy()

View File

@ -0,0 +1,19 @@
# Generated by Django 2.0.8 on 2018-10-07 14:20
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('reminders', '0001_initial'),
]
operations = [
migrations.AlterField(
model_name='reminder',
name='document',
field=models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, to='documents.Document'),
),
]

View File

@ -4,7 +4,6 @@ from django.db import models
class Reminder(models.Model): class Reminder(models.Model):
document = models.ForeignKey( document = models.ForeignKey(
"documents.Document", on_delete=models.PROTECT "documents.Document", on_delete=models.PROTECT)
)
date = models.DateTimeField() date = models.DateTimeField()
note = models.TextField(blank=True) note = models.TextField(blank=True)