Compare commits

..

92 Commits

Author SHA1 Message Date
Jonas Winkler
c6a51a1cdc Version bump 2018-12-12 13:25:28 +01:00
Jonas Winkler
4b20d5d4b9 Fixed migration order 2018-12-12 13:13:21 +01:00
Jonas Winkler
cccd183c31 Fixed migration order 2018-12-12 13:11:30 +01:00
Jonas Winkler
1baa203ef2 Merge branch 'release-1.0.0' into dev 2018-12-11 22:58:14 +01:00
Jonas Winkler
c3ce05e1cd Merge branch 'master' into dev 2018-12-11 22:36:26 +01:00
Jonas Winkler
7659dde16c Merge remote-tracking branch 'origin/patch-1' into dev 2018-12-11 22:26:20 +01:00
Jonas Winkler
872d657361 Version bumb 2018-12-11 14:32:30 +01:00
Jonas Winkler
ea58c66fd4 Merge branch 'master' into dev 2018-12-11 12:38:15 +01:00
Jonas Winkler
bcd9220021 minor changes 2018-12-11 12:26:44 +01:00
Jonas Winkler
766109ae4e Merge remote-tracking branch 'upstream/master' 2018-12-11 12:06:15 +01:00
Jonas Winkler
b347e3347d Restored tagging functionality 2018-09-27 20:41:16 +02:00
Jonas Winkler
7257cece30 Code style changes 2018-09-26 10:51:42 +02:00
Jonas Winkler
5b9f38d398 Removed the archive tag, as it wasnt really used anyway. 2018-09-25 21:51:38 +02:00
Jonas Winkler
b31d4779bf Code style changes 2018-09-25 21:12:47 +02:00
Jonas Winkler
60618381f8 Code style adjustments 2018-09-25 16:09:33 +02:00
Jonas Winkler
779ea6a015 Merge branch 'master' into dev 2018-09-25 14:53:21 +02:00
Jonas Winkler
94ede7389d Merge remote-tracking branch 'upstream/master' 2018-09-25 14:47:12 +02:00
Jonas Winkler
03beca7838 Fixed api issue (some parameter name got renamed) 2018-09-16 13:29:56 +02:00
Jonas Winkler
fb1dcb6e08 Merge branch 'fix-document-viewer' into dev 2018-09-14 16:48:37 +02:00
Jonas Winkler
a298cbd4ce Merge branch 'fix-document-viewer' 2018-09-14 16:48:27 +02:00
Jonas Winkler
f1a1e7f1a4 fixed document viewer 2018-09-14 16:48:08 +02:00
Jonas Winkler
8371c2399f Merge branch 'dev' 2018-09-13 14:15:33 +02:00
Jonas Winkler
909586bf25 Code style changed 2018-09-13 14:15:16 +02:00
Jonas Winkler
8d003a6a85 Save and edit next button appears on documents without viewer as well.
Made the new recent correspondents filter optional. Disabled by default.
2018-09-13 13:10:05 +02:00
Jonas Winkler
0209b71404 Merge branch 'dev' 2018-09-13 10:29:10 +02:00
Jonas Winkler
0dc3644cc1 Added missing dependencies 2018-09-12 17:43:13 +02:00
Jonas Winkler
fb1a2ee577 Merge branch 'dev' 2018-09-12 17:20:12 +02:00
Jonas Winkler
7c589f71a4 Fixed a few minor issues. 2018-09-12 16:25:23 +02:00
Jonas Winkler
25a6aa909b removed duplicate code 2018-09-12 13:43:28 +02:00
Jonas Winkler
ef0d37985b Merge branch 'master' into dev 2018-09-12 11:47:35 +02:00
Jonas Winkler
898931cc03 bugfix 2018-09-11 20:45:36 +02:00
Jonas Winkler
17803e7936 fixed settings 2018-09-11 17:30:46 +02:00
Jonas Winkler
e72735c4f0 Merge remote-tracking branch 'upstream/master' 2018-09-11 14:43:59 +02:00
Jonas Winkler
46a5bc00d7 Merge branch 'machine-learning' into dev 2018-09-11 14:36:21 +02:00
Jonas Winkler
d46ee11143 The classifier works with ids now, not names. Minor changes. 2018-09-11 14:30:18 +02:00
Jonas Winkler
d2534a73e5 changed classifier 2018-09-11 00:33:07 +02:00
Jonas Winkler
11adc94e5e mode change 2018-09-06 12:00:01 +02:00
Jonas Winkler
04bf5fc094 fixed merge error 2018-09-06 10:15:15 +02:00
Jonas Winkler
d26f940a91 Merge branch 'dev' into machine-learning 2018-09-06 00:29:41 +02:00
Jonas Winkler
13725ef8ee Merge branch 'master' into dev 2018-09-06 00:28:58 +02:00
Jonas Winkler
6f0ca432c4 Added scikit-learn to requirements 2018-09-06 00:20:44 +02:00
Jonas Winkler
dd8746bac7 fixed the api 2018-09-05 15:29:05 +02:00
Jonas Winkler
8eeded95c4 Merge branch 'dev' into machine-learning 2018-09-05 15:26:39 +02:00
Jonas Winkler
131e1c9dd8 fixed the api 2018-09-05 15:25:14 +02:00
Jonas Winkler
a6b4fc7e81 fixed api 2018-09-05 14:57:37 +02:00
Jonas Winkler
cea880f245 implemented automatic classification field functionality 2018-09-05 14:31:02 +02:00
Jonas Winkler
82bc0e3368 Fixed a few things 2018-09-05 12:43:11 +02:00
Jonas Winkler
70bd05450a removed matching model fields, automatic classifier reloading, added autmatic_classification field to matching model 2018-09-04 18:40:26 +02:00
Jonas Winkler
c765ef5eeb Merge remote-tracking branch 'upstream/master' 2018-09-04 16:02:48 +02:00
Jonas Winkler
30134034e2 Fixed documents not being saved after modification 2018-09-04 15:33:51 +02:00
Jonas Winkler
8a1a736340 Merge branch 'document-type' into dev 2018-09-04 14:55:59 +02:00
Jonas Winkler
68652c8c37 Document Type exporting 2018-09-04 14:55:29 +02:00
Jonas Winkler
c091eba26e Implemented the classifier model, including automatic tagging of new documents 2018-09-04 14:39:55 +02:00
Jonas Winkler
ca315ba76c Added code that trains models based on data from the databasae 2018-09-03 15:55:41 +02:00
Jonas Winkler
350da81081 Added command to create datasets 2018-09-02 12:47:19 +02:00
Jonas Winkler
4129002086 Added static to ignore 2018-09-02 11:46:45 +02:00
Jonas Winkler
781a1dae71 - added recent correspondents filter
- sortable document_count fields
- added last correspondence field to CorrespondentAdmin
2018-08-28 15:42:39 +02:00
Jonas Winkler
01fed4f49d Removed WebDAV from dev, since it is kind of broken. 2018-08-28 12:12:29 +02:00
Jonas Winkler
d7ab69fed9 Added document type 2018-08-24 13:45:15 +02:00
Jonas Winkler
dfa5ea423f Merge branch 'ui-improvements' into dev 2018-07-16 20:56:49 +02:00
Jonas Winkler
a698a1b66b Different way to get the changelist. 2018-07-16 18:35:01 +02:00
Jonas Winkler
a5129018d2 Merge branch 'ui-improvements' into dev 2018-07-16 18:19:05 +02:00
Jonas Winkler
e3974c68ba bugfix 2018-07-16 18:01:27 +02:00
Jonas Winkler
d72604eb86 Merge branch 'ui-improvements' into dev 2018-07-16 16:09:41 +02:00
Jonas Winkler
f0c94cc65f Added 'save and edit next' functionality 2018-07-16 16:08:51 +02:00
Jonas Winkler
f21debe95d css stuff 2018-07-16 14:39:09 +02:00
Jonas Winkler
033ab72475 Merge branch 'workflow-improvements' into dev 2018-07-15 13:42:00 +02:00
Jonas Winkler
b059602050 Merge branch 'db-config' into dev 2018-07-15 13:41:54 +02:00
Jonas Winkler
2775dfb735 Merge branch 'ui-improvements' into dev 2018-07-15 13:41:49 +02:00
Jonas Winkler
04384c7037 Merge branch 'master' into dev 2018-07-15 13:41:43 +02:00
Jonas Winkler
75beb91791 added options to change database backend 2018-07-15 13:40:38 +02:00
Jonas Winkler
b138f4b52b fixed image width 2018-07-15 13:07:00 +02:00
Jonas Winkler
d108a69f1b added document viewers on document change form for easier editing of metadata, supports pdf, png, jpg 2018-07-14 23:05:28 +02:00
Jonas Winkler
bdaea3915e Merge branch 'master' into ui-improvements 2018-07-13 11:24:19 +02:00
Jonas Winkler
9e71b70d4b fixed the api 2018-07-13 11:20:45 +02:00
Jonas Winkler
960340a5db updated migrations 2018-07-12 11:54:03 +02:00
Jonas Winkler
b3709663f1 Merge branch 'ui-improvements' into dev 2018-07-11 15:07:30 +02:00
Jonas Winkler
9f20175cd3 Merge branch 'workflow-improvements' into dev 2018-07-11 15:05:56 +02:00
Jonas Winkler
adf57b2669 Merge branch 'master' into webdav 2018-07-11 15:02:50 +02:00
Jonas Winkler
f2c32d840e Added setting to enable webdav (default: disabled), cleaned up the code somewhat. 2018-07-11 14:59:47 +02:00
Jonas Winkler
ba9d7c8892 Moved actions to separate file 2018-07-11 13:02:18 +02:00
Jonas Winkler
270b0487ec Merge branch 'master' into workflow-improvements 2018-07-10 15:53:38 +02:00
Jonas Winkler
a63880ed19 Merge remote-tracking branch 'upstream/master' 2018-07-10 15:46:46 +02:00
Jonas Winkler
a40737bd0e Added actions to modify tags and correspondents on multiple documents 2018-07-10 15:39:24 +02:00
Jonas Winkler
c5b315f518 Show document serial number on change list 2018-07-06 18:04:31 +02:00
Jonas Winkler
e143a20f50 automatically update documents whenever a tag or correspondent is changed (this should make the document_retagger and document_correspondent managers somewhat obsolete (?) 2018-07-06 13:51:50 +02:00
Jonas Winkler
c3a144f2ca inbox tags, archive tags, archive serial number for documents 2018-07-06 13:25:02 +02:00
Jonas Winkler
38bb1f9672 Some minor changes 2018-07-06 11:53:08 +02:00
Jonas Winkler
22da848be4 Updated WebDAV filtering. Filters resulting in empty results are not available anymore. 2018-07-05 17:21:13 +02:00
Jonas Winkler
a53e30e0a5 Initial support for WebDAV. Lots of stuff is not there yet and most of the stuff which is there is not really tested. But it kind of already works. 2018-07-05 16:18:20 +02:00
Jonas Winkler
7a2bd58ef8 Updated date filter to use the drilldown feature of django 2018-07-04 17:10:56 +02:00
Jonas Winkler
8f6231bd34 Updated to Django 2 2018-07-04 17:03:59 +02:00
98 changed files with 57256 additions and 5340 deletions

View File

@@ -18,9 +18,6 @@ max_line_length = off
indent_size = 4
indent_style = space
[*.yml]
indent_style = space
# Tests don't get a line width restriction. It's still a good idea to follow
# the 79 character rule, but in the interests of clarity, tests often need to
# violate it.

6
.gitignore vendored
View File

@@ -66,7 +66,6 @@ media/overrides.js
# Sqlite database
db.sqlite3
db.sqlite3-journal
# PyCharm
.idea
@@ -82,4 +81,7 @@ scripts/import-for-development
scripts/nuke
# Static files collected by the collectstatic command
./static/
static/
# Classification Models
models/

View File

@@ -2,22 +2,19 @@ language: python
before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr
- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr tesseract-ocr-eng tesseract-ocr-cat tesseract-ocr-deu
sudo: false
matrix:
include:
- python: "3.4"
- python: "3.5"
- python: "3.6"
- python: "3.7-dev"
- python: 3.4
- python: 3.5
- python: 3.6
install:
- pip install --upgrade pip pipenv sphinx
- pipenv lock -r > requirements.txt
- pip install -r requirements.txt
- pip install --requirement requirements.txt
- pip install sphinx
script:
- cd src/
- pytest --cov
@@ -25,4 +22,4 @@ script:
- sphinx-build -b html ../docs ../docs/_build -W
after_success:
- coveralls
- coveralls

View File

@@ -4,8 +4,8 @@ LABEL maintainer="The Paperless Project https://github.com/danielquinn/paperless
contributors="Guy Addadi <addadi@gmail.com>, Pit Kleyersburg <pitkley@googlemail.com>, \
Sven Fischer <git-dev@linux4tw.de>"
# Copy Pipfiles file and init script
COPY Pipfile* /usr/src/paperless/
# Copy requirements file and init script
COPY requirements.txt /usr/src/paperless/
COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh
# Set export and consumption directories
@@ -21,8 +21,7 @@ RUN apk update --no-cache && apk add python3 gnupg libmagic libpq bash shadow cu
python3 -m ensurepip && \
rm -r /usr/lib/python*/ensurepip && \
cd /usr/src/paperless && \
pip3 install --upgrade pip pipenv && \
pipenv install --system --deploy && \
pip3 install --no-cache-dir -r requirements.txt && \
# Remove build dependencies
apk del .build-dependencies && \
# Create the consumption directory

View File

@@ -34,8 +34,6 @@ pytest-django = "*"
pytest-sugar = "*"
pytest-env = "*"
pytest-xdist = "*"
psycopg2 = "*"
djangoql = "*"
[dev-packages]
ipython = "*"

556
Pipfile.lock generated
View File

@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "971e0c975821925652865e34eed1c668bc6f52bb8217b776f35e87a66c936e1b"
"sha256": "3782f7e6b5461c39c8fd0d0048a4622418f247439113bd3cdc91712fd47036f6"
},
"pipfile-spec": 6,
"requires": {},
@@ -49,12 +49,19 @@
],
"version": "==2.6.0"
},
"backcall": {
"hashes": [
"sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4",
"sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"
],
"version": "==0.1.0"
},
"certifi": {
"hashes": [
"sha256:47f9c83ef4c0c621eaef743f133f09fa8a74a9b75f037e8624f83bd1b6626cb7",
"sha256:993f830721089fef441cdfeb4b2c8c9df86f0c63239f06bd025a76a7daddb033"
"sha256:339dc09518b07e2fa7eda5450740925974815557727d6bd35d319c1524a04a4c",
"sha256:6d58c986d22b038c8c0df30d639f23a3e6d172a05c3583e766f4c0b785c0986a"
],
"version": "==2018.11.29"
"version": "==2018.10.15"
},
"chardet": {
"hashes": [
@@ -65,39 +72,41 @@
},
"coverage": {
"hashes": [
"sha256:09e47c529ff77bf042ecfe858fb55c3e3eb97aac2c87f0349ab5a7efd6b3939f",
"sha256:0a1f9b0eb3aa15c990c328535655847b3420231af299386cfe5efc98f9c250fe",
"sha256:0cc941b37b8c2ececfed341444a456912e740ecf515d560de58b9a76562d966d",
"sha256:10e8af18d1315de936d67775d3a814cc81d0747a1a0312d84e27ae5610e313b0",
"sha256:1b4276550b86caa60606bd3572b52769860a81a70754a54acc8ba789ce74d607",
"sha256:1e8a2627c48266c7b813975335cfdea58c706fe36f607c97d9392e61502dc79d",
"sha256:2b224052bfd801beb7478b03e8a66f3f25ea56ea488922e98903914ac9ac930b",
"sha256:447c450a093766744ab53bf1e7063ec82866f27bcb4f4c907da25ad293bba7e3",
"sha256:46101fc20c6f6568561cdd15a54018bb42980954b79aa46da8ae6f008066a30e",
"sha256:4710dc676bb4b779c4361b54eb308bc84d64a2fa3d78e5f7228921eccce5d815",
"sha256:510986f9a280cd05189b42eee2b69fecdf5bf9651d4cd315ea21d24a964a3c36",
"sha256:5535dda5739257effef56e49a1c51c71f1d37a6e5607bb25a5eee507c59580d1",
"sha256:5a7524042014642b39b1fcae85fb37556c200e64ec90824ae9ecf7b667ccfc14",
"sha256:5f55028169ef85e1fa8e4b8b1b91c0b3b0fa3297c4fb22990d46ff01d22c2d6c",
"sha256:6694d5573e7790a0e8d3d177d7a416ca5f5c150742ee703f3c18df76260de794",
"sha256:6831e1ac20ac52634da606b658b0b2712d26984999c9d93f0c6e59fe62ca741b",
"sha256:77f0d9fa5e10d03aa4528436e33423bfa3718b86c646615f04616294c935f840",
"sha256:828ad813c7cdc2e71dcf141912c685bfe4b548c0e6d9540db6418b807c345ddd",
"sha256:85a06c61598b14b015d4df233d249cd5abfa61084ef5b9f64a48e997fd829a82",
"sha256:8cb4febad0f0b26c6f62e1628f2053954ad2c555d67660f28dfb1b0496711952",
"sha256:a5c58664b23b248b16b96253880b2868fb34358911400a7ba39d7f6399935389",
"sha256:aaa0f296e503cda4bc07566f592cd7a28779d433f3a23c48082af425d6d5a78f",
"sha256:ab235d9fe64833f12d1334d29b558aacedfbca2356dfb9691f2d0d38a8a7bfb4",
"sha256:b3b0c8f660fae65eac74fbf003f3103769b90012ae7a460863010539bb7a80da",
"sha256:bab8e6d510d2ea0f1d14f12642e3f35cefa47a9b2e4c7cea1852b52bc9c49647",
"sha256:c45297bbdbc8bb79b02cf41417d63352b70bcb76f1bbb1ee7d47b3e89e42f95d",
"sha256:d19bca47c8a01b92640c614a9147b081a1974f69168ecd494687c827109e8f42",
"sha256:d64b4340a0c488a9e79b66ec9f9d77d02b99b772c8b8afd46c1294c1d39ca478",
"sha256:da969da069a82bbb5300b59161d8d7c8d423bc4ccd3b410a9b4d8932aeefc14b",
"sha256:ed02c7539705696ecb7dc9d476d861f3904a8d2b7e894bd418994920935d36bb",
"sha256:ee5b8abc35b549012e03a7b1e86c09491457dba6c94112a2482b18589cc2bdb9"
"sha256:03481e81d558d30d230bc12999e3edffe392d244349a90f4ef9b88425fac74ba",
"sha256:0b136648de27201056c1869a6c0d4e23f464750fd9a9ba9750b8336a244429ed",
"sha256:0bf8cbbd71adfff0ef1f3a1531e6402d13b7b01ac50a79c97ca15f030dba6306",
"sha256:10a46017fef60e16694a30627319f38a2b9b52e90182dddb6e37dcdab0f4bf95",
"sha256:198626739a79b09fa0a2f06e083ffd12eb55449b5f8bfdbeed1df4910b2ca640",
"sha256:23d341cdd4a0371820eb2b0bd6b88f5003a7438bbedb33688cd33b8eae59affd",
"sha256:28b2191e7283f4f3568962e373b47ef7f0392993bb6660d079c62bd50fe9d162",
"sha256:2a5b73210bad5279ddb558d9a2bfedc7f4bf6ad7f3c988641d83c40293deaec1",
"sha256:2eb564bbf7816a9d68dd3369a510be3327f1c618d2357fa6b1216994c2e3d508",
"sha256:337ded681dd2ef9ca04ef5d93cfc87e52e09db2594c296b4a0a3662cb1b41249",
"sha256:3a2184c6d797a125dca8367878d3b9a178b6fdd05fdc2d35d758c3006a1cd694",
"sha256:3c79a6f7b95751cdebcd9037e4d06f8d5a9b60e4ed0cd231342aa8ad7124882a",
"sha256:3d72c20bd105022d29b14a7d628462ebdc61de2f303322c0212a054352f3b287",
"sha256:3eb42bf89a6be7deb64116dd1cc4b08171734d721e7a7e57ad64cc4ef29ed2f1",
"sha256:4635a184d0bbe537aa185a34193898eee409332a8ccb27eea36f262566585000",
"sha256:56e448f051a201c5ebbaa86a5efd0ca90d327204d8b059ab25ad0f35fbfd79f1",
"sha256:5a13ea7911ff5e1796b6d5e4fbbf6952381a611209b736d48e675c2756f3f74e",
"sha256:69bf008a06b76619d3c3f3b1983f5145c75a305a0fea513aca094cae5c40a8f5",
"sha256:6bc583dc18d5979dc0f6cec26a8603129de0304d5ae1f17e57a12834e7235062",
"sha256:701cd6093d63e6b8ad7009d8a92425428bc4d6e7ab8d75efbb665c806c1d79ba",
"sha256:7608a3dd5d73cb06c531b8925e0ef8d3de31fed2544a7de6c63960a1e73ea4bc",
"sha256:76ecd006d1d8f739430ec50cc872889af1f9c1b6b8f48e29941814b09b0fd3cc",
"sha256:7aa36d2b844a3e4a4b356708d79fd2c260281a7390d678a10b91ca595ddc9e99",
"sha256:7d3f553904b0c5c016d1dad058a7554c7ac4c91a789fca496e7d8347ad040653",
"sha256:7e1fe19bd6dce69d9fd159d8e4a80a8f52101380d5d3a4d374b6d3eae0e5de9c",
"sha256:8c3cb8c35ec4d9506979b4cf90ee9918bc2e49f84189d9bf5c36c0c1119c6558",
"sha256:9d6dd10d49e01571bf6e147d3b505141ffc093a06756c60b053a859cb2128b1f",
"sha256:be6cfcd8053d13f5f5eeb284aa8a814220c3da1b0078fa859011c7fffd86dab9",
"sha256:c1bb572fab8208c400adaf06a8133ac0712179a334c09224fb11393e920abcdd",
"sha256:de4418dadaa1c01d497e539210cb6baa015965526ff5afc078c57ca69160108d",
"sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6",
"sha256:f05a636b4564104120111800021a92e43397bc12a5c72fed7036be8556e0029e",
"sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80"
],
"version": "==4.5.2"
"version": "==4.5.1"
},
"coveralls": {
"hashes": [
@@ -115,13 +124,20 @@
"index": "pypi",
"version": "==0.7.0"
},
"decorator": {
"hashes": [
"sha256:2c51dff8ef3c447388fe5e4453d24a2bf128d3a4c32af3fabef1f01c6851ab82",
"sha256:c39efa13fbdeb4506c476c9b3babf6a718da943dab7811c206005a4a956c080c"
],
"version": "==4.3.0"
},
"django": {
"hashes": [
"sha256:0292a7ad7d8ffc9cfc6a77f043d2e81f5bbc360c0c4a1686e130ef3432437d23",
"sha256:e89f613e3c1f7ff245ffee3560472f9fa9c07060b11f65e1de3cb763f8dcd4b9"
"sha256:25df265e1fdb74f7e7305a1de620a84681bcc9c05e84a3ed97e4a1a63024f18d",
"sha256:d6d94554abc82ca37e447c3d28958f5ac39bd7d4adaa285543ae97fb1129fd69"
],
"index": "pypi",
"version": "==2.0.10"
"version": "==2.0.9"
},
"django-cors-headers": {
"hashes": [
@@ -141,34 +157,27 @@
},
"django-extensions": {
"hashes": [
"sha256:8317a3fe479b1ba3e3a04ecf33fb8d6ccf09bb18f30eab64e34c40a593741d26",
"sha256:a76a61566f1c8d96acc7bcf765080b8e91367a25a2c6f8c5bddd574493839180"
"sha256:30cb6a8c7d6f75a55edf0c0c4491bd98f8264ae1616ce105f9cecac4387edd07",
"sha256:4ad86a7a5e84f1c77db030761ae87a600647250c652030a2b71a16e87f3a3d62"
],
"index": "pypi",
"version": "==2.1.4"
"version": "==2.1.3"
},
"django-filter": {
"hashes": [
"sha256:3dafb7d2810790498895c22a1f31b2375795910680ac9c1432821cbedb1e176d",
"sha256:a3014de317bef0cd43075a0f08dfa1d319a7ccc5733c3901fb860da70b0dda68"
"sha256:6f4e4bc1a11151178520567b50320e5c32f8edb552139d93ea3e30613b886f56",
"sha256:86c3925020c27d072cdae7b828aaa5d165c2032a629abbe3c3a1be1edae61c58"
],
"index": "pypi",
"version": "==2.1.0"
},
"djangoql": {
"hashes": [
"sha256:7c488ec4e3362e5389ba3c1169d1ff9a27c4222601f32c6dbf6130ce04330d76"
],
"index": "pypi",
"version": "==0.12.3"
"version": "==2.0.0"
},
"djangorestframework": {
"hashes": [
"sha256:79c6efbb2514bc50cf25906d7c0a5cfead714c7af667ff4bd110312cd380ae66",
"sha256:a4138613b67e3a223be6c97f53b13d759c5b90d2b433bad670b8ebf95402075f"
"sha256:607865b0bb1598b153793892101d881466bd5a991de12bd6229abb18b1c86136",
"sha256:63f76cbe1e7d12b94c357d7e54401103b2e52aef0f7c1650d6c820ad708776e5"
],
"index": "pypi",
"version": "==3.9.1"
"version": "==3.9.0"
},
"docopt": {
"hashes": [
@@ -201,10 +210,10 @@
},
"faker": {
"hashes": [
"sha256:16342dca4d92bfc83bab6a7daf6650e0ab087605a66bc38f17523fdb01757910",
"sha256:d871ea315b2dcba9138b8344f2c131a76ac62d6227ca39f69b0c889fec97376c"
"sha256:2621643b80a10b91999925cfd20f64d2b36f20bf22136bbdc749bb57d6ffe124",
"sha256:5ed822d31bd2d6edf10944d176d30dc9c886afdd381eefb7ba8b7aad86171646"
],
"version": "==1.0.2"
"version": "==0.9.2"
},
"filelock": {
"hashes": [
@@ -241,10 +250,10 @@
},
"idna": {
"hashes": [
"sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407",
"sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c"
"sha256:156a6814fb5ac1fc6850fb002e0852d56c0c8d2531923a51032d1b70760e186e",
"sha256:684a38a6f903c1d71d6d5fac066b58d7768af4de2b832e426ec79c30daa94a16"
],
"version": "==2.8"
"version": "==2.7"
},
"imagesize": {
"hashes": [
@@ -260,6 +269,28 @@
"index": "pypi",
"version": "==1.1.8"
},
"ipython": {
"hashes": [
"sha256:a5781d6934a3341a1f9acb4ea5acdc7ea0a0855e689dbe755d070ca51e995435",
"sha256:b10a7ddd03657c761fc503495bc36471c8158e3fc948573fb9fe82a7029d8efd"
],
"index": "pypi",
"version": "==7.1.1"
},
"ipython-genutils": {
"hashes": [
"sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
"sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
],
"version": "==0.2.0"
},
"jedi": {
"hashes": [
"sha256:0191c447165f798e6a730285f2eee783fff81b0d3df261945ecb80983b5c3ca7",
"sha256:b7493f73a2febe0dc33d51c99b474547f7f6c0b2c8fb2b21f453eef204c12148"
],
"version": "==0.13.1"
},
"jinja2": {
"hashes": [
"sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd",
@@ -276,51 +307,31 @@
},
"markupsafe": {
"hashes": [
"sha256:048ef924c1623740e70204aa7143ec592504045ae4429b59c30054cb31e3c432",
"sha256:130f844e7f5bdd8e9f3f42e7102ef1d49b2e6fdf0d7526df3f87281a532d8c8b",
"sha256:19f637c2ac5ae9da8bfd98cef74d64b7e1bb8a63038a3505cd182c3fac5eb4d9",
"sha256:1b8a7a87ad1b92bd887568ce54b23565f3fd7018c4180136e1cf412b405a47af",
"sha256:1c25694ca680b6919de53a4bb3bdd0602beafc63ff001fea2f2fc16ec3a11834",
"sha256:1f19ef5d3908110e1e891deefb5586aae1b49a7440db952454b4e281b41620cd",
"sha256:1fa6058938190ebe8290e5cae6c351e14e7bb44505c4a7624555ce57fbbeba0d",
"sha256:31cbb1359e8c25f9f48e156e59e2eaad51cd5242c05ed18a8de6dbe85184e4b7",
"sha256:3e835d8841ae7863f64e40e19477f7eb398674da6a47f09871673742531e6f4b",
"sha256:4e97332c9ce444b0c2c38dd22ddc61c743eb208d916e4265a2a3b575bdccb1d3",
"sha256:525396ee324ee2da82919f2ee9c9e73b012f23e7640131dd1b53a90206a0f09c",
"sha256:52b07fbc32032c21ad4ab060fec137b76eb804c4b9a1c7c7dc562549306afad2",
"sha256:52ccb45e77a1085ec5461cde794e1aa037df79f473cbc69b974e73940655c8d7",
"sha256:5c3fbebd7de20ce93103cb3183b47671f2885307df4a17a0ad56a1dd51273d36",
"sha256:5e5851969aea17660e55f6a3be00037a25b96a9b44d2083651812c99d53b14d1",
"sha256:5edfa27b2d3eefa2210fb2f5d539fbed81722b49f083b2c6566455eb7422fd7e",
"sha256:7d263e5770efddf465a9e31b78362d84d015cc894ca2c131901a4445eaa61ee1",
"sha256:83381342bfc22b3c8c06f2dd93a505413888694302de25add756254beee8449c",
"sha256:857eebb2c1dc60e4219ec8e98dfa19553dae33608237e107db9c6078b1167856",
"sha256:98e439297f78fca3a6169fd330fbe88d78b3bb72f967ad9961bcac0d7fdd1550",
"sha256:bf54103892a83c64db58125b3f2a43df6d2cb2d28889f14c78519394feb41492",
"sha256:d9ac82be533394d341b41d78aca7ed0e0f4ba5a2231602e2f05aa87f25c51672",
"sha256:e982fe07ede9fada6ff6705af70514a52beb1b2c3d25d4e873e82114cf3c5401",
"sha256:edce2ea7f3dfc981c4ddc97add8a61381d9642dc3273737e756517cc03e84dd6",
"sha256:efdc45ef1afc238db84cb4963aa689c0408912a0239b0721cb172b4016eb31d6",
"sha256:f137c02498f8b935892d5c0172560d7ab54bc45039de8805075e19079c639a9c",
"sha256:f82e347a72f955b7017a39708a3667f106e6ad4d10b25f237396a7115d8ed5fd",
"sha256:fb7c206e01ad85ce57feeaaa0bf784b97fa3cad0d4a5737bc5295785f5c613a1"
"sha256:a6be69091dac236ea9c6bc7d012beab42010fa914c459791d627dad4910eb665"
],
"version": "==1.1.0"
"version": "==1.0"
},
"more-itertools": {
"hashes": [
"sha256:38a936c0a6d98a38bcc2d03fdaaedaba9f412879461dd2ceff8d37564d6522e4",
"sha256:c0a5785b1109a6bd7fac76d6837fd1feca158e54e521ccd2ae8bfe393cc9d4fc",
"sha256:fe7a7cae1ccb57d33952113ff4fa1bc5f879963600ed74918f1236e212ee50b9"
"sha256:c187a73da93e7a8acc0001572aebc7e3c69daf7bf6881a2cea10650bd4420092",
"sha256:c476b5d3a34e12d40130bc2f935028b5f636df8f372dc2c1c01dc19681b2039e",
"sha256:fcbfeaea0be121980e15bc97b3817b5202ca73d0eae185b4550cbfce2a3ebb3d"
],
"version": "==5.0.0"
"version": "==4.3.0"
},
"packaging": {
"hashes": [
"sha256:0c98a5d0be38ed775798ece1b9727178c4469d9c3b4ada66e8e6b7849f8732af",
"sha256:9e1cbf8c12b1f1ce0bb5344b8d7ecf66a6f8a6e91bcb0c84593ed6d3ab5c4ab3"
"sha256:0886227f54515e592aaa2e5a553332c73962917f2831f1b0f9b9f4380a4b9807",
"sha256:f95a1e147590f204328170981833854229bb2912ac3d5f89e2a8ccd2834800c9"
],
"version": "==19.0"
"version": "==18.0"
},
"parso": {
"hashes": [
"sha256:35704a43a3c113cce4de228ddb39aab374b8004f4f2407d070b6a2ca784ce8a2",
"sha256:895c63e93b94ac1e1690f5fdd40b65f07c8171e3e53cbd7793b5b96c0e0a7f24"
],
"version": "==0.3.1"
},
"pdftotext": {
"hashes": [
@@ -329,91 +340,78 @@
"index": "pypi",
"version": "==2.1.1"
},
"pexpect": {
"hashes": [
"sha256:2a8e88259839571d1251d278476f3eec5db26deb73a70be5ed5dc5435e418aba",
"sha256:3fbd41d4caf27fa4a377bfd16fef87271099463e6fa73e92a52f92dfee5d425b"
],
"markers": "sys_platform != 'win32'",
"version": "==4.6.0"
},
"pickleshare": {
"hashes": [
"sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca",
"sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"
],
"version": "==0.7.5"
},
"pillow": {
"hashes": [
"sha256:051de330a06c99d6f84bcf582960487835bcae3fc99365185dc2d4f65a390c0e",
"sha256:0ae5289948c5e0a16574750021bd8be921c27d4e3527800dc9c2c1d2abc81bf7",
"sha256:0b1efce03619cdbf8bcc61cfae81fcda59249a469f31c6735ea59badd4a6f58a",
"sha256:163136e09bd1d6c6c6026b0a662976e86c58b932b964f255ff384ecc8c3cefa3",
"sha256:18e912a6ccddf28defa196bd2021fe33600cbe5da1aa2f2e2c6df15f720b73d1",
"sha256:24ec3dea52339a610d34401d2d53d0fb3c7fd08e34b20c95d2ad3973193591f1",
"sha256:267f8e4c0a1d7e36e97c6a604f5b03ef58e2b81c1becb4fccecddcb37e063cc7",
"sha256:3273a28734175feebbe4d0a4cde04d4ed20f620b9b506d26f44379d3c72304e1",
"sha256:4c678e23006798fc8b6f4cef2eaad267d53ff4c1779bd1af8725cc11b72a63f3",
"sha256:4d4bc2e6bb6861103ea4655d6b6f67af8e5336e7216e20fff3e18ffa95d7a055",
"sha256:505738076350a337c1740a31646e1de09a164c62c07db3b996abdc0f9d2e50cf",
"sha256:5233664eadfa342c639b9b9977190d64ad7aca4edc51a966394d7e08e7f38a9f",
"sha256:5d95cb9f6cced2628f3e4de7e795e98b2659dfcc7176ab4a01a8b48c2c2f488f",
"sha256:7eda4c737637af74bac4b23aa82ea6fbb19002552be85f0b89bc27e3a762d239",
"sha256:801ddaa69659b36abf4694fed5aa9f61d1ecf2daaa6c92541bbbbb775d97b9fe",
"sha256:825aa6d222ce2c2b90d34a0ea31914e141a85edefc07e17342f1d2fdf121c07c",
"sha256:9c215442ff8249d41ff58700e91ef61d74f47dfd431a50253e1a1ca9436b0697",
"sha256:a3d90022f2202bbb14da991f26ca7a30b7e4c62bf0f8bf9825603b22d7e87494",
"sha256:a631fd36a9823638fe700d9225f9698fb59d049c942d322d4c09544dc2115356",
"sha256:a6523a23a205be0fe664b6b8747a5c86d55da960d9586db039eec9f5c269c0e6",
"sha256:a756ecf9f4b9b3ed49a680a649af45a8767ad038de39e6c030919c2f443eb000",
"sha256:b117287a5bdc81f1bac891187275ec7e829e961b8032c9e5ff38b70fd036c78f",
"sha256:ba04f57d1715ca5ff74bb7f8a818bf929a204b3b3c2c2826d1e1cc3b1c13398c",
"sha256:cd878195166723f30865e05d87cbaf9421614501a4bd48792c5ed28f90fd36ca",
"sha256:cee815cc62d136e96cf76771b9d3eb58e0777ec18ea50de5cfcede8a7c429aa8",
"sha256:d1722b7aa4b40cf93ac3c80d3edd48bf93b9208241d166a14ad8e7a20ee1d4f3",
"sha256:d7c1c06246b05529f9984435fc4fa5a545ea26606e7f450bdbe00c153f5aeaad",
"sha256:e9c8066249c040efdda84793a2a669076f92a301ceabe69202446abb4c5c5ef9",
"sha256:f227d7e574d050ff3996049e086e1f18c7bd2d067ef24131e50a1d3fe5831fbc",
"sha256:fc9a12aad714af36cf3ad0275a96a733526571e52710319855628f476dcb144e"
"sha256:00203f406818c3f45d47bb8fe7e67d3feddb8dcbbd45a289a1de7dd789226360",
"sha256:0616f800f348664e694dddb0b0c88d26761dd5e9f34e1ed7b7a7d2da14b40cb7",
"sha256:1f7908aab90c92ad85af9d2fec5fc79456a89b3adcc26314d2cde0e238bd789e",
"sha256:2ea3517cd5779843de8a759c2349a3cd8d3893e03ab47053b66d5ec6f8bc4f93",
"sha256:48a9f0538c91fc136b3a576bee0e7cd174773dc9920b310c21dcb5519722e82c",
"sha256:5280ebc42641a1283b7b1f2c20e5b936692198b9dd9995527c18b794850be1a8",
"sha256:5e34e4b5764af65551647f5cc67cf5198c1d05621781d5173b342e5e55bf023b",
"sha256:63b120421ab85cad909792583f83b6ca3584610c2fe70751e23f606a3c2e87f0",
"sha256:696b5e0109fe368d0057f484e2e91717b49a03f1e310f857f133a4acec9f91dd",
"sha256:870ed021a42b1b02b5fe4a739ea735f671a84128c0a666c705db2cb9abd528eb",
"sha256:916da1c19e4012d06a372127d7140dae894806fad67ef44330e5600d77833581",
"sha256:9303a289fa0811e1c6abd9ddebfc770556d7c3311cb2b32eff72164ddc49bc64",
"sha256:9577888ecc0ad7d06c3746afaba339c94d62b59da16f7a5d1cff9e491f23dace",
"sha256:987e1c94a33c93d9b209315bfda9faa54b8edfce6438a1e93ae866ba20de5956",
"sha256:99a3bbdbb844f4fb5d6dd59fac836a40749781c1fa63c563bc216c27aef63f60",
"sha256:99db8dc3097ceafbcff9cb2bff384b974795edeb11d167d391a02c7bfeeb6e16",
"sha256:a5a96cf49eb580756a44ecf12949e52f211e20bffbf5a95760ac14b1e499cd37",
"sha256:aa6ca3eb56704cdc0d876fc6047ffd5ee960caad52452fbee0f99908a141a0ae",
"sha256:aade5e66795c94e4a2b2624affeea8979648d1b0ae3fcee17e74e2c647fc4a8a",
"sha256:b78905860336c1d292409e3df6ad39cc1f1c7f0964e66844bbc2ebfca434d073",
"sha256:b92f521cdc4e4a3041cc343625b699f20b0b5f976793fb45681aac1efda565f8",
"sha256:bfde84bbd6ae5f782206d454b67b7ee8f7f818c29b99fd02bf022fd33bab14cb",
"sha256:c2b62d3df80e694c0e4a0ed47754c9480521e25642251b3ab1dff050a4e60409",
"sha256:c5e2be6c263b64f6f7656e23e18a4a9980cffc671442795682e8c4e4f815dd9f",
"sha256:c99aa3c63104e0818ec566f8ff3942fb7c7a8f35f9912cb63fd8e12318b214b2",
"sha256:dae06620d3978da346375ebf88b9e2dd7d151335ba668c995aea9ed07af7add4",
"sha256:db5499d0710823fa4fb88206050d46544e8f0e0136a9a5f5570b026584c8fd74",
"sha256:f36baafd82119c4a114b9518202f2a983819101dcc14b26e43fc12cbefdce00e",
"sha256:f52b79c8796d81391ab295b04e520bda6feed54d54931708872e8f9ae9db0ea1",
"sha256:ff8cff01582fa1a7e533cb97f628531c4014af4b5f38e33cdcfe5eec29b6d888"
],
"index": "pypi",
"version": "==5.4.1"
"version": "==5.3.0"
},
"pluggy": {
"hashes": [
"sha256:8ddc32f03971bfdf900a81961a48ccf2fb677cf7715108f85295c67405798616",
"sha256:980710797ff6a041e9a73a5787804f848996ecaa6f8a1b1e08224a5894f2074a"
"sha256:447ba94990e8014ee25ec853339faf7b0fc8050cdc3289d4d71f7f410fb90095",
"sha256:bde19360a8ec4dfd8a20dcb811780a30998101f078fc7ded6162f0076f50508f"
],
"version": "==0.8.1"
"version": "==0.8.0"
},
"ply": {
"prompt-toolkit": {
"hashes": [
"sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3",
"sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"
"sha256:c1d6aff5252ab2ef391c2fe498ed8c088066f66bc64a8d5c095bbf795d9fec34",
"sha256:d4c47f79b635a0e70b84fdb97ebd9a274203706b1ee5ed44c10da62755cf3ec9",
"sha256:fd17048d8335c1e6d5ee403c3569953ba3eb8555d710bfc548faf0712666ea39"
],
"version": "==3.11"
"version": "==2.0.7"
},
"psycopg2": {
"ptyprocess": {
"hashes": [
"sha256:02445ebbb3a11a3fe8202c413d5e6faf38bb75b4e336203ee144ca2c46529f94",
"sha256:0e9873e60f98f0c52339abf8f0339d1e22bfe5aae0bcf7aabd40c055175035ec",
"sha256:1148a5eb29073280bf9057c7fc45468592c1bb75a28f6df1591adb93c8cb63d0",
"sha256:259a8324e109d4922b0fcd046e223e289830e2568d6f4132a3702439e5fd532b",
"sha256:28dffa9ed4595429e61bacac41d3f9671bb613d1442ff43bcbec63d4f73ed5e8",
"sha256:314a74302d4737a3865d40ea50e430ce1543c921ba10f39d562e807cfe2edf2a",
"sha256:36b60201b6d215d7658a71493fdf6bd5e60ad9a0cffed39906627ff9f4f3afd3",
"sha256:3f9d532bce54c4234161176ff3b8688ff337575ca441ea27597e112dfcd0ee0c",
"sha256:5d222983847b40af989ad96c07fc3f07e47925e463baa5de716be8f805b41d9b",
"sha256:6757a6d2fc58f7d8f5d471ad180a0bd7b4dd3c7d681f051504fbea7ae29c8d6f",
"sha256:6a0e0f1e74edb0ab57d89680e59e7bfefad2bfbdf7c80eb38304d897d43674bb",
"sha256:6ca703ccdf734e886a1cf53eb702261110f6a8b0ed74bcad15f1399f74d3f189",
"sha256:8513b953d8f443c446aa79a4cc8a898bd415fc5e29349054f03a7d696d495542",
"sha256:9262a5ce2038570cb81b4d6413720484cb1bc52c064b2f36228d735b1f98b794",
"sha256:97441f851d862a0c844d981cbee7ee62566c322ebb3d68f86d66aa99d483985b",
"sha256:a07feade155eb8e69b54dd6774cf6acf2d936660c61d8123b8b6b1f9247b67d6",
"sha256:a9b9c02c91b1e3ec1f1886b2d0a90a0ea07cc529cb7e6e472b556bc20ce658f3",
"sha256:ae88216f94728d691b945983140bf40d51a1ff6c7fe57def93949bf9339ed54a",
"sha256:b360ffd17659491f1a6ad7c928350e229c7b7bd83a2b922b6ee541245c7a776f",
"sha256:b4221957ceccf14b2abdabef42d806e791350be10e21b260d7c9ce49012cc19e",
"sha256:b90758e49d5e6b152a460d10b92f8a6ccf318fcc0ee814dcf53f3a6fc5328789",
"sha256:c669ea986190ed05fb289d0c100cc88064351f2b85177cbfd3564c4f4847d18c",
"sha256:d1b61999d15c79cf7f4f7cc9021477aef35277fc52452cf50fd13b713c84424d",
"sha256:de7bb043d1adaaf46e38d47e7a5f703bb3dab01376111e522b07d25e1a79c1e1",
"sha256:e393568e288d884b94d263f2669215197840d097c7e5b0acd1a51c1ea7d1aba8",
"sha256:ed7e0849337bd37d89f2c2b0216a0de863399ee5d363d31b1e5330a99044737b",
"sha256:f153f71c3164665d269a5d03c7fa76ba675c7a8de9dc09a4e2c2cdc9936a7b41",
"sha256:f1fb5a8427af099beb7f65093cbdb52e021b8e6dbdfaf020402a623f4181baf5",
"sha256:f36b333e9f86a2fba960c72b90c34be6ca71819e300f7b1fc3d2b0f0b2c546cd",
"sha256:f4526d078aedd5187d0508aa5f9a01eae6a48a470ed678406da94b4cd6524b7e"
"sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0",
"sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"
],
"index": "pypi",
"version": "==2.7.7"
"version": "==0.6.0"
},
"py": {
"hashes": [
@@ -432,10 +430,10 @@
},
"pygments": {
"hashes": [
"sha256:5ffada19f6203563680669ee7f53b64dabbeb100eb51b61996085e99c03b284a",
"sha256:e8218dd399a61674745138520d0d4cf2621d7e032439341bc3f647bff125818d"
"sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
"sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc"
],
"version": "==2.3.1"
"version": "==2.2.0"
},
"pyocr": {
"hashes": [
@@ -446,34 +444,34 @@
},
"pyparsing": {
"hashes": [
"sha256:66c9268862641abcac4a96ba74506e594c884e3f57690a696d21ad8210ed667a",
"sha256:f6c5ef0d7480ad048c054c37632c67fca55299990fff127850181659eea33fc3"
"sha256:40856e74d4987de5d01761a22d1621ae1c7f8774585acae358aa5c5936c6c90b",
"sha256:f353aab21fd474459d97b709e527b5571314ee5f067441dc9f88e33eecd96592"
],
"version": "==2.3.1"
"version": "==2.3.0"
},
"pytest": {
"hashes": [
"sha256:41568ea7ecb4a68d7f63837cf65b92ce8d0105e43196ff2b26622995bb3dc4b2",
"sha256:c3c573a29d7c9547fb90217ece8a8843aa0c1328a797e200290dc3d0b4b823be"
"sha256:a9e5e8d7ab9d5b0747f37740276eb362e6a76275d76cebbb52c6049d93b475db",
"sha256:bf47e8ed20d03764f963f0070ff1c8fda6e2671fc5dd562a4d3b7148ad60f5ca"
],
"index": "pypi",
"version": "==4.1.1"
"version": "==3.9.3"
},
"pytest-cov": {
"hashes": [
"sha256:0ab664b25c6aa9716cbf203b17ddb301932383046082c081b9848a0edf5add33",
"sha256:230ef817450ab0699c6cc3c9c8f7a829c34674456f2ed8df1fe1d39780f7c87f"
"sha256:513c425e931a0344944f84ea47f3956be0e416d95acbd897a44970c8d926d5d7",
"sha256:e360f048b7dae3f2f2a9a4d067b2dd6b6a015d384d1577c994a43f3f7cbad762"
],
"index": "pypi",
"version": "==2.6.1"
"version": "==2.6.0"
},
"pytest-django": {
"hashes": [
"sha256:1a5d33be930e3172fa238643a380414dc369fe8fa4b3c3de25e59ed142950736",
"sha256:e88e471d3d0f9acfb6293bb03d0ee8a33ed978734e92ea6b5312163a6c9e87cc"
"sha256:49e9ffc856bc6a1bec1c26c5c7b7213dff7cc8bc6b64d624c4d143d04aff0bcf",
"sha256:b379282feaf89069cb790775ab6bbbd2bd2038a68c7ef9b84a41898e0b551081"
],
"index": "pypi",
"version": "==3.4.5"
"version": "==3.4.3"
},
"pytest-env": {
"hashes": [
@@ -484,26 +482,25 @@
},
"pytest-forked": {
"hashes": [
"sha256:260d03fbd38d5ce41a657759e8d19bc7c8cfa6d0dcfa36c0bc9742d33bc30742",
"sha256:8d05c2e6f33cd4422571b2b1bb309720c398b0549cff499e3e4cde661875ab54"
"sha256:e4500cd0509ec4a26535f7d4112a8cc0f17d3a41c29ffd4eab479d2a55b30805",
"sha256:f275cb48a73fc61a6710726348e1da6d68a978f0ec0c54ece5a5fae5977e5a08"
],
"version": "==1.0.1"
"version": "==0.2"
},
"pytest-sugar": {
"hashes": [
"sha256:26cf8289fe10880cbbc130bd77398c4e6a8b936d8393b116a5c16121d95ab283",
"sha256:fcd87a74b2bce5386d244b49ad60549bfbc4602527797fac167da147983f58ab"
"sha256:ab8cc42faf121344a4e9b13f39a51257f26f410e416c52ea11078cdd00d98a2c"
],
"index": "pypi",
"version": "==0.9.2"
"version": "==0.9.1"
},
"pytest-xdist": {
"hashes": [
"sha256:107e9db0ee30ead02ca93e7d6d4846675f1b2142234f0eb1cd4d76739cd9ae6f",
"sha256:5795f665e112520fa5beab736ad957e7f36ce7d44210f4004be9d99f86529d97"
"sha256:3bc9dcb6ff47e607d3c710727cd9996fd7ac1466d405c3b40bb495da99b6b669",
"sha256:8e188d13ce6614c7a678179a76f46231199ffdfe6163de031c17e62ffa256917"
],
"index": "pypi",
"version": "==1.26.0"
"version": "==1.24.0"
},
"python-dateutil": {
"hashes": [
@@ -515,61 +512,62 @@
},
"python-dotenv": {
"hashes": [
"sha256:a84569d0e00d178bc5b957f7ff208bf49287cbf61857c31c258c4a91f571527b",
"sha256:c9b1ddd3cdbe75c7d462cb84674d87130f4b948f090f02c7d7144779afb99ae0"
"sha256:122290a38ece9fe4f162dc7c95cae3357b983505830a154d3c98ef7f6c6cea77",
"sha256:4a205787bc829233de2a823aa328e44fd9996fedb954989a21f1fc67c13d7a77"
],
"index": "pypi",
"version": "==0.10.1"
"version": "==0.9.1"
},
"python-gnupg": {
"hashes": [
"sha256:45daf020b370bda13a1429c859fcdff0b766c0576844211446f9266cae97fb0e",
"sha256:85c231850a0275c9722f06e34b45a22510b83a6a6e88f93b5ae32ba04c95056c"
"sha256:2d158dfc6b54927752b945ebe57e6a0c45da27747fa3b9ae66eccc0d2147ac0d",
"sha256:faa69bab58ed0936f0ccf96c99b92369b7a1819305d37dfe5c927d21a437a09d"
],
"index": "pypi",
"version": "==0.4.4"
"version": "==0.4.3"
},
"python-levenshtein": {
"hashes": [
"sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1"
],
"markers": "extra == 'speedup'",
"version": "==0.12.0"
},
"pytz": {
"hashes": [
"sha256:32b0891edff07e28efe91284ed9c31e123d84bea3fd98e1f72be2508f43ef8d9",
"sha256:d5f05e487007e29e03409f9398d074e158d920d36eb82eaf66fb1136b0c5374c"
"sha256:31cb35c89bd7d333cd32c5f278fca91b523b0834369e757f4c5641ea252236ca",
"sha256:8e0f8568c118d3077b46be7d654cc8167fa916092e28320cde048e54bfc9f1e6"
],
"index": "pypi",
"version": "==2018.9"
"version": "==2018.7"
},
"regex": {
"hashes": [
"sha256:0bcd8ab8c812278981df3161db3f94f0ec72f1fa07020173c96f20e74bd7c16a",
"sha256:20b1601b887e1073805adda2f8a09bb4c86dc7629c46c0d7bf28444dcb32920d",
"sha256:3c4327dd686d2e05b1b3d60a256fbf1c93c53001614ed8acd65453e09d40e10f",
"sha256:5e8c6cef2cd964888d5fdf16f3db3dbaaa18c5c5c648161c52c8df065cc26ac5",
"sha256:666b03b1c33ef8105f28ecf0fd26f4480931a91a6e30e29e304c1d9eddce2209",
"sha256:8afb6ecd80154464b1ad31a93228b63832526f9b0291a82a55287ae15c01de79",
"sha256:9326e1c5b9de6e74150bdd424789aecec41c7ecaf9e78bf4c3acfe6868ce1365",
"sha256:e06eac198d9c76bcbe52b987b74ead0d353cfaa9f3d8cd907ec984f4bb20ea1e",
"sha256:eee007ee39a02cb631b74f89f992766b9d7e952a750166f3e5a5baf8c328c070"
"sha256:0ef96690c3d2294155b7d44187ca4a151e45c931cb768e106ba464a9fa64c5da",
"sha256:251683e01a3bcacd9188acf0d4caf7b29a3b963c843159311825613ae144cddb",
"sha256:3fe15a75fe00f04d1ec16713d55cf1e206077c450267a10b33318756fb8b3f99",
"sha256:53a962f9dc28cdf403978a142cb1e054479759ad64d312a999f9f042c25b5c9a",
"sha256:8bd1da6a93d32336a5e5432886dd8543004f0591c39b83dbfa60705cccdf414d",
"sha256:b5423061918f602e9342b54d746ac31c598d328ecaf4ef0618763e960c926fd4",
"sha256:d80ebc65b1f7d0403117f59309c16eac24be6a0bc730b593a79f703462858d94",
"sha256:fd8419979639b7de7fb964a13bce3ac47e6fe33043b83de0398c3067986e5659",
"sha256:ff2f15b2b0b4b58ba8a1de651780a0d3fd54f96ad6b77dceb77695220e5d7b7a"
],
"version": "==2019.1.24"
"version": "==2018.11.2"
},
"requests": {
"hashes": [
"sha256:502a824f31acdacb3a35b6690b5fbf0bc41d63a24a45c4004352b0242707598e",
"sha256:7bf2a778576d825600030a110f3c0e3e8edc51dfaafe1c146e39a2027784957b"
"sha256:99dcfdaaeb17caf6e526f32b6a7b780461512ab3f1d992187801694cba42770c",
"sha256:a84b8c9ab6239b578f22d1c21d51b696dcfe004032bb80ea832398d6909d7279"
],
"version": "==2.21.0"
"version": "==2.20.0"
},
"six": {
"hashes": [
"sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
"sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
"sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
"sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
],
"version": "==1.12.0"
"version": "==1.11.0"
},
"snowballstemmer": {
"hashes": [
@@ -580,11 +578,11 @@
},
"sphinx": {
"hashes": [
"sha256:429e3172466df289f0f742471d7e30ba3ee11f3b5aecd9a840480d03f14bcfe5",
"sha256:c4cb17ba44acffae3d3209646b6baec1e215cad3065e852c68cc569d4df1b9f8"
"sha256:652eb8c566f18823a022bb4b6dbc868d366df332a11a0226b5bc3a798a479f17",
"sha256:d222626d8356de702431e813a05c68a35967e3d66c6cd1c2c89539bb179a7464"
],
"index": "pypi",
"version": "==1.8.3"
"version": "==1.8.1"
},
"sphinxcontrib-websupport": {
"hashes": [
@@ -615,11 +613,18 @@
},
"tox": {
"hashes": [
"sha256:04f8f1aa05de8e76d7a266ccd14e0d665d429977cd42123bc38efa9b59964e9e",
"sha256:25ef928babe88c71e3ed3af0c464d1160b01fca2dd1870a5bb26c2dea61a17fc"
"sha256:513e32fdf2f9e2d583c2f248f47ba9886428c949f068ac54a0469cac55df5862",
"sha256:75fa30e8329b41b664585f5fb837e23ce1d7e6fa1f7811f2be571c990f9d911b"
],
"index": "pypi",
"version": "==3.7.0"
"version": "==3.5.3"
},
"traitlets": {
"hashes": [
"sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835",
"sha256:c6cb5e6f57c5a9bdaa40fa71ce7b4af30298fbab9ece9815b5d995ab6217c7d9"
],
"version": "==4.3.2"
},
"tzlocal": {
"hashes": [
@@ -636,106 +641,10 @@
},
"virtualenv": {
"hashes": [
"sha256:58c359370401e0af817fb0070911e599c5fdc836166306b04fd0f278151ed125",
"sha256:729f0bcab430e4ef137646805b5b1d8efbb43fe53d4a0f33328624a84a5121f7"
"sha256:686176c23a538ecc56d27ed9d5217abd34644823d6391cbeb232f42bf722baad",
"sha256:f899fafcd92e1150f40c8215328be38ff24b519cd95357fa6e78e006c7638208"
],
"version": "==16.3.0"
}
},
"develop": {
"backcall": {
"hashes": [
"sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4",
"sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"
],
"version": "==0.1.0"
},
"decorator": {
"hashes": [
"sha256:33cd704aea07b4c28b3eb2c97d288a06918275dac0ecebdaf1bc8a48d98adb9e",
"sha256:cabb249f4710888a2fc0e13e9a16c343d932033718ff62e1e9bc93a9d3a9122b"
],
"version": "==4.3.2"
},
"ipython": {
"hashes": [
"sha256:6a9496209b76463f1dec126ab928919aaf1f55b38beb9219af3fe202f6bbdd12",
"sha256:f69932b1e806b38a7818d9a1e918e5821b685715040b48e59c657b3c7961b742"
],
"index": "pypi",
"version": "==7.2.0"
},
"ipython-genutils": {
"hashes": [
"sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
"sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
],
"version": "==0.2.0"
},
"jedi": {
"hashes": [
"sha256:571702b5bd167911fe9036e5039ba67f820d6502832285cde8c881ab2b2149fd",
"sha256:c8481b5e59d34a5c7c42e98f6625e633f6ef59353abea6437472c7ec2093f191"
],
"version": "==0.13.2"
},
"parso": {
"hashes": [
"sha256:4b8f9ed80c3a4a3191aa3261505d868aa552dd25649cb13a7d73b6b7315edf2d",
"sha256:5a120be2e8863993b597f1c0437efca799e90e0793c98ae5d4e34ebd00140e31"
],
"version": "==0.3.2"
},
"pexpect": {
"hashes": [
"sha256:2a8e88259839571d1251d278476f3eec5db26deb73a70be5ed5dc5435e418aba",
"sha256:3fbd41d4caf27fa4a377bfd16fef87271099463e6fa73e92a52f92dfee5d425b"
],
"markers": "sys_platform != 'win32'",
"version": "==4.6.0"
},
"pickleshare": {
"hashes": [
"sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca",
"sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"
],
"version": "==0.7.5"
},
"prompt-toolkit": {
"hashes": [
"sha256:c1d6aff5252ab2ef391c2fe498ed8c088066f66bc64a8d5c095bbf795d9fec34",
"sha256:d4c47f79b635a0e70b84fdb97ebd9a274203706b1ee5ed44c10da62755cf3ec9",
"sha256:fd17048d8335c1e6d5ee403c3569953ba3eb8555d710bfc548faf0712666ea39"
],
"version": "==2.0.7"
},
"ptyprocess": {
"hashes": [
"sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0",
"sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"
],
"version": "==0.6.0"
},
"pygments": {
"hashes": [
"sha256:5ffada19f6203563680669ee7f53b64dabbeb100eb51b61996085e99c03b284a",
"sha256:e8218dd399a61674745138520d0d4cf2621d7e032439341bc3f647bff125818d"
],
"version": "==2.3.1"
},
"six": {
"hashes": [
"sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
"sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
],
"version": "==1.12.0"
},
"traitlets": {
"hashes": [
"sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835",
"sha256:c6cb5e6f57c5a9bdaa40fa71ce7b4af30298fbab9ece9815b5d995ab6217c7d9"
],
"version": "==4.3.2"
"version": "==16.1.0"
},
"wcwidth": {
"hashes": [
@@ -744,5 +653,6 @@
],
"version": "==0.1.7"
}
}
},
"develop": {}
}

View File

@@ -1,6 +1,7 @@
[ [en](README.md) | de | [el](README-el.md) ]
*[English](README.md)*<br/>
*[Greek](README-el.md)*
![Paperless](https://raw.githubusercontent.com/danielquinn/paperless/master/src/paperless/static/paperless/img/logo-dark.png)
# Paperless
[![Dokumentation](https://readthedocs.org/projects/paperless/badge/?version=latest)](https://paperless.readthedocs.org/) [![Chat](https://badges.gitter.im/danielquinn/paperless.svg)](https://gitter.im/danielquinn/paperless) [![Travis](https://travis-ci.org/danielquinn/paperless.svg?branch=master)](https://travis-ci.org/danielquinn/paperless) [![Coverage Status](https://coveralls.io/repos/github/danielquinn/paperless/badge.svg?branch=master)](https://coveralls.io/github/danielquinn/paperless?branch=master) [![Danke](https://img.shields.io/badge/THANKS-md-ff69b4.svg)](https://github.com/danielquinn/paperless/blob/master/THANKS.md)

View File

@@ -1,6 +1,7 @@
[ [en](README.md) | [de](README-de.md) | el ]
*[English](README.md)*<br/>
*[German](README-de.md)*
![Paperless](https://raw.githubusercontent.com/danielquinn/paperless/master/src/paperless/static/paperless/img/logo-dark.png)
# Paperless
[![Documentation](https://readthedocs.org/projects/paperless/badge/?version=latest)](https://paperless.readthedocs.org/) [![Chat](https://badges.gitter.im/danielquinn/paperless.svg)](https://gitter.im/danielquinn/paperless) [![Travis](https://travis-ci.org/danielquinn/paperless.svg?branch=master)](https://travis-ci.org/danielquinn/paperless) [![Coverage Status](https://coveralls.io/repos/github/danielquinn/paperless/badge.svg?branch=master)](https://coveralls.io/github/danielquinn/paperless?branch=master) [![Thanks](https://img.shields.io/badge/THANKS-md-ff69b4.svg)](https://github.com/danielquinn/paperless/blob/master/THANKS.md)

View File

@@ -1,6 +1,7 @@
[ en | [de](README-de.md) | [el](README-el.md) ]
*[German](README-de.md)*<br/>
*[Greek](README-el.md)*
![Paperless](https://raw.githubusercontent.com/danielquinn/paperless/master/src/paperless/static/paperless/img/logo-dark.png)
# Paperless
[![Documentation](https://readthedocs.org/projects/paperless/badge/?version=latest)](https://paperless.readthedocs.org/) [![Chat](https://badges.gitter.im/danielquinn/paperless.svg)](https://gitter.im/danielquinn/paperless) [![Travis](https://travis-ci.org/danielquinn/paperless.svg?branch=master)](https://travis-ci.org/danielquinn/paperless) [![Coverage Status](https://coveralls.io/repos/github/danielquinn/paperless/badge.svg?branch=master)](https://coveralls.io/github/danielquinn/paperless?branch=master) [![Thanks](https://img.shields.io/badge/THANKS-md-ff69b4.svg)](https://github.com/danielquinn/paperless/blob/master/THANKS.md)

View File

@@ -17,9 +17,6 @@ services:
volumes:
- data:/usr/src/paperless/data
- media:/usr/src/paperless/media
# You have to adapt the local path you want the consumption
# directory to mount to by modifying the part before the ':'.
- ./consume:/consume
env_file: docker-compose.env
# The reason the line is here is so that the webserver that doesn't do
# any text recognition and doesn't have to install unnecessary
@@ -39,8 +36,8 @@ services:
volumes:
- data:/usr/src/paperless/data
- media:/usr/src/paperless/media
# This should be set to the same value as the consume directory
# in the webserver service above.
# You have to adapt the local path you want the consumption
# directory to mount to by modifying the part before the ':'.
- ./consume:/consume
# Likewise, you can add a local path to mount a directory for
# exporting. This is not strictly needed for paperless to

View File

@@ -1,35 +1,6 @@
Changelog
#########
2.7.0
=====
* `syntonym`_ submitted a pull request to catch IMAP connection errors `#475`_.
* `Stéphane Brunner`_ added ``psycopg2`` to the Pipfile `#489`_. He also fixed
a syntax error in ``docker-compose.yml.example`` `#488`_ and added [DjangoQL](https://github.com/ivelum/djangoql),
which allows a litany of handy search functionality `#492`_.
* `CkuT`_ and `JOKer`_ hacked out a simple, but super-helpful optimisation to
how the thumbnails are served up, improving performance considerably `#481`_.
* `tsia`_ added a few fields to the tags REST API. `#483`_.
* `Brian Cribbs`_ improved the documentation to help people using Paperless
over NFS `#484`_.
* `Brendan M. Sleight`_ updated the documentation to include a note for setting the
``DEBUG`` value. The ``paperless.conf.example`` file was also updated to
mirror the project defaults.
2.6.1
=====
* We now have a logo, complete with a favicon :-)
* Removed some problematic tests.
* Fix the docker-compose example config to include a shared consume volume so
that using the push API will work for users of the Docker install. Thanks to
`Colin Frei`_ for fixing this in `#466`_.
* `khrise`_ submitted a pull request to include the ``added`` property to the
REST API `#471`_.
2.6.0
=====
@@ -623,13 +594,7 @@ bulk of the work on this big change.
.. _euri10: https://github.com/euri10
.. _Ulli: https://github.com/Ulli2k
.. _tsia: https://github.com/tsia
.. _Sblop: https://github.com/Sblop
.. _Colin Frei: https://github.com/colinfrei
.. _khrise: https://github.com/khrise
.. _syntonym: https://github.com/syntonym
.. _JOKer: https://github.com/JOKer
.. _Brian Cribbs: https://github.com/cribbstechnolog
.. _Brendan M. Sleight: https://github.com/bmsleight
.. _Sblop: https://github.com/Sblop
.. _#20: https://github.com/danielquinn/paperless/issues/20
.. _#44: https://github.com/danielquinn/paperless/issues/44
@@ -729,15 +694,6 @@ bulk of the work on this big change.
.. _#440: https://github.com/danielquinn/paperless/pull/440
.. _#441: https://github.com/danielquinn/paperless/pull/441
.. _#442: https://github.com/danielquinn/paperless/pull/442
.. _#466: https://github.com/danielquinn/paperless/pull/466
.. _#471: https://github.com/danielquinn/paperless/pull/471
.. _#475: https://github.com/danielquinn/paperless/pull/475
.. _#481: https://github.com/danielquinn/paperless/pull/481
.. _#483: https://github.com/danielquinn/paperless/pull/483
.. _#484: https://github.com/danielquinn/paperless/pull/484
.. _#488: https://github.com/danielquinn/paperless/pull/488
.. _#489: https://github.com/danielquinn/paperless/pull/489
.. _#492: https://github.com/danielquinn/paperless/pull/492
.. _pipenv: https://docs.pipenv.org/
.. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/

View File

@@ -0,0 +1,20 @@
Changelog (jonaswinkler)
########################
1.0.0
=====
* First release based on paperless 2.6.0
* Added: Automatic document classification using neural networks (replaces
regex-based tagging)
* Added: Document types
* Added: Archive serial number allows easy referencing of physical document
copies
* Added: Inbox tags (added automatically to newly consumed documents)
* Added: Document viewer on document edit page
* Database backend is now configurable
1.0.1
=====
* Fixed migration order

View File

@@ -46,3 +46,4 @@ Contents
contributing
scanners
changelog
changelog_jonaswinkler

View File

@@ -102,7 +102,7 @@ is similar:
$ cd /path/to/project
$ git pull
$ docker build -t paperless .
$ docker-compose run --rm consumer migrate
$ docker-compose run --rm comsumer migrate
$ docker-compose up -d
If ``git pull`` doesn't report any changes, there is no need to continue with

View File

@@ -12,7 +12,6 @@ should work) that has the following software installed:
* `Imagemagick`_ version 6.7.5 or higher
* `unpaper`_
* `libpoppler-cpp-dev`_ PDF rendering library
* `optipng`_
.. _Python3: https://python.org/
.. _GNU Privacy Guard: https://gnupg.org
@@ -20,7 +19,6 @@ should work) that has the following software installed:
.. _Imagemagick: http://imagemagick.org/
.. _unpaper: https://www.flameeyes.eu/projects/unpaper
.. _libpoppler-cpp-dev: https://poppler.freedesktop.org/
.. _optipng: http://optipng.sourceforge.net/
Notably, you should confirm how you access your Python3 installation. Many
Linux distributions will install Python3 in parallel to Python2, using the

View File

@@ -77,16 +77,12 @@ Standard (Bare Metal)
encrypt/decrypt the original documents. Don't worry about defining this
if you don't want to use encryption (the default).
Note also that if you're using the ``runserver`` as mentioned below, you
should make sure that PAPERLESS_DEBUG="true" or is just commented out as
this is the default.
4. Initialise the SQLite database with ``./manage.py migrate``.
5. Create a user for your Paperless instance with
``./manage.py createsuperuser``. Follow the prompts to create your user.
6. Start the webserver with ``./manage.py runserver <IP>:<PORT>``.
If no specific IP or port is given, the default is ``127.0.0.1:8000`` also
known as http://localhost:8000/.
If no specifc IP or port are given, the default is ``127.0.0.1:8000``
also known as http://localhost:8000/.
You should now be able to visit your (empty) installation at
`Paperless webserver`_ or whatever you chose before. You can login with the
user/pass you created in #5.
@@ -147,15 +143,6 @@ Docker Method
instructions in comments in the file. The only change that is a hard
requirement is to specify where the consumption directory should
mount.[#dockercomposeyml]_
.. caution::
If you are using NFS mounts for the consume directory you also need to
change the command to turn off inotify as it doesn't work with NFS
`command: ["document_consumer", "--no-inotify"]`
5. Modify ``docker-compose.env`` and adapt the following environment variables:
``PAPERLESS_PASSPHRASE``

View File

@@ -214,5 +214,5 @@ This too is done via the ``manage.py`` script:
That's it. It'll loop over all of the documents in your database and attempt
to match all of your tags to them. If one matches, it'll be applied. And
don't worry, you can run this as often as you like, it won't double-tag
don't worry, you can run this as often as you like, it' won't double-tag
a document.

0
models/.keep Normal file
View File

View File

@@ -3,6 +3,16 @@
# As this file contains passwords it should only be readable by the user
# running paperless.
###############################################################################
#### Database Settings ####
###############################################################################
# By default, sqlite is used as the database backend. This can be changed here.
#PAPERLESS_DBENGINE="django.db.backends.postgresql_psycopg2"
#PAPERLESS_DBNAME="paperless"
#PAPERLESS_DBUSER="paperless"
#PAPERLESS_DBPASS="paperless"
###############################################################################
#### Paths & Folders ####
@@ -38,6 +48,13 @@ PAPERLESS_CONSUMPTION_DIR=""
#PAPERLESS_STATIC_URL="/static/"
# You can specify where the document classification model file should be
# stored. Make sure that this file is writeable by the user executing the
# management command "document_create_classifier" and that the path exists.
# The default location is /models/model.pickle wwithin the install folder.
#PAPERLESS_MODEL_FILE=/path/to/model/file
# These values are required if you want paperless to check a particular email
# box every 10 minutes and attempt to consume documents from there. If you
# don't define a HOST, mail checking will just be disabled.
@@ -61,7 +78,7 @@ PAPERLESS_EMAIL_SECRET=""
# Controls whether django's debug mode is enabled. Disable this on production
# systems. Debug mode is enabled by default.
#PAPERLESS_DEBUG="true"
PAPERLESS_DEBUG="false"
# Paperless can be instructed to attempt to encrypt your PDF files with GPG
@@ -202,11 +219,6 @@ PAPERLESS_EMAIL_SECRET=""
#PAPERLESS_FORGIVING_OCR="false"
# By default Paperless does not OCR a document if the text can be retrieved from
# the document directly. Set to true to always OCR documents.
#PAPERLESS_OCR_ALWAYS="false"
###############################################################################
#### Interface ####
###############################################################################

View File

@@ -4,67 +4,80 @@ apipkg==1.5
atomicwrites==1.2.1
attrs==18.2.0
babel==2.6.0
certifi==2018.11.29
backcall==0.1.0
certifi==2018.10.15
chardet==3.0.4
coverage==4.5.2
coverage==4.5.1
coveralls==1.5.1
dateparser==0.7.0
decorator==4.3.0
django-cors-headers==2.4.0
django-crispy-forms==1.7.2
django-extensions==2.1.4
django-filter==2.1.0
django==2.0.10
djangoql==0.12.3
djangorestframework==3.9.1
django-extensions==2.1.3
django-filter==2.0.0
django==2.0.9
djangorestframework==3.9.0
docopt==0.6.2
docutils==0.14
execnet==1.5.0
factory-boy==2.11.1
faker==1.0.2
faker==0.9.2
filelock==3.0.10
filemagic==1.6
fuzzywuzzy[speedup]==0.15.0
gunicorn==19.9.0
idna==2.8
idna==2.7
imagesize==1.1.0
inotify-simple==1.1.8
ipython-genutils==0.2.0
ipython==7.1.1
jedi==0.13.1
jinja2==2.10
langdetect==1.0.7
markupsafe==1.1.0
more-itertools==5.0.0
packaging==19.0
markupsafe==1.0
more-itertools==4.3.0
numpy==1.15.1
packaging==18.0
parso==0.3.1
pdftotext==2.1.1
pillow==5.4.1
pluggy==0.8.1
ply==3.11
psycopg2==2.7.7
pexpect==4.6.0
pickleshare==0.7.5
pillow==5.3.0
pluggy==0.8.0
psycopg2==2.7.6.1
prompt-toolkit==2.0.7
ptyprocess==0.6.0
py==1.7.0
pycodestyle==2.4.0
pygments==2.3.1
pygments==2.2.0
pyocr==0.5.3
pyparsing==2.3.1
pytest-cov==2.6.1
pytest-django==3.4.5
pyparsing==2.3.0
pytest-cov==2.6.0
pytest-django==3.4.3
pytest-env==0.6.2
pytest-forked==1.0.1
pytest-sugar==0.9.2
pytest-xdist==1.26.0
pytest==4.1.1
pytest-forked==0.2
pytest-sugar==0.9.1
pytest-xdist==1.24.0
pytest==3.9.3
python-dateutil==2.7.5
python-dotenv==0.10.1
python-gnupg==0.4.4
python-levenshtein==0.12.0
pytz==2018.9
regex==2019.1.24
requests==2.21.0
six==1.12.0
python-dotenv==0.9.1
python-gnupg==0.4.3
python-levenshtein==0.12.0 ; extra == 'speedup'
pytz==2018.7
regex==2018.11.2
requests==2.20.0
six==1.11.0
scikit-learn==0.19.2
scipy==1.1.0
snowballstemmer==1.2.1
sphinx==1.8.3
sphinx==1.8.1
sphinxcontrib-websupport==1.1.0
termcolor==1.1.0
text-unidecode==1.2
toml==0.10.0
tox==3.7.0
tox==3.5.3
traitlets==4.3.2
tzlocal==1.5.1
urllib3==1.24.1
virtualenv==16.3.0
virtualenv==16.1.0
wcwidth==0.1.7

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

Before

Width:  |  Height:  |  Size: 91 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 116 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 94 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 7.4 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 7.5 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 7.5 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 7.4 KiB

View File

@@ -1,82 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
version="1.1"
width="900"
height="900"
id="svg3923"
sodipodi:docname="square.svg"
inkscape:export-filename="/tmp/test.png"
inkscape:export-xdpi="96"
inkscape:export-ydpi="96"
inkscape:version="0.92.2 2405546, 2018-03-11">
<metadata
id="metadata3929">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title></dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<defs
id="defs3927" />
<sodipodi:namedview
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1"
objecttolerance="10"
gridtolerance="10"
guidetolerance="10"
inkscape:pageopacity="0"
inkscape:pageshadow="2"
inkscape:window-width="3840"
inkscape:window-height="2096"
id="namedview3925"
showgrid="false"
inkscape:zoom="1.1360927"
inkscape:cx="635.07139"
inkscape:cy="606.383"
inkscape:window-x="0"
inkscape:window-y="27"
inkscape:window-maximized="1"
inkscape:current-layer="g3921" />
<g
transform="matrix(10.638298,0,0,10.638298,106.38298,-206.38301)"
id="g3921">
<defs
id="SvgjsDefs1018" />
<g
id="SvgjsG1019"
featureKey="root"
style="fill:#ffffff" />
<g
id="SvgjsG1020"
featureKey="symbol1"
transform="matrix(0.10341565,0,0,0.10341565,-11.43874,18.048418)"
inkscape:export-filename="/tmp/test.png"
inkscape:export-xdpi="116.02285"
inkscape:export-ydpi="116.02285"
style="fill:#17541f">
<defs
id="defs3911" />
<g
id="g3915">
<path
d="M 231,798 C 227,779 219,741 218,741 49,640 69,465 125,365 c 12,126 235,213 105,367 -1,2 6,26 12,48 26,-44 65,-97 63,-102 C 145,288 645,258 749,16 c 47,234 -24,596 -426,688 -2,1 -73,126 -76,127 0,-2 -30,-1 -26,-11 2,-6 6,-14 10,-22 z M 330,625 C 267,476 452,312 544,271 356,439 324,564 330,625 Z m -104,79 c 51,-59 -9,-160 -45,-193 61,105 57,166 45,193 z"
style="fill:#17541f"
id="path3913"
inkscape:connector-curvature="0" />
</g>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 2.6 KiB

View File

@@ -75,7 +75,7 @@ install_languages() {
pkg="tesseract-ocr-data-$lang"
# English is installed by default
if [[ "$lang" == "eng" ]]; then
if [ "$lang" == "eng" ]; then
continue
fi
@@ -95,7 +95,7 @@ if [[ "$1" != "/"* ]]; then
initialize
# Install additional languages if specified
if [[ ! -z "$PAPERLESS_OCR_LANGUAGES" ]]; then
if [ ! -z "$PAPERLESS_OCR_LANGUAGES" ]; then
install_languages "$PAPERLESS_OCR_LANGUAGES"
fi

64
src/documents/actions.py Normal file → Executable file
View File

@@ -4,7 +4,8 @@ from django.contrib.admin.utils import model_ngettext
from django.core.exceptions import PermissionDenied
from django.template.response import TemplateResponse
from documents.models import Correspondent, Tag
from documents.classifier import DocumentClassifier
from documents.models import Correspondent, DocumentType, Tag
def select_action(
@@ -17,9 +18,9 @@ def select_action(
if not modeladmin.has_change_permission(request):
raise PermissionDenied
if request.POST.get('post'):
if request.POST.get("post"):
n = queryset.count()
selected_object = modelclass.objects.get(id=request.POST.get('obj_id'))
selected_object = modelclass.objects.get(id=request.POST.get("obj_id"))
if n:
for document in queryset:
if document_action:
@@ -137,6 +138,57 @@ def remove_correspondent_from_selected(modeladmin, request, queryset):
)
def set_document_type_on_selected(modeladmin, request, queryset):
return select_action(
modeladmin=modeladmin,
request=request,
queryset=queryset,
title="Set document type on multiple documents",
action="set_document_type_on_selected",
modelclass=DocumentType,
success_message="Successfully set document type %(selected_object)s "
"on %(count)d %(items)s.",
queryset_action=lambda qs, document_type: qs.update(
document_type=document_type)
)
def remove_document_type_from_selected(modeladmin, request, queryset):
return simple_action(
modeladmin=modeladmin,
request=request,
queryset=queryset,
success_message="Successfully removed document type from %(count)d "
"%(items)s.",
queryset_action=lambda qs: qs.update(document_type=None)
)
def run_document_classifier_on_selected(modeladmin, request, queryset):
clf = DocumentClassifier()
try:
clf.reload()
return simple_action(
modeladmin=modeladmin,
request=request,
queryset=queryset,
success_message="Successfully applied document classifier to "
"%(count)d %(items)s.",
document_action=lambda doc: clf.classify_document(
doc,
classify_correspondent=True,
classify_tags=True,
classify_document_type=True)
)
except FileNotFoundError:
modeladmin.message_user(
request,
"Classifier model file not found.",
messages.ERROR
)
return None
add_tag_to_selected.short_description = "Add tag to selected documents"
remove_tag_from_selected.short_description = \
"Remove tag from selected documents"
@@ -144,3 +196,9 @@ set_correspondent_on_selected.short_description = \
"Set correspondent on selected documents"
remove_correspondent_from_selected.short_description = \
"Remove correspondent from selected documents"
set_document_type_on_selected.short_description = \
"Set document type on selected documents"
remove_document_type_from_selected.short_description = \
"Remove document type from selected documents"
run_document_classifier_on_selected.short_description = \
"Run document classifier on selected"

54
src/documents/admin.py Normal file → Executable file
View File

@@ -11,16 +11,18 @@ from django.urls import reverse
from django.utils.html import format_html, format_html_join
from django.utils.http import urlquote
from django.utils.safestring import mark_safe
from djangoql.admin import DjangoQLSearchMixin
from documents.actions import (
add_tag_to_selected,
remove_correspondent_from_selected,
remove_tag_from_selected,
set_correspondent_on_selected
set_correspondent_on_selected,
set_document_type_on_selected,
remove_document_type_from_selected,
run_document_classifier_on_selected
)
from .models import Correspondent, Document, Log, Tag
from .models import Correspondent, Document, DocumentType, Log, Tag
class FinancialYearFilter(admin.SimpleListFilter):
@@ -117,13 +119,11 @@ class CorrespondentAdmin(CommonAdmin):
list_display = (
"name",
"match",
"matching_algorithm",
"automatic_classification",
"document_count",
"last_correspondence"
)
list_filter = ("matching_algorithm",)
list_editable = ("match", "matching_algorithm")
list_editable = ("automatic_classification",)
readonly_fields = ("slug",)
@@ -147,9 +147,12 @@ class CorrespondentAdmin(CommonAdmin):
class TagAdmin(CommonAdmin):
list_display = (
"name", "colour", "match", "matching_algorithm", "document_count")
list_filter = ("colour", "matching_algorithm")
list_editable = ("colour", "match", "matching_algorithm")
"name",
"colour",
"automatic_classification",
"document_count")
list_filter = ("colour",)
list_editable = ("colour", "automatic_classification")
readonly_fields = ("slug",)
@@ -166,7 +169,24 @@ class TagAdmin(CommonAdmin):
document_count.admin_order_field = "document_count"
class DocumentAdmin(DjangoQLSearchMixin, CommonAdmin):
class DocumentTypeAdmin(CommonAdmin):
list_display = ("name", "automatic_classification", "document_count")
list_editable = ("automatic_classification",)
readonly_fields = ("slug",)
def get_queryset(self, request):
qs = super(DocumentTypeAdmin, self).get_queryset(request)
qs = qs.annotate(document_count=models.Count("documents"))
return qs
def document_count(self, obj):
return obj.document_count
document_count.admin_order_field = "document_count"
class DocumentAdmin(CommonAdmin):
class Media:
css = {
@@ -176,8 +196,9 @@ class DocumentAdmin(DjangoQLSearchMixin, CommonAdmin):
search_fields = ("correspondent__name", "title", "content", "tags__name")
readonly_fields = ("added", "file_type", "storage_type",)
list_display = ("title", "created", "added", "thumbnail", "correspondent",
"tags_")
"tags_", "archive_serial_number", "document_type")
list_filter = (
"document_type",
"tags",
("correspondent", RecentCorrespondentFilter),
FinancialYearFilter
@@ -191,7 +212,10 @@ class DocumentAdmin(DjangoQLSearchMixin, CommonAdmin):
add_tag_to_selected,
remove_tag_from_selected,
set_correspondent_on_selected,
remove_correspondent_from_selected
remove_correspondent_from_selected,
set_document_type_on_selected,
remove_document_type_from_selected,
run_document_classifier_on_selected
]
date_hierarchy = "created"
@@ -224,6 +248,9 @@ class DocumentAdmin(DjangoQLSearchMixin, CommonAdmin):
extra_context=None):
extra_context = extra_context or {}
doc = Document.objects.get(id=object_id)
extra_context["download_url"] = doc.download_url
extra_context["file_type"] = doc.file_type
if self.document_queue and object_id:
if int(object_id) in self.document_queue:
@@ -347,6 +374,7 @@ class LogAdmin(CommonAdmin):
admin.site.register(Correspondent, CorrespondentAdmin)
admin.site.register(Tag, TagAdmin)
admin.site.register(DocumentType, DocumentTypeAdmin)
admin.site.register(Document, DocumentAdmin)
admin.site.register(Log, LogAdmin)

View File

@@ -11,8 +11,8 @@ class DocumentsConfig(AppConfig):
from .signals import document_consumption_started
from .signals import document_consumption_finished
from .signals.handlers import (
set_correspondent,
set_tags,
classify_document,
add_inbox_tags,
run_pre_consume_script,
run_post_consume_script,
cleanup_document_deletion,
@@ -21,8 +21,8 @@ class DocumentsConfig(AppConfig):
document_consumption_started.connect(run_pre_consume_script)
document_consumption_finished.connect(set_tags)
document_consumption_finished.connect(set_correspondent)
document_consumption_finished.connect(classify_document)
document_consumption_finished.connect(add_inbox_tags)
document_consumption_finished.connect(set_log_entry)
document_consumption_finished.connect(run_post_consume_script)

240
src/documents/classifier.py Executable file
View File

@@ -0,0 +1,240 @@
import logging
import os
import pickle
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
from documents.models import Correspondent, DocumentType, Tag, Document
from paperless import settings
def preprocess_content(content):
content = content.lower()
content = content.strip()
content = content.replace("\n", " ")
content = content.replace("\r", " ")
while content.find(" ") > -1:
content = content.replace(" ", " ")
return content
class DocumentClassifier(object):
def __init__(self):
self.classifier_version = 0
self.data_vectorizer = None
self.tags_binarizer = None
self.correspondent_binarizer = None
self.document_type_binarizer = None
self.tags_classifier = None
self.correspondent_classifier = None
self.document_type_classifier = None
def reload(self):
if os.path.getmtime(settings.MODEL_FILE) > self.classifier_version:
logging.getLogger(__name__).info("Reloading classifier models")
with open(settings.MODEL_FILE, "rb") as f:
self.data_vectorizer = pickle.load(f)
self.tags_binarizer = pickle.load(f)
self.correspondent_binarizer = pickle.load(f)
self.document_type_binarizer = pickle.load(f)
self.tags_classifier = pickle.load(f)
self.correspondent_classifier = pickle.load(f)
self.document_type_classifier = pickle.load(f)
self.classifier_version = os.path.getmtime(settings.MODEL_FILE)
def save_classifier(self):
with open(settings.MODEL_FILE, "wb") as f:
pickle.dump(self.data_vectorizer, f)
pickle.dump(self.tags_binarizer, f)
pickle.dump(self.correspondent_binarizer, f)
pickle.dump(self.document_type_binarizer, f)
pickle.dump(self.tags_classifier, f)
pickle.dump(self.correspondent_classifier, f)
pickle.dump(self.document_type_classifier, f)
def train(self):
data = list()
labels_tags = list()
labels_correspondent = list()
labels_document_type = list()
# Step 1: Extract and preprocess training data from the database.
logging.getLogger(__name__).info("Gathering data from database...")
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
data.append(preprocess_content(doc.content))
y = -1
if doc.document_type:
if doc.document_type.automatic_classification:
y = doc.document_type.id
labels_document_type.append(y)
y = -1
if doc.correspondent:
if doc.correspondent.automatic_classification:
y = doc.correspondent.id
labels_correspondent.append(y)
tags = [tag.id for tag in doc.tags.filter(
automatic_classification=True
)]
labels_tags.append(tags)
labels_tags_unique = set([tag for tags in labels_tags for tag in tags])
logging.getLogger(__name__).info(
"{} documents, {} tag(s), {} correspondent(s), "
"{} document type(s).".format(
len(data),
len(labels_tags_unique),
len(set(labels_correspondent)),
len(set(labels_document_type))
)
)
# Step 2: vectorize data
logging.getLogger(__name__).info("Vectorizing data...")
self.data_vectorizer = CountVectorizer(
analyzer="char",
ngram_range=(3, 5),
min_df=0.1
)
data_vectorized = self.data_vectorizer.fit_transform(data)
self.tags_binarizer = MultiLabelBinarizer()
labels_tags_vectorized = self.tags_binarizer.fit_transform(labels_tags)
self.correspondent_binarizer = LabelBinarizer()
labels_correspondent_vectorized = \
self.correspondent_binarizer.fit_transform(labels_correspondent)
self.document_type_binarizer = LabelBinarizer()
labels_document_type_vectorized = \
self.document_type_binarizer.fit_transform(labels_document_type)
# Step 3: train the classifiers
if len(self.tags_binarizer.classes_) > 0:
logging.getLogger(__name__).info("Training tags classifier...")
self.tags_classifier = MLPClassifier(verbose=True)
self.tags_classifier.fit(data_vectorized, labels_tags_vectorized)
else:
self.tags_classifier = None
logging.getLogger(__name__).info(
"There are no tags. Not training tags classifier."
)
if len(self.correspondent_binarizer.classes_) > 0:
logging.getLogger(__name__).info(
"Training correspondent classifier..."
)
self.correspondent_classifier = MLPClassifier(verbose=True)
self.correspondent_classifier.fit(
data_vectorized,
labels_correspondent_vectorized
)
else:
self.correspondent_classifier = None
logging.getLogger(__name__).info(
"There are no correspondents. Not training correspondent "
"classifier."
)
if len(self.document_type_binarizer.classes_) > 0:
logging.getLogger(__name__).info(
"Training document type classifier..."
)
self.document_type_classifier = MLPClassifier(verbose=True)
self.document_type_classifier.fit(
data_vectorized,
labels_document_type_vectorized
)
else:
self.document_type_classifier = None
logging.getLogger(__name__).info(
"There are no document types. Not training document type "
"classifier."
)
def classify_document(
self, document, classify_correspondent=False,
classify_document_type=False, classify_tags=False,
replace_tags=False):
X = self.data_vectorizer.transform(
[preprocess_content(document.content)]
)
if classify_correspondent and self.correspondent_classifier:
self._classify_correspondent(X, document)
if classify_document_type and self.document_type_classifier:
self._classify_document_type(X, document)
if classify_tags and self.tags_classifier:
self._classify_tags(X, document, replace_tags)
document.save(update_fields=("correspondent", "document_type"))
def _classify_correspondent(self, X, document):
y = self.correspondent_classifier.predict(X)
correspondent_id = self.correspondent_binarizer.inverse_transform(y)[0]
try:
correspondent = None
if correspondent_id != -1:
correspondent = Correspondent.objects.get(id=correspondent_id)
logging.getLogger(__name__).info(
"Detected correspondent: {}".format(correspondent.name)
)
else:
logging.getLogger(__name__).info("Detected correspondent: -")
document.correspondent = correspondent
except Correspondent.DoesNotExist:
logging.getLogger(__name__).warning(
"Detected correspondent with id {} does not exist "
"anymore! Did you delete it?".format(correspondent_id)
)
def _classify_document_type(self, X, document):
y = self.document_type_classifier.predict(X)
document_type_id = self.document_type_binarizer.inverse_transform(y)[0]
try:
document_type = None
if document_type_id != -1:
document_type = DocumentType.objects.get(id=document_type_id)
logging.getLogger(__name__).info(
"Detected document type: {}".format(document_type.name)
)
else:
logging.getLogger(__name__).info("Detected document type: -")
document.document_type = document_type
except DocumentType.DoesNotExist:
logging.getLogger(__name__).warning(
"Detected document type with id {} does not exist "
"anymore! Did you delete it?".format(document_type_id)
)
def _classify_tags(self, X, document, replace_tags):
y = self.tags_classifier.predict(X)
tags_ids = self.tags_binarizer.inverse_transform(y)[0]
if replace_tags:
document.tags.clear()
for tag_id in tags_ids:
try:
tag = Tag.objects.get(id=tag_id)
logging.getLogger(__name__).info(
"Detected tag: {}".format(tag.name)
)
document.tags.add(tag)
except Tag.DoesNotExist:
logging.getLogger(__name__).warning(
"Detected tag with id {} does not exist anymore! Did "
"you delete it?".format(tag_id)
)

2
src/documents/consumer.py Normal file → Executable file
View File

@@ -225,7 +225,7 @@ class Consumer:
storage_type=self.storage_type
)
relevant_tags = set(list(Tag.match_all(text)) + list(file_info.tags))
relevant_tags = set(file_info.tags)
if relevant_tags:
tag_names = ", ".join([t.slug for t in relevant_tags])
self.log("debug", "Tagging with {}".format(tag_names))

18
src/documents/filters.py Normal file → Executable file
View File

@@ -1,6 +1,6 @@
from django_filters.rest_framework import BooleanFilter, FilterSet
from .models import Correspondent, Document, Tag
from .models import Correspondent, Document, Tag, DocumentType
CHAR_KWARGS = (
@@ -35,6 +35,19 @@ class TagFilterSet(FilterSet):
}
class DocumentTypeFilterSet(FilterSet):
class Meta:
model = DocumentType
fields = {
"name": [
"startswith", "endswith", "contains",
"istartswith", "iendswith", "icontains"
],
"slug": ["istartswith", "iendswith", "icontains"]
}
class DocumentFilterSet(FilterSet):
tags_empty = BooleanFilter(
@@ -57,4 +70,7 @@ class DocumentFilterSet(FilterSet):
"tags__name": CHAR_KWARGS,
"tags__slug": CHAR_KWARGS,
"document_type__name": CHAR_KWARGS,
"document_type__slug": CHAR_KWARGS,
}

View File

@@ -216,11 +216,7 @@ class MailFetcher(Loggable):
return r
def _connect(self):
try:
self._connection = imaplib.IMAP4_SSL(self._host, self._port)
except OSError as e:
msg = "Problem connecting to {}: {}".format(self._host, e.strerror)
raise MailFetcherError(msg)
self._connection = imaplib.IMAP4_SSL(self._host, self._port)
def _login(self):

View File

@@ -1,82 +0,0 @@
import sys
from django.core.management.base import BaseCommand
from documents.models import Correspondent, Document
from ...mixins import Renderable
class Command(Renderable, BaseCommand):
help = """
Using the current set of correspondent rules, apply said rules to all
documents in the database, effectively allowing you to back-tag all
previously indexed documents with correspondent created (or modified)
after their initial import.
""".replace(" ", "")
TOO_MANY_CONTINUE = (
"Detected {} potential correspondents for {}, so we've opted for {}")
TOO_MANY_SKIP = (
"Detected {} potential correspondents for {}, so we're skipping it")
CHANGE_MESSAGE = (
'Document {}: "{}" was given the correspondent id {}: "{}"')
def __init__(self, *args, **kwargs):
self.verbosity = 0
BaseCommand.__init__(self, *args, **kwargs)
def add_arguments(self, parser):
parser.add_argument(
"--use-first",
default=False,
action="store_true",
help="By default this command won't try to assign a correspondent "
"if more than one matches the document. Use this flag if "
"you'd rather it just pick the first one it finds."
)
def handle(self, *args, **options):
self.verbosity = options["verbosity"]
for document in Document.objects.filter(correspondent__isnull=True):
potential_correspondents = list(
Correspondent.match_all(document.content))
if not potential_correspondents:
continue
potential_count = len(potential_correspondents)
correspondent = potential_correspondents[0]
if potential_count > 1:
if not options["use_first"]:
print(
self.TOO_MANY_SKIP.format(potential_count, document),
file=sys.stderr
)
continue
print(
self.TOO_MANY_CONTINUE.format(
potential_count,
document,
correspondent
),
file=sys.stderr
)
document.correspondent = correspondent
document.save(update_fields=("correspondent",))
print(
self.CHANGE_MESSAGE.format(
document.pk,
document.title,
correspondent.pk,
correspondent.name
),
file=sys.stderr
)

View File

@@ -0,0 +1,25 @@
import logging
from django.core.management.base import BaseCommand
from documents.classifier import DocumentClassifier
from paperless import settings
from ...mixins import Renderable
class Command(Renderable, BaseCommand):
help = """
Trains the classifier on your data and saves the resulting models to a
file. The document consumer will then automatically use this new model.
""".replace(" ", "")
def __init__(self, *args, **kwargs):
BaseCommand.__init__(self, *args, **kwargs)
def handle(self, *args, **options):
clf = DocumentClassifier()
clf.train()
logging.getLogger(__name__).info(
"Saving models to {}...".format(settings.MODEL_FILE)
)
clf.save_classifier()

View File

@@ -6,7 +6,7 @@ import shutil
from django.core.management.base import BaseCommand, CommandError
from django.core import serializers
from documents.models import Document, Correspondent, Tag
from documents.models import Document, Correspondent, Tag, DocumentType
from paperless.db import GnuPG
from ...mixins import Renderable
@@ -96,6 +96,9 @@ class Command(Renderable, BaseCommand):
manifest += json.loads(serializers.serialize(
"json", Tag.objects.all()))
manifest += json.loads(serializers.serialize(
"json", DocumentType.objects.all()))
with open(os.path.join(self.target, "manifest.json"), "w") as f:
json.dump(manifest, f, indent=2)

64
src/documents/management/commands/document_retagger.py Normal file → Executable file
View File

@@ -1,5 +1,8 @@
import logging
from django.core.management.base import BaseCommand
from documents.classifier import DocumentClassifier
from documents.models import Document, Tag
from ...mixins import Renderable
@@ -8,25 +11,66 @@ from ...mixins import Renderable
class Command(Renderable, BaseCommand):
help = """
Using the current set of tagging rules, apply said rules to all
documents in the database, effectively allowing you to back-tag all
previously indexed documents with tags created (or modified) after
their initial import.
Using the current classification model, assigns correspondents, tags
and document types to all documents, effectively allowing you to
back-tag all previously indexed documents with metadata created (or
modified) after their initial import.
""".replace(" ", "")
def __init__(self, *args, **kwargs):
self.verbosity = 0
BaseCommand.__init__(self, *args, **kwargs)
def add_arguments(self, parser):
parser.add_argument(
"-c", "--correspondent",
action="store_true"
)
parser.add_argument(
"-T", "--tags",
action="store_true"
)
parser.add_argument(
"-t", "--type",
action="store_true"
)
parser.add_argument(
"-i", "--inbox-only",
action="store_true"
)
parser.add_argument(
"-r", "--replace-tags",
action="store_true"
)
def handle(self, *args, **options):
self.verbosity = options["verbosity"]
for document in Document.objects.all():
if options["inbox_only"]:
queryset = Document.objects.filter(tags__is_inbox_tag=True)
else:
queryset = Document.objects.all()
documents = queryset.distinct()
tags = Tag.objects.exclude(
pk__in=document.tags.values_list("pk", flat=True))
logging.getLogger(__name__).info("Loading classifier")
clf = DocumentClassifier()
try:
clf.reload()
except FileNotFoundError:
logging.getLogger(__name__).fatal("Cannot classify documents, "
"classifier model file was not "
"found.")
return
for tag in Tag.match_all(document.content, tags):
print('Tagging {} with "{}"'.format(document, tag))
document.tags.add(tag)
for document in documents:
logging.getLogger(__name__).info(
"Processing document {}".format(document.title)
)
clf.classify_document(
document,
classify_document_type=options["type"],
classify_tags=options["tags"],
classify_correspondent=options["correspondent"],
replace_tags=options["replace_tags"]
)

View File

@@ -12,8 +12,9 @@ def re_slug_all_the_things(apps, schema_editor):
Tag = apps.get_model("documents", "Tag")
Correspondent = apps.get_model("documents", "Correspondent")
DocumentType = apps.get_model("documents", "DocumentType")
for klass in (Tag, Correspondent):
for klass in (Tag, Correspondent, DocumentType):
for instance in klass.objects.all():
klass.objects.filter(
pk=instance.pk
@@ -25,7 +26,7 @@ def re_slug_all_the_things(apps, schema_editor):
class Migration(migrations.Migration):
dependencies = [
('documents', '0021_document_storage_type'),
('documents', '1003_auto_20180904_1425'),
]
operations = [
@@ -48,5 +49,10 @@ class Migration(migrations.Migration):
name='slug',
field=models.SlugField(blank=True, editable=False),
),
migrations.AlterField(
model_name='documenttype',
name='slug',
field=models.SlugField(blank=True, editable=False),
),
migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop)
]

View File

@@ -0,0 +1,23 @@
# Generated by Django 2.0.7 on 2018-07-12 09:52
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('documents', '0021_document_storage_type'),
]
operations = [
migrations.AddField(
model_name='document',
name='archive_serial_number',
field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True),
),
migrations.AddField(
model_name='tag',
name='is_inbox_tag',
field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.'),
),
]

View File

@@ -0,0 +1,33 @@
# Generated by Django 2.0.7 on 2018-08-23 11:55
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('documents', '1001_workflow_improvements'),
]
operations = [
migrations.CreateModel(
name='DocumentType',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128, unique=True)),
('slug', models.SlugField(blank=True)),
('match', models.CharField(blank=True, max_length=256)),
('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')),
('is_insensitive', models.BooleanField(default=True)),
],
options={
'abstract': False,
},
),
migrations.AddField(
model_name='document',
name='document_type',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.DocumentType'),
),
]

View File

@@ -0,0 +1,77 @@
# Generated by Django 2.0.8 on 2018-09-04 14:25
from django.db import migrations, models
def transfer_automatic_classification(apps, schema_editor):
for model_name in ["Tag", "Correspondent", "DocumentType"]:
model_class = apps.get_model("documents", model_name)
for o in model_class.objects.all():
o.automatic_classification = o.match is not None and len(o.match) > 0
o.save()
def reverse_automatic_classification(apps, schema_editor):
pass
class Migration(migrations.Migration):
dependencies = [
('documents', '1002_auto_20180823_1155'),
]
operations = [
migrations.AddField(
model_name='correspondent',
name='automatic_classification',
field=models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.'),
),
migrations.AddField(
model_name='documenttype',
name='automatic_classification',
field=models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.'),
),
migrations.AddField(
model_name='tag',
name='automatic_classification',
field=models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.'),
),
migrations.RunPython(transfer_automatic_classification, reverse_automatic_classification),
migrations.RemoveField(
model_name='correspondent',
name='is_insensitive',
),
migrations.RemoveField(
model_name='correspondent',
name='match',
),
migrations.RemoveField(
model_name='correspondent',
name='matching_algorithm',
),
migrations.RemoveField(
model_name='documenttype',
name='is_insensitive',
),
migrations.RemoveField(
model_name='documenttype',
name='match',
),
migrations.RemoveField(
model_name='documenttype',
name='matching_algorithm',
),
migrations.RemoveField(
model_name='tag',
name='is_insensitive',
),
migrations.RemoveField(
model_name='tag',
name='match',
),
migrations.RemoveField(
model_name='tag',
name='matching_algorithm',
),
]

0
src/documents/mixins.py Normal file → Executable file
View File

142
src/documents/models.py Normal file → Executable file
View File

@@ -24,43 +24,15 @@ except ImportError:
class MatchingModel(models.Model):
MATCH_ANY = 1
MATCH_ALL = 2
MATCH_LITERAL = 3
MATCH_REGEX = 4
MATCH_FUZZY = 5
MATCHING_ALGORITHMS = (
(MATCH_ANY, "Any"),
(MATCH_ALL, "All"),
(MATCH_LITERAL, "Literal"),
(MATCH_REGEX, "Regular Expression"),
(MATCH_FUZZY, "Fuzzy Match"),
)
name = models.CharField(max_length=128, unique=True)
slug = models.SlugField(blank=True, editable=False)
match = models.CharField(max_length=256, blank=True)
matching_algorithm = models.PositiveIntegerField(
choices=MATCHING_ALGORITHMS,
default=MATCH_ANY,
help_text=(
"Which algorithm you want to use when matching text to the OCR'd "
"PDF. Here, \"any\" looks for any occurrence of any word "
"provided in the PDF, while \"all\" requires that every word "
"provided appear in the PDF, albeit not in the order provided. A "
"\"literal\" match means that the text you enter must appear in "
"the PDF exactly as you've entered it, and \"regular expression\" "
"uses a regex to match the PDF. (If you don't know what a regex "
"is, you probably don't want this option.) Finally, a \"fuzzy "
"match\" looks for words or phrases that are mostly—but not "
"exactly—the same, which can be useful for matching against "
"documents containg imperfections that foil accurate OCR."
)
automatic_classification = models.BooleanField(
default=False,
help_text="Automatically assign to newly added documents based on "
"current usage in your document collection."
)
is_insensitive = models.BooleanField(default=True)
class Meta:
abstract = True
ordering = ("name",)
@@ -68,86 +40,8 @@ class MatchingModel(models.Model):
def __str__(self):
return self.name
@property
def conditions(self):
return "{}: \"{}\" ({})".format(
self.name, self.match, self.get_matching_algorithm_display())
@classmethod
def match_all(cls, text, tags=None):
if tags is None:
tags = cls.objects.all()
text = text.lower()
for tag in tags:
if tag.matches(text):
yield tag
def matches(self, text):
search_kwargs = {}
# Check that match is not empty
if self.match.strip() == "":
return False
if self.is_insensitive:
search_kwargs = {"flags": re.IGNORECASE}
if self.matching_algorithm == self.MATCH_ALL:
for word in self._split_match():
search_result = re.search(
r"\b{}\b".format(word), text, **search_kwargs)
if not search_result:
return False
return True
if self.matching_algorithm == self.MATCH_ANY:
for word in self._split_match():
if re.search(r"\b{}\b".format(word), text, **search_kwargs):
return True
return False
if self.matching_algorithm == self.MATCH_LITERAL:
return bool(re.search(
r"\b{}\b".format(self.match), text, **search_kwargs))
if self.matching_algorithm == self.MATCH_REGEX:
return bool(re.search(
re.compile(self.match, **search_kwargs), text))
if self.matching_algorithm == self.MATCH_FUZZY:
match = re.sub(r'[^\w\s]', '', self.match)
text = re.sub(r'[^\w\s]', '', text)
if self.is_insensitive:
match = match.lower()
text = text.lower()
return True if fuzz.partial_ratio(match, text) >= 90 else False
raise NotImplementedError("Unsupported matching algorithm")
def _split_match(self):
"""
Splits the match to individual keywords, getting rid of unnecessary
spaces and grouping quoted words together.
Example:
' some random words "with quotes " and spaces'
==>
["some", "random", "words", "with+quotes", "and", "spaces"]
"""
findterms = re.compile(r'"([^"]+)"|(\S+)').findall
normspace = re.compile(r"\s+").sub
return [
normspace(" ", (t[0] or t[1]).strip()).replace(" ", r"\s+")
for t in findterms(self.match)
]
def save(self, *args, **kwargs):
self.match = self.match.lower()
self.slug = slugify(self.name)
models.Model.save(self, *args, **kwargs)
@@ -183,6 +77,17 @@ class Tag(MatchingModel):
colour = models.PositiveIntegerField(choices=COLOURS, default=1)
is_inbox_tag = models.BooleanField(
default=False,
help_text="Marks this tag as an inbox tag: All newly consumed "
"documents will be tagged with inbox tags."
)
class DocumentType(MatchingModel):
pass
class Document(models.Model):
@@ -214,6 +119,14 @@ class Document(models.Model):
title = models.CharField(max_length=128, blank=True, db_index=True)
document_type = models.ForeignKey(
DocumentType,
blank=True,
null=True,
related_name="documents",
on_delete=models.SET_NULL
)
content = models.TextField(
db_index=True,
blank=True,
@@ -254,6 +167,15 @@ class Document(models.Model):
added = models.DateTimeField(
default=timezone.now, editable=False, db_index=True)
archive_serial_number = models.IntegerField(
blank=True,
null=True,
unique=True,
db_index=True,
help_text="The position of this document in your physical document "
"archive."
)
class Meta:
ordering = ("correspondent", "title")

View File

@@ -1,20 +1,20 @@
from rest_framework import serializers
from .models import Correspondent, Tag, Document, Log
from .models import Correspondent, Tag, Document, Log, DocumentType
class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
class Meta:
model = Correspondent
fields = (
"id",
"slug",
"name",
"match",
"matching_algorithm",
"is_insensitive"
)
fields = ("id", "slug", "name", "automatic_classification")
class DocumentTypeSerializer(serializers.HyperlinkedModelSerializer):
class Meta:
model = DocumentType
fields = ("id", "slug", "name", "automatic_classification")
class TagSerializer(serializers.HyperlinkedModelSerializer):
@@ -22,14 +22,7 @@ class TagSerializer(serializers.HyperlinkedModelSerializer):
class Meta:
model = Tag
fields = (
"id",
"slug",
"name",
"colour",
"match",
"matching_algorithm",
"is_insensitive"
)
"id", "slug", "name", "colour", "automatic_classification")
class CorrespondentField(serializers.HyperlinkedRelatedField):
@@ -42,17 +35,25 @@ class TagsField(serializers.HyperlinkedRelatedField):
return Tag.objects.all()
class DocumentTypeField(serializers.HyperlinkedRelatedField):
def get_queryset(self):
return DocumentType.objects.all()
class DocumentSerializer(serializers.ModelSerializer):
correspondent = CorrespondentField(
view_name="drf:correspondent-detail", allow_null=True)
tags = TagsField(view_name="drf:tag-detail", many=True)
document_type = DocumentTypeField(
view_name="drf:documenttype-detail", allow_null=True)
class Meta:
model = Document
fields = (
"id",
"correspondent",
"document_type",
"title",
"content",
"file_type",
@@ -60,7 +61,6 @@ class DocumentSerializer(serializers.ModelSerializer):
"checksum",
"created",
"modified",
"added",
"file_name",
"download_url",
"thumbnail_url",

59
src/documents/signals/handlers.py Normal file → Executable file
View File

@@ -8,57 +8,36 @@ from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType
from django.utils import timezone
from ..models import Correspondent, Document, Tag
from documents.classifier import DocumentClassifier
from ..models import Document, Tag
def logger(message, group):
logging.getLogger(__name__).debug(message, extra={"group": group})
def set_correspondent(sender, document=None, logging_group=None, **kwargs):
classifier = DocumentClassifier()
# No sense in assigning a correspondent when one is already set.
if document.correspondent:
return
# No matching correspondents, so no need to continue
potential_correspondents = list(Correspondent.match_all(document.content))
if not potential_correspondents:
return
potential_count = len(potential_correspondents)
selected = potential_correspondents[0]
if potential_count > 1:
message = "Detected {} potential correspondents, so we've opted for {}"
logger(
message.format(potential_count, selected),
logging_group
def classify_document(sender, document=None, logging_group=None, **kwargs):
global classifier
try:
classifier.reload()
classifier.classify_document(
document,
classify_correspondent=True,
classify_tags=True,
classify_document_type=True
)
except FileNotFoundError:
logging.getLogger(__name__).fatal(
"Cannot classify document, classifier model file was not found."
)
logger(
'Assigning correspondent "{}" to "{}" '.format(selected, document),
logging_group
)
document.correspondent = selected
document.save(update_fields=("correspondent",))
def set_tags(sender, document=None, logging_group=None, **kwargs):
current_tags = set(document.tags.all())
relevant_tags = set(Tag.match_all(document.content)) - current_tags
if not relevant_tags:
return
message = 'Tagging "{}" with "{}"'
logger(
message.format(document, ", ".join([t.slug for t in relevant_tags])),
logging_group
)
document.tags.add(*relevant_tags)
def add_inbox_tags(sender, document=None, logging_group=None, **kwargs):
inbox_tags = Tag.objects.filter(is_inbox_tag=True)
document.tags.add(*inbox_tags)
def run_pre_consume_script(sender, filename, **kwargs):

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

41660
src/documents/static/documents/js/pdf.worker.js vendored Executable file

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

13
src/documents/static/paperless.css Normal file → Executable file
View File

@@ -20,4 +20,17 @@ td a.tag {
#result_list td textarea {
width: 90%;
height: 5em;
}
#change_form_twocolumn_parent {
display: flex;
}
#change_form_form_parent {
flex:50%;
margin-right: 10px;
}
#change_form_viewer_parent {
flex:50%;
margin-left: 10px;
text-align: center;
}

View File

@@ -3,63 +3,10 @@
{# NOTE: This should probably be extending base.html. See CSS comment below details. #}
{% load static %}
{% load custom_css from customisation %}
{% load custom_js from customisation %}
{% block extrahead %}
<link rel="icon" type="image/x-icon" href="{% url 'favicon' %}" />
<style>
#header {
background-color: #90a9b7;
line-height: inherit;
height: auto;
}
#branding h1 {
font-weight: inherit;
font-size: inherit;
}
.button,
.button:active,
.button:focus,
.button:hover,
a.button,
.submit-row input,
input[type="submit"],
input[type="submit"]:active,
input[type="submit"]:focus,
input[type="submit"]:hover,
input[type="button"],
input[type="button"]:active,
input[type="button"]:focus,
input[type="button"]:hover {
background-color: #074f57;
}
.module h2,
.module caption,
.inline-group h2 {
background-color: #90a9b7;
}
div.breadcrumbs {
background-color: #077187;
}
.module h2,
.module caption,
.inline-group h2 {
background-color: #077187;
}
</style>
{% endblock %}
{% block branding %}
<h1 id="site-name">
<a href="{% url 'admin:index' %}"><img src="{% static 'paperless/img/logo-light.png' %}" alt="Paperless" /></a>
</h1>
{% endblock %}
{% block blockbots %}
{% comment %}

View File

@@ -4,6 +4,27 @@
{{ block.super }}
{% if file_type in "pdf jpg png" %}
<div id="change_form_twocolumn_parent">
<div id="change_form_form_parent"></div>
<div id="change_form_viewer_parent">
{% if file_type == "pdf" %}
{% include "admin/documents/document/viewers/viewer_pdf.html" %}
{% endif %}
{% if file_type in "jpg png" %}
{% include "admin/documents/document/viewers/viewer_image.html" %}
{% endif %}
</div>
</div>
<script>
django.jQuery("#change_form_form_parent").append(django.jQuery("#document_form"));
django.jQuery("#content-main").append(django.jQuery("#change_form_twocolumn_parent"));
</script>
{% endif %}
{% if next_object %}
<script type="text/javascript">//<![CDATA[
(function($){

View File

@@ -24,11 +24,12 @@
border: 1px solid #cccccc;
border-radius: 2%;
overflow: hidden;
height: 300px;
height: 350px;
position: relative;
}
.result .header {
padding: 5px;
background-color: #90a9b7;
background-color: #79AEC8;
position: relative;
}
.result .header .checkbox {
@@ -60,6 +61,11 @@
.result a.tag {
color: #ffffff;
}
.result .documentType {
padding: 5px;
background-color: #eeeeee;
text-align: center;
}
.result .date {
padding: 5px;
}
@@ -79,6 +85,15 @@
.result .image img {
width: 100%;
}
.result .footer {
position: absolute;
bottom: 0;
right: 0;
border-left: 1px solid #cccccc;
border-top: 1px solid #cccccc;
padding: 4px 10px 4px 10px;
background: white;
}
.grid {
margin-right: 260px;
@@ -152,7 +167,9 @@
{# 4: Image #}
{# 5: Correspondent #}
{# 6: Tags #}
{# 7: Document edit url #}
{# 7: Archive serial number #}
{# 8: Document type #}
{# 9: Document edit url #}
<div class="box">
<div class="result">
<div class="header">
@@ -166,7 +183,7 @@
selection would not be possible with mouse click + drag. Instead,
the underlying link would be dragged.
{% endcomment %}
<div class="headerLink" onclick="location.href='{{ result.7 }}';"></div>
<div class="headerLink" onclick="location.href='{{ result.9 }}';"></div>
<div class="checkbox">{{ result.0 }}</div>
<div class="info">
{{ result.5 }}
@@ -174,10 +191,14 @@
{{ result.1 }}
<div style="clear: both;"></div>
</div>
{% if '>-<' not in result.8 %}<div class="documentType">{{ result.8 }}</div>{% endif %}
<div class="tags">{{ result.6 }}</div>
<div class="date">{{ result.2 }}</div>
<div style="clear: both;"></div>
<div class="image">{{ result.4 }}</div>
{# Only show the archive serial number if it is set on the document. #}
{# checking for >-< (i.e., will a dash be displayed) doesn't feel like a very good solution to me. #}
{% if '>-<' not in result.7 %}<div class="footer">#{{ result.7 }}</div>{% endif %}
</div>
</div>
{% endfor %}

View File

View File

@@ -0,0 +1 @@
<img src="{{download_url}}" style="max-width: 100%">

View File

@@ -0,0 +1,130 @@
{% load static %}
<div>
<input id="prev" value="Previous" class="default" type="button">
<input id="next" value="Next" class="default" type="button">
&nbsp; &nbsp;
<span>Page: <span id="page_num"></span> / <span id="page_count"></span></span>
&nbsp; &nbsp;
<input id="zoomin" value="+" class="default" type="button">
<input id="zoomout" value="-" class="default" type="button">
</div>
<div style="width: 100%; overflow: auto;">
<canvas id="the-canvas"></canvas>
</div>
<script type="text/javascript" src="{% static 'documents/js/pdf.js' %}"></script>
<script type="text/javascript" src="{% static 'documents/js/pdf.worker.js' %}"></script>
{# Load and display PDF document#}
<script>
var pdfjsLib = window['pdfjs-dist/build/pdf'];
var pdfDoc = null,
pageNum = 1,
pageRendering = false,
pageNumPending = null,
scale = 1.0,
canvas = document.getElementById('the-canvas'),
ctx = canvas.getContext('2d');
/**
* Get page info from document, resize canvas accordingly, and render page.
* @param num Page number.
*/
function renderPage(num) {
pageRendering = true;
// Using promise to fetch the page
pdfDoc.getPage(num).then(function(page) {
var viewport = page.getViewport(scale);
canvas.height = viewport.height;
canvas.width = viewport.width;
// Render PDF page into canvas context
var renderContext = {
canvasContext: ctx,
viewport: viewport
};
var renderTask = page.render(renderContext);
// Wait for rendering to finish
renderTask.promise.then(function () {
pageRendering = false;
if (pageNumPending !== null) {
// New page rendering is pending
renderPage(pageNumPending);
pageNumPending = null;
}
});
});
// Update page counters
document.getElementById('page_num').textContent = num;
}
/**
* If another page rendering in progress, waits until the rendering is
* finised. Otherwise, executes rendering immediately.
*/
function queueRenderPage(num) {
if (pageRendering) {
pageNumPending = num;
} else {
renderPage(num);
}
}
/**
* Displays previous page.
*/
function onPrevPage() {
if (pageNum <= 1) {
return;
}
pageNum--;
queueRenderPage(pageNum);
}
document.getElementById('prev').addEventListener('click', onPrevPage);
/**
* Displays next page.
*/
function onNextPage() {
if (pageNum >= pdfDoc.numPages) {
return;
}
pageNum++;
queueRenderPage(pageNum);
}
document.getElementById('next').addEventListener('click', onNextPage);
/**
* Displays next page.
*/
function onZoomIn() {
scale *= 1.2;
queueRenderPage(pageNum);
}
document.getElementById('zoomin').addEventListener('click', onZoomIn);
/**
* Displays next page.
*/
function onZoomOut() {
scale /= 1.2;
queueRenderPage(pageNum);
}
document.getElementById('zoomout').addEventListener('click', onZoomOut);
/**
* Asynchronously downloads PDF.
*/
pdfjsLib.getDocument("{{download_url}}").then(function (pdfDoc_) {
pdfDoc = pdfDoc_;
document.getElementById('page_count').textContent = pdfDoc.numPages;
// Initial/first page rendering
renderPage(pageNum);
});
</script>

31
src/documents/views.py Normal file → Executable file
View File

@@ -2,7 +2,6 @@ from django.http import HttpResponse, HttpResponseBadRequest
from django.views.generic import DetailView, FormView, TemplateView
from django_filters.rest_framework import DjangoFilterBackend
from django.conf import settings
from django.utils import cache
from paperless.db import GnuPG
from paperless.mixins import SessionOrBasicAuthMixin
@@ -21,14 +20,21 @@ from rest_framework.viewsets import (
ReadOnlyModelViewSet
)
from .filters import CorrespondentFilterSet, DocumentFilterSet, TagFilterSet
from .filters import (
CorrespondentFilterSet,
DocumentFilterSet,
TagFilterSet,
DocumentTypeFilterSet
)
from .forms import UploadForm
from .models import Correspondent, Document, Log, Tag
from .models import Correspondent, Document, Log, Tag, DocumentType
from .serialisers import (
CorrespondentSerializer,
DocumentSerializer,
LogSerializer,
TagSerializer
TagSerializer,
DocumentTypeSerializer
)
@@ -57,12 +63,10 @@ class FetchView(SessionOrBasicAuthMixin, DetailView):
}
if self.kwargs["kind"] == "thumb":
response = HttpResponse(
return HttpResponse(
self._get_raw_data(self.object.thumbnail_file),
content_type=content_types[Document.TYPE_PNG]
)
cache.patch_cache_control(response, max_age=31536000, private=True)
return response
response = HttpResponse(
self._get_raw_data(self.object.source_file),
@@ -119,6 +123,17 @@ class TagViewSet(ModelViewSet):
ordering_fields = ("name", "slug")
class DocumentTypeViewSet(ModelViewSet):
model = DocumentType
queryset = DocumentType.objects.all()
serializer_class = DocumentTypeSerializer
pagination_class = StandardPagination
permission_classes = (IsAuthenticated,)
filter_backends = (DjangoFilterBackend, OrderingFilter)
filter_class = DocumentTypeFilterSet
ordering_fields = ("name", "slug")
class DocumentViewSet(RetrieveModelMixin,
UpdateModelMixin,
DestroyModelMixin,
@@ -133,7 +148,7 @@ class DocumentViewSet(RetrieveModelMixin,
filter_class = DocumentFilterSet
search_fields = ("title", "correspondent__name", "content")
ordering_fields = (
"id", "title", "correspondent__name", "created", "modified", "added")
"id", "title", "correspondent__name", "created", "modified")
class LogViewSet(ReadOnlyModelViewSet):

0
src/manage.py Executable file → Normal file
View File

16
src/paperless/settings.py Normal file → Executable file
View File

@@ -58,7 +58,7 @@ if _allowed_hosts:
ALLOWED_HOSTS = _allowed_hosts.split(",")
FORCE_SCRIPT_NAME = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
# Application definition
INSTALLED_APPS = [
@@ -72,7 +72,6 @@ INSTALLED_APPS = [
"corsheaders",
"django_extensions",
"paperless",
"documents.apps.DocumentsConfig",
"reminders.apps.RemindersConfig",
"paperless_tesseract.apps.PaperlessTesseractConfig",
@@ -83,7 +82,6 @@ INSTALLED_APPS = [
"rest_framework",
"crispy_forms",
"django_filters",
"djangoql",
]
@@ -146,9 +144,9 @@ DATABASES = {
}
}
if os.getenv("PAPERLESS_DBUSER"):
if os.getenv("PAPERLESS_DBENGINE"):
DATABASES["default"] = {
"ENGINE": "django.db.backends.postgresql_psycopg2",
"ENGINE": os.getenv("PAPERLESS_DBENGINE"),
"NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
"USER": os.getenv("PAPERLESS_DBUSER"),
}
@@ -215,6 +213,14 @@ MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/")
DATA_UPLOAD_MAX_NUMBER_FIELDS = None
# Document classification models location
MODEL_FILE = os.getenv(
"PAPERLESS_MODEL_FILE", os.path.join(
BASE_DIR, "..", "models", "model.pickle"
)
)
# Paperless-specific stuff
# You shouldn't have to edit any of these values. Rather, you can set these
# values in /etc/paperless.conf instead.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 108 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.6 KiB

8
src/paperless/urls.py Normal file → Executable file
View File

@@ -6,19 +6,20 @@ from django.views.decorators.csrf import csrf_exempt
from django.views.generic import RedirectView
from rest_framework.routers import DefaultRouter
from paperless.views import FaviconView
from documents.views import (
CorrespondentViewSet,
DocumentViewSet,
FetchView,
LogViewSet,
PushView,
TagViewSet
TagViewSet,
DocumentTypeViewSet
)
from reminders.views import ReminderViewSet
router = DefaultRouter()
router.register(r"correspondents", CorrespondentViewSet)
router.register(r"document_types", DocumentTypeViewSet)
router.register(r"documents", DocumentViewSet)
router.register(r"logs", LogViewSet)
router.register(r"reminders", ReminderViewSet)
@@ -45,9 +46,6 @@ urlpatterns = [
# File uploads
url(r"^push$", csrf_exempt(PushView.as_view()), name="push"),
# Favicon
url(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),
# The Django admin
url(r"admin/", admin.site.urls),

View File

@@ -1 +1 @@
__version__ = (2, 6, 1)
__version__ = (1, 0, 1)

View File

@@ -1,7 +1,3 @@
import os
from django.http import HttpResponse
from django.views.generic import View
from rest_framework.pagination import PageNumberPagination
@@ -9,17 +5,3 @@ class StandardPagination(PageNumberPagination):
page_size = 25
page_size_query_param = "page-size"
max_page_size = 100000
class FaviconView(View):
def get(self, request, *args, **kwargs):
favicon = os.path.join(
os.path.dirname(__file__),
"static",
"paperless",
"img",
"favicon.ico"
)
with open(favicon, "rb") as f:
return HttpResponse(f, content_type="image/x-icon")

Binary file not shown.

After

Width:  |  Height:  |  Size: 136 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 136 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 135 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 136 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 136 KiB

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 136 KiB

View File

@@ -5,7 +5,7 @@ from unittest import mock
from uuid import uuid4
from dateutil import tz
from django.test import TestCase
from django.test import TestCase, override_settings
from ..parsers import RasterisedDocumentParser
from django.conf import settings
@@ -16,36 +16,46 @@ class TestDate(TestCase):
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
MOCK_SCRATCH = "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH" # NOQA: E501
def setUp(self):
os.makedirs(self.SCRATCH, exist_ok=True)
def tearDown(self):
shutil.rmtree(self.SCRATCH)
@mock.patch(MOCK_SCRATCH, SCRATCH)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_date_format_1(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document._text = "lorem ipsum 130218 lorem ipsum"
self.assertEqual(document.get_date(), None)
@mock.patch(MOCK_SCRATCH, SCRATCH)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_date_format_2(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document._text = "lorem ipsum 2018 lorem ipsum"
self.assertEqual(document.get_date(), None)
@mock.patch(MOCK_SCRATCH, SCRATCH)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_date_format_3(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document._text = "lorem ipsum 20180213 lorem ipsum"
self.assertEqual(document.get_date(), None)
@mock.patch(MOCK_SCRATCH, SCRATCH)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_date_format_4(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
@@ -59,7 +69,10 @@ class TestDate(TestCase):
)
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_date_format_5(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
@@ -76,7 +89,10 @@ class TestDate(TestCase):
)
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_date_format_6(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
@@ -93,7 +109,10 @@ class TestDate(TestCase):
)
self.assertEqual(document.get_date(), None)
@mock.patch(MOCK_SCRATCH, SCRATCH)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_date_format_7(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
@@ -111,7 +130,10 @@ class TestDate(TestCase):
)
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_date_format_8(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
@@ -135,7 +157,10 @@ class TestDate(TestCase):
)
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_date_format_9(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
@@ -153,11 +178,398 @@ class TestDate(TestCase):
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_1_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.pdf")
document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text()
date = document.get_date()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
date,
datetime.datetime(
2018, 4, 1, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_1_png(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.png")
document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text()
self.assertEqual(document._is_ocred(), False)
self.assertEqual(
document.get_date(),
datetime.datetime(
2018, 4, 1, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_2_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.pdf")
document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
document.get_date(),
datetime.datetime(
2013, 2, 1, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_2_png(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.png")
document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text()
self.assertEqual(document._is_ocred(), False)
self.assertEqual(
document.get_date(),
datetime.datetime(
2013, 2, 1, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
@override_settings(OCR_LANGUAGE="deu")
def test_get_text_3_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.pdf")
document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
document.get_date(),
datetime.datetime(
2018, 10, 5, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
@override_settings(OCR_LANGUAGE="deu")
def test_get_text_3_png(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.png")
document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text()
self.assertEqual(document._is_ocred(), False)
self.assertEqual(
document.get_date(),
datetime.datetime(
2018, 10, 5, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
@override_settings(OCR_LANGUAGE="eng")
def test_get_text_4_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.pdf")
document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
document.get_date(),
datetime.datetime(
2018, 10, 5, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
@override_settings(OCR_LANGUAGE="eng")
def test_get_text_4_png(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.png")
document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text()
self.assertEqual(document._is_ocred(), False)
self.assertEqual(
document.get_date(),
datetime.datetime(
2018, 10, 5, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_5_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.pdf")
document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
document.get_date(),
datetime.datetime(
2018, 12, 17, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_5_png(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.png")
document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text()
self.assertEqual(document._is_ocred(), False)
self.assertEqual(
document.get_date(),
datetime.datetime(
2018, 12, 17, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_6_pdf_us(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
document = RasterisedDocumentParser(input_file)
document.get_text()
document.DATE_ORDER = "MDY"
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
document.get_date(),
datetime.datetime(
2018, 12, 17, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_6_png_us(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
document = RasterisedDocumentParser(input_file)
document.get_text()
document.DATE_ORDER = "MDY"
self.assertEqual(document._is_ocred(), False)
self.assertEqual(
document.get_date(),
datetime.datetime(
2018, 12, 17, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_6_pdf_eu(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(document.get_date(), None)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_6_png_eu(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text()
self.assertEqual(document._is_ocred(), False)
self.assertEqual(document.get_date(), None)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_7_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_7.pdf")
document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
document.get_date(),
datetime.datetime(
2018, 4, 1, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_8_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_8.pdf")
document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
document.get_date(),
datetime.datetime(
2017, 12, 31, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_9_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_9.pdf")
document = RasterisedDocumentParser(input_file)
document.DATE_ORDER = 'DMY'
document.get_text()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
document.get_date(),
datetime.datetime(
2017, 12, 31, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_filename_date_1_pdf(self):
input_file = os.path.join(
self.SAMPLE_FILES,
"tests_date_in_filename_2018-03-20_1.pdf"
)
document = RasterisedDocumentParser(input_file)
document.FILENAME_DATE_ORDER = 'YMD'
document.get_text()
date = document.get_date()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
date,
datetime.datetime(
2018, 3, 20, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_filename_date_1_png(self):
input_file = os.path.join(
self.SAMPLE_FILES,
"tests_date_in_filename_2018-03-20_1.png"
)
document = RasterisedDocumentParser(input_file)
document.FILENAME_DATE_ORDER = 'YMD'
date = document.get_date()
self.assertEqual(document._is_ocred(), False)
self.assertEqual(
date,
datetime.datetime(
2018, 3, 20, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_filename_date_2_pdf(self):
input_file = os.path.join(
self.SAMPLE_FILES,
"2013-12-11_tests_date_in_filename_2.pdf"
)
document = RasterisedDocumentParser(input_file)
document.FILENAME_DATE_ORDER = 'YMD'
date = document.get_date()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
date,
datetime.datetime(
2013, 12, 11, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_filename_date_2_png(self):
input_file = os.path.join(
self.SAMPLE_FILES,
"2013-12-11_tests_date_in_filename_2.png"
)
document = RasterisedDocumentParser(input_file)
document.FILENAME_DATE_ORDER = 'YMD'
date = document.get_date()
self.assertEqual(document._is_ocred(), False)
self.assertEqual(
date,
datetime.datetime(
2013, 12, 11, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
return_value="01-07-0590 00:00:00"
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_crazy_date_past(self, *args):
document = RasterisedDocumentParser("/dev/null")
document.get_text()
@@ -167,7 +579,10 @@ class TestDate(TestCase):
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
return_value="01-07-2350 00:00:00"
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_crazy_date_future(self, *args):
document = RasterisedDocumentParser("/dev/null")
document.get_text()
@@ -177,7 +592,10 @@ class TestDate(TestCase):
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
return_value="01-07-0590 00:00:00"
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_crazy_date_past(self, *args):
document = RasterisedDocumentParser("/dev/null")
document.get_text()

View File

@@ -17,5 +17,6 @@ deps=pycodestyle
[testenv:doc]
deps =
-r {toxinidir}/../requirements.txt
-r{toxinidir}/../requirements.txt
sphinx
commands=sphinx-build -b html ../docs ../docs/_build -W