Compare commits

..

100 Commits
2.5.0 ... 2.7.0

Author SHA1 Message Date
Daniel Quinn
94c2950afe Ignore sqlite3-journal files too 2019-01-27 13:48:05 +00:00
Daniel Quinn
9f56bf9992 Fix missing links 2019-01-27 13:47:40 +00:00
Daniel Quinn
6df35e4cb7 Merge branch 'sbrunner-dql' 2019-01-27 13:42:58 +00:00
Daniel Quinn
b4b7d167d1 Update dependencies & add djangoql 2019-01-27 13:39:56 +00:00
Daniel Quinn
4936fad542 Merge pull request #488 from sbrunner/no-tab
Tabs are not allowed in Yaml files
2019-01-27 13:06:46 +00:00
Daniel Quinn
3c78105fd7 Align example conf with real-world defaults 2019-01-27 13:05:56 +00:00
Daniel Quinn
a58a7ce0f7 Move note about DEBUG up into 3. 2019-01-27 13:03:55 +00:00
Daniel Quinn
792aeee11e Merge pull request #487 from bmsleight/issue486
Add note runserver PAPERLESS_DEBUG='true'
2019-01-27 13:00:37 +00:00
Daniel Quinn
5588e86855 Merge pull request #484 from cribbstechnologies/patch-1
adding information about NFS mounts and inotify
2019-01-27 12:54:03 +00:00
Daniel Quinn
97f1e4ab16 pep8 2019-01-27 12:52:15 +00:00
Daniel Quinn
e4dece8e53 Merge pull request #483 from tsia/patch-1
added fields to the correspondent and tag REST API
2019-01-27 12:50:42 +00:00
Daniel Quinn
c5c204f605 Merge pull request #481 from CkuT/cache-control
Add Cache-Control header for thumbnails
2019-01-27 12:46:32 +00:00
Daniel Quinn
611ec6840b Merge pull request #489 from sbrunner/docker-psycopg2-clean
Install psycopg2 in the Docker container
2019-01-27 12:25:32 +00:00
Daniel Quinn
2cd077d12d Merge pull request #475 from syntonym/master
Catch IMAP connection errors
2019-01-27 12:19:18 +00:00
Stéphane Brunner
4efb153e86 Add and configure DjangoQL 2019-01-26 22:15:28 +01:00
Stéphane Brunner
25e953bbf0 Install psycopg2 in the Docker container 2019-01-26 14:33:51 +01:00
Stéphane Brunner
0509d5a3d2 Tabs are not allowed in Yaml files 2019-01-26 14:23:11 +01:00
bmsleight
5e674f17af Add note runserver PAPERLESS_DEBUG='true' 2019-01-26 13:15:45 +00:00
Brian Cribbs
7c7a814096 adding information about NFS mounts and inotify 2019-01-22 15:18:14 -05:00
tsia
43e71cfcaa added fields to the correspondent and tag REST API 2019-01-22 20:51:20 +01:00
CkuT
79868930f1 Add Cache-Control header for thumbnails
This drastically optimizes admin interface loading by telling the browser to cache thumbnails. The max-age recommendation is 1 year according to rfc2616
Closes #411
2019-01-21 20:59:40 +01:00
Daniel Quinn
0256dcbe32 Merge pull request #478 from wiwie/patch-1
Update requirements.rst
2019-01-21 00:32:01 +00:00
Christian Wiwie
29db177ce2 Update requirements.rst 2019-01-18 20:33:35 +01:00
syntonym
5c1edf78ce Catches OSError on IMAP connection error
When something goes wrong with the imaplib.IMAP4_SSL connection (like the host is
temporarely down or the DNS does not resolve) it generates an OSError which is currently
not catched and handled. Now OSErrors are translated to MailFetcherErrors which get
logged and the IMAP connection is retried in the next IMAP check.

Fixes #474
2019-01-14 19:08:59 +01:00
Daniel Quinn
60e8990a7b Update to include #471 2019-01-04 11:38:57 +00:00
Daniel Quinn
75a79ac204 Merge pull request #471 from khrise/added-missing-column-to-rest-api
Exposing documents' "added" timestamp via Rest API.
2019-01-04 11:36:38 +00:00
khrise
0c47907dda Exposing documents' "added" timestamp via Rest API. 2019-01-03 20:23:34 +00:00
Daniel Quinn
cea8332038 Merge pull request #468 from ddddavidmartin/document_ocr_always_setting
Reference PAPERLESS_OCR_ALWAYS in example config file.
2018-12-31 14:30:58 +00:00
Daniel Quinn
5982cb693a Include notes for #466 2018-12-30 18:30:23 +00:00
Daniel Quinn
73a02d40c4 Merge pull request #453 from jonaswinkler/patch-1
Update 0022_auto_20181007_1420.py
2018-12-30 18:27:27 +00:00
Daniel Quinn
b541765817 Merge pull request #464 from colinfrei/patch-2
remove unnecessary character
2018-12-30 18:26:12 +00:00
Daniel Quinn
28ffd1ec6b Merge pull request #466 from colinfrei/patch-3
Set consume directory for webserver too
2018-12-30 18:23:33 +00:00
Daniel Quinn
5760aa0894 Merge pull request #467 from danielquinn/feature/update-travis
Fix the tests so they finally start passing everywhere they should.
2018-12-30 18:22:46 +00:00
Daniel Quinn
562e5f644d Update changelog with test changes 2018-12-30 18:19:05 +00:00
Daniel Quinn
5ab2009ebf Tweak Travis to include Python3.7 and pipenv 2018-12-30 18:18:37 +00:00
Daniel Quinn
637b0d4cc2 Drop problematic tests
Some tests had differing outcomes depending on the version of Tesseract
installed on the test system.  This lead to a bunch of false test
failures, which lead to people (including me) just ignoring the Travis
results.

This commit removes those tests, and while it reduces our coverage, at
least the results are predictable.
2018-12-30 17:32:45 +00:00
Daniel Quinn
4a71c33537 Use [[]] instead of [] in Bash scripts 2018-12-30 17:32:17 +00:00
Daniel Quinn
cf36c8467e Update the Pipfile lock 2018-12-30 17:32:04 +00:00
Daniel Quinn
dafa6a4c71 Use pipenv in the Docker build 2018-12-30 17:31:26 +00:00
Daniel Quinn
a3c5ec834d Codify spaces in .yml files 2018-12-30 17:31:13 +00:00
Daniel Quinn
be57dbe4c8 Merge pull request #462 from colinfrei/patch-1
Type in docker-compose command
2018-12-30 14:46:17 +00:00
Daniel Quinn
4d50c7e105 Add Python 3.7 to test suite 2018-12-30 14:09:32 +00:00
Daniel Quinn
27af2603f5 Use modern languages for sample test files 2018-12-30 14:09:17 +00:00
Daniel Quinn
ff5b34179a Bump version 2018-12-30 12:44:26 +00:00
Daniel Quinn
0334617287 Update language READMEs with new logo & new language navigation 2018-12-30 12:44:13 +00:00
Daniel Quinn
f8b43fa74b Add the new logo 2018-12-30 12:40:29 +00:00
Daniel Quinn
1ff06d0dd9 Fix .gitignore exclusion that hid the logos 2018-12-30 12:39:07 +00:00
Daniel Quinn
4ad6813d11 Add the new logo
I bought this logo from the excellent logojoy.com site, and am including
the source files here to be covered under the project license (GPL).
2018-12-30 12:20:08 +00:00
Colin Frei
cbc5f0603f Set consume directory for webserver too
Fixes #289 

The HTTP POST endpoint saves the file in the consume directory.
This needs to be shared between the two services so that the file is actually consumed.
2018-12-30 07:43:49 +01:00
Colin Frei
0d21bdeffa remove unnecessary character 2018-12-28 17:43:36 +01:00
Colin Frei
b1f9b18b8c Type in docker-compose command 2018-12-26 16:43:22 +01:00
David Martin
4d13521f36 Reference PAPERLESS_OCR_ALWAYS in example config file.
This setting was introduced when support for retrieving the text layer
from documents was added. Having it in the example config makes it more
clear that it exists.
2018-12-16 18:11:39 +11:00
Daniel Quinn
7b4785bdb9 Merge pull request #450 from erikarvstedt/fix-parser-test
Fix date test sample image
2018-12-11 11:43:14 +00:00
jonaswinkler
baf89cad8e Update 0022_auto_20181007_1420.py
copy paste error.
2018-12-10 18:38:19 +01:00
Daniel Quinn
3c2a1a8c13 Merge pull request #451 from speshak/remote_pg
Add DBHOST & DBPORT parameters to settings
2018-12-06 23:38:50 +00:00
Daniel Quinn
1c7047bbb8 Move ipython out of the base dependencies 2018-12-06 23:28:33 +00:00
Scott Peshak
96dafe8c43 Add psycopg2 dependencies to Dockerfile 2018-12-02 16:14:58 -06:00
Scott Peshak
d6896daece Add psycopg2 to requirements.txt 2018-12-02 16:14:58 -06:00
Scott Peshak
d12f0642f2 Add DBHOST & DBPORT parameters
Resolves #445
2018-12-02 15:20:29 -06:00
Erik Arvstedt
a19f0ef97e Fix date test sample image
The previous version of `tests_date_3.png` had too much spacing
between the `0` and the `8` glyphs, which resulted in the year getting
parsed as `200 8` in Tesseract 3.05.00 (+ tessdata 3.04.00).
This caused the date parsing test to fail.
2018-12-02 15:10:21 +01:00
Erik Arvstedt
ec7125b6bb Fix travis ocr languages
The tests need German language support for Tesseract
2018-12-02 15:10:20 +01:00
Daniel Quinn
e3a616ebc3 Version bump 2018-12-01 17:12:34 +00:00
Daniel Quinn
f898ec792f Added notes for 2.6.0 2018-12-01 17:11:58 +00:00
Daniel Quinn
f45b6762f2 Merge branch 'jat255-ENH_filename_date_parsing' 2018-12-01 17:10:26 +00:00
Daniel Quinn
d544f269e0 Conform everything to the coding standards
https://paperless.readthedocs.io/en/latest/contributing.html#additional-style-guides
2018-12-01 17:09:12 +00:00
Daniel Quinn
650db75c2b Merge branch 'ENH_filename_date_parsing' of https://github.com/jat255/paperless into jat255-ENH_filename_date_parsing 2018-12-01 16:57:16 +00:00
Daniel Quinn
7dbb77e57b Add a .editorconfig 2018-12-01 16:56:58 +00:00
Daniel Quinn
f1b3312bcb Merge branch 'jat255-ENH_tag_colour_override' 2018-12-01 16:22:38 +00:00
Daniel Quinn
ea05ab2b06 Restructure colour.js to work withing a .ready() 2018-12-01 16:22:19 +00:00
Daniel Quinn
4f4c515629 Add colours to the tags pages 2018-12-01 16:21:58 +00:00
Daniel Quinn
c1f926a40c Merge branch 'ENH_tag_colour_override' of https://github.com/jat255/paperless into jat255-ENH_tag_colour_override 2018-12-01 15:56:37 +00:00
Daniel Quinn
c1d18c1e83 Fix language guesses in tests
It turns out that the Lorem ipsum text in the sample files was confuing the language guesser, causing it to think the file was in Catalan and not English or German.
2018-12-01 15:55:59 +00:00
Joshua Taillon
ba452e0524 move tag colour override to static folder 2018-12-01 09:14:44 -05:00
Daniel Quinn
c5488dcb98 Merge pull request #441 from jat255/patch-1
Update gunicorn commands
2018-11-30 19:45:01 +00:00
Joshua Taillon
d6eefbccee encapsulate in if blocks so no errors on non-tag pages; added support for edit tags page 2018-11-17 21:34:11 -05:00
Joshua Taillon
a813288aaf add example override for tag colour display 2018-11-17 09:18:36 -05:00
Joshua Taillon
63e2fbe0c9 Update paperless-webserver.service
Update `gunicorn` command to use `--pythonpath`
2018-11-16 09:21:07 -05:00
Joshua Taillon
597a7bb391 Update setup.rst
The provided `gunicorn` command did not work for me, failing with the following error:

```
ModuleNotFoundError: No module named '/home/paperless/paperless/src/paperless' 
```

The solution was to provide only `paperless.wsgi` as the argument to `gunicorn`, and provide a flag for `--pythonpath`. After changing it to this, the server started up fine.
2018-11-16 09:20:08 -05:00
Joshua Taillon
730daa3d6d Merge branch 'master' of github.com:danielquinn/paperless into ENH_filename_date_parsing 2018-11-15 23:17:59 -05:00
Joshua Taillon
c225281f95 Change the massive regex to match boundaries with _ or - characters (not just word breaks); add line for year first formats like YYYY-MM-DD 2018-11-15 20:38:53 -05:00
Joshua Taillon
e1d8744c66 Add option for parsing of date from filename (and associated tests) 2018-11-15 20:32:15 -05:00
Joshua Taillon
4409f65840 Update date tests to be more explicit with settings and allow tests to pass if using a timezone other than UTC 2018-11-15 20:30:23 -05:00
Daniel Quinn
c83dc666a4 I'm going to have to ditch requirements.txt if it can't be reliably generated 2018-11-03 13:42:03 +00:00
Daniel Quinn
9ab50ed09d Fix requiremnts.txt 2018-11-03 13:29:22 +00:00
Daniel Quinn
e0acb4a40b Update dependencies
This includes a security update for requests.
2018-11-03 12:49:35 +00:00
Daniel Quinn
eca6250c1b Fix the correspondent filters #423 2018-11-03 11:06:55 +00:00
Daniel Quinn
33abec0663 Code cleanup 2018-11-03 11:05:22 +00:00
Daniel Quinn
d825667c9b Allow an infinite number of logs to be deleted. 2018-11-03 10:25:51 +00:00
Daniel Quinn
84511f8418 Merge pull request #432 from deanpcmad/patch-1
Added missing ; to nginx config
2018-10-31 13:12:32 +00:00
Dean Perry
81e488b90d added missing ; to nginx config 2018-10-31 12:39:48 +00:00
Daniel Quinn
bff28113df Merge pull request #425 from mrwacky42/remove_vagrant
Remove Vagrant docs
2018-10-14 09:57:41 +01:00
Sharif Nassar
0b377a76d0 Remove Vagrant docs
* Vagrant does not seem to have any libvirt boxes for Ubuntu any more.
* Vagrant 2 was released a year ago, but vagrant-libvirt only claims
  to support up to Vagrant 1.8.
2018-10-13 11:31:53 -07:00
Daniel Quinn
ec1d5c80ff Add pip install to update process 2018-10-08 10:38:53 +01:00
Daniel Quinn
bd95804fbf Merge pull request #421 from ddddavidmartin/clarify_forgiving_ocr_handling
Clarify forgiving ocr handling
2018-10-08 09:35:57 +00:00
Daniel Quinn
8dc355a66f Merge pull request #422 from erikarvstedt/inotify-linux
requirements.txt: bring back Linux-only restriction for inotify-simple
2018-10-08 09:34:47 +00:00
Daniel Quinn
fbb389553c Merge pull request #419 from ddddavidmartin/let_unpaper_overwrite_temp_files
Let unpaper overwrite temporary files.
2018-10-08 09:32:30 +00:00
Erik Arvstedt
f8cfbb44d2 requirements.txt: bring back Linux-only restriction for inotify-simple
Fixes #418
2018-10-08 11:00:34 +02:00
David Martin
818780a191 Add PAPERLESS_FORGIVING_OCR option to example config.
It helps having it in the example config as that makes it more clear
that it exists.
2018-10-08 19:38:38 +11:00
David Martin
b350ec48b7 Mention FORGIVING_OCR config option when language detection fails.
It is not obvious that the PAPERLESS_FORGIVING_OCR allows to let
document consumption happen even if no language can be detected.
Mentioning it in the actual error message in the log seems like the best
way to make it clear.
2018-10-08 19:37:05 +11:00
David Martin
f948ee11be Let unpaper overwrite temporary files.
I'm not sure what the circumstances are, but it looks like unpaper can
attempt to write a temporary file that already exists [0]. This then
fails the consumption. As per daedadu's comment simply letting unpaper
overwrite files fixes this.

[0]
unpaper: error: output file '/tmp/paperless/paperless-pjkrcr4l/convert-0000.unpaper.pnm' already present.
See https://web.archive.org/web/20181008081515/https://github.com/danielquinn/paperless/issues/406#issue-360651630
2018-10-08 19:12:11 +11:00
75 changed files with 5440 additions and 894 deletions

28
.editorconfig Normal file
View File

@@ -0,0 +1,28 @@
# EditorConfig: http://EditorConfig.org
root = true
[*]
indent_style = tab
indent_size = 2
insert_final_newline = true
trim_trailing_whitespace = true
end_of_line = lf
charset = utf-8
max_line_length = 79
[{*.html,*.css,*.js}]
max_line_length = off
[*.py]
indent_size = 4
indent_style = space
[*.yml]
indent_style = space
# Tests don't get a line width restriction. It's still a good idea to follow
# the 79 character rule, but in the interests of clarity, tests often need to
# violate it.
[**/test_*.py]
max_line_length = off

4
.gitignore vendored
View File

@@ -66,6 +66,7 @@ media/overrides.js
# Sqlite database
db.sqlite3
db.sqlite3-journal
# PyCharm
.idea
@@ -73,7 +74,6 @@ db.sqlite3
# Other stuff that doesn't belong
.virtualenv
virtualenv
.vagrant
docker-compose.yml
docker-compose.env
@@ -82,4 +82,4 @@ scripts/import-for-development
scripts/nuke
# Static files collected by the collectstatic command
static/
./static/

View File

@@ -2,19 +2,22 @@ language: python
before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr tesseract-ocr-eng tesseract-ocr-cat
- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr
sudo: false
matrix:
include:
- python: 3.4
- python: 3.5
- python: 3.6
- python: "3.4"
- python: "3.5"
- python: "3.6"
- python: "3.7-dev"
install:
- pip install --requirement requirements.txt
- pip install sphinx
- pip install --upgrade pip pipenv sphinx
- pipenv lock -r > requirements.txt
- pip install -r requirements.txt
script:
- cd src/
- pytest --cov
@@ -22,4 +25,4 @@ script:
- sphinx-build -b html ../docs ../docs/_build -W
after_success:
- coveralls
- coveralls

View File

@@ -4,8 +4,8 @@ LABEL maintainer="The Paperless Project https://github.com/danielquinn/paperless
contributors="Guy Addadi <addadi@gmail.com>, Pit Kleyersburg <pitkley@googlemail.com>, \
Sven Fischer <git-dev@linux4tw.de>"
# Copy requirements file and init script
COPY requirements.txt /usr/src/paperless/
# Copy Pipfiles file and init script
COPY Pipfile* /usr/src/paperless/
COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh
# Set export and consumption directories
@@ -13,15 +13,16 @@ ENV PAPERLESS_EXPORT_DIR=/export \
PAPERLESS_CONSUMPTION_DIR=/consume
RUN apk update --no-cache && apk add python3 gnupg libmagic bash shadow curl \
RUN apk update --no-cache && apk add python3 gnupg libmagic libpq bash shadow curl \
sudo poppler tesseract-ocr imagemagick ghostscript unpaper optipng && \
apk add --virtual .build-dependencies \
python3-dev poppler-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
python3-dev poppler-dev postgresql-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
# Install python dependencies
python3 -m ensurepip && \
rm -r /usr/lib/python*/ensurepip && \
cd /usr/src/paperless && \
pip3 install --no-cache-dir -r requirements.txt && \
pip3 install --upgrade pip pipenv && \
pipenv install --system --deploy && \
# Remove build dependencies
apk del .build-dependencies && \
# Create the consumption directory

View File

@@ -25,6 +25,8 @@ python-dateutil = "*"
python-dotenv = "*"
python-gnupg = "*"
pytz = "*"
sphinx = "*"
tox = "*"
pycodestyle = "*"
pytest = "*"
pytest-cov = "*"
@@ -32,9 +34,8 @@ pytest-django = "*"
pytest-sugar = "*"
pytest-env = "*"
pytest-xdist = "*"
psycopg2 = "*"
djangoql = "*"
[dev-packages]
ipython = "*"
sphinx = "*"
tox = "*"

719
Pipfile.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,6 @@
*[English](README.md)*<br/>
*[Greek](README-el.md)*
[ [en](README.md) | de | [el](README-el.md) ]
# Paperless
![Paperless](https://raw.githubusercontent.com/danielquinn/paperless/master/src/paperless/static/paperless/img/logo-dark.png)
[![Dokumentation](https://readthedocs.org/projects/paperless/badge/?version=latest)](https://paperless.readthedocs.org/) [![Chat](https://badges.gitter.im/danielquinn/paperless.svg)](https://gitter.im/danielquinn/paperless) [![Travis](https://travis-ci.org/danielquinn/paperless.svg?branch=master)](https://travis-ci.org/danielquinn/paperless) [![Coverage Status](https://coveralls.io/repos/github/danielquinn/paperless/badge.svg?branch=master)](https://coveralls.io/github/danielquinn/paperless?branch=master) [![Danke](https://img.shields.io/badge/THANKS-md-ff69b4.svg)](https://github.com/danielquinn/paperless/blob/master/THANKS.md)

View File

@@ -1,7 +1,6 @@
*[English](README.md)*<br/>
*[German](README-de.md)*
[ [en](README.md) | [de](README-de.md) | el ]
# Paperless
![Paperless](https://raw.githubusercontent.com/danielquinn/paperless/master/src/paperless/static/paperless/img/logo-dark.png)
[![Documentation](https://readthedocs.org/projects/paperless/badge/?version=latest)](https://paperless.readthedocs.org/) [![Chat](https://badges.gitter.im/danielquinn/paperless.svg)](https://gitter.im/danielquinn/paperless) [![Travis](https://travis-ci.org/danielquinn/paperless.svg?branch=master)](https://travis-ci.org/danielquinn/paperless) [![Coverage Status](https://coveralls.io/repos/github/danielquinn/paperless/badge.svg?branch=master)](https://coveralls.io/github/danielquinn/paperless?branch=master) [![Thanks](https://img.shields.io/badge/THANKS-md-ff69b4.svg)](https://github.com/danielquinn/paperless/blob/master/THANKS.md)

View File

@@ -1,7 +1,6 @@
*[German](README-de.md)*<br/>
*[Greek](README-el.md)*
[ en | [de](README-de.md) | [el](README-el.md) ]
# Paperless
![Paperless](https://raw.githubusercontent.com/danielquinn/paperless/master/src/paperless/static/paperless/img/logo-dark.png)
[![Documentation](https://readthedocs.org/projects/paperless/badge/?version=latest)](https://paperless.readthedocs.org/) [![Chat](https://badges.gitter.im/danielquinn/paperless.svg)](https://gitter.im/danielquinn/paperless) [![Travis](https://travis-ci.org/danielquinn/paperless.svg?branch=master)](https://travis-ci.org/danielquinn/paperless) [![Coverage Status](https://coveralls.io/repos/github/danielquinn/paperless/badge.svg?branch=master)](https://coveralls.io/github/danielquinn/paperless?branch=master) [![Thanks](https://img.shields.io/badge/THANKS-md-ff69b4.svg)](https://github.com/danielquinn/paperless/blob/master/THANKS.md)

20
Vagrantfile vendored
View File

@@ -1,20 +0,0 @@
# -*- mode: ruby -*-
# vi: set ft=ruby :
VAGRANT_API_VERSION = "2"
Vagrant.configure(VAGRANT_API_VERSION) do |config|
config.vm.box = "ubuntu/trusty64"
# Provision using shell
config.vm.host_name = "dev.paperless"
config.vm.synced_folder ".", "/opt/paperless"
config.vm.provision "shell", path: "scripts/vagrant-provision"
# Networking details
config.vm.network "private_network", ip: "172.28.128.4"
config.vm.provider "virtualbox" do |vb|
# Customize the amount of memory on the VM:
vb.memory = "1024"
end
end

View File

@@ -17,6 +17,9 @@ services:
volumes:
- data:/usr/src/paperless/data
- media:/usr/src/paperless/media
# You have to adapt the local path you want the consumption
# directory to mount to by modifying the part before the ':'.
- ./consume:/consume
env_file: docker-compose.env
# The reason the line is here is so that the webserver that doesn't do
# any text recognition and doesn't have to install unnecessary
@@ -36,8 +39,8 @@ services:
volumes:
- data:/usr/src/paperless/data
- media:/usr/src/paperless/media
# You have to adapt the local path you want the consumption
# directory to mount to by modifying the part before the ':'.
# This should be set to the same value as the consume directory
# in the webserver service above.
- ./consume:/consume
# Likewise, you can add a local path to mount a directory for
# exporting. This is not strictly needed for paperless to

View File

@@ -1,6 +1,56 @@
Changelog
#########
2.7.0
=====
* `syntonym`_ submitted a pull request to catch IMAP connection errors `#475`_.
* `Stéphane Brunner`_ added ``psycopg2`` to the Pipfile `#489`_. He also fixed
a syntax error in ``docker-compose.yml.example`` `#488`_ and added [DjangoQL](https://github.com/ivelum/djangoql),
which allows a litany of handy search functionality `#492`_.
* `CkuT`_ and `JOKer`_ hacked out a simple, but super-helpful optimisation to
how the thumbnails are served up, improving performance considerably `#481`_.
* `tsia`_ added a few fields to the tags REST API. `#483`_.
* `Brian Cribbs`_ improved the documentation to help people using Paperless
over NFS `#484`_.
* `Brendan M. Sleight`_ updated the documentation to include a note for setting the
``DEBUG`` value. The ``paperless.conf.example`` file was also updated to
mirror the project defaults.
2.6.1
=====
* We now have a logo, complete with a favicon :-)
* Removed some problematic tests.
* Fix the docker-compose example config to include a shared consume volume so
that using the push API will work for users of the Docker install. Thanks to
`Colin Frei`_ for fixing this in `#466`_.
* `khrise`_ submitted a pull request to include the ``added`` property to the
REST API `#471`_.
2.6.0
=====
* Allow an infinite number of logs to be deleted. Thanks to `Ulli`_ for noting
the problem in `#433`_.
* Fix the ``RecentCorrespondentsFilter`` correspondents filter that was added
in 2.4 to play nice with the defaults. Thanks to `tsia`_ and `Sblop`_ who
pointed this out. `#423`_.
* Updated dependencies to include (among other things) a security patch to
requests.
* Fix text in sample data for tests so that the language guesser stops thinking
that everything is in Catalan because we had *Lorem ipsum* in there.
* Tweaked the gunicorn sample command to use filesystem paths instead of Python
paths. `#441`_
* Added pretty colour boxes next to the hex values in the Tags section, thanks
to a pull request from `Joshua Taillon`_ `#442`_.
* Added a ``.editorconfig`` file to better specify coding style.
* `Joshua Taillon`_ also added some logic to tie Paperless' date guessing logic
into how it parses file names on import. `#440`_
2.5.0
=====
@@ -44,6 +94,7 @@ Changelog
* The ``get_date()`` functionality of the parsers has been consolidated onto
the ``DocumentParser`` class since much of that code was redundant anyway.
2.4.0
=====
@@ -55,13 +106,13 @@ Changelog
It's now in the import step that we decide the storage type. This allows you
to export from an encrypted system and import into an unencrypted one, or
vice-versa.
* The migration history has been slightly modified to accomodate PostgreSQL
* The migration history has been slightly modified to accommodate PostgreSQL
users. Additionally, you can now tell paperless to use PostgreSQL simply by
declaring ``PAPERLESS_DBUSER`` in your environment. This will attempt to
connect to your Postgres database without a password unless you also set
``PAPERLESS_DBPASS``.
* A bug was found in the REST API filter system that was the result of an
update of django-filter some time ago. This has now been patched `#412`_.
update of django-filter some time ago. This has now been patched in `#412`_.
Thanks to `thepill`_ for spotting it!
@@ -570,6 +621,15 @@ bulk of the work on this big change.
.. _thepill: https://github.com/thepill
.. _Andrew Peng: https://github.com/pengc99
.. _euri10: https://github.com/euri10
.. _Ulli: https://github.com/Ulli2k
.. _tsia: https://github.com/tsia
.. _Sblop: https://github.com/Sblop
.. _Colin Frei: https://github.com/colinfrei
.. _khrise: https://github.com/khrise
.. _syntonym: https://github.com/syntonym
.. _JOKer: https://github.com/JOKer
.. _Brian Cribbs: https://github.com/cribbstechnolog
.. _Brendan M. Sleight: https://github.com/bmsleight
.. _#20: https://github.com/danielquinn/paperless/issues/20
.. _#44: https://github.com/danielquinn/paperless/issues/44
@@ -664,6 +724,20 @@ bulk of the work on this big change.
.. _#412: https://github.com/danielquinn/paperless/issues/412
.. _#413: https://github.com/danielquinn/paperless/pull/413
.. _#414: https://github.com/danielquinn/paperless/issues/414
.. _#423: https://github.com/danielquinn/paperless/issues/423
.. _#433: https://github.com/danielquinn/paperless/issues/433
.. _#440: https://github.com/danielquinn/paperless/pull/440
.. _#441: https://github.com/danielquinn/paperless/pull/441
.. _#442: https://github.com/danielquinn/paperless/pull/442
.. _#466: https://github.com/danielquinn/paperless/pull/466
.. _#471: https://github.com/danielquinn/paperless/pull/471
.. _#475: https://github.com/danielquinn/paperless/pull/475
.. _#481: https://github.com/danielquinn/paperless/pull/481
.. _#483: https://github.com/danielquinn/paperless/pull/483
.. _#484: https://github.com/danielquinn/paperless/pull/484
.. _#488: https://github.com/danielquinn/paperless/pull/488
.. _#489: https://github.com/danielquinn/paperless/pull/489
.. _#492: https://github.com/danielquinn/paperless/pull/492
.. _pipenv: https://docs.pipenv.org/
.. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/

View File

@@ -43,6 +43,16 @@ These however wouldn't work:
* ``Some Company Name, Invoice 2016-01-01, money, invoices.pdf``
* ``Another Company- Letter of Reference.jpg``
Do I have to be so strict about naming?
---------------------------------------
Rather than using the strict document naming rules, one can also set the option
``PAPERLESS_FILENAME_DATE_ORDER`` in ``paperless.conf`` to any date order
that is accepted by dateparser_. Doing so will cause ``paperless`` to default
to any date format that is found in the title, instead of a date pulled from
the document's text, without requiring the strict formatting of the document
filename as described above.
.. _dateparser: https://github.com/scrapinghub/dateparser/blob/v0.7.0/docs/usage.rst#settings
.. _guesswork-content:
@@ -82,11 +92,11 @@ text and matching algorithm. From the help info there:
uses a regex to match the PDF. If you don't know what a regex is, you
probably don't want this option.
When using the "any" or "all" matching algorithms, you can search for terms that
consist of multiple words by enclosing them in double quotes. For example, defining
a match text of ``"Bank of America" BofA`` using the "any" algorithm, will match
documents that contain either "Bank of America" or "BofA", but will not match
documents containing "Bank of South America".
When using the "any" or "all" matching algorithms, you can search for terms
that consist of multiple words by enclosing them in double quotes. For example,
defining a match text of ``"Bank of America" BofA`` using the "any" algorithm,
will match documents that contain either "Bank of America" or "BofA", but will
not match documents containing "Bank of South America".
Then just save your tag/correspondent and run another document through the
consumer. Once complete, you should see the newly-created document,

View File

@@ -82,6 +82,7 @@ rolled in as part of the update:
$ cd /path/to/project
$ git pull
$ pip install -r requirements.txt
$ cd src
$ ./manage.py migrate
@@ -101,7 +102,7 @@ is similar:
$ cd /path/to/project
$ git pull
$ docker build -t paperless .
$ docker-compose run --rm comsumer migrate
$ docker-compose run --rm consumer migrate
$ docker-compose up -d
If ``git pull`` doesn't report any changes, there is no need to continue with

View File

@@ -12,6 +12,7 @@ should work) that has the following software installed:
* `Imagemagick`_ version 6.7.5 or higher
* `unpaper`_
* `libpoppler-cpp-dev`_ PDF rendering library
* `optipng`_
.. _Python3: https://python.org/
.. _GNU Privacy Guard: https://gnupg.org
@@ -19,6 +20,7 @@ should work) that has the following software installed:
.. _Imagemagick: http://imagemagick.org/
.. _unpaper: https://www.flameeyes.eu/projects/unpaper
.. _libpoppler-cpp-dev: https://poppler.freedesktop.org/
.. _optipng: http://optipng.sourceforge.net/
Notably, you should confirm how you access your Python3 installation. Many
Linux distributions will install Python3 in parallel to Python2, using the
@@ -33,7 +35,7 @@ In addition to the above, there are a number of Python requirements, all of
which are listed in a file called ``requirements.txt`` in the project root
directory.
If you're not working on a virtual environment (like Vagrant or Docker), you
If you're not working on a virtual environment (like Docker), you
should probably be using a virtualenv, but that's your call. The reasons why
you might choose a virtualenv or not aren't really within the scope of this
document. Needless to say if you don't know what a virtualenv is, you should

View File

@@ -42,18 +42,14 @@ Installation & Configuration
You can go multiple routes with setting up and running Paperless:
* The `bare metal route`_
* The `vagrant route`_
* The `docker route`_
The `Vagrant route`_ is quick & easy, but means you're running a VM which comes
with memory consumption, cpu overhead etc. The `docker route`_ offers the same
simplicity as Vagrant with lower resource consumption.
The `docker route`_ is quick & easy.
The `bare metal route`_ is a bit more complicated to setup but makes it easier
should you want to contribute some code back.
.. _Vagrant route: setup-installation-vagrant_
.. _docker route: setup-installation-docker_
.. _bare metal route: setup-installation-bare-metal_
.. _Docker Machine: https://docs.docker.com/machine/
@@ -81,12 +77,16 @@ Standard (Bare Metal)
encrypt/decrypt the original documents. Don't worry about defining this
if you don't want to use encryption (the default).
Note also that if you're using the ``runserver`` as mentioned below, you
should make sure that PAPERLESS_DEBUG="true" or is just commented out as
this is the default.
4. Initialise the SQLite database with ``./manage.py migrate``.
5. Create a user for your Paperless instance with
``./manage.py createsuperuser``. Follow the prompts to create your user.
6. Start the webserver with ``./manage.py runserver <IP>:<PORT>``.
If no specifc IP or port are given, the default is ``127.0.0.1:8000``
also known as http://localhost:8000/.
If no specific IP or port is given, the default is ``127.0.0.1:8000`` also
known as http://localhost:8000/.
You should now be able to visit your (empty) installation at
`Paperless webserver`_ or whatever you chose before. You can login with the
user/pass you created in #5.
@@ -147,6 +147,15 @@ Docker Method
instructions in comments in the file. The only change that is a hard
requirement is to specify where the consumption directory should
mount.[#dockercomposeyml]_
.. caution::
If you are using NFS mounts for the consume directory you also need to
change the command to turn off inotify as it doesn't work with NFS
`command: ["document_consumer", "--no-inotify"]`
5. Modify ``docker-compose.env`` and adapt the following environment variables:
``PAPERLESS_PASSPHRASE``
@@ -267,54 +276,6 @@ Docker Method
newer ``docker-compose.yml.example`` file
.. _setup-installation-vagrant:
Vagrant Method
++++++++++++++
1. Install `Vagrant`_. How you do that is really between you and your OS.
2. Run ``vagrant up``. An instance will start up for you. When it's ready and
provisioned...
3. Run ``vagrant ssh`` and once inside your new vagrant box, edit
``/etc/paperless.conf`` and set the values for:
* ``PAPERLESS_CONSUMPTION_DIR``: This is where your documents will be
dumped to be consumed by Paperless.
* ``PAPERLESS_PASSPHRASE``: This is the passphrase Paperless uses to
encrypt/decrypt the original document. It's only required if you want
your original files to be encrypted, otherwise, just leave it unset.
* ``PAPERLESS_EMAIL_SECRET``: this is the "magic word" used when consuming
documents from mail or via the API. If you don't use either, leaving it
blank is just fine.
4. Exit the vagrant box and re-enter it with ``vagrant ssh`` again. This
updates the environment to make use of the changes you made to the config
file.
5. Initialise the database with ``/opt/paperless/src/manage.py migrate``.
6. Still inside your vagrant box, create a user for your Paperless instance
with ``/opt/paperless/src/manage.py createsuperuser``. Follow the prompts to
create your user.
7. Start the webserver with
``/opt/paperless/src/manage.py runserver 0.0.0.0:8000``. You should now be
able to visit your (empty) `Paperless webserver`_ at ``172.28.128.4:8000``.
You can login with the user/pass you created in #6.
8. In a separate window, run ``vagrant ssh`` again, but this time once inside
your vagrant instance, you should start the consumer script with
``/opt/paperless/src/manage.py document_consumer``.
9. Scan something. Put it in the ``CONSUMPTION_DIR``.
10. Wait a few minutes
11. Visit the document list on your webserver, and it should be there, indexed
and downloadable.
.. caution::
This installation is not secure. Once everything is working head up to
`Making things more permanent`_
.. _Vagrant: https://vagrantup.com/
.. _Paperless server: http://172.28.128.4:8000
.. _setup-permanent:
Making Things a Little more Permanent
@@ -398,7 +359,7 @@ instance listening on localhost port 8000.
location /static {
autoindex on;
alias <path-to-paperless-static-directory>
alias <path-to-paperless-static-directory>;
}
@@ -409,7 +370,7 @@ instance listening on localhost port 8000.
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_pass http://127.0.0.1:8000
proxy_pass http://127.0.0.1:8000;
}
}
@@ -418,7 +379,7 @@ The gunicorn server can be started with the command:
.. code-block:: shell
$ <path-to-paperless-virtual-environment>/bin/gunicorn <path-to-paperless>/src/paperless.wsgi -w 2
$ <path-to-paperless-virtual-environment>/bin/gunicorn --pythonpath=<path-to-paperless>/src paperless.wsgi -w 2
.. _setup-permanent-standard-systemd:
@@ -475,7 +436,7 @@ after restarting your system:
respawn limit 10 5
script
exec <path to paperless virtual environment>/bin/gunicorn <path to parperless>/src/paperless.wsgi -w 2
exec <path to paperless virtual environment>/bin/gunicorn --pythonpath=<path to parperless>/src paperless.wsgi -w 2
end script
Note that you'll need to replace ``/srv/paperless/src/manage.py`` with the
@@ -513,13 +474,6 @@ second period.
.. _Upstart: http://upstart.ubuntu.com/
Vagrant
~~~~~~~
You may use the Ubuntu explanation above. Replace
``(local-filesystems and net-device-up IFACE=eth0)`` with ``vagrant-mounted``.
.. _setup-permanent-docker:
Docker

View File

@@ -14,9 +14,8 @@ FORGIVING_OCR is enabled``, then you might need to install the
`Tesseract language files <http://packages.ubuntu.com/search?keywords=tesseract-ocr>`_
marching your document's languages.
As an example, if you are running Paperless from the Vagrant setup provided
(or from any Ubuntu or Debian box), and your documents are written in Spanish
you may need to run::
As an example, if you are running Paperless from any Ubuntu or Debian
box, and your documents are written in Spanish you may need to run::
apt-get install -y tesseract-ocr-spa

View File

@@ -214,5 +214,5 @@ This too is done via the ``manage.py`` script:
That's it. It'll loop over all of the documents in your database and attempt
to match all of your tags to them. If one matches, it'll be applied. And
don't worry, you can run this as often as you like, it' won't double-tag
don't worry, you can run this as often as you like, it won't double-tag
a document.

11
overrides/README.md Normal file
View File

@@ -0,0 +1,11 @@
# Customizing Paperless
*See customization
[documentation](https://paperless.readthedocs.io/en/latest/customising.html)
for more detail!*
The example `.css` and `.js` snippets in this folder can be placed into
one of two files in your ``PAPERLESS_MEDIADIR`` folder: `overrides.js` or
`overrides.css`. Please feel free to submit pull requests to the main
repository with other examples of customizations that you think others may
find useful.

View File

@@ -61,7 +61,7 @@ PAPERLESS_EMAIL_SECRET=""
# Controls whether django's debug mode is enabled. Disable this on production
# systems. Debug mode is enabled by default.
PAPERLESS_DEBUG="false"
#PAPERLESS_DEBUG="true"
# Paperless can be instructed to attempt to encrypt your PDF files with GPG
@@ -127,6 +127,14 @@ PAPERLESS_DEBUG="false"
# "true", the document will instead be opened in the browser, if possible.
#PAPERLESS_INLINE_DOC="false"
# By default, paperless will check the document text for document date information.
# Uncomment the line below to enable checking the document filename for date
# information. The date order can be set to any option as specified in
# https://dateparser.readthedocs.io/en/latest/#settings. The filename will be
# checked first, and if nothing is found, the document text will be checked
# as normal.
#PAPERLESS_FILENAME_DATE_ORDER="YMD"
#
# The following values use sensible defaults for modern systems, but if you're
# running Paperless on a low-resource device (like a Raspberry Pi), modifying
@@ -188,6 +196,17 @@ PAPERLESS_DEBUG="false"
#PAPERLESS_CONSUMER_LOOP_TIME=10
# By default Paperless stops consuming a document if no language can be
# detected. Set to true to consume documents even if the language detection
# fails.
#PAPERLESS_FORGIVING_OCR="false"
# By default Paperless does not OCR a document if the text can be retrieved from
# the document directly. Set to true to always OCR documents.
#PAPERLESS_OCR_ALWAYS="false"
###############################################################################
#### Interface ####
###############################################################################

View File

@@ -1,51 +1,70 @@
-i https://pypi.python.org/simple
apipkg==1.5; python_version != '3.3.*'
atomicwrites==1.2.1; python_version != '3.3.*'
alabaster==0.7.12
apipkg==1.5
atomicwrites==1.2.1
attrs==18.2.0
certifi==2018.8.24
babel==2.6.0
certifi==2018.11.29
chardet==3.0.4
coverage==4.5.1; python_version < '4'
coveralls==1.5.0
coverage==4.5.2
coveralls==1.5.1
dateparser==0.7.0
django-cors-headers==2.4.0
django-crispy-forms==1.7.2
django-extensions==2.1.2
django-filter==2.0.0
django==2.0.8
djangorestframework==3.8.2
django-extensions==2.1.4
django-filter==2.1.0
django==2.0.10
djangoql==0.12.3
djangorestframework==3.9.1
docopt==0.6.2
execnet==1.5.0; python_version != '3.3.*'
docutils==0.14
execnet==1.5.0
factory-boy==2.11.1
faker==0.9.0; python_version >= '2.7'
faker==1.0.2
filelock==3.0.10
filemagic==1.6
fuzzywuzzy==0.15.0
fuzzywuzzy[speedup]==0.15.0
gunicorn==19.9.0
idna==2.7
idna==2.8
imagesize==1.1.0
inotify-simple==1.1.8
jinja2==2.10
langdetect==1.0.7
more-itertools==4.3.0
pdftotext==2.1.0
pillow==5.2.0
pluggy==0.7.1; python_version != '3.3.*'
py==1.6.0; python_version != '3.3.*'
markupsafe==1.1.0
more-itertools==5.0.0
packaging==19.0
pdftotext==2.1.1
pillow==5.4.1
pluggy==0.8.1
ply==3.11
psycopg2==2.7.7
py==1.7.0
pycodestyle==2.4.0
pygments==2.3.1
pyocr==0.5.3
pytest-cov==2.6.0
pytest-django==3.4.2
pyparsing==2.3.1
pytest-cov==2.6.1
pytest-django==3.4.5
pytest-env==0.6.2
pytest-forked==0.2; python_version != '3.3.*'
pytest-sugar==0.9.1
pytest-xdist==1.23.0
pytest==3.8.0
python-dateutil==2.7.3
python-dotenv==0.9.1
python-gnupg==0.4.3
pytest-forked==1.0.1
pytest-sugar==0.9.2
pytest-xdist==1.26.0
pytest==4.1.1
python-dateutil==2.7.5
python-dotenv==0.10.1
python-gnupg==0.4.4
python-levenshtein==0.12.0
pytz==2018.5
regex==2018.8.29
requests==2.19.1
six==1.11.0
pytz==2018.9
regex==2019.1.24
requests==2.21.0
six==1.12.0
snowballstemmer==1.2.1
sphinx==1.8.3
sphinxcontrib-websupport==1.1.0
termcolor==1.1.0
text-unidecode==1.2
toml==0.10.0
tox==3.7.0
tzlocal==1.5.1
urllib3==1.23; python_version != '3.3.*'
urllib3==1.24.1
virtualenv==16.3.0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 91 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 116 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 94 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 7.4 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 7.5 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 7.5 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 7.4 KiB

View File

@@ -0,0 +1,82 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
version="1.1"
width="900"
height="900"
id="svg3923"
sodipodi:docname="square.svg"
inkscape:export-filename="/tmp/test.png"
inkscape:export-xdpi="96"
inkscape:export-ydpi="96"
inkscape:version="0.92.2 2405546, 2018-03-11">
<metadata
id="metadata3929">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title></dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<defs
id="defs3927" />
<sodipodi:namedview
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1"
objecttolerance="10"
gridtolerance="10"
guidetolerance="10"
inkscape:pageopacity="0"
inkscape:pageshadow="2"
inkscape:window-width="3840"
inkscape:window-height="2096"
id="namedview3925"
showgrid="false"
inkscape:zoom="1.1360927"
inkscape:cx="635.07139"
inkscape:cy="606.383"
inkscape:window-x="0"
inkscape:window-y="27"
inkscape:window-maximized="1"
inkscape:current-layer="g3921" />
<g
transform="matrix(10.638298,0,0,10.638298,106.38298,-206.38301)"
id="g3921">
<defs
id="SvgjsDefs1018" />
<g
id="SvgjsG1019"
featureKey="root"
style="fill:#ffffff" />
<g
id="SvgjsG1020"
featureKey="symbol1"
transform="matrix(0.10341565,0,0,0.10341565,-11.43874,18.048418)"
inkscape:export-filename="/tmp/test.png"
inkscape:export-xdpi="116.02285"
inkscape:export-ydpi="116.02285"
style="fill:#17541f">
<defs
id="defs3911" />
<g
id="g3915">
<path
d="M 231,798 C 227,779 219,741 218,741 49,640 69,465 125,365 c 12,126 235,213 105,367 -1,2 6,26 12,48 26,-44 65,-97 63,-102 C 145,288 645,258 749,16 c 47,234 -24,596 -426,688 -2,1 -73,126 -76,127 0,-2 -30,-1 -26,-11 2,-6 6,-14 10,-22 z M 330,625 C 267,476 452,312 544,271 356,439 324,564 330,625 Z m -104,79 c 51,-59 -9,-160 -45,-193 61,105 57,166 45,193 z"
style="fill:#17541f"
id="path3913"
inkscape:connector-curvature="0" />
</g>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 2.6 KiB

View File

@@ -75,7 +75,7 @@ install_languages() {
pkg="tesseract-ocr-data-$lang"
# English is installed by default
if [ "$lang" == "eng" ]; then
if [[ "$lang" == "eng" ]]; then
continue
fi
@@ -95,7 +95,7 @@ if [[ "$1" != "/"* ]]; then
initialize
# Install additional languages if specified
if [ ! -z "$PAPERLESS_OCR_LANGUAGES" ]; then
if [[ ! -z "$PAPERLESS_OCR_LANGUAGES" ]]; then
install_languages "$PAPERLESS_OCR_LANGUAGES"
fi

View File

@@ -4,7 +4,7 @@ Description=Paperless webserver
[Service]
User=paperless
Group=paperless
ExecStart=/home/paperless/project/virtualenv/bin/gunicorn /home/paperless/project/src/paperless.wsgi -w 2
ExecStart=/home/paperless/project/virtualenv/bin/gunicorn --pythonpath=/home/paperless/project/src paperless.wsgi -w 2
[Install]
WantedBy=multi-user.target

View File

@@ -1,31 +0,0 @@
#!/bin/bash
# Install packages
apt-get update
apt-get build-dep -y python-imaging
apt-get install -y libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev
apt-get install -y build-essential python3-dev python3-pip sqlite3 libsqlite3-dev git
apt-get install -y tesseract-ocr tesseract-ocr-eng imagemagick unpaper
# Python dependencies
pip3 install -r /opt/paperless/requirements.txt
# Create the environment file
cat /opt/paperless/paperless.conf.example | sed -e 's#CONSUMPTION_DIR=""#CONSUMPTION_DIR="/home/vagrant/consumption"#' > /etc/paperless.conf
chmod 0640 /etc/paperless.conf
chown root:vagrant /etc/paperless.conf
# Create the consumption directory
mkdir /home/vagrant/consumption
chown vagrant:vagrant /home/vagrant/consumption
echo "
Now follow the remaining steps in the Vagrant section of the setup
documentation to complete the process:
http://paperless.readthedocs.org/en/latest/setup.html#setup-installation-vagrant
"

View File

@@ -11,6 +11,7 @@ from django.urls import reverse
from django.utils.html import format_html, format_html_join
from django.utils.http import urlquote
from django.utils.safestring import mark_safe
from djangoql.admin import DjangoQLSearchMixin
from documents.actions import (
add_tag_to_selected,
@@ -61,12 +62,12 @@ class FinancialYearFilter(admin.SimpleListFilter):
# To keep it simple we use the same string for both
# query parameter and the display.
return (query, query)
return query, query
else:
query = "{0}-{0}".format(date.year)
display = "{}".format(date.year)
return (query, display)
return query, display
def lookups(self, request, model_admin):
if not settings.FY_START or not settings.FY_END:
@@ -88,25 +89,24 @@ class FinancialYearFilter(admin.SimpleListFilter):
class RecentCorrespondentFilter(admin.RelatedFieldListFilter):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.title = "correspondent (recent)"
"""
If PAPERLESS_RECENT_CORRESPONDENT_YEARS is set, we limit the available
correspondents to documents sent our way over the past ``n`` years.
"""
def field_choices(self, field, request, model_admin):
years = settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS
days = 365 * years
correspondents = Correspondent.objects.all()
lookups = []
if years and years > 0:
correspondents = Correspondent.objects.filter(
self.title = "Correspondent (Recent)"
days = 365 * years
correspondents = correspondents.filter(
documents__created__gte=datetime.now() - timedelta(days=days)
).distinct()
for c in correspondents:
lookups.append((c.id, c.name))
return lookups
return [(c.id, c.name) for c in correspondents]
class CommonAdmin(admin.ModelAdmin):
@@ -146,13 +146,16 @@ class CorrespondentAdmin(CommonAdmin):
class TagAdmin(CommonAdmin):
list_display = ("name", "colour", "match", "matching_algorithm",
"document_count")
list_display = (
"name", "colour", "match", "matching_algorithm", "document_count")
list_filter = ("colour", "matching_algorithm")
list_editable = ("colour", "match", "matching_algorithm")
readonly_fields = ("slug",)
class Media:
js = ("js/colours.js",)
def get_queryset(self, request):
qs = super(TagAdmin, self).get_queryset(request)
qs = qs.annotate(document_count=models.Count("documents"))
@@ -163,7 +166,7 @@ class TagAdmin(CommonAdmin):
document_count.admin_order_field = "document_count"
class DocumentAdmin(CommonAdmin):
class DocumentAdmin(DjangoQLSearchMixin, CommonAdmin):
class Media:
css = {
@@ -177,7 +180,6 @@ class DocumentAdmin(CommonAdmin):
list_filter = (
"tags",
("correspondent", RecentCorrespondentFilter),
"correspondent",
FinancialYearFilter
)

View File

@@ -216,7 +216,11 @@ class MailFetcher(Loggable):
return r
def _connect(self):
self._connection = imaplib.IMAP4_SSL(self._host, self._port)
try:
self._connection = imaplib.IMAP4_SSL(self._host, self._port)
except OSError as e:
msg = "Problem connecting to {}: {}".format(self._host, e.strerror)
raise MailFetcherError(msg)
def _login(self):

View File

@@ -11,7 +11,7 @@ def re_slug_all_the_things(apps, schema_editor):
"""
Tag = apps.get_model("documents", "Tag")
Correspondent = apps.get_model("documents", "Tag")
Correspondent = apps.get_model("documents", "Correspondent")
for klass in (Tag, Correspondent):
for instance in klass.objects.all():

View File

@@ -14,14 +14,18 @@ from django.utils import timezone
# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - ZZZZ.XX.YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - ZZZZ/XX/YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - ZZZZ-XX-YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
# - MONTH ZZZZ, with ZZZZ being 4 digits
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
DATE_REGEX = re.compile(
r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' +
r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' +
r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' +
r'\b([^\W\d_]{3,9} [0-9]{4})\b'
r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' + # NOQA: E501
r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' + # NOQA: E501
r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' + # NOQA: E501
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' +
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
)
@@ -37,6 +41,7 @@ class DocumentParser:
SCRATCH = settings.SCRATCH_DIR
DATE_ORDER = settings.DATE_ORDER
FILENAME_DATE_ORDER = settings.FILENAME_DATE_ORDER
OPTIPNG = settings.OPTIPNG_BINARY
def __init__(self, path):
@@ -75,30 +80,60 @@ class DocumentParser:
Returns the date of the document.
"""
def __parser(ds, date_order):
"""
Call dateparser.parse with a particular date ordering
"""
return dateparser.parse(
ds,
settings={
"DATE_ORDER": date_order,
"PREFER_DAY_OF_MONTH": "first",
"RETURN_AS_TIMEZONE_AWARE":
True
}
)
date = None
date_string = None
next_year = timezone.now().year + 5 # Arbitrary 5 year future limit
title = os.path.basename(self.document_path)
# if filename date parsing is enabled, search there first:
if self.FILENAME_DATE_ORDER:
self.log("info", "Checking document title for date")
for m in re.finditer(DATE_REGEX, title):
date_string = m.group(0)
try:
date = __parser(date_string, self.FILENAME_DATE_ORDER)
except TypeError:
# Skip all matches that do not parse to a proper date
continue
if date is not None and next_year > date.year > 1900:
self.log(
"info",
"Detected document date {} based on string {} "
"from document title"
"".format(date.isoformat(), date_string)
)
return date
try:
# getting text after checking filename will save time if only
# looking at the filename instead of the whole text
text = self.get_text()
except ParseError:
return None
next_year = timezone.now().year + 5 # Arbitrary 5 year future limit
# Iterate through all regex matches and try to parse the date
# Iterate through all regex matches in text and try to parse the date
for m in re.finditer(DATE_REGEX, text):
date_string = m.group(0)
try:
date = dateparser.parse(
date_string,
settings={
"DATE_ORDER": self.DATE_ORDER,
"PREFER_DAY_OF_MONTH": "first",
"RETURN_AS_TIMEZONE_AWARE": True
}
)
date = __parser(date_string, self.DATE_ORDER)
except TypeError:
# Skip all matches that do not parse to a proper date
continue

View File

@@ -7,7 +7,14 @@ class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
class Meta:
model = Correspondent
fields = ("id", "slug", "name")
fields = (
"id",
"slug",
"name",
"match",
"matching_algorithm",
"is_insensitive"
)
class TagSerializer(serializers.HyperlinkedModelSerializer):
@@ -15,7 +22,14 @@ class TagSerializer(serializers.HyperlinkedModelSerializer):
class Meta:
model = Tag
fields = (
"id", "slug", "name", "colour", "match", "matching_algorithm")
"id",
"slug",
"name",
"colour",
"match",
"matching_algorithm",
"is_insensitive"
)
class CorrespondentField(serializers.HyperlinkedRelatedField):
@@ -46,6 +60,7 @@ class DocumentSerializer(serializers.ModelSerializer):
"checksum",
"created",
"modified",
"added",
"file_name",
"download_url",
"thumbnail_url",

View File

@@ -0,0 +1,66 @@
// The following jQuery snippet will add a small square next to the selection
// drop-down on the `Add tag` page that will update to show the selected tag
// color as the drop-down value is changed.
django.jQuery(document).ready(function(){
if (django.jQuery("#id_colour").length) {
let colour;
let colour_num;
colour_num = django.jQuery("#id_colour").val() - 1;
colour = django.jQuery('#id_colour')[0][colour_num].text;
django.jQuery('#id_colour').after('<div class="colour_square"></div>');
django.jQuery('.colour_square').css({
'float': 'left',
'width': '20px',
'height': '20px',
'margin': '5px',
'border': '1px solid rgba(0, 0, 0, .2)',
'background': colour
});
django.jQuery('#id_colour').change(function () {
colour_num = django.jQuery("#id_colour").val() - 1;
colour = django.jQuery('#id_colour')[0][colour_num].text;
django.jQuery('.colour_square').css({'background': colour});
});
} else if (django.jQuery("select[id*='colour']").length) {
django.jQuery('select[id*="-colour"]').each(function (index, element) {
let id;
let loop_colour_num;
let loop_colour;
id = "colour_square_" + index;
django.jQuery(element).after('<div class="colour_square" id="' + id + '"></div>');
loop_colour_num = django.jQuery(element).val() - 1;
loop_colour = django.jQuery(element)[0][loop_colour_num].text;
django.jQuery("<style type='text/css'>\
.colour_square{ \
float: left; \
width: 20px; \
height: 20px; \
margin: 5px; \
border: 1px solid rgba(0,0,0,.2); \
} </style>").appendTo("head");
django.jQuery('#' + id).css({'background': loop_colour});
console.log(id, loop_colour_num, loop_colour);
django.jQuery(element).change(function () {
loop_colour_num = django.jQuery(element).val() - 1;
loop_colour = django.jQuery(element)[0][loop_colour_num].text;
django.jQuery('#' + id).css({'background': loop_colour});
console.log('#' + id, loop_colour)
});
})
}
});

View File

@@ -3,10 +3,63 @@
{# NOTE: This should probably be extending base.html. See CSS comment below details. #}
{% load static %}
{% load custom_css from customisation %}
{% load custom_js from customisation %}
{% block extrahead %}
<link rel="icon" type="image/x-icon" href="{% url 'favicon' %}" />
<style>
#header {
background-color: #90a9b7;
line-height: inherit;
height: auto;
}
#branding h1 {
font-weight: inherit;
font-size: inherit;
}
.button,
.button:active,
.button:focus,
.button:hover,
a.button,
.submit-row input,
input[type="submit"],
input[type="submit"]:active,
input[type="submit"]:focus,
input[type="submit"]:hover,
input[type="button"],
input[type="button"]:active,
input[type="button"]:focus,
input[type="button"]:hover {
background-color: #074f57;
}
.module h2,
.module caption,
.inline-group h2 {
background-color: #90a9b7;
}
div.breadcrumbs {
background-color: #077187;
}
.module h2,
.module caption,
.inline-group h2 {
background-color: #077187;
}
</style>
{% endblock %}
{% block branding %}
<h1 id="site-name">
<a href="{% url 'admin:index' %}"><img src="{% static 'paperless/img/logo-light.png' %}" alt="Paperless" /></a>
</h1>
{% endblock %}
{% block blockbots %}
{% comment %}

View File

@@ -28,7 +28,7 @@
}
.result .header {
padding: 5px;
background-color: #79AEC8;
background-color: #90a9b7;
position: relative;
}
.result .header .checkbox {

View File

@@ -2,6 +2,7 @@ from django.http import HttpResponse, HttpResponseBadRequest
from django.views.generic import DetailView, FormView, TemplateView
from django_filters.rest_framework import DjangoFilterBackend
from django.conf import settings
from django.utils import cache
from paperless.db import GnuPG
from paperless.mixins import SessionOrBasicAuthMixin
@@ -56,10 +57,12 @@ class FetchView(SessionOrBasicAuthMixin, DetailView):
}
if self.kwargs["kind"] == "thumb":
return HttpResponse(
response = HttpResponse(
self._get_raw_data(self.object.thumbnail_file),
content_type=content_types[Document.TYPE_PNG]
)
cache.patch_cache_control(response, max_age=31536000, private=True)
return response
response = HttpResponse(
self._get_raw_data(self.object.source_file),
@@ -130,7 +133,7 @@ class DocumentViewSet(RetrieveModelMixin,
filter_class = DocumentFilterSet
search_fields = ("title", "correspondent__name", "content")
ordering_fields = (
"id", "title", "correspondent__name", "created", "modified")
"id", "title", "correspondent__name", "created", "modified", "added")
class LogViewSet(ReadOnlyModelViewSet):

View File

@@ -72,6 +72,7 @@ INSTALLED_APPS = [
"corsheaders",
"django_extensions",
"paperless",
"documents.apps.DocumentsConfig",
"reminders.apps.RemindersConfig",
"paperless_tesseract.apps.PaperlessTesseractConfig",
@@ -82,6 +83,7 @@ INSTALLED_APPS = [
"rest_framework",
"crispy_forms",
"django_filters",
"djangoql",
]
@@ -152,6 +154,10 @@ if os.getenv("PAPERLESS_DBUSER"):
}
if os.getenv("PAPERLESS_DBPASS"):
DATABASES["default"]["PASSWORD"] = os.getenv("PAPERLESS_DBPASS")
if os.getenv("PAPERLESS_DBHOST"):
DATABASES["default"]["HOST"] = os.getenv("PAPERLESS_DBHOST")
if os.getenv("PAPERLESS_DBPORT"):
DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
# Password validation
@@ -199,6 +205,16 @@ STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", "/static/")
MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/")
# Other
# Disable Django's artificial limit on the number of form fields to submit at
# once. This is a protection against overloading the server, but since this is
# a self-hosted sort of gig, the benefits of being able to mass-delete a tonne
# of log entries outweight the benefits of such a safeguard.
DATA_UPLOAD_MAX_NUMBER_FIELDS = None
# Paperless-specific stuff
# You shouldn't have to edit any of these values. Rather, you can set these
# values in /etc/paperless.conf instead.
@@ -296,6 +312,7 @@ FY_END = os.getenv("PAPERLESS_FINANCIAL_YEAR_END")
# Specify the default date order (for autodetected dates)
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
# Specify for how many years a correspondent is considered recent. Recent
# correspondents will be shown in a separate "Recent correspondents" filter as

Binary file not shown.

After

Width:  |  Height:  |  Size: 108 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.6 KiB

View File

@@ -6,6 +6,7 @@ from django.views.decorators.csrf import csrf_exempt
from django.views.generic import RedirectView
from rest_framework.routers import DefaultRouter
from paperless.views import FaviconView
from documents.views import (
CorrespondentViewSet,
DocumentViewSet,
@@ -44,6 +45,9 @@ urlpatterns = [
# File uploads
url(r"^push$", csrf_exempt(PushView.as_view()), name="push"),
# Favicon
url(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),
# The Django admin
url(r"admin/", admin.site.urls),

View File

@@ -1 +1 @@
__version__ = (2, 5, 0)
__version__ = (2, 6, 1)

View File

@@ -1,3 +1,7 @@
import os
from django.http import HttpResponse
from django.views.generic import View
from rest_framework.pagination import PageNumberPagination
@@ -5,3 +9,17 @@ class StandardPagination(PageNumberPagination):
page_size = 25
page_size_query_param = "page-size"
max_page_size = 100000
class FaviconView(View):
def get(self, request, *args, **kwargs):
favicon = os.path.join(
os.path.dirname(__file__),
"static",
"paperless",
"img",
"favicon.ico"
)
with open(favicon, "rb") as f:
return HttpResponse(f, content_type="image/x-icon")

View File

@@ -153,7 +153,10 @@ class RasterisedDocumentParser(DocumentParser):
)
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
return raw_text
raise OCRError("Language detection failed")
error_msg = ("Language detection failed. Set "
"PAPERLESS_FORGIVING_OCR in config file to continue "
"anyway.")
raise OCRError(error_msg)
if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
@@ -218,7 +221,8 @@ def run_convert(*args):
def run_unpaper(args):
unpaper, pnm = args
command_args = unpaper, pnm, pnm.replace(".pnm", ".unpaper.pnm")
command_args = (unpaper, "--overwrite", pnm,
pnm.replace(".pnm", ".unpaper.pnm"))
if not subprocess.Popen(command_args).wait() == 0:
raise ParseError("Unpaper failed at {}".format(command_args))

Binary file not shown.

Before

Width:  |  Height:  |  Size: 136 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 135 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 138 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 138 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 136 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 136 KiB

View File

@@ -8,6 +8,7 @@ from dateutil import tz
from django.test import TestCase
from ..parsers import RasterisedDocumentParser
from django.conf import settings
class TestDate(TestCase):
@@ -15,73 +16,67 @@ class TestDate(TestCase):
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
MOCK_SCRATCH = "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH" # NOQA: E501
def setUp(self):
os.makedirs(self.SCRATCH, exist_ok=True)
def tearDown(self):
shutil.rmtree(self.SCRATCH)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_1(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document._text = "lorem ipsum 130218 lorem ipsum"
self.assertEqual(document.get_date(), None)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_2(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document._text = "lorem ipsum 2018 lorem ipsum"
self.assertEqual(document.get_date(), None)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_3(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document._text = "lorem ipsum 20180213 lorem ipsum"
self.assertEqual(document.get_date(), None)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_4(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document._text = "lorem ipsum 13.02.2018 lorem ipsum"
date = document.get_date()
self.assertEqual(
document.get_date(),
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc())
date,
datetime.datetime(
2018, 2, 13, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_5(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document._text = (
"lorem ipsum 130218, 2018, 20180213 and 13.02.2018 lorem ipsum")
"lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem "
"ipsum"
)
date = document.get_date()
self.assertEqual(
document.get_date(),
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc())
date,
datetime.datetime(
2018, 2, 13, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_6(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
@@ -98,10 +93,7 @@ class TestDate(TestCase):
)
self.assertEqual(document.get_date(), None)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_7(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
@@ -110,289 +102,62 @@ class TestDate(TestCase):
"März 2019\n"
"lorem ipsum"
)
date = document.get_date()
self.assertEqual(
document.get_date(),
datetime.datetime(2019, 3, 1, 0, 0, tzinfo=tz.tzutc())
date,
datetime.datetime(
2019, 3, 1, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_8(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document._text = ("lorem ipsum\n"
"Wohnort\n"
"3100\n"
"IBAN\n"
"AT87 4534\n"
"1234\n"
"1234 5678\n"
"BIC\n"
"lorem ipsum\n"
"März 2020")
self.assertEqual(document.get_date(),
datetime.datetime(2020, 3, 1, 0, 0,
tzinfo=tz.tzutc()))
document._text = (
"lorem ipsum\n"
"Wohnort\n"
"3100\n"
"IBAN\n"
"AT87 4534\n"
"1234\n"
"1234 5678\n"
"BIC\n"
"lorem ipsum\n"
"März 2020"
)
self.assertEqual(
document.get_date(),
datetime.datetime(
2020, 3, 1, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_9(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document._text = ("lorem ipsum\n"
"27. Nullmonth 2020\n"
"März 2020\n"
"lorem ipsum")
self.assertEqual(document.get_date(),
datetime.datetime(2020, 3, 1, 0, 0,
tzinfo=tz.tzutc()))
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_1_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.pdf")
document = RasterisedDocumentParser(input_file)
document.get_text()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
document.get_date(),
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
document._text = (
"lorem ipsum\n"
"27. Nullmonth 2020\n"
"März 2020\n"
"lorem ipsum"
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_1_png(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.png")
document = RasterisedDocumentParser(input_file)
document.get_text()
self.assertEqual(document._is_ocred(), False)
self.assertEqual(
document.get_date(),
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_2_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.pdf")
document = RasterisedDocumentParser(input_file)
document.get_text()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
document.get_date(),
datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc())
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_2_png(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.png")
document = RasterisedDocumentParser(input_file)
document.get_text()
self.assertEqual(document._is_ocred(), False)
self.assertEqual(
document.get_date(),
datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc())
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_3_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.pdf")
document = RasterisedDocumentParser(input_file)
document.get_text()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
document.get_date(),
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_3_png(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.png")
document = RasterisedDocumentParser(input_file)
document.get_text()
self.assertEqual(document._is_ocred(), False)
self.assertEqual(
document.get_date(),
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_4_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.pdf")
document = RasterisedDocumentParser(input_file)
document.get_text()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
document.get_date(),
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_4_png(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.png")
document = RasterisedDocumentParser(input_file)
document.get_text()
self.assertEqual(document._is_ocred(), False)
self.assertEqual(
document.get_date(),
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_5_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.pdf")
document = RasterisedDocumentParser(input_file)
document.get_text()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
document.get_date(),
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_5_png(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.png")
document = RasterisedDocumentParser(input_file)
document.get_text()
self.assertEqual(document._is_ocred(), False)
self.assertEqual(
document.get_date(),
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_6_pdf_us(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
document = RasterisedDocumentParser(input_file)
document.get_text()
document.DATE_ORDER = "MDY"
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
document.get_date(),
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_6_png_us(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
document = RasterisedDocumentParser(input_file)
document.get_text()
document.DATE_ORDER = "MDY"
self.assertEqual(document._is_ocred(), False)
self.assertEqual(
document.get_date(),
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_6_pdf_eu(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
document = RasterisedDocumentParser(input_file)
document.get_text()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(document.get_date(), None)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_6_png_eu(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
document = RasterisedDocumentParser(input_file)
document.get_text()
self.assertEqual(document._is_ocred(), False)
self.assertEqual(document.get_date(), None)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_7_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_7.pdf")
document = RasterisedDocumentParser(input_file)
document.get_text()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
document.get_date(),
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_8_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_8.pdf")
document = RasterisedDocumentParser(input_file)
document.get_text()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
document.get_date(),
datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc())
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
def test_get_text_9_pdf(self):
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_9.pdf")
document = RasterisedDocumentParser(input_file)
document.get_text()
self.assertEqual(document._is_ocred(), True)
self.assertEqual(
document.get_date(),
datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc())
datetime.datetime(
2020, 3, 1, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
return_value="01-07-0590 00:00:00"
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_crazy_date_past(self, *args):
document = RasterisedDocumentParser("/dev/null")
document.get_text()
@@ -402,10 +167,7 @@ class TestDate(TestCase):
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
return_value="01-07-2350 00:00:00"
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_crazy_date_future(self, *args):
document = RasterisedDocumentParser("/dev/null")
document.get_text()
@@ -415,10 +177,7 @@ class TestDate(TestCase):
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
return_value="01-07-0590 00:00:00"
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
SCRATCH
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_crazy_date_past(self, *args):
document = RasterisedDocumentParser("/dev/null")
document.get_text()

View File

@@ -17,6 +17,5 @@ deps=pycodestyle
[testenv:doc]
deps =
-r{toxinidir}/../requirements.txt
sphinx
-r {toxinidir}/../requirements.txt
commands=sphinx-build -b html ../docs ../docs/_build -W