Compare commits
100 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
94c2950afe | ||
![]() |
9f56bf9992 | ||
![]() |
6df35e4cb7 | ||
![]() |
b4b7d167d1 | ||
![]() |
4936fad542 | ||
![]() |
3c78105fd7 | ||
![]() |
a58a7ce0f7 | ||
![]() |
792aeee11e | ||
![]() |
5588e86855 | ||
![]() |
97f1e4ab16 | ||
![]() |
e4dece8e53 | ||
![]() |
c5c204f605 | ||
![]() |
611ec6840b | ||
![]() |
2cd077d12d | ||
![]() |
4efb153e86 | ||
![]() |
25e953bbf0 | ||
![]() |
0509d5a3d2 | ||
![]() |
5e674f17af | ||
![]() |
7c7a814096 | ||
![]() |
43e71cfcaa | ||
![]() |
79868930f1 | ||
![]() |
0256dcbe32 | ||
![]() |
29db177ce2 | ||
![]() |
5c1edf78ce | ||
![]() |
60e8990a7b | ||
![]() |
75a79ac204 | ||
![]() |
0c47907dda | ||
![]() |
cea8332038 | ||
![]() |
5982cb693a | ||
![]() |
73a02d40c4 | ||
![]() |
b541765817 | ||
![]() |
28ffd1ec6b | ||
![]() |
5760aa0894 | ||
![]() |
562e5f644d | ||
![]() |
5ab2009ebf | ||
![]() |
637b0d4cc2 | ||
![]() |
4a71c33537 | ||
![]() |
cf36c8467e | ||
![]() |
dafa6a4c71 | ||
![]() |
a3c5ec834d | ||
![]() |
be57dbe4c8 | ||
![]() |
4d50c7e105 | ||
![]() |
27af2603f5 | ||
![]() |
ff5b34179a | ||
![]() |
0334617287 | ||
![]() |
f8b43fa74b | ||
![]() |
1ff06d0dd9 | ||
![]() |
4ad6813d11 | ||
![]() |
cbc5f0603f | ||
![]() |
0d21bdeffa | ||
![]() |
b1f9b18b8c | ||
![]() |
4d13521f36 | ||
![]() |
7b4785bdb9 | ||
![]() |
baf89cad8e | ||
![]() |
3c2a1a8c13 | ||
![]() |
1c7047bbb8 | ||
![]() |
96dafe8c43 | ||
![]() |
d6896daece | ||
![]() |
d12f0642f2 | ||
![]() |
a19f0ef97e | ||
![]() |
ec7125b6bb | ||
![]() |
e3a616ebc3 | ||
![]() |
f898ec792f | ||
![]() |
f45b6762f2 | ||
![]() |
d544f269e0 | ||
![]() |
650db75c2b | ||
![]() |
7dbb77e57b | ||
![]() |
f1b3312bcb | ||
![]() |
ea05ab2b06 | ||
![]() |
4f4c515629 | ||
![]() |
c1f926a40c | ||
![]() |
c1d18c1e83 | ||
![]() |
ba452e0524 | ||
![]() |
c5488dcb98 | ||
![]() |
d6eefbccee | ||
![]() |
a813288aaf | ||
![]() |
63e2fbe0c9 | ||
![]() |
597a7bb391 | ||
![]() |
730daa3d6d | ||
![]() |
c225281f95 | ||
![]() |
e1d8744c66 | ||
![]() |
4409f65840 | ||
![]() |
c83dc666a4 | ||
![]() |
9ab50ed09d | ||
![]() |
e0acb4a40b | ||
![]() |
eca6250c1b | ||
![]() |
33abec0663 | ||
![]() |
d825667c9b | ||
![]() |
84511f8418 | ||
![]() |
81e488b90d | ||
![]() |
bff28113df | ||
![]() |
0b377a76d0 | ||
![]() |
ec1d5c80ff | ||
![]() |
bd95804fbf | ||
![]() |
8dc355a66f | ||
![]() |
fbb389553c | ||
![]() |
f8cfbb44d2 | ||
![]() |
818780a191 | ||
![]() |
b350ec48b7 | ||
![]() |
f948ee11be |
28
.editorconfig
Normal file
@@ -0,0 +1,28 @@
|
||||
# EditorConfig: http://EditorConfig.org
|
||||
|
||||
root = true
|
||||
|
||||
[*]
|
||||
indent_style = tab
|
||||
indent_size = 2
|
||||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
end_of_line = lf
|
||||
charset = utf-8
|
||||
max_line_length = 79
|
||||
|
||||
[{*.html,*.css,*.js}]
|
||||
max_line_length = off
|
||||
|
||||
[*.py]
|
||||
indent_size = 4
|
||||
indent_style = space
|
||||
|
||||
[*.yml]
|
||||
indent_style = space
|
||||
|
||||
# Tests don't get a line width restriction. It's still a good idea to follow
|
||||
# the 79 character rule, but in the interests of clarity, tests often need to
|
||||
# violate it.
|
||||
[**/test_*.py]
|
||||
max_line_length = off
|
4
.gitignore
vendored
@@ -66,6 +66,7 @@ media/overrides.js
|
||||
|
||||
# Sqlite database
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# PyCharm
|
||||
.idea
|
||||
@@ -73,7 +74,6 @@ db.sqlite3
|
||||
# Other stuff that doesn't belong
|
||||
.virtualenv
|
||||
virtualenv
|
||||
.vagrant
|
||||
docker-compose.yml
|
||||
docker-compose.env
|
||||
|
||||
@@ -82,4 +82,4 @@ scripts/import-for-development
|
||||
scripts/nuke
|
||||
|
||||
# Static files collected by the collectstatic command
|
||||
static/
|
||||
./static/
|
||||
|
17
.travis.yml
@@ -2,19 +2,22 @@ language: python
|
||||
|
||||
before_install:
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr tesseract-ocr-eng tesseract-ocr-cat
|
||||
- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr
|
||||
|
||||
sudo: false
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- python: 3.4
|
||||
- python: 3.5
|
||||
- python: 3.6
|
||||
- python: "3.4"
|
||||
- python: "3.5"
|
||||
- python: "3.6"
|
||||
- python: "3.7-dev"
|
||||
|
||||
install:
|
||||
- pip install --requirement requirements.txt
|
||||
- pip install sphinx
|
||||
- pip install --upgrade pip pipenv sphinx
|
||||
- pipenv lock -r > requirements.txt
|
||||
- pip install -r requirements.txt
|
||||
|
||||
script:
|
||||
- cd src/
|
||||
- pytest --cov
|
||||
@@ -22,4 +25,4 @@ script:
|
||||
- sphinx-build -b html ../docs ../docs/_build -W
|
||||
|
||||
after_success:
|
||||
- coveralls
|
||||
- coveralls
|
||||
|
11
Dockerfile
@@ -4,8 +4,8 @@ LABEL maintainer="The Paperless Project https://github.com/danielquinn/paperless
|
||||
contributors="Guy Addadi <addadi@gmail.com>, Pit Kleyersburg <pitkley@googlemail.com>, \
|
||||
Sven Fischer <git-dev@linux4tw.de>"
|
||||
|
||||
# Copy requirements file and init script
|
||||
COPY requirements.txt /usr/src/paperless/
|
||||
# Copy Pipfiles file and init script
|
||||
COPY Pipfile* /usr/src/paperless/
|
||||
COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh
|
||||
|
||||
# Set export and consumption directories
|
||||
@@ -13,15 +13,16 @@ ENV PAPERLESS_EXPORT_DIR=/export \
|
||||
PAPERLESS_CONSUMPTION_DIR=/consume
|
||||
|
||||
|
||||
RUN apk update --no-cache && apk add python3 gnupg libmagic bash shadow curl \
|
||||
RUN apk update --no-cache && apk add python3 gnupg libmagic libpq bash shadow curl \
|
||||
sudo poppler tesseract-ocr imagemagick ghostscript unpaper optipng && \
|
||||
apk add --virtual .build-dependencies \
|
||||
python3-dev poppler-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
|
||||
python3-dev poppler-dev postgresql-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
|
||||
# Install python dependencies
|
||||
python3 -m ensurepip && \
|
||||
rm -r /usr/lib/python*/ensurepip && \
|
||||
cd /usr/src/paperless && \
|
||||
pip3 install --no-cache-dir -r requirements.txt && \
|
||||
pip3 install --upgrade pip pipenv && \
|
||||
pipenv install --system --deploy && \
|
||||
# Remove build dependencies
|
||||
apk del .build-dependencies && \
|
||||
# Create the consumption directory
|
||||
|
7
Pipfile
@@ -25,6 +25,8 @@ python-dateutil = "*"
|
||||
python-dotenv = "*"
|
||||
python-gnupg = "*"
|
||||
pytz = "*"
|
||||
sphinx = "*"
|
||||
tox = "*"
|
||||
pycodestyle = "*"
|
||||
pytest = "*"
|
||||
pytest-cov = "*"
|
||||
@@ -32,9 +34,8 @@ pytest-django = "*"
|
||||
pytest-sugar = "*"
|
||||
pytest-env = "*"
|
||||
pytest-xdist = "*"
|
||||
psycopg2 = "*"
|
||||
djangoql = "*"
|
||||
|
||||
[dev-packages]
|
||||
ipython = "*"
|
||||
sphinx = "*"
|
||||
tox = "*"
|
||||
|
||||
|
719
Pipfile.lock
generated
@@ -1,7 +1,6 @@
|
||||
*[English](README.md)*<br/>
|
||||
*[Greek](README-el.md)*
|
||||
[ [en](README.md) | de | [el](README-el.md) ]
|
||||
|
||||
# Paperless
|
||||

|
||||
|
||||
[](https://paperless.readthedocs.org/) [](https://gitter.im/danielquinn/paperless) [](https://travis-ci.org/danielquinn/paperless) [](https://coveralls.io/github/danielquinn/paperless?branch=master) [](https://github.com/danielquinn/paperless/blob/master/THANKS.md)
|
||||
|
||||
|
@@ -1,7 +1,6 @@
|
||||
*[English](README.md)*<br/>
|
||||
*[German](README-de.md)*
|
||||
[ [en](README.md) | [de](README-de.md) | el ]
|
||||
|
||||
# Paperless
|
||||

|
||||
|
||||
[](https://paperless.readthedocs.org/) [](https://gitter.im/danielquinn/paperless) [](https://travis-ci.org/danielquinn/paperless) [](https://coveralls.io/github/danielquinn/paperless?branch=master) [](https://github.com/danielquinn/paperless/blob/master/THANKS.md)
|
||||
|
||||
|
@@ -1,7 +1,6 @@
|
||||
*[German](README-de.md)*<br/>
|
||||
*[Greek](README-el.md)*
|
||||
[ en | [de](README-de.md) | [el](README-el.md) ]
|
||||
|
||||
# Paperless
|
||||

|
||||
|
||||
[](https://paperless.readthedocs.org/) [](https://gitter.im/danielquinn/paperless) [](https://travis-ci.org/danielquinn/paperless) [](https://coveralls.io/github/danielquinn/paperless?branch=master) [](https://github.com/danielquinn/paperless/blob/master/THANKS.md)
|
||||
|
||||
|
20
Vagrantfile
vendored
@@ -1,20 +0,0 @@
|
||||
# -*- mode: ruby -*-
|
||||
# vi: set ft=ruby :
|
||||
|
||||
VAGRANT_API_VERSION = "2"
|
||||
Vagrant.configure(VAGRANT_API_VERSION) do |config|
|
||||
config.vm.box = "ubuntu/trusty64"
|
||||
|
||||
# Provision using shell
|
||||
config.vm.host_name = "dev.paperless"
|
||||
config.vm.synced_folder ".", "/opt/paperless"
|
||||
config.vm.provision "shell", path: "scripts/vagrant-provision"
|
||||
|
||||
# Networking details
|
||||
config.vm.network "private_network", ip: "172.28.128.4"
|
||||
|
||||
config.vm.provider "virtualbox" do |vb|
|
||||
# Customize the amount of memory on the VM:
|
||||
vb.memory = "1024"
|
||||
end
|
||||
end
|
@@ -17,6 +17,9 @@ services:
|
||||
volumes:
|
||||
- data:/usr/src/paperless/data
|
||||
- media:/usr/src/paperless/media
|
||||
# You have to adapt the local path you want the consumption
|
||||
# directory to mount to by modifying the part before the ':'.
|
||||
- ./consume:/consume
|
||||
env_file: docker-compose.env
|
||||
# The reason the line is here is so that the webserver that doesn't do
|
||||
# any text recognition and doesn't have to install unnecessary
|
||||
@@ -36,8 +39,8 @@ services:
|
||||
volumes:
|
||||
- data:/usr/src/paperless/data
|
||||
- media:/usr/src/paperless/media
|
||||
# You have to adapt the local path you want the consumption
|
||||
# directory to mount to by modifying the part before the ':'.
|
||||
# This should be set to the same value as the consume directory
|
||||
# in the webserver service above.
|
||||
- ./consume:/consume
|
||||
# Likewise, you can add a local path to mount a directory for
|
||||
# exporting. This is not strictly needed for paperless to
|
||||
|
@@ -1,6 +1,56 @@
|
||||
Changelog
|
||||
#########
|
||||
|
||||
2.7.0
|
||||
=====
|
||||
|
||||
* `syntonym`_ submitted a pull request to catch IMAP connection errors `#475`_.
|
||||
* `Stéphane Brunner`_ added ``psycopg2`` to the Pipfile `#489`_. He also fixed
|
||||
a syntax error in ``docker-compose.yml.example`` `#488`_ and added [DjangoQL](https://github.com/ivelum/djangoql),
|
||||
which allows a litany of handy search functionality `#492`_.
|
||||
* `CkuT`_ and `JOKer`_ hacked out a simple, but super-helpful optimisation to
|
||||
how the thumbnails are served up, improving performance considerably `#481`_.
|
||||
* `tsia`_ added a few fields to the tags REST API. `#483`_.
|
||||
* `Brian Cribbs`_ improved the documentation to help people using Paperless
|
||||
over NFS `#484`_.
|
||||
* `Brendan M. Sleight`_ updated the documentation to include a note for setting the
|
||||
``DEBUG`` value. The ``paperless.conf.example`` file was also updated to
|
||||
mirror the project defaults.
|
||||
|
||||
|
||||
2.6.1
|
||||
=====
|
||||
|
||||
* We now have a logo, complete with a favicon :-)
|
||||
* Removed some problematic tests.
|
||||
* Fix the docker-compose example config to include a shared consume volume so
|
||||
that using the push API will work for users of the Docker install. Thanks to
|
||||
`Colin Frei`_ for fixing this in `#466`_.
|
||||
* `khrise`_ submitted a pull request to include the ``added`` property to the
|
||||
REST API `#471`_.
|
||||
|
||||
|
||||
2.6.0
|
||||
=====
|
||||
|
||||
* Allow an infinite number of logs to be deleted. Thanks to `Ulli`_ for noting
|
||||
the problem in `#433`_.
|
||||
* Fix the ``RecentCorrespondentsFilter`` correspondents filter that was added
|
||||
in 2.4 to play nice with the defaults. Thanks to `tsia`_ and `Sblop`_ who
|
||||
pointed this out. `#423`_.
|
||||
* Updated dependencies to include (among other things) a security patch to
|
||||
requests.
|
||||
* Fix text in sample data for tests so that the language guesser stops thinking
|
||||
that everything is in Catalan because we had *Lorem ipsum* in there.
|
||||
* Tweaked the gunicorn sample command to use filesystem paths instead of Python
|
||||
paths. `#441`_
|
||||
* Added pretty colour boxes next to the hex values in the Tags section, thanks
|
||||
to a pull request from `Joshua Taillon`_ `#442`_.
|
||||
* Added a ``.editorconfig`` file to better specify coding style.
|
||||
* `Joshua Taillon`_ also added some logic to tie Paperless' date guessing logic
|
||||
into how it parses file names on import. `#440`_
|
||||
|
||||
|
||||
2.5.0
|
||||
=====
|
||||
|
||||
@@ -44,6 +94,7 @@ Changelog
|
||||
* The ``get_date()`` functionality of the parsers has been consolidated onto
|
||||
the ``DocumentParser`` class since much of that code was redundant anyway.
|
||||
|
||||
|
||||
2.4.0
|
||||
=====
|
||||
|
||||
@@ -55,13 +106,13 @@ Changelog
|
||||
It's now in the import step that we decide the storage type. This allows you
|
||||
to export from an encrypted system and import into an unencrypted one, or
|
||||
vice-versa.
|
||||
* The migration history has been slightly modified to accomodate PostgreSQL
|
||||
* The migration history has been slightly modified to accommodate PostgreSQL
|
||||
users. Additionally, you can now tell paperless to use PostgreSQL simply by
|
||||
declaring ``PAPERLESS_DBUSER`` in your environment. This will attempt to
|
||||
connect to your Postgres database without a password unless you also set
|
||||
``PAPERLESS_DBPASS``.
|
||||
* A bug was found in the REST API filter system that was the result of an
|
||||
update of django-filter some time ago. This has now been patched `#412`_.
|
||||
update of django-filter some time ago. This has now been patched in `#412`_.
|
||||
Thanks to `thepill`_ for spotting it!
|
||||
|
||||
|
||||
@@ -570,6 +621,15 @@ bulk of the work on this big change.
|
||||
.. _thepill: https://github.com/thepill
|
||||
.. _Andrew Peng: https://github.com/pengc99
|
||||
.. _euri10: https://github.com/euri10
|
||||
.. _Ulli: https://github.com/Ulli2k
|
||||
.. _tsia: https://github.com/tsia
|
||||
.. _Sblop: https://github.com/Sblop
|
||||
.. _Colin Frei: https://github.com/colinfrei
|
||||
.. _khrise: https://github.com/khrise
|
||||
.. _syntonym: https://github.com/syntonym
|
||||
.. _JOKer: https://github.com/JOKer
|
||||
.. _Brian Cribbs: https://github.com/cribbstechnolog
|
||||
.. _Brendan M. Sleight: https://github.com/bmsleight
|
||||
|
||||
.. _#20: https://github.com/danielquinn/paperless/issues/20
|
||||
.. _#44: https://github.com/danielquinn/paperless/issues/44
|
||||
@@ -664,6 +724,20 @@ bulk of the work on this big change.
|
||||
.. _#412: https://github.com/danielquinn/paperless/issues/412
|
||||
.. _#413: https://github.com/danielquinn/paperless/pull/413
|
||||
.. _#414: https://github.com/danielquinn/paperless/issues/414
|
||||
.. _#423: https://github.com/danielquinn/paperless/issues/423
|
||||
.. _#433: https://github.com/danielquinn/paperless/issues/433
|
||||
.. _#440: https://github.com/danielquinn/paperless/pull/440
|
||||
.. _#441: https://github.com/danielquinn/paperless/pull/441
|
||||
.. _#442: https://github.com/danielquinn/paperless/pull/442
|
||||
.. _#466: https://github.com/danielquinn/paperless/pull/466
|
||||
.. _#471: https://github.com/danielquinn/paperless/pull/471
|
||||
.. _#475: https://github.com/danielquinn/paperless/pull/475
|
||||
.. _#481: https://github.com/danielquinn/paperless/pull/481
|
||||
.. _#483: https://github.com/danielquinn/paperless/pull/483
|
||||
.. _#484: https://github.com/danielquinn/paperless/pull/484
|
||||
.. _#488: https://github.com/danielquinn/paperless/pull/488
|
||||
.. _#489: https://github.com/danielquinn/paperless/pull/489
|
||||
.. _#492: https://github.com/danielquinn/paperless/pull/492
|
||||
|
||||
.. _pipenv: https://docs.pipenv.org/
|
||||
.. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
|
||||
|
@@ -43,6 +43,16 @@ These however wouldn't work:
|
||||
* ``Some Company Name, Invoice 2016-01-01, money, invoices.pdf``
|
||||
* ``Another Company- Letter of Reference.jpg``
|
||||
|
||||
Do I have to be so strict about naming?
|
||||
---------------------------------------
|
||||
Rather than using the strict document naming rules, one can also set the option
|
||||
``PAPERLESS_FILENAME_DATE_ORDER`` in ``paperless.conf`` to any date order
|
||||
that is accepted by dateparser_. Doing so will cause ``paperless`` to default
|
||||
to any date format that is found in the title, instead of a date pulled from
|
||||
the document's text, without requiring the strict formatting of the document
|
||||
filename as described above.
|
||||
|
||||
.. _dateparser: https://github.com/scrapinghub/dateparser/blob/v0.7.0/docs/usage.rst#settings
|
||||
|
||||
.. _guesswork-content:
|
||||
|
||||
@@ -82,11 +92,11 @@ text and matching algorithm. From the help info there:
|
||||
uses a regex to match the PDF. If you don't know what a regex is, you
|
||||
probably don't want this option.
|
||||
|
||||
When using the "any" or "all" matching algorithms, you can search for terms that
|
||||
consist of multiple words by enclosing them in double quotes. For example, defining
|
||||
a match text of ``"Bank of America" BofA`` using the "any" algorithm, will match
|
||||
documents that contain either "Bank of America" or "BofA", but will not match
|
||||
documents containing "Bank of South America".
|
||||
When using the "any" or "all" matching algorithms, you can search for terms
|
||||
that consist of multiple words by enclosing them in double quotes. For example,
|
||||
defining a match text of ``"Bank of America" BofA`` using the "any" algorithm,
|
||||
will match documents that contain either "Bank of America" or "BofA", but will
|
||||
not match documents containing "Bank of South America".
|
||||
|
||||
Then just save your tag/correspondent and run another document through the
|
||||
consumer. Once complete, you should see the newly-created document,
|
||||
|
@@ -82,6 +82,7 @@ rolled in as part of the update:
|
||||
|
||||
$ cd /path/to/project
|
||||
$ git pull
|
||||
$ pip install -r requirements.txt
|
||||
$ cd src
|
||||
$ ./manage.py migrate
|
||||
|
||||
@@ -101,7 +102,7 @@ is similar:
|
||||
$ cd /path/to/project
|
||||
$ git pull
|
||||
$ docker build -t paperless .
|
||||
$ docker-compose run --rm comsumer migrate
|
||||
$ docker-compose run --rm consumer migrate
|
||||
$ docker-compose up -d
|
||||
|
||||
If ``git pull`` doesn't report any changes, there is no need to continue with
|
||||
|
@@ -12,6 +12,7 @@ should work) that has the following software installed:
|
||||
* `Imagemagick`_ version 6.7.5 or higher
|
||||
* `unpaper`_
|
||||
* `libpoppler-cpp-dev`_ PDF rendering library
|
||||
* `optipng`_
|
||||
|
||||
.. _Python3: https://python.org/
|
||||
.. _GNU Privacy Guard: https://gnupg.org
|
||||
@@ -19,6 +20,7 @@ should work) that has the following software installed:
|
||||
.. _Imagemagick: http://imagemagick.org/
|
||||
.. _unpaper: https://www.flameeyes.eu/projects/unpaper
|
||||
.. _libpoppler-cpp-dev: https://poppler.freedesktop.org/
|
||||
.. _optipng: http://optipng.sourceforge.net/
|
||||
|
||||
Notably, you should confirm how you access your Python3 installation. Many
|
||||
Linux distributions will install Python3 in parallel to Python2, using the
|
||||
@@ -33,7 +35,7 @@ In addition to the above, there are a number of Python requirements, all of
|
||||
which are listed in a file called ``requirements.txt`` in the project root
|
||||
directory.
|
||||
|
||||
If you're not working on a virtual environment (like Vagrant or Docker), you
|
||||
If you're not working on a virtual environment (like Docker), you
|
||||
should probably be using a virtualenv, but that's your call. The reasons why
|
||||
you might choose a virtualenv or not aren't really within the scope of this
|
||||
document. Needless to say if you don't know what a virtualenv is, you should
|
||||
|
@@ -42,18 +42,14 @@ Installation & Configuration
|
||||
You can go multiple routes with setting up and running Paperless:
|
||||
|
||||
* The `bare metal route`_
|
||||
* The `vagrant route`_
|
||||
* The `docker route`_
|
||||
|
||||
|
||||
The `Vagrant route`_ is quick & easy, but means you're running a VM which comes
|
||||
with memory consumption, cpu overhead etc. The `docker route`_ offers the same
|
||||
simplicity as Vagrant with lower resource consumption.
|
||||
The `docker route`_ is quick & easy.
|
||||
|
||||
The `bare metal route`_ is a bit more complicated to setup but makes it easier
|
||||
should you want to contribute some code back.
|
||||
|
||||
.. _Vagrant route: setup-installation-vagrant_
|
||||
.. _docker route: setup-installation-docker_
|
||||
.. _bare metal route: setup-installation-bare-metal_
|
||||
.. _Docker Machine: https://docs.docker.com/machine/
|
||||
@@ -81,12 +77,16 @@ Standard (Bare Metal)
|
||||
encrypt/decrypt the original documents. Don't worry about defining this
|
||||
if you don't want to use encryption (the default).
|
||||
|
||||
Note also that if you're using the ``runserver`` as mentioned below, you
|
||||
should make sure that PAPERLESS_DEBUG="true" or is just commented out as
|
||||
this is the default.
|
||||
|
||||
4. Initialise the SQLite database with ``./manage.py migrate``.
|
||||
5. Create a user for your Paperless instance with
|
||||
``./manage.py createsuperuser``. Follow the prompts to create your user.
|
||||
6. Start the webserver with ``./manage.py runserver <IP>:<PORT>``.
|
||||
If no specifc IP or port are given, the default is ``127.0.0.1:8000``
|
||||
also known as http://localhost:8000/.
|
||||
If no specific IP or port is given, the default is ``127.0.0.1:8000`` also
|
||||
known as http://localhost:8000/.
|
||||
You should now be able to visit your (empty) installation at
|
||||
`Paperless webserver`_ or whatever you chose before. You can login with the
|
||||
user/pass you created in #5.
|
||||
@@ -147,6 +147,15 @@ Docker Method
|
||||
instructions in comments in the file. The only change that is a hard
|
||||
requirement is to specify where the consumption directory should
|
||||
mount.[#dockercomposeyml]_
|
||||
|
||||
.. caution::
|
||||
|
||||
If you are using NFS mounts for the consume directory you also need to
|
||||
change the command to turn off inotify as it doesn't work with NFS
|
||||
|
||||
`command: ["document_consumer", "--no-inotify"]`
|
||||
|
||||
|
||||
5. Modify ``docker-compose.env`` and adapt the following environment variables:
|
||||
|
||||
``PAPERLESS_PASSPHRASE``
|
||||
@@ -267,54 +276,6 @@ Docker Method
|
||||
newer ``docker-compose.yml.example`` file
|
||||
|
||||
|
||||
.. _setup-installation-vagrant:
|
||||
|
||||
Vagrant Method
|
||||
++++++++++++++
|
||||
|
||||
1. Install `Vagrant`_. How you do that is really between you and your OS.
|
||||
2. Run ``vagrant up``. An instance will start up for you. When it's ready and
|
||||
provisioned...
|
||||
3. Run ``vagrant ssh`` and once inside your new vagrant box, edit
|
||||
``/etc/paperless.conf`` and set the values for:
|
||||
|
||||
* ``PAPERLESS_CONSUMPTION_DIR``: This is where your documents will be
|
||||
dumped to be consumed by Paperless.
|
||||
* ``PAPERLESS_PASSPHRASE``: This is the passphrase Paperless uses to
|
||||
encrypt/decrypt the original document. It's only required if you want
|
||||
your original files to be encrypted, otherwise, just leave it unset.
|
||||
* ``PAPERLESS_EMAIL_SECRET``: this is the "magic word" used when consuming
|
||||
documents from mail or via the API. If you don't use either, leaving it
|
||||
blank is just fine.
|
||||
|
||||
4. Exit the vagrant box and re-enter it with ``vagrant ssh`` again. This
|
||||
updates the environment to make use of the changes you made to the config
|
||||
file.
|
||||
5. Initialise the database with ``/opt/paperless/src/manage.py migrate``.
|
||||
6. Still inside your vagrant box, create a user for your Paperless instance
|
||||
with ``/opt/paperless/src/manage.py createsuperuser``. Follow the prompts to
|
||||
create your user.
|
||||
7. Start the webserver with
|
||||
``/opt/paperless/src/manage.py runserver 0.0.0.0:8000``. You should now be
|
||||
able to visit your (empty) `Paperless webserver`_ at ``172.28.128.4:8000``.
|
||||
You can login with the user/pass you created in #6.
|
||||
8. In a separate window, run ``vagrant ssh`` again, but this time once inside
|
||||
your vagrant instance, you should start the consumer script with
|
||||
``/opt/paperless/src/manage.py document_consumer``.
|
||||
9. Scan something. Put it in the ``CONSUMPTION_DIR``.
|
||||
10. Wait a few minutes
|
||||
11. Visit the document list on your webserver, and it should be there, indexed
|
||||
and downloadable.
|
||||
|
||||
.. caution::
|
||||
|
||||
This installation is not secure. Once everything is working head up to
|
||||
`Making things more permanent`_
|
||||
|
||||
.. _Vagrant: https://vagrantup.com/
|
||||
.. _Paperless server: http://172.28.128.4:8000
|
||||
|
||||
|
||||
.. _setup-permanent:
|
||||
|
||||
Making Things a Little more Permanent
|
||||
@@ -398,7 +359,7 @@ instance listening on localhost port 8000.
|
||||
location /static {
|
||||
|
||||
autoindex on;
|
||||
alias <path-to-paperless-static-directory>
|
||||
alias <path-to-paperless-static-directory>;
|
||||
|
||||
}
|
||||
|
||||
@@ -409,7 +370,7 @@ instance listening on localhost port 8000.
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
proxy_pass http://127.0.0.1:8000
|
||||
proxy_pass http://127.0.0.1:8000;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -418,7 +379,7 @@ The gunicorn server can be started with the command:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
$ <path-to-paperless-virtual-environment>/bin/gunicorn <path-to-paperless>/src/paperless.wsgi -w 2
|
||||
$ <path-to-paperless-virtual-environment>/bin/gunicorn --pythonpath=<path-to-paperless>/src paperless.wsgi -w 2
|
||||
|
||||
|
||||
.. _setup-permanent-standard-systemd:
|
||||
@@ -475,7 +436,7 @@ after restarting your system:
|
||||
respawn limit 10 5
|
||||
|
||||
script
|
||||
exec <path to paperless virtual environment>/bin/gunicorn <path to parperless>/src/paperless.wsgi -w 2
|
||||
exec <path to paperless virtual environment>/bin/gunicorn --pythonpath=<path to parperless>/src paperless.wsgi -w 2
|
||||
end script
|
||||
|
||||
Note that you'll need to replace ``/srv/paperless/src/manage.py`` with the
|
||||
@@ -513,13 +474,6 @@ second period.
|
||||
.. _Upstart: http://upstart.ubuntu.com/
|
||||
|
||||
|
||||
Vagrant
|
||||
~~~~~~~
|
||||
|
||||
You may use the Ubuntu explanation above. Replace
|
||||
``(local-filesystems and net-device-up IFACE=eth0)`` with ``vagrant-mounted``.
|
||||
|
||||
|
||||
.. _setup-permanent-docker:
|
||||
|
||||
Docker
|
||||
|
@@ -14,9 +14,8 @@ FORGIVING_OCR is enabled``, then you might need to install the
|
||||
`Tesseract language files <http://packages.ubuntu.com/search?keywords=tesseract-ocr>`_
|
||||
marching your document's languages.
|
||||
|
||||
As an example, if you are running Paperless from the Vagrant setup provided
|
||||
(or from any Ubuntu or Debian box), and your documents are written in Spanish
|
||||
you may need to run::
|
||||
As an example, if you are running Paperless from any Ubuntu or Debian
|
||||
box, and your documents are written in Spanish you may need to run::
|
||||
|
||||
apt-get install -y tesseract-ocr-spa
|
||||
|
||||
|
@@ -214,5 +214,5 @@ This too is done via the ``manage.py`` script:
|
||||
|
||||
That's it. It'll loop over all of the documents in your database and attempt
|
||||
to match all of your tags to them. If one matches, it'll be applied. And
|
||||
don't worry, you can run this as often as you like, it' won't double-tag
|
||||
don't worry, you can run this as often as you like, it won't double-tag
|
||||
a document.
|
||||
|
11
overrides/README.md
Normal file
@@ -0,0 +1,11 @@
|
||||
# Customizing Paperless
|
||||
|
||||
*See customization
|
||||
[documentation](https://paperless.readthedocs.io/en/latest/customising.html)
|
||||
for more detail!*
|
||||
|
||||
The example `.css` and `.js` snippets in this folder can be placed into
|
||||
one of two files in your ``PAPERLESS_MEDIADIR`` folder: `overrides.js` or
|
||||
`overrides.css`. Please feel free to submit pull requests to the main
|
||||
repository with other examples of customizations that you think others may
|
||||
find useful.
|
@@ -61,7 +61,7 @@ PAPERLESS_EMAIL_SECRET=""
|
||||
|
||||
# Controls whether django's debug mode is enabled. Disable this on production
|
||||
# systems. Debug mode is enabled by default.
|
||||
PAPERLESS_DEBUG="false"
|
||||
#PAPERLESS_DEBUG="true"
|
||||
|
||||
|
||||
# Paperless can be instructed to attempt to encrypt your PDF files with GPG
|
||||
@@ -127,6 +127,14 @@ PAPERLESS_DEBUG="false"
|
||||
# "true", the document will instead be opened in the browser, if possible.
|
||||
#PAPERLESS_INLINE_DOC="false"
|
||||
|
||||
# By default, paperless will check the document text for document date information.
|
||||
# Uncomment the line below to enable checking the document filename for date
|
||||
# information. The date order can be set to any option as specified in
|
||||
# https://dateparser.readthedocs.io/en/latest/#settings. The filename will be
|
||||
# checked first, and if nothing is found, the document text will be checked
|
||||
# as normal.
|
||||
#PAPERLESS_FILENAME_DATE_ORDER="YMD"
|
||||
|
||||
#
|
||||
# The following values use sensible defaults for modern systems, but if you're
|
||||
# running Paperless on a low-resource device (like a Raspberry Pi), modifying
|
||||
@@ -188,6 +196,17 @@ PAPERLESS_DEBUG="false"
|
||||
#PAPERLESS_CONSUMER_LOOP_TIME=10
|
||||
|
||||
|
||||
# By default Paperless stops consuming a document if no language can be
|
||||
# detected. Set to true to consume documents even if the language detection
|
||||
# fails.
|
||||
#PAPERLESS_FORGIVING_OCR="false"
|
||||
|
||||
|
||||
# By default Paperless does not OCR a document if the text can be retrieved from
|
||||
# the document directly. Set to true to always OCR documents.
|
||||
#PAPERLESS_OCR_ALWAYS="false"
|
||||
|
||||
|
||||
###############################################################################
|
||||
#### Interface ####
|
||||
###############################################################################
|
||||
|
@@ -1,51 +1,70 @@
|
||||
-i https://pypi.python.org/simple
|
||||
apipkg==1.5; python_version != '3.3.*'
|
||||
atomicwrites==1.2.1; python_version != '3.3.*'
|
||||
alabaster==0.7.12
|
||||
apipkg==1.5
|
||||
atomicwrites==1.2.1
|
||||
attrs==18.2.0
|
||||
certifi==2018.8.24
|
||||
babel==2.6.0
|
||||
certifi==2018.11.29
|
||||
chardet==3.0.4
|
||||
coverage==4.5.1; python_version < '4'
|
||||
coveralls==1.5.0
|
||||
coverage==4.5.2
|
||||
coveralls==1.5.1
|
||||
dateparser==0.7.0
|
||||
django-cors-headers==2.4.0
|
||||
django-crispy-forms==1.7.2
|
||||
django-extensions==2.1.2
|
||||
django-filter==2.0.0
|
||||
django==2.0.8
|
||||
djangorestframework==3.8.2
|
||||
django-extensions==2.1.4
|
||||
django-filter==2.1.0
|
||||
django==2.0.10
|
||||
djangoql==0.12.3
|
||||
djangorestframework==3.9.1
|
||||
docopt==0.6.2
|
||||
execnet==1.5.0; python_version != '3.3.*'
|
||||
docutils==0.14
|
||||
execnet==1.5.0
|
||||
factory-boy==2.11.1
|
||||
faker==0.9.0; python_version >= '2.7'
|
||||
faker==1.0.2
|
||||
filelock==3.0.10
|
||||
filemagic==1.6
|
||||
fuzzywuzzy==0.15.0
|
||||
fuzzywuzzy[speedup]==0.15.0
|
||||
gunicorn==19.9.0
|
||||
idna==2.7
|
||||
idna==2.8
|
||||
imagesize==1.1.0
|
||||
inotify-simple==1.1.8
|
||||
jinja2==2.10
|
||||
langdetect==1.0.7
|
||||
more-itertools==4.3.0
|
||||
pdftotext==2.1.0
|
||||
pillow==5.2.0
|
||||
pluggy==0.7.1; python_version != '3.3.*'
|
||||
py==1.6.0; python_version != '3.3.*'
|
||||
markupsafe==1.1.0
|
||||
more-itertools==5.0.0
|
||||
packaging==19.0
|
||||
pdftotext==2.1.1
|
||||
pillow==5.4.1
|
||||
pluggy==0.8.1
|
||||
ply==3.11
|
||||
psycopg2==2.7.7
|
||||
py==1.7.0
|
||||
pycodestyle==2.4.0
|
||||
pygments==2.3.1
|
||||
pyocr==0.5.3
|
||||
pytest-cov==2.6.0
|
||||
pytest-django==3.4.2
|
||||
pyparsing==2.3.1
|
||||
pytest-cov==2.6.1
|
||||
pytest-django==3.4.5
|
||||
pytest-env==0.6.2
|
||||
pytest-forked==0.2; python_version != '3.3.*'
|
||||
pytest-sugar==0.9.1
|
||||
pytest-xdist==1.23.0
|
||||
pytest==3.8.0
|
||||
python-dateutil==2.7.3
|
||||
python-dotenv==0.9.1
|
||||
python-gnupg==0.4.3
|
||||
pytest-forked==1.0.1
|
||||
pytest-sugar==0.9.2
|
||||
pytest-xdist==1.26.0
|
||||
pytest==4.1.1
|
||||
python-dateutil==2.7.5
|
||||
python-dotenv==0.10.1
|
||||
python-gnupg==0.4.4
|
||||
python-levenshtein==0.12.0
|
||||
pytz==2018.5
|
||||
regex==2018.8.29
|
||||
requests==2.19.1
|
||||
six==1.11.0
|
||||
pytz==2018.9
|
||||
regex==2019.1.24
|
||||
requests==2.21.0
|
||||
six==1.12.0
|
||||
snowballstemmer==1.2.1
|
||||
sphinx==1.8.3
|
||||
sphinxcontrib-websupport==1.1.0
|
||||
termcolor==1.1.0
|
||||
text-unidecode==1.2
|
||||
toml==0.10.0
|
||||
tox==3.7.0
|
||||
tzlocal==1.5.1
|
||||
urllib3==1.23; python_version != '3.3.*'
|
||||
urllib3==1.24.1
|
||||
virtualenv==16.3.0
|
||||
|
1086
resources/logo/print/eps/Black logo - no background.eps
Normal file
1090
resources/logo/print/eps/Color logo - no background.eps
Normal file
1099
resources/logo/print/eps/Color logo with background.eps
Normal file
1090
resources/logo/print/eps/White logo - no background.eps
Normal file
BIN
resources/logo/print/pdf/Black logo - no background.pdf
Normal file
BIN
resources/logo/print/pdf/Color logo - no background.pdf
Normal file
BIN
resources/logo/print/pdf/Color logo with background.pdf
Normal file
BIN
resources/logo/print/pdf/White logo - no background.pdf
Normal file
BIN
resources/logo/web/png/Black logo - no background.png
Normal file
After Width: | Height: | Size: 91 KiB |
BIN
resources/logo/web/png/Color logo - no background.png
Normal file
After Width: | Height: | Size: 111 KiB |
BIN
resources/logo/web/png/Color logo with background.png
Normal file
After Width: | Height: | Size: 116 KiB |
BIN
resources/logo/web/png/White logo - no background.png
Normal file
After Width: | Height: | Size: 94 KiB |
8
resources/logo/web/svg/Black logo - no background.svg
Normal file
After Width: | Height: | Size: 7.4 KiB |
8
resources/logo/web/svg/Color logo - no background.svg
Normal file
After Width: | Height: | Size: 7.5 KiB |
8
resources/logo/web/svg/Color logo with background.svg
Normal file
After Width: | Height: | Size: 7.5 KiB |
8
resources/logo/web/svg/White logo - no background.svg
Normal file
After Width: | Height: | Size: 7.4 KiB |
82
resources/logo/web/svg/square.svg
Normal file
@@ -0,0 +1,82 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
version="1.1"
|
||||
width="900"
|
||||
height="900"
|
||||
id="svg3923"
|
||||
sodipodi:docname="square.svg"
|
||||
inkscape:export-filename="/tmp/test.png"
|
||||
inkscape:export-xdpi="96"
|
||||
inkscape:export-ydpi="96"
|
||||
inkscape:version="0.92.2 2405546, 2018-03-11">
|
||||
<metadata
|
||||
id="metadata3929">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<defs
|
||||
id="defs3927" />
|
||||
<sodipodi:namedview
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1"
|
||||
objecttolerance="10"
|
||||
gridtolerance="10"
|
||||
guidetolerance="10"
|
||||
inkscape:pageopacity="0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:window-width="3840"
|
||||
inkscape:window-height="2096"
|
||||
id="namedview3925"
|
||||
showgrid="false"
|
||||
inkscape:zoom="1.1360927"
|
||||
inkscape:cx="635.07139"
|
||||
inkscape:cy="606.383"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="27"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:current-layer="g3921" />
|
||||
<g
|
||||
transform="matrix(10.638298,0,0,10.638298,106.38298,-206.38301)"
|
||||
id="g3921">
|
||||
<defs
|
||||
id="SvgjsDefs1018" />
|
||||
<g
|
||||
id="SvgjsG1019"
|
||||
featureKey="root"
|
||||
style="fill:#ffffff" />
|
||||
<g
|
||||
id="SvgjsG1020"
|
||||
featureKey="symbol1"
|
||||
transform="matrix(0.10341565,0,0,0.10341565,-11.43874,18.048418)"
|
||||
inkscape:export-filename="/tmp/test.png"
|
||||
inkscape:export-xdpi="116.02285"
|
||||
inkscape:export-ydpi="116.02285"
|
||||
style="fill:#17541f">
|
||||
<defs
|
||||
id="defs3911" />
|
||||
<g
|
||||
id="g3915">
|
||||
<path
|
||||
d="M 231,798 C 227,779 219,741 218,741 49,640 69,465 125,365 c 12,126 235,213 105,367 -1,2 6,26 12,48 26,-44 65,-97 63,-102 C 145,288 645,258 749,16 c 47,234 -24,596 -426,688 -2,1 -73,126 -76,127 0,-2 -30,-1 -26,-11 2,-6 6,-14 10,-22 z M 330,625 C 267,476 452,312 544,271 356,439 324,564 330,625 Z m -104,79 c 51,-59 -9,-160 -45,-193 61,105 57,166 45,193 z"
|
||||
style="fill:#17541f"
|
||||
id="path3913"
|
||||
inkscape:connector-curvature="0" />
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 2.6 KiB |
@@ -75,7 +75,7 @@ install_languages() {
|
||||
pkg="tesseract-ocr-data-$lang"
|
||||
|
||||
# English is installed by default
|
||||
if [ "$lang" == "eng" ]; then
|
||||
if [[ "$lang" == "eng" ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
@@ -95,7 +95,7 @@ if [[ "$1" != "/"* ]]; then
|
||||
initialize
|
||||
|
||||
# Install additional languages if specified
|
||||
if [ ! -z "$PAPERLESS_OCR_LANGUAGES" ]; then
|
||||
if [[ ! -z "$PAPERLESS_OCR_LANGUAGES" ]]; then
|
||||
install_languages "$PAPERLESS_OCR_LANGUAGES"
|
||||
fi
|
||||
|
||||
|
@@ -4,7 +4,7 @@ Description=Paperless webserver
|
||||
[Service]
|
||||
User=paperless
|
||||
Group=paperless
|
||||
ExecStart=/home/paperless/project/virtualenv/bin/gunicorn /home/paperless/project/src/paperless.wsgi -w 2
|
||||
ExecStart=/home/paperless/project/virtualenv/bin/gunicorn --pythonpath=/home/paperless/project/src paperless.wsgi -w 2
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
@@ -1,31 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Install packages
|
||||
apt-get update
|
||||
apt-get build-dep -y python-imaging
|
||||
apt-get install -y libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev
|
||||
apt-get install -y build-essential python3-dev python3-pip sqlite3 libsqlite3-dev git
|
||||
apt-get install -y tesseract-ocr tesseract-ocr-eng imagemagick unpaper
|
||||
|
||||
# Python dependencies
|
||||
pip3 install -r /opt/paperless/requirements.txt
|
||||
|
||||
# Create the environment file
|
||||
cat /opt/paperless/paperless.conf.example | sed -e 's#CONSUMPTION_DIR=""#CONSUMPTION_DIR="/home/vagrant/consumption"#' > /etc/paperless.conf
|
||||
chmod 0640 /etc/paperless.conf
|
||||
chown root:vagrant /etc/paperless.conf
|
||||
|
||||
# Create the consumption directory
|
||||
mkdir /home/vagrant/consumption
|
||||
chown vagrant:vagrant /home/vagrant/consumption
|
||||
|
||||
echo "
|
||||
|
||||
|
||||
Now follow the remaining steps in the Vagrant section of the setup
|
||||
documentation to complete the process:
|
||||
|
||||
http://paperless.readthedocs.org/en/latest/setup.html#setup-installation-vagrant
|
||||
|
||||
|
||||
"
|
@@ -11,6 +11,7 @@ from django.urls import reverse
|
||||
from django.utils.html import format_html, format_html_join
|
||||
from django.utils.http import urlquote
|
||||
from django.utils.safestring import mark_safe
|
||||
from djangoql.admin import DjangoQLSearchMixin
|
||||
|
||||
from documents.actions import (
|
||||
add_tag_to_selected,
|
||||
@@ -61,12 +62,12 @@ class FinancialYearFilter(admin.SimpleListFilter):
|
||||
|
||||
# To keep it simple we use the same string for both
|
||||
# query parameter and the display.
|
||||
return (query, query)
|
||||
return query, query
|
||||
|
||||
else:
|
||||
query = "{0}-{0}".format(date.year)
|
||||
display = "{}".format(date.year)
|
||||
return (query, display)
|
||||
return query, display
|
||||
|
||||
def lookups(self, request, model_admin):
|
||||
if not settings.FY_START or not settings.FY_END:
|
||||
@@ -88,25 +89,24 @@ class FinancialYearFilter(admin.SimpleListFilter):
|
||||
|
||||
|
||||
class RecentCorrespondentFilter(admin.RelatedFieldListFilter):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.title = "correspondent (recent)"
|
||||
"""
|
||||
If PAPERLESS_RECENT_CORRESPONDENT_YEARS is set, we limit the available
|
||||
correspondents to documents sent our way over the past ``n`` years.
|
||||
"""
|
||||
|
||||
def field_choices(self, field, request, model_admin):
|
||||
|
||||
years = settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS
|
||||
days = 365 * years
|
||||
correspondents = Correspondent.objects.all()
|
||||
|
||||
lookups = []
|
||||
if years and years > 0:
|
||||
correspondents = Correspondent.objects.filter(
|
||||
self.title = "Correspondent (Recent)"
|
||||
days = 365 * years
|
||||
correspondents = correspondents.filter(
|
||||
documents__created__gte=datetime.now() - timedelta(days=days)
|
||||
).distinct()
|
||||
for c in correspondents:
|
||||
lookups.append((c.id, c.name))
|
||||
|
||||
return lookups
|
||||
return [(c.id, c.name) for c in correspondents]
|
||||
|
||||
|
||||
class CommonAdmin(admin.ModelAdmin):
|
||||
@@ -146,13 +146,16 @@ class CorrespondentAdmin(CommonAdmin):
|
||||
|
||||
class TagAdmin(CommonAdmin):
|
||||
|
||||
list_display = ("name", "colour", "match", "matching_algorithm",
|
||||
"document_count")
|
||||
list_display = (
|
||||
"name", "colour", "match", "matching_algorithm", "document_count")
|
||||
list_filter = ("colour", "matching_algorithm")
|
||||
list_editable = ("colour", "match", "matching_algorithm")
|
||||
|
||||
readonly_fields = ("slug",)
|
||||
|
||||
class Media:
|
||||
js = ("js/colours.js",)
|
||||
|
||||
def get_queryset(self, request):
|
||||
qs = super(TagAdmin, self).get_queryset(request)
|
||||
qs = qs.annotate(document_count=models.Count("documents"))
|
||||
@@ -163,7 +166,7 @@ class TagAdmin(CommonAdmin):
|
||||
document_count.admin_order_field = "document_count"
|
||||
|
||||
|
||||
class DocumentAdmin(CommonAdmin):
|
||||
class DocumentAdmin(DjangoQLSearchMixin, CommonAdmin):
|
||||
|
||||
class Media:
|
||||
css = {
|
||||
@@ -177,7 +180,6 @@ class DocumentAdmin(CommonAdmin):
|
||||
list_filter = (
|
||||
"tags",
|
||||
("correspondent", RecentCorrespondentFilter),
|
||||
"correspondent",
|
||||
FinancialYearFilter
|
||||
)
|
||||
|
||||
|
@@ -216,7 +216,11 @@ class MailFetcher(Loggable):
|
||||
return r
|
||||
|
||||
def _connect(self):
|
||||
self._connection = imaplib.IMAP4_SSL(self._host, self._port)
|
||||
try:
|
||||
self._connection = imaplib.IMAP4_SSL(self._host, self._port)
|
||||
except OSError as e:
|
||||
msg = "Problem connecting to {}: {}".format(self._host, e.strerror)
|
||||
raise MailFetcherError(msg)
|
||||
|
||||
def _login(self):
|
||||
|
||||
|
@@ -11,7 +11,7 @@ def re_slug_all_the_things(apps, schema_editor):
|
||||
"""
|
||||
|
||||
Tag = apps.get_model("documents", "Tag")
|
||||
Correspondent = apps.get_model("documents", "Tag")
|
||||
Correspondent = apps.get_model("documents", "Correspondent")
|
||||
|
||||
for klass in (Tag, Correspondent):
|
||||
for instance in klass.objects.all():
|
||||
|
@@ -14,14 +14,18 @@ from django.utils import timezone
|
||||
# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - ZZZZ.XX.YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - ZZZZ/XX/YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - ZZZZ-XX-YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - MONTH ZZZZ, with ZZZZ being 4 digits
|
||||
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
|
||||
DATE_REGEX = re.compile(
|
||||
r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' +
|
||||
r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' +
|
||||
r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' +
|
||||
r'\b([^\W\d_]{3,9} [0-9]{4})\b'
|
||||
r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' + # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' + # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' + # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' +
|
||||
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
|
||||
)
|
||||
|
||||
|
||||
@@ -37,6 +41,7 @@ class DocumentParser:
|
||||
|
||||
SCRATCH = settings.SCRATCH_DIR
|
||||
DATE_ORDER = settings.DATE_ORDER
|
||||
FILENAME_DATE_ORDER = settings.FILENAME_DATE_ORDER
|
||||
OPTIPNG = settings.OPTIPNG_BINARY
|
||||
|
||||
def __init__(self, path):
|
||||
@@ -75,30 +80,60 @@ class DocumentParser:
|
||||
Returns the date of the document.
|
||||
"""
|
||||
|
||||
def __parser(ds, date_order):
|
||||
"""
|
||||
Call dateparser.parse with a particular date ordering
|
||||
"""
|
||||
return dateparser.parse(
|
||||
ds,
|
||||
settings={
|
||||
"DATE_ORDER": date_order,
|
||||
"PREFER_DAY_OF_MONTH": "first",
|
||||
"RETURN_AS_TIMEZONE_AWARE":
|
||||
True
|
||||
}
|
||||
)
|
||||
|
||||
date = None
|
||||
date_string = None
|
||||
|
||||
next_year = timezone.now().year + 5 # Arbitrary 5 year future limit
|
||||
title = os.path.basename(self.document_path)
|
||||
|
||||
# if filename date parsing is enabled, search there first:
|
||||
if self.FILENAME_DATE_ORDER:
|
||||
self.log("info", "Checking document title for date")
|
||||
for m in re.finditer(DATE_REGEX, title):
|
||||
date_string = m.group(0)
|
||||
|
||||
try:
|
||||
date = __parser(date_string, self.FILENAME_DATE_ORDER)
|
||||
except TypeError:
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
||||
if date is not None and next_year > date.year > 1900:
|
||||
self.log(
|
||||
"info",
|
||||
"Detected document date {} based on string {} "
|
||||
"from document title"
|
||||
"".format(date.isoformat(), date_string)
|
||||
)
|
||||
return date
|
||||
|
||||
try:
|
||||
# getting text after checking filename will save time if only
|
||||
# looking at the filename instead of the whole text
|
||||
text = self.get_text()
|
||||
except ParseError:
|
||||
return None
|
||||
|
||||
next_year = timezone.now().year + 5 # Arbitrary 5 year future limit
|
||||
|
||||
# Iterate through all regex matches and try to parse the date
|
||||
# Iterate through all regex matches in text and try to parse the date
|
||||
for m in re.finditer(DATE_REGEX, text):
|
||||
|
||||
date_string = m.group(0)
|
||||
|
||||
try:
|
||||
date = dateparser.parse(
|
||||
date_string,
|
||||
settings={
|
||||
"DATE_ORDER": self.DATE_ORDER,
|
||||
"PREFER_DAY_OF_MONTH": "first",
|
||||
"RETURN_AS_TIMEZONE_AWARE": True
|
||||
}
|
||||
)
|
||||
date = __parser(date_string, self.DATE_ORDER)
|
||||
except TypeError:
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
@@ -7,7 +7,14 @@ class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
|
||||
|
||||
class Meta:
|
||||
model = Correspondent
|
||||
fields = ("id", "slug", "name")
|
||||
fields = (
|
||||
"id",
|
||||
"slug",
|
||||
"name",
|
||||
"match",
|
||||
"matching_algorithm",
|
||||
"is_insensitive"
|
||||
)
|
||||
|
||||
|
||||
class TagSerializer(serializers.HyperlinkedModelSerializer):
|
||||
@@ -15,7 +22,14 @@ class TagSerializer(serializers.HyperlinkedModelSerializer):
|
||||
class Meta:
|
||||
model = Tag
|
||||
fields = (
|
||||
"id", "slug", "name", "colour", "match", "matching_algorithm")
|
||||
"id",
|
||||
"slug",
|
||||
"name",
|
||||
"colour",
|
||||
"match",
|
||||
"matching_algorithm",
|
||||
"is_insensitive"
|
||||
)
|
||||
|
||||
|
||||
class CorrespondentField(serializers.HyperlinkedRelatedField):
|
||||
@@ -46,6 +60,7 @@ class DocumentSerializer(serializers.ModelSerializer):
|
||||
"checksum",
|
||||
"created",
|
||||
"modified",
|
||||
"added",
|
||||
"file_name",
|
||||
"download_url",
|
||||
"thumbnail_url",
|
||||
|
66
src/documents/static/js/colours.js
Normal file
@@ -0,0 +1,66 @@
|
||||
// The following jQuery snippet will add a small square next to the selection
|
||||
// drop-down on the `Add tag` page that will update to show the selected tag
|
||||
// color as the drop-down value is changed.
|
||||
|
||||
django.jQuery(document).ready(function(){
|
||||
|
||||
if (django.jQuery("#id_colour").length) {
|
||||
|
||||
let colour;
|
||||
let colour_num;
|
||||
|
||||
colour_num = django.jQuery("#id_colour").val() - 1;
|
||||
colour = django.jQuery('#id_colour')[0][colour_num].text;
|
||||
django.jQuery('#id_colour').after('<div class="colour_square"></div>');
|
||||
|
||||
django.jQuery('.colour_square').css({
|
||||
'float': 'left',
|
||||
'width': '20px',
|
||||
'height': '20px',
|
||||
'margin': '5px',
|
||||
'border': '1px solid rgba(0, 0, 0, .2)',
|
||||
'background': colour
|
||||
});
|
||||
|
||||
django.jQuery('#id_colour').change(function () {
|
||||
colour_num = django.jQuery("#id_colour").val() - 1;
|
||||
colour = django.jQuery('#id_colour')[0][colour_num].text;
|
||||
django.jQuery('.colour_square').css({'background': colour});
|
||||
});
|
||||
|
||||
} else if (django.jQuery("select[id*='colour']").length) {
|
||||
|
||||
django.jQuery('select[id*="-colour"]').each(function (index, element) {
|
||||
let id;
|
||||
let loop_colour_num;
|
||||
let loop_colour;
|
||||
|
||||
id = "colour_square_" + index;
|
||||
django.jQuery(element).after('<div class="colour_square" id="' + id + '"></div>');
|
||||
|
||||
loop_colour_num = django.jQuery(element).val() - 1;
|
||||
loop_colour = django.jQuery(element)[0][loop_colour_num].text;
|
||||
|
||||
django.jQuery("<style type='text/css'>\
|
||||
.colour_square{ \
|
||||
float: left; \
|
||||
width: 20px; \
|
||||
height: 20px; \
|
||||
margin: 5px; \
|
||||
border: 1px solid rgba(0,0,0,.2); \
|
||||
} </style>").appendTo("head");
|
||||
django.jQuery('#' + id).css({'background': loop_colour});
|
||||
|
||||
console.log(id, loop_colour_num, loop_colour);
|
||||
|
||||
django.jQuery(element).change(function () {
|
||||
loop_colour_num = django.jQuery(element).val() - 1;
|
||||
loop_colour = django.jQuery(element)[0][loop_colour_num].text;
|
||||
django.jQuery('#' + id).css({'background': loop_colour});
|
||||
console.log('#' + id, loop_colour)
|
||||
});
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
});
|
@@ -3,10 +3,63 @@
|
||||
{# NOTE: This should probably be extending base.html. See CSS comment below details. #}
|
||||
|
||||
|
||||
{% load static %}
|
||||
{% load custom_css from customisation %}
|
||||
{% load custom_js from customisation %}
|
||||
|
||||
|
||||
{% block extrahead %}
|
||||
<link rel="icon" type="image/x-icon" href="{% url 'favicon' %}" />
|
||||
<style>
|
||||
#header {
|
||||
background-color: #90a9b7;
|
||||
line-height: inherit;
|
||||
height: auto;
|
||||
}
|
||||
#branding h1 {
|
||||
font-weight: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
.button,
|
||||
.button:active,
|
||||
.button:focus,
|
||||
.button:hover,
|
||||
a.button,
|
||||
.submit-row input,
|
||||
input[type="submit"],
|
||||
input[type="submit"]:active,
|
||||
input[type="submit"]:focus,
|
||||
input[type="submit"]:hover,
|
||||
input[type="button"],
|
||||
input[type="button"]:active,
|
||||
input[type="button"]:focus,
|
||||
input[type="button"]:hover {
|
||||
background-color: #074f57;
|
||||
}
|
||||
.module h2,
|
||||
.module caption,
|
||||
.inline-group h2 {
|
||||
background-color: #90a9b7;
|
||||
}
|
||||
div.breadcrumbs {
|
||||
background-color: #077187;
|
||||
}
|
||||
.module h2,
|
||||
.module caption,
|
||||
.inline-group h2 {
|
||||
background-color: #077187;
|
||||
}
|
||||
</style>
|
||||
{% endblock %}
|
||||
|
||||
|
||||
{% block branding %}
|
||||
<h1 id="site-name">
|
||||
<a href="{% url 'admin:index' %}"><img src="{% static 'paperless/img/logo-light.png' %}" alt="Paperless" /></a>
|
||||
</h1>
|
||||
{% endblock %}
|
||||
|
||||
|
||||
{% block blockbots %}
|
||||
|
||||
{% comment %}
|
||||
|
@@ -28,7 +28,7 @@
|
||||
}
|
||||
.result .header {
|
||||
padding: 5px;
|
||||
background-color: #79AEC8;
|
||||
background-color: #90a9b7;
|
||||
position: relative;
|
||||
}
|
||||
.result .header .checkbox {
|
||||
|
@@ -2,6 +2,7 @@ from django.http import HttpResponse, HttpResponseBadRequest
|
||||
from django.views.generic import DetailView, FormView, TemplateView
|
||||
from django_filters.rest_framework import DjangoFilterBackend
|
||||
from django.conf import settings
|
||||
from django.utils import cache
|
||||
|
||||
from paperless.db import GnuPG
|
||||
from paperless.mixins import SessionOrBasicAuthMixin
|
||||
@@ -56,10 +57,12 @@ class FetchView(SessionOrBasicAuthMixin, DetailView):
|
||||
}
|
||||
|
||||
if self.kwargs["kind"] == "thumb":
|
||||
return HttpResponse(
|
||||
response = HttpResponse(
|
||||
self._get_raw_data(self.object.thumbnail_file),
|
||||
content_type=content_types[Document.TYPE_PNG]
|
||||
)
|
||||
cache.patch_cache_control(response, max_age=31536000, private=True)
|
||||
return response
|
||||
|
||||
response = HttpResponse(
|
||||
self._get_raw_data(self.object.source_file),
|
||||
@@ -130,7 +133,7 @@ class DocumentViewSet(RetrieveModelMixin,
|
||||
filter_class = DocumentFilterSet
|
||||
search_fields = ("title", "correspondent__name", "content")
|
||||
ordering_fields = (
|
||||
"id", "title", "correspondent__name", "created", "modified")
|
||||
"id", "title", "correspondent__name", "created", "modified", "added")
|
||||
|
||||
|
||||
class LogViewSet(ReadOnlyModelViewSet):
|
||||
|
@@ -72,6 +72,7 @@ INSTALLED_APPS = [
|
||||
"corsheaders",
|
||||
"django_extensions",
|
||||
|
||||
"paperless",
|
||||
"documents.apps.DocumentsConfig",
|
||||
"reminders.apps.RemindersConfig",
|
||||
"paperless_tesseract.apps.PaperlessTesseractConfig",
|
||||
@@ -82,6 +83,7 @@ INSTALLED_APPS = [
|
||||
"rest_framework",
|
||||
"crispy_forms",
|
||||
"django_filters",
|
||||
"djangoql",
|
||||
|
||||
]
|
||||
|
||||
@@ -152,6 +154,10 @@ if os.getenv("PAPERLESS_DBUSER"):
|
||||
}
|
||||
if os.getenv("PAPERLESS_DBPASS"):
|
||||
DATABASES["default"]["PASSWORD"] = os.getenv("PAPERLESS_DBPASS")
|
||||
if os.getenv("PAPERLESS_DBHOST"):
|
||||
DATABASES["default"]["HOST"] = os.getenv("PAPERLESS_DBHOST")
|
||||
if os.getenv("PAPERLESS_DBPORT"):
|
||||
DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
|
||||
|
||||
|
||||
# Password validation
|
||||
@@ -199,6 +205,16 @@ STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", "/static/")
|
||||
MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/")
|
||||
|
||||
|
||||
# Other
|
||||
|
||||
# Disable Django's artificial limit on the number of form fields to submit at
|
||||
# once. This is a protection against overloading the server, but since this is
|
||||
# a self-hosted sort of gig, the benefits of being able to mass-delete a tonne
|
||||
# of log entries outweight the benefits of such a safeguard.
|
||||
|
||||
DATA_UPLOAD_MAX_NUMBER_FIELDS = None
|
||||
|
||||
|
||||
# Paperless-specific stuff
|
||||
# You shouldn't have to edit any of these values. Rather, you can set these
|
||||
# values in /etc/paperless.conf instead.
|
||||
@@ -296,6 +312,7 @@ FY_END = os.getenv("PAPERLESS_FINANCIAL_YEAR_END")
|
||||
|
||||
# Specify the default date order (for autodetected dates)
|
||||
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
|
||||
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
|
||||
|
||||
# Specify for how many years a correspondent is considered recent. Recent
|
||||
# correspondents will be shown in a separate "Recent correspondents" filter as
|
||||
|
BIN
src/paperless/static/paperless/img/favicon.ico
Normal file
After Width: | Height: | Size: 108 KiB |
BIN
src/paperless/static/paperless/img/logo-dark.png
Normal file
After Width: | Height: | Size: 6.2 KiB |
BIN
src/paperless/static/paperless/img/logo-light.png
Normal file
After Width: | Height: | Size: 8.6 KiB |
@@ -6,6 +6,7 @@ from django.views.decorators.csrf import csrf_exempt
|
||||
from django.views.generic import RedirectView
|
||||
from rest_framework.routers import DefaultRouter
|
||||
|
||||
from paperless.views import FaviconView
|
||||
from documents.views import (
|
||||
CorrespondentViewSet,
|
||||
DocumentViewSet,
|
||||
@@ -44,6 +45,9 @@ urlpatterns = [
|
||||
# File uploads
|
||||
url(r"^push$", csrf_exempt(PushView.as_view()), name="push"),
|
||||
|
||||
# Favicon
|
||||
url(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),
|
||||
|
||||
# The Django admin
|
||||
url(r"admin/", admin.site.urls),
|
||||
|
||||
|
@@ -1 +1 @@
|
||||
__version__ = (2, 5, 0)
|
||||
__version__ = (2, 6, 1)
|
||||
|
@@ -1,3 +1,7 @@
|
||||
import os
|
||||
|
||||
from django.http import HttpResponse
|
||||
from django.views.generic import View
|
||||
from rest_framework.pagination import PageNumberPagination
|
||||
|
||||
|
||||
@@ -5,3 +9,17 @@ class StandardPagination(PageNumberPagination):
|
||||
page_size = 25
|
||||
page_size_query_param = "page-size"
|
||||
max_page_size = 100000
|
||||
|
||||
|
||||
class FaviconView(View):
|
||||
|
||||
def get(self, request, *args, **kwargs):
|
||||
favicon = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"static",
|
||||
"paperless",
|
||||
"img",
|
||||
"favicon.ico"
|
||||
)
|
||||
with open(favicon, "rb") as f:
|
||||
return HttpResponse(f, content_type="image/x-icon")
|
||||
|
@@ -153,7 +153,10 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
)
|
||||
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
|
||||
return raw_text
|
||||
raise OCRError("Language detection failed")
|
||||
error_msg = ("Language detection failed. Set "
|
||||
"PAPERLESS_FORGIVING_OCR in config file to continue "
|
||||
"anyway.")
|
||||
raise OCRError(error_msg)
|
||||
|
||||
if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
|
||||
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
|
||||
@@ -218,7 +221,8 @@ def run_convert(*args):
|
||||
|
||||
def run_unpaper(args):
|
||||
unpaper, pnm = args
|
||||
command_args = unpaper, pnm, pnm.replace(".pnm", ".unpaper.pnm")
|
||||
command_args = (unpaper, "--overwrite", pnm,
|
||||
pnm.replace(".pnm", ".unpaper.pnm"))
|
||||
if not subprocess.Popen(command_args).wait() == 0:
|
||||
raise ParseError("Unpaper failed at {}".format(command_args))
|
||||
|
||||
|
Before Width: | Height: | Size: 136 KiB |
Before Width: | Height: | Size: 135 KiB |
Before Width: | Height: | Size: 138 KiB |
Before Width: | Height: | Size: 138 KiB |
Before Width: | Height: | Size: 136 KiB |
Before Width: | Height: | Size: 136 KiB |
@@ -8,6 +8,7 @@ from dateutil import tz
|
||||
from django.test import TestCase
|
||||
|
||||
from ..parsers import RasterisedDocumentParser
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
class TestDate(TestCase):
|
||||
@@ -15,73 +16,67 @@ class TestDate(TestCase):
|
||||
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
|
||||
SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
|
||||
|
||||
MOCK_SCRATCH = "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH" # NOQA: E501
|
||||
|
||||
def setUp(self):
|
||||
os.makedirs(self.SCRATCH, exist_ok=True)
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.SCRATCH)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_1(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document._text = "lorem ipsum 130218 lorem ipsum"
|
||||
self.assertEqual(document.get_date(), None)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_2(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document._text = "lorem ipsum 2018 lorem ipsum"
|
||||
self.assertEqual(document.get_date(), None)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_3(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document._text = "lorem ipsum 20180213 lorem ipsum"
|
||||
self.assertEqual(document.get_date(), None)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_4(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document._text = "lorem ipsum 13.02.2018 lorem ipsum"
|
||||
date = document.get_date()
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc())
|
||||
date,
|
||||
datetime.datetime(
|
||||
2018, 2, 13, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_5(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document._text = (
|
||||
"lorem ipsum 130218, 2018, 20180213 and 13.02.2018 lorem ipsum")
|
||||
"lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem "
|
||||
"ipsum"
|
||||
)
|
||||
date = document.get_date()
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc())
|
||||
date,
|
||||
datetime.datetime(
|
||||
2018, 2, 13, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_6(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
@@ -98,10 +93,7 @@ class TestDate(TestCase):
|
||||
)
|
||||
self.assertEqual(document.get_date(), None)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_7(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
@@ -110,289 +102,62 @@ class TestDate(TestCase):
|
||||
"März 2019\n"
|
||||
"lorem ipsum"
|
||||
)
|
||||
date = document.get_date()
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2019, 3, 1, 0, 0, tzinfo=tz.tzutc())
|
||||
date,
|
||||
datetime.datetime(
|
||||
2019, 3, 1, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_8(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document._text = ("lorem ipsum\n"
|
||||
"Wohnort\n"
|
||||
"3100\n"
|
||||
"IBAN\n"
|
||||
"AT87 4534\n"
|
||||
"1234\n"
|
||||
"1234 5678\n"
|
||||
"BIC\n"
|
||||
"lorem ipsum\n"
|
||||
"März 2020")
|
||||
self.assertEqual(document.get_date(),
|
||||
datetime.datetime(2020, 3, 1, 0, 0,
|
||||
tzinfo=tz.tzutc()))
|
||||
document._text = (
|
||||
"lorem ipsum\n"
|
||||
"Wohnort\n"
|
||||
"3100\n"
|
||||
"IBAN\n"
|
||||
"AT87 4534\n"
|
||||
"1234\n"
|
||||
"1234 5678\n"
|
||||
"BIC\n"
|
||||
"lorem ipsum\n"
|
||||
"März 2020"
|
||||
)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(
|
||||
2020, 3, 1, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_9(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document._text = ("lorem ipsum\n"
|
||||
"27. Nullmonth 2020\n"
|
||||
"März 2020\n"
|
||||
"lorem ipsum")
|
||||
self.assertEqual(document.get_date(),
|
||||
datetime.datetime(2020, 3, 1, 0, 0,
|
||||
tzinfo=tz.tzutc()))
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_1_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
|
||||
document._text = (
|
||||
"lorem ipsum\n"
|
||||
"27. Nullmonth 2020\n"
|
||||
"März 2020\n"
|
||||
"lorem ipsum"
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_1_png(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_2_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_2_png(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_3_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_3_png(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_4_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_4_png(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_5_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_5_png(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_6_pdf_us(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
document.DATE_ORDER = "MDY"
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_6_png_us(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
document.DATE_ORDER = "MDY"
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_6_pdf_eu(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(document.get_date(), None)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_6_png_eu(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(document.get_date(), None)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_7_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_7.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_8_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_8.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_9_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_9.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc())
|
||||
datetime.datetime(
|
||||
2020, 3, 1, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
|
||||
return_value="01-07-0590 00:00:00"
|
||||
)
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_crazy_date_past(self, *args):
|
||||
document = RasterisedDocumentParser("/dev/null")
|
||||
document.get_text()
|
||||
@@ -402,10 +167,7 @@ class TestDate(TestCase):
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
|
||||
return_value="01-07-2350 00:00:00"
|
||||
)
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_crazy_date_future(self, *args):
|
||||
document = RasterisedDocumentParser("/dev/null")
|
||||
document.get_text()
|
||||
@@ -415,10 +177,7 @@ class TestDate(TestCase):
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
|
||||
return_value="01-07-0590 00:00:00"
|
||||
)
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_crazy_date_past(self, *args):
|
||||
document = RasterisedDocumentParser("/dev/null")
|
||||
document.get_text()
|
||||
|
@@ -17,6 +17,5 @@ deps=pycodestyle
|
||||
|
||||
[testenv:doc]
|
||||
deps =
|
||||
-r{toxinidir}/../requirements.txt
|
||||
sphinx
|
||||
-r {toxinidir}/../requirements.txt
|
||||
commands=sphinx-build -b html ../docs ../docs/_build -W
|
||||
|