mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-08-03 18:54:40 -05:00
Compare commits
132 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
5009bd022f | ||
![]() |
73163d893f | ||
![]() |
506af7c9c2 | ||
![]() |
c90ed2da1d | ||
![]() |
cebb8b9fa2 | ||
![]() |
46aca10a72 | ||
![]() |
6384c698ad | ||
![]() |
abf01be889 | ||
![]() |
1e4928d2a0 | ||
![]() |
503be90932 | ||
![]() |
b5d6a82cc3 | ||
![]() |
c073ba5272 | ||
![]() |
d1e317ce21 | ||
![]() |
d4abeafb34 | ||
![]() |
4d96551619 | ||
![]() |
178361b247 | ||
![]() |
40f8ba23a4 | ||
![]() |
bef2d94374 | ||
![]() |
f39c7654a0 | ||
![]() |
e9fff764cb | ||
![]() |
87e466c47c | ||
![]() |
bd0b593c4a | ||
![]() |
7a8142df2b | ||
![]() |
bbe3084eda | ||
![]() |
89d42bd078 | ||
![]() |
93efaf7a38 | ||
![]() |
398575c70c | ||
![]() |
4e21fa4830 | ||
![]() |
d2d2d9edaf | ||
![]() |
771c8bbbe4 | ||
![]() |
20eeda19b8 | ||
![]() |
5e40227bc3 | ||
![]() |
5479942fc0 | ||
![]() |
ce98019b49 | ||
![]() |
9470154df2 | ||
![]() |
5c59120c57 | ||
![]() |
88736ff867 | ||
![]() |
fd5b831979 | ||
![]() |
3fcd1e2d7e | ||
![]() |
2c81648d59 | ||
![]() |
cd92c005e3 | ||
![]() |
31c8cf020e | ||
![]() |
e900a38983 | ||
![]() |
7343a07ddd | ||
![]() |
e20b4fb905 | ||
![]() |
cbbc4d37d0 | ||
![]() |
b140935843 | ||
![]() |
9faf0a102e | ||
![]() |
b747dd58c3 | ||
![]() |
09e1b505e1 | ||
![]() |
a6babffed8 | ||
![]() |
0256e2dfbb | ||
![]() |
7afa90b769 | ||
![]() |
5796956235 | ||
![]() |
3ca215e4dc | ||
![]() |
16c4183333 | ||
![]() |
6fe37678f2 | ||
![]() |
b58188f805 | ||
![]() |
f2a42ab6fe | ||
![]() |
e236b7bf7b | ||
![]() |
35004f434b | ||
![]() |
75251ad694 | ||
![]() |
870357968a | ||
![]() |
a593798b4b | ||
![]() |
4f070ba162 | ||
![]() |
9517d27f40 | ||
![]() |
35bb3dbcc2 | ||
![]() |
06117929bb | ||
![]() |
d1c8241947 | ||
![]() |
4c38b28469 | ||
![]() |
ad0f0a0b5d | ||
![]() |
83746a9aeb | ||
![]() |
6a36a4ec97 | ||
![]() |
7e49d047b0 | ||
![]() |
68cdeb7b3d | ||
![]() |
76293084a4 | ||
![]() |
e1cf2117f5 | ||
![]() |
7d81de4edf | ||
![]() |
37af5992c7 | ||
![]() |
af4623e605 | ||
![]() |
db8e116681 | ||
![]() |
a8616ebfe2 | ||
![]() |
a38d3bf7f8 | ||
![]() |
1cb5bbd07d | ||
![]() |
6edb5b912f | ||
![]() |
ec20c7577e | ||
![]() |
d6df9b3656 | ||
![]() |
80a849fef7 | ||
![]() |
bd67b53d50 | ||
![]() |
e32ed09da3 | ||
![]() |
c5632e5c04 | ||
![]() |
4d2b71454d | ||
![]() |
5cbb33b02b | ||
![]() |
2c55aad6c0 | ||
![]() |
1e039dcb32 | ||
![]() |
6ca8da4858 | ||
![]() |
67b492bcb7 | ||
![]() |
360d1e2802 | ||
![]() |
1cd76634a3 | ||
![]() |
c65c5009e4 | ||
![]() |
24fb6cefb9 | ||
![]() |
d80e272b75 | ||
![]() |
82f05e27c3 | ||
![]() |
7a627e4ad8 | ||
![]() |
73af9552ec | ||
![]() |
e4854f2144 | ||
![]() |
6f5c1ac4e1 | ||
![]() |
22acc51284 | ||
![]() |
a05644fc31 | ||
![]() |
d1aa54caa9 | ||
![]() |
e293f70a91 | ||
![]() |
347986a2b3 | ||
![]() |
ede274386b | ||
![]() |
3e083354cc | ||
![]() |
b2b4f6516a | ||
![]() |
2ae702c7bb | ||
![]() |
b748420a94 | ||
![]() |
8a4546ce0d | ||
![]() |
167412a003 | ||
![]() |
e8d90b42a1 | ||
![]() |
d8c7e9de5f | ||
![]() |
2ac1b78a2c | ||
![]() |
e8e38befb7 | ||
![]() |
b30629dd60 | ||
![]() |
f66d7e1c2d | ||
![]() |
8417ac7eeb | ||
![]() |
6342225b22 | ||
![]() |
4460fb7004 | ||
![]() |
6f635c74fc | ||
![]() |
c82d45689c | ||
![]() |
02e0543a02 | ||
![]() |
fde0276d65 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -68,6 +68,7 @@ db.sqlite3
|
||||
.idea
|
||||
|
||||
# Other stuff that doesn't belong
|
||||
.virtualenv
|
||||
virtualenv
|
||||
.vagrant
|
||||
docker-compose.yml
|
||||
|
@@ -1,5 +1,9 @@
|
||||
language: python
|
||||
|
||||
before_install:
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr tesseract-ocr-eng
|
||||
|
||||
sudo: false
|
||||
|
||||
matrix:
|
||||
@@ -11,7 +15,7 @@ matrix:
|
||||
- python: 3.6
|
||||
env: TOXENV=py36
|
||||
- python: 3.6
|
||||
env: TOXENV=pep8
|
||||
env: TOXENV=pycodestyle
|
||||
|
||||
install:
|
||||
- pip install --requirement requirements.txt
|
||||
|
76
Dockerfile
76
Dockerfile
@@ -1,50 +1,48 @@
|
||||
FROM python:3.5
|
||||
MAINTAINER Pit Kleyersburg <pitkley@googlemail.com>
|
||||
FROM alpine:3.7
|
||||
|
||||
# Install dependencies
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
sudo \
|
||||
tesseract-ocr tesseract-ocr-eng imagemagick ghostscript unpaper \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install python dependencies
|
||||
RUN mkdir -p /usr/src/paperless
|
||||
WORKDIR /usr/src/paperless
|
||||
COPY requirements.txt /usr/src/paperless/
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
LABEL maintainer="The Paperless Project https://github.com/danielquinn/paperless" \
|
||||
contributors="Guy Addadi <addadi@gmail.com>, Pit Kleyersburg <pitkley@googlemail.com>, \
|
||||
Sven Fischer <git-dev@linux4tw.de>"
|
||||
|
||||
# Copy application
|
||||
RUN mkdir -p /usr/src/paperless/src
|
||||
RUN mkdir -p /usr/src/paperless/data
|
||||
RUN mkdir -p /usr/src/paperless/media
|
||||
COPY requirements.txt /usr/src/paperless/
|
||||
COPY src/ /usr/src/paperless/src/
|
||||
COPY data/ /usr/src/paperless/data/
|
||||
COPY media/ /usr/src/paperless/media/
|
||||
|
||||
# Set consumption directory
|
||||
ENV PAPERLESS_CONSUMPTION_DIR /consume
|
||||
RUN mkdir -p $PAPERLESS_CONSUMPTION_DIR
|
||||
|
||||
# Migrate database
|
||||
WORKDIR /usr/src/paperless/src
|
||||
RUN ./manage.py migrate
|
||||
|
||||
# Create user
|
||||
RUN groupadd -g 1000 paperless \
|
||||
&& useradd -u 1000 -g 1000 -d /usr/src/paperless paperless \
|
||||
&& chown -Rh paperless:paperless /usr/src/paperless
|
||||
|
||||
# Set export directory
|
||||
ENV PAPERLESS_EXPORT_DIR /export
|
||||
RUN mkdir -p $PAPERLESS_EXPORT_DIR
|
||||
|
||||
# Setup entrypoint
|
||||
COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh
|
||||
RUN chmod 755 /sbin/docker-entrypoint.sh
|
||||
|
||||
# Mount volumes
|
||||
# Set export and consumption directories
|
||||
ENV PAPERLESS_EXPORT_DIR=/export \
|
||||
PAPERLESS_CONSUMPTION_DIR=/consume
|
||||
|
||||
# Install dependencies
|
||||
RUN apk --no-cache --update add \
|
||||
python3 gnupg libmagic bash \
|
||||
sudo poppler tesseract-ocr imagemagick ghostscript unpaper && \
|
||||
apk --no-cache add --virtual .build-dependencies \
|
||||
python3-dev poppler-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
|
||||
# Install python dependencies
|
||||
python3 -m ensurepip && \
|
||||
rm -r /usr/lib/python*/ensurepip && \
|
||||
cd /usr/src/paperless && \
|
||||
pip3 install --no-cache-dir -r requirements.txt && \
|
||||
# Remove build dependencies
|
||||
apk del .build-dependencies && \
|
||||
# Create the consumption directory
|
||||
mkdir -p $PAPERLESS_CONSUMPTION_DIR && \
|
||||
# Migrate database
|
||||
./src/manage.py migrate && \
|
||||
# Create user
|
||||
addgroup -g 1000 paperless && \
|
||||
adduser -D -u 1000 -G paperless -h /usr/src/paperless paperless && \
|
||||
chown -Rh paperless:paperless /usr/src/paperless && \
|
||||
mkdir -p $PAPERLESS_EXPORT_DIR && \
|
||||
# Setup entrypoint
|
||||
chmod 755 /sbin/docker-entrypoint.sh
|
||||
|
||||
WORKDIR /usr/src/paperless/src
|
||||
# Mount volumes and set Entrypoint
|
||||
VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/media", "/consume", "/export"]
|
||||
|
||||
ENTRYPOINT ["/sbin/docker-entrypoint.sh"]
|
||||
CMD ["--help"]
|
||||
|
||||
|
70
README.md
Normal file
70
README.md
Normal file
@@ -0,0 +1,70 @@
|
||||
# Paperless
|
||||
|
||||
  
|
||||
|
||||
Index and archive all of your scanned paper documents
|
||||
|
||||
I hate paper. Environmental issues aside, it's a tech person's nightmare:
|
||||
|
||||
* There's no search feature
|
||||
* It takes up physical space
|
||||
* Backups mean more paper
|
||||
|
||||
In the past few months I've been bitten more than a few times by the problem of not having the right document around. Sometimes I recycled a document I needed (who keeps water bills for two years?) and other times I just lost it... because paper. I wrote this to make my life easier.
|
||||
|
||||
|
||||
## How it Works
|
||||
|
||||
Paperless does not control your scanner, it only helps you deal with what your scanner produces
|
||||
|
||||
1. Buy a document scanner that can write to a place on your network. If you need some inspiration, have a look at the [scanner recommendations](https://paperless.readthedocs.io/en/latest/scanners.html) page.
|
||||
2. Set it up to "scan to FTP" or something similar. It should be able to push scanned images to a server without you having to do anything. Of course if your scanner doesn't know how to automatically upload the file somewhere, you can always do that manually. Paperless doesn't care how the documents get into its local consumption directory.
|
||||
3. Have the target server run the Paperless consumption script to OCR the file and index it into a local database.
|
||||
4. Use the web frontend to sift through the database and find what you want.
|
||||
5. Download the PDF you need/want via the web interface and do whatever you like with it. You can even print it and send it as if it's the original. In most cases, no one will care or notice.
|
||||
|
||||
Here's what you get:
|
||||
|
||||

|
||||
|
||||
|
||||
## Documentation
|
||||
|
||||
It's all available on [ReadTheDocs](https://paperless.readthedocs.org/).
|
||||
|
||||
|
||||
## Requirements
|
||||
|
||||
This is all really a quite simple, shiny, user-friendly wrapper around some very powerful tools.
|
||||
|
||||
* [ImageMagick](http://imagemagick.org/) converts the images between colour and greyscale.
|
||||
* [Tesseract](https://github.com/tesseract-ocr) does the character recognition.
|
||||
* [Unpaper](https://www.flameeyes.eu/projects/unpaper) despeckles and deskews the scanned image.
|
||||
* [GNU Privacy Guard](https://gnupg.org/) is used as the encryption backend.
|
||||
* [Python 3](https://python.org/) is the language of the project.
|
||||
* [Pillow](https://pypi.python.org/pypi/pillowfight/) loads the image data as a python object to be used with PyOCR.
|
||||
* [PyOCR](https://github.com/jflesch/pyocr) is a slick programmatic wrapper around tesseract.
|
||||
* [Django](https://www.djangoproject.com/) is the framework this project is written against.
|
||||
* [Python-GNUPG](http://pythonhosted.org/python-gnupg/) decrypts the PDFs on-the-fly to allow you to download unencrypted files, leaving the encrypted ones on-disk.
|
||||
|
||||
|
||||
## Stability
|
||||
|
||||
This project has been around since 2015, and there's lots of people using it, however it's still under active development (just look at the git commit history) so don't expect it to be 100% stable. You can backup the sqlite3 database, media directory and your configuration file to be on the safe side.
|
||||
|
||||
|
||||
## Similar Projects
|
||||
|
||||
There's another project out there called [Mayan EDMS](https://mayan.readthedocs.org/en/latest/) that has a surprising amount of technical overlap with Paperless. Also based on Django and using a consumer model with Tesseract and Unpaper, Mayan EDMS is *much* more featureful and comes with a slick UI as well, but still in Python 2. It may be that Paperless consumes fewer resources, but to be honest, this is just a guess as I haven't tested this myself. One thing's for certain though, *Paperless* is a **way** better name.
|
||||
|
||||
|
||||
## Important Note
|
||||
|
||||
Document scanners are typically used to scan sensitive documents. Things like your social insurance number, tax records, invoices, etc. While Paperless encrypts the original files via the consumption script, the OCR'd text is *not* encrypted and is therefore stored in the clear (it needs to be searchable, so if someone has ideas on how to do that on encrypted data, I'm all ears). This means that Paperless should never be run on an untrusted host. Instead, I recommend that if you do want to use it, run it locally on a server in your own home.
|
||||
|
||||
|
||||
## Donations
|
||||
|
||||
As with all Free software, the power is less in the finances and more in the collective efforts. I really appreciate every pull request and bug report offered up by Paperless' users, so please keep that stuff coming. If however, you're not one for coding/design/documentation, and would like to contribute financially, I won't say no ;-)
|
||||
|
||||
The thing is, I'm doing ok for money, so I would instead ask you to donate to the [United Nations High Commissioner for Refugees](https://donate.unhcr.org/int-en/general). They're doing important work and they need the money a lot more than I do.
|
144
README.rst
144
README.rst
@@ -1,144 +0,0 @@
|
||||
Paperless
|
||||
#########
|
||||
|
||||
|Documentation|
|
||||
|Chat|
|
||||
|Travis|
|
||||
|Dependencies|
|
||||
|
||||
Index and archive all of your scanned paper documents
|
||||
|
||||
I hate paper. Environmental issues aside, it's a tech person's nightmare:
|
||||
|
||||
* There's no search feature
|
||||
* It takes up physical space
|
||||
* Backups mean more paper
|
||||
|
||||
In the past few months I've been bitten more than a few times by the problem
|
||||
of not having the right document around. Sometimes I recycled a document I
|
||||
needed (who keeps water bills for two years?) and other times I just lost
|
||||
it... because paper. I wrote this to make my life easier.
|
||||
|
||||
|
||||
How it Works
|
||||
============
|
||||
|
||||
Paperless does not control your scanner, it only helps you deal with what your
|
||||
scanner produces
|
||||
|
||||
1. Buy a document scanner like `this one`_ (used by me) or `this other one`_
|
||||
recommended by another user.
|
||||
2. Set it up to "scan to FTP" or something similar. It should be able to push
|
||||
scanned images to a server without you having to do anything. If your
|
||||
scanner doesn't know how to automatically upload the file somewhere, you can
|
||||
always do that manually. Paperless doesn't care how the documents get into
|
||||
its local consumption directory.
|
||||
3. Have the target server run the Paperless consumption script to OCR the file
|
||||
and index it into a local database.
|
||||
4. Use the web frontend to sift through the database and find what you want.
|
||||
5. Download the PDF you need/want via the web interface and do whatever you
|
||||
like with it. You can even print it and send it as if it's the original.
|
||||
In most cases, no one will care or notice.
|
||||
|
||||
Here's what you get:
|
||||
|
||||
.. image:: docs/_static/screenshot.png
|
||||
:alt: The before and after
|
||||
:target: docs/_static/screenshot.png
|
||||
|
||||
|
||||
Stability
|
||||
=========
|
||||
|
||||
Paperless is still under active development (just look at the git commit
|
||||
history) so don't expect it to be 100% stable. You can backup the sqlite3
|
||||
database, media directory and your configuration file to be on the safe side.
|
||||
|
||||
|
||||
Requirements
|
||||
============
|
||||
|
||||
This is all really a quite simple, shiny, user-friendly wrapper around some
|
||||
very powerful tools.
|
||||
|
||||
* `ImageMagick`_ converts the images between colour and greyscale.
|
||||
* `Tesseract`_ does the character recognition.
|
||||
* `Unpaper`_ despeckles and deskews the scanned image.
|
||||
* `GNU Privacy Guard`_ is used as the encryption backend.
|
||||
* `Python 3`_ is the language of the project.
|
||||
|
||||
* `Pillow`_ loads the image data as a python object to be used with PyOCR.
|
||||
* `PyOCR`_ is a slick programmatic wrapper around tesseract.
|
||||
* `Django`_ is the framework this project is written against.
|
||||
* `Python-GNUPG`_ decrypts the PDFs on-the-fly to allow you to download
|
||||
unencrypted files, leaving the encrypted ones on-disk.
|
||||
|
||||
|
||||
Documentation
|
||||
=============
|
||||
|
||||
It's all available on `ReadTheDocs`_.
|
||||
|
||||
|
||||
Similar Projects
|
||||
================
|
||||
|
||||
There's another project out there called `Mayan EDMS`_ that has a surprising
|
||||
amount of technical overlap with Paperless. Also based on Django and using
|
||||
a consumer model with Tesseract and Unpaper, Mayan EDMS is *much* more
|
||||
featureful and comes with a slick UI as well, but still in Python 2. It may be
|
||||
that Paperless consumes fewer resources, but to be honest, this is just a guess
|
||||
as I haven't tested this myself. One thing's for certain though, *Paperless*
|
||||
is a **much** better name.
|
||||
|
||||
|
||||
Important Note
|
||||
==============
|
||||
|
||||
Document scanners are typically used to scan sensitive documents. Things like
|
||||
your social insurance number, tax records, invoices, etc. While Paperless
|
||||
encrypts the original files via the consumption script, the OCR'd text is *not*
|
||||
encrypted and is therefore stored in the clear (it needs to be searchable, so
|
||||
if someone has ideas on how to do that on encrypted data, I'm all ears). This
|
||||
means that Paperless should never be run on an untrusted host. Instead, I
|
||||
recommend that if you do want to use it, run it locally on a server in your own
|
||||
home.
|
||||
|
||||
|
||||
Donations
|
||||
=========
|
||||
|
||||
As with all Free software, the power is less in the finances and more in the
|
||||
collective efforts. I really appreciate every pull request and bug report
|
||||
offered up by Paperless' users, so please keep that stuff coming. If however,
|
||||
you're not one for coding/design/documentation, and would like to contribute
|
||||
financially, I won't say no ;-)
|
||||
|
||||
The thing is, I'm doing ok for money, so I would instead ask you to donate to
|
||||
the `United Nations High Commissioner for Refugees`_. They're doing important
|
||||
work and they need the money a lot more than I do.
|
||||
|
||||
.. _this one: http://www.brother.ca/en-CA/Scanners/11/ProductDetail/ADS1500W?ProductDetail=productdetail
|
||||
.. _this other one: http://www.fujitsu.com/us/products/computing/peripheral/scanners/scansnap/ix500/
|
||||
.. _ImageMagick: http://imagemagick.org/
|
||||
.. _Tesseract: https://github.com/tesseract-ocr
|
||||
.. _Unpaper: https://www.flameeyes.eu/projects/unpaper
|
||||
.. _GNU Privacy Guard: https://gnupg.org/
|
||||
.. _Python 3: https://python.org/
|
||||
.. _Pillow: https://pypi.python.org/pypi/pillowfight/
|
||||
.. _PyOCR: https://github.com/jflesch/pyocr
|
||||
.. _Django: https://www.djangoproject.com/
|
||||
.. _Python-GNUPG: http://pythonhosted.org/python-gnupg/
|
||||
.. _ReadTheDocs: https://paperless.readthedocs.org/
|
||||
.. _Mayan EDMS: https://mayan.readthedocs.org/en/latest/
|
||||
.. _United Nations High Commissioner for Refugees: https://donate.unhcr.org/int-en/general
|
||||
.. |Documentation| image:: https://readthedocs.org/projects/paperless/badge/?version=latest
|
||||
:alt: Read the documentation at https://paperless.readthedocs.org/
|
||||
:target: https://paperless.readthedocs.org/
|
||||
.. |Chat| image:: https://badges.gitter.im/danielquinn/paperless.svg
|
||||
:alt: Join the chat at https://gitter.im/danielquinn/paperless
|
||||
:target: https://gitter.im/danielquinn/paperless?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
|
||||
.. |Travis| image:: https://travis-ci.org/danielquinn/paperless.svg?branch=master
|
||||
:target: https://travis-ci.org/danielquinn/paperless
|
||||
.. |Dependencies| image:: https://www.versioneye.com/user/projects/57b33b81d9f1b00016faa500/badge.svg?style=flat-square
|
||||
:target: https://www.versioneye.com/user/projects/57b33b81d9f1b00016faa500
|
5
Vagrantfile
vendored
5
Vagrantfile
vendored
@@ -12,4 +12,9 @@ Vagrant.configure(VAGRANT_API_VERSION) do |config|
|
||||
|
||||
# Networking details
|
||||
config.vm.network "private_network", ip: "172.28.128.4"
|
||||
|
||||
config.vm.provider "virtualbox" do |vb|
|
||||
# Customize the amount of memory on the VM:
|
||||
vb.memory = "1024"
|
||||
end
|
||||
end
|
||||
|
@@ -2,7 +2,7 @@ version: '2'
|
||||
|
||||
services:
|
||||
webserver:
|
||||
image: pitkley/paperless
|
||||
build: ./
|
||||
ports:
|
||||
# You can adapt the port you want Paperless to listen on by
|
||||
# modifying the part before the `:`.
|
||||
@@ -17,16 +17,16 @@ services:
|
||||
# value with nothing.
|
||||
environment:
|
||||
- PAPERLESS_OCR_LANGUAGES=
|
||||
command: ["runserver", "0.0.0.0:8000"]
|
||||
command: ["runserver", "--insecure", "0.0.0.0:8000"]
|
||||
|
||||
consumer:
|
||||
image: pitkley/paperless
|
||||
build: ./
|
||||
volumes:
|
||||
- data:/usr/src/paperless/data
|
||||
- media:/usr/src/paperless/media
|
||||
# You have to adapt the local path you want the consumption
|
||||
# directory to mount to by modifying the part before the ':'.
|
||||
- /path/to/arbitrary/place:/consume
|
||||
- ./consume:/consume
|
||||
# Likewise, you can add a local path to mount a directory for
|
||||
# exporting. This is not strictly needed for paperless to
|
||||
# function, only if you're exporting your files: uncomment
|
||||
|
@@ -1,191 +1,302 @@
|
||||
Changelog
|
||||
#########
|
||||
|
||||
* 0.5.0
|
||||
* Support for fuzzy matching in the auto-tagger & auto-correspondent systems
|
||||
thanks to `Jake Gysland`_'s patch `#220`_.
|
||||
* Modified the Dockerfile to prepare an export directory (`#212`_). Thanks
|
||||
to combined efforts from `Pit`_ and `Strubbl`_ in working out the kinks on
|
||||
this one.
|
||||
* Updated the import/export scripts to include support for thumbnails. Big
|
||||
thanks to `CkuT`_ for finding this shortcoming and doing the work to get
|
||||
it fixed in `#224`_.
|
||||
* All of the following changes are thanks to `David Martin`_:
|
||||
* Bumped the dependency on pyocr to 0.4.7 so new users can make use of Tesseract 4 if they so prefer (`#226`_).
|
||||
* Fixed a number of issues with the automated mail handler (`#227`_, `#228`_)
|
||||
* Amended the documentation for better handling of systemd service files (`#229`_)
|
||||
* Amended the Django Admin configuration to have nice headers (`#230`_)
|
||||
1.2.0
|
||||
=====
|
||||
|
||||
* 0.4.1
|
||||
* Fix for `#206`_ wherein the pluggable parser didn't recognise files with
|
||||
all-caps suffixes like ``.PDF``
|
||||
* New Docker image, now based on Alpine, thanks to the efforts of `addadi`_
|
||||
and `Pit`_. This new image is dramatically smaller than the Debian-based
|
||||
one, and it also has `a new home on Docker Hub`_. A proper thank-you to
|
||||
`Pit`_ for hosting the image on his Docker account all this time, but after
|
||||
some discussion, we decided the image needed a more *official-looking* home.
|
||||
* `BastianPoe`_ has added the long-awaited feature to automatically skip the
|
||||
OCR step when the PDF already contains text. This can be overridden by
|
||||
setting ``PAPERLESS_OCR_ALWAYS=YES`` either in your ``paperless.conf`` or
|
||||
in the environment. Note that this also means that Paperless now requires
|
||||
``libpoppler-cpp-dev`` to be installed. **Important**: You'll need to run
|
||||
``pip install -r requirements.txt`` after the usual ``git pull`` to
|
||||
properly update.
|
||||
* `BastianPoe`_ has also contributed a monumental amount of work (`#291`_) to
|
||||
solving `#158`_: setting the document creation date based on finding a date
|
||||
in the document text.
|
||||
|
||||
* 0.4.0
|
||||
* Introducing reminders. See `#199`_ for more information, but the short
|
||||
explanation is that you can now attach simple notes & times to documents
|
||||
which are made available via the API. Currently, the default API
|
||||
(basically just the Django admin) doesn't really make use of this, but
|
||||
`Thomas Brueggemann`_ over at `Paperless Desktop`_ has said that he would
|
||||
like to make use of this feature in his project.
|
||||
1.1.0
|
||||
=====
|
||||
|
||||
* 0.3.6
|
||||
* Fix for `#200`_ (!!) where the API wasn't configured to allow updating the
|
||||
correspondent or the tags for a document.
|
||||
* The ``content`` field is now optional, to allow for the edge case of a
|
||||
purely graphical document.
|
||||
* You can no longer add documents via the admin. This never worked in the
|
||||
first place, so all I've done here is remove the link to the broken form.
|
||||
* The consumer code has been heavily refactored to support a pluggable
|
||||
interface. Install a paperless consumer via pip and tell paperless about
|
||||
it with an environment variable, and you're good to go. Proper
|
||||
documentation is on its way.
|
||||
* Fix for `#283`_, a redirect bug which broke interactions with
|
||||
paperless-desktop. Thanks to `chris-aeviator`_ for reporting it.
|
||||
* Addition of an optional new financial year filter, courtesy of
|
||||
`David Martin`_ `#256`_
|
||||
* Fixed a typo in how thumbnails were named in exports `#285`_, courtesy of
|
||||
`Dan Panzarella`_
|
||||
|
||||
* 0.3.5
|
||||
* A serious facelift for the documents listing page wherein we drop the
|
||||
tabular layout in favour of a tiled interface.
|
||||
* Users can now configure the number of items per page.
|
||||
* Fix for `#171`_: Allow users to specify their own ``SECRET_KEY`` value.
|
||||
* Moved the dotenv loading to the top of settings.py
|
||||
* Fix for `#112`_: Added checks for binaries required for document
|
||||
consumption.
|
||||
1.0.0
|
||||
=====
|
||||
|
||||
* 0.3.4
|
||||
* Removal of django-suit due to a licensing conflict I bumped into in 0.3.3.
|
||||
Note that you *can* use Django Suit with Paperless, but only in a
|
||||
non-profit situation as their free license prohibits for-profit use. As a
|
||||
result, I can't bundle Suit with Paperless without conflicting with the
|
||||
GPL. Further development will be done against the stock Django admin.
|
||||
* I shrunk the thumbnails a little 'cause they were too big for me, even on
|
||||
my high-DPI monitor.
|
||||
* BasicAuth support for document and thumbnail downloads, as well as the Push
|
||||
API thanks to @thomasbrueggemann. See `#179`_.
|
||||
* Upgrade to Django 1.11. **You'll need to run
|
||||
``pip install -r requirements.txt`` after the usual ``git pull`` to
|
||||
properly update**.
|
||||
* Replace the templatetag-based hack we had for document listing in favour of
|
||||
a slightly less ugly solution in the form of another template tag with less
|
||||
copypasta.
|
||||
* Support for multi-word-matches for auto-tagging thanks to an excellent
|
||||
patch from `ishirav`_ `#277`_.
|
||||
* Fixed a CSS bug reported by `Stefan Hagen`_ that caused an overlapping of
|
||||
the text and checkboxes under some resolutions `#272`_.
|
||||
* Patched the Docker config to force the serving of static files. Credit for
|
||||
this one goes to `dev-rke`_ via `#248`_.
|
||||
* Fix file permissions during Docker start up thanks to `Pit`_ on `#268`_.
|
||||
* Date fields in the admin are now expressed as HTML5 date fields thanks to
|
||||
`Lukas Winkler`_'s issue `#278`_
|
||||
|
||||
* 0.3.3
|
||||
* Thumbnails in the UI and a Django-suit -based face-lift courtesy of @ekw!
|
||||
* Timezone, items per page, and default language are now all configurable,
|
||||
also thanks to @ekw.
|
||||
0.8.0
|
||||
=====
|
||||
|
||||
* 0.3.2
|
||||
* Fix for `#172`_: defaulting ALLOWED_HOSTS to ``["*"]`` and allowing the
|
||||
user to set her own value via ``PAPERLESS_ALLOWED_HOSTS`` should the need
|
||||
arise.
|
||||
* Paperless can now run in a subdirectory on a host (``/paperless``), rather
|
||||
than always running in the root (``/``) thanks to `maphy-psd`_'s work on
|
||||
`#255`_.
|
||||
|
||||
* 0.3.1
|
||||
* Added a default value for ``CONVERT_BINARY``
|
||||
0.7.0
|
||||
=====
|
||||
|
||||
* 0.3.0
|
||||
* Updated to using django-filter 1.x
|
||||
* Added some system checks so new users aren't confused by misconfigurations.
|
||||
* Consumer loop time is now configurable for systems with slow writes. Just
|
||||
set ``PAPERLESS_CONSUMER_LOOP_TIME`` to a number of seconds. The default
|
||||
is 10.
|
||||
* As per `#44`_, we've removed support for ``PAPERLESS_CONVERT``,
|
||||
``PAPERLESS_CONSUME``, and ``PAPERLESS_SECRET``. Please use
|
||||
``PAPERLESS_CONVERT_BINARY``, ``PAPERLESS_CONSUMPTION_DIR``, and
|
||||
``PAPERLESS_SHARED_SECRET`` respectively instead.
|
||||
* **Potentially breaking change**: As per `#235`_, Paperless will no longer
|
||||
automatically delete documents attached to correspondents when those
|
||||
correspondents are themselves deleted. This was Django's default
|
||||
behaviour, but didn't make much sense in Paperless' case. Thanks to
|
||||
`Thomas Brueggemann`_ and `David Martin`_ for their input on this one.
|
||||
* Fix for `#232`_ wherein Paperless wasn't recognising ``.tif`` files
|
||||
properly. Thanks to `ayounggun`_ for reporting this one and to
|
||||
`Kusti Skytén`_ for posting the correct solution in the Github issue.
|
||||
|
||||
* 0.2.0
|
||||
0.6.0
|
||||
=====
|
||||
|
||||
* `#150`_: The media root is now a variable you can set in
|
||||
``paperless.conf``.
|
||||
* `#148`_: The database location (sqlite) is now a variable you can set in
|
||||
``paperless.conf``.
|
||||
* `#146`_: Fixed a bug that allowed unauthorised access to the ``/fetch``
|
||||
URL.
|
||||
* `#131`_: Document files are now automatically removed from disk when
|
||||
they're deleted in Paperless.
|
||||
* `#121`_: Fixed a bug where Paperless wasn't setting document creation time
|
||||
based on the file naming scheme.
|
||||
* `#81`_: Added a hook to run an arbitrary script after every document is
|
||||
consumed.
|
||||
* `#98`_: Added optional environment variables for ImageMagick so that it
|
||||
doesn't explode when handling Very Large Documents or when it's just
|
||||
running on a low-memory system. Thanks to `Florian Harr`_ for his help on
|
||||
this one.
|
||||
* `#89`_ Ported the auto-tagging code to correspondents as well. Thanks to
|
||||
`Justin Snyman`_ for the pointers in the issue queue.
|
||||
* Added support for guessing the date from the file name along with the
|
||||
correspondent, title, and tags. Thanks to `Tikitu de Jager`_ for his pull
|
||||
request that I took forever to merge and to `Pit`_ for his efforts on the
|
||||
regex front.
|
||||
* `#94`_: Restored support for changing the created date in the UI. Thanks
|
||||
to `Martin Honermeyer`_ and `Tim White`_ for working with me on this.
|
||||
* Abandon the shared-secret trick we were using for the POST API in favour
|
||||
of BasicAuth or Django session.
|
||||
* Fix the POST API so it actually works. `#236`_
|
||||
* **Breaking change**: We've dropped the use of ``PAPERLESS_SHARED_SECRET``
|
||||
as it was being used both for the API (now replaced with a normal auth)
|
||||
and form email polling. Now that we're only using it for email, this
|
||||
variable has been renamed to ``PAPERLESS_EMAIL_SECRET``. The old value
|
||||
will still work for a while, but you should change your config if you've
|
||||
been using the email polling feature. Thanks to `Joshua Gilman`_ for all
|
||||
the help with this feature.
|
||||
|
||||
* 0.1.1
|
||||
0.5.0
|
||||
=====
|
||||
|
||||
* Potentially **Breaking Change**: All references to "sender" in the code
|
||||
have been renamed to "correspondent" to better reflect the nature of the
|
||||
property (one could quite reasonably scan a document before sending it to
|
||||
someone.)
|
||||
* `#67`_: Rewrote the document exporter and added a new importer that allows
|
||||
for full metadata retention without depending on the file name and
|
||||
modification time. A big thanks to `Tikitu de Jager`_, `Pit`_,
|
||||
`Florian Jung`_, and `Christopher Luu`_ for their code snippets and
|
||||
contributing conversation that lead to this change.
|
||||
* `#20`_: Added *unpaper* support to help in cleaning up the scanned image
|
||||
before it's OCR'd. Thanks to `Pit`_ for this one.
|
||||
* `#71`_ Added (encrypted) thumbnails in anticipation of a proper UI.
|
||||
* `#68`_: Added support for using a proper config file at
|
||||
``/etc/paperless.conf`` and modified the systemd unit files to use it.
|
||||
* Refactored the Vagrant installation process to use environment variables
|
||||
rather than asking the user to modify ``settings.py``.
|
||||
* `#44`_: Harmonise environment variable names with constant names.
|
||||
* `#60`_: Setup logging to actually use the Python native logging framework.
|
||||
* `#53`_: Fixed an annoying bug that caused ``.jpeg`` and ``.JPG`` images
|
||||
to be imported but made unavailable.
|
||||
* Support for fuzzy matching in the auto-tagger & auto-correspondent systems
|
||||
thanks to `Jake Gysland`_'s patch `#220`_.
|
||||
* Modified the Dockerfile to prepare an export directory (`#212`_). Thanks
|
||||
to combined efforts from `Pit`_ and `Strubbl`_ in working out the kinks on
|
||||
this one.
|
||||
* Updated the import/export scripts to include support for thumbnails. Big
|
||||
thanks to `CkuT`_ for finding this shortcoming and doing the work to get
|
||||
it fixed in `#224`_.
|
||||
* All of the following changes are thanks to `David Martin`_:
|
||||
* Bumped the dependency on pyocr to 0.4.7 so new users can make use of
|
||||
Tesseract 4 if they so prefer (`#226`_).
|
||||
* Fixed a number of issues with the automated mail handler (`#227`_, `#228`_)
|
||||
* Amended the documentation for better handling of systemd service files (`#229`_)
|
||||
* Amended the Django Admin configuration to have nice headers (`#230`_)
|
||||
|
||||
* 0.1.0
|
||||
0.4.1
|
||||
=====
|
||||
|
||||
* Docker support! Big thanks to `Wayne Werner`_, `Brian Conn`_, and
|
||||
`Tikitu de Jager`_ for this one, and especially to `Pit`_
|
||||
who spearheadded this effort.
|
||||
* A simple REST API is in place, but it should be considered unstable.
|
||||
* Cleaned up the consumer to use temporary directories instead of a single
|
||||
scratch space. (Thanks `Pit`_)
|
||||
* Improved the efficiency of the consumer by parsing pages more intelligently
|
||||
and introducing a threaded OCR process (thanks again `Pit`_).
|
||||
* `#45`_: Cleaned up the logic for tag matching. Reported by `darkmatter`_.
|
||||
* `#47`_: Auto-rotate landscape documents. Reported by `Paul`_ and fixed by
|
||||
`Pit`_.
|
||||
* `#48`_: Matching algorithms should do so on a word boundary (`darkmatter`_)
|
||||
* `#54`_: Documented the re-tagger (`zedster`_)
|
||||
* `#57`_: Make sure file is preserved on import failure (`darkmatter`_)
|
||||
* Added tox with pep8 checking
|
||||
* Fix for `#206`_ wherein the pluggable parser didn't recognise files with
|
||||
all-caps suffixes like ``.PDF``
|
||||
|
||||
* 0.0.6
|
||||
0.4.0
|
||||
=====
|
||||
|
||||
* Added support for parallel OCR (significant work from `Pit`_)
|
||||
* Sped up the language detection (significant work from `Pit`_)
|
||||
* Added simple logging
|
||||
* Introducing reminders. See `#199`_ for more information, but the short
|
||||
explanation is that you can now attach simple notes & times to documents
|
||||
which are made available via the API. Currently, the default API
|
||||
(basically just the Django admin) doesn't really make use of this, but
|
||||
`Thomas Brueggemann`_ over at `Paperless Desktop`_ has said that he would
|
||||
like to make use of this feature in his project.
|
||||
|
||||
* 0.0.5
|
||||
0.3.6
|
||||
=====
|
||||
|
||||
* Added support for image files as documents (png, jpg, gif, tiff)
|
||||
* Added a crude means of HTTP POST for document imports
|
||||
* Added IMAP mail support
|
||||
* Added a re-tagging utility
|
||||
* Documentation for the above as well as data migration
|
||||
* Fix for `#200`_ (!!) where the API wasn't configured to allow updating the
|
||||
correspondent or the tags for a document.
|
||||
* The ``content`` field is now optional, to allow for the edge case of a
|
||||
purely graphical document.
|
||||
* You can no longer add documents via the admin. This never worked in the
|
||||
first place, so all I've done here is remove the link to the broken form.
|
||||
* The consumer code has been heavily refactored to support a pluggable
|
||||
interface. Install a paperless consumer via pip and tell paperless about
|
||||
it with an environment variable, and you're good to go. Proper
|
||||
documentation is on its way.
|
||||
|
||||
* 0.0.4
|
||||
0.3.5
|
||||
=====
|
||||
|
||||
* Added automated tagging basted on keyword matching
|
||||
* Cleaned up the document listing page
|
||||
* Removed ``User`` and ``Group`` from the admin
|
||||
* Added ``pytz`` to the list of requirements
|
||||
* A serious facelift for the documents listing page wherein we drop the
|
||||
tabular layout in favour of a tiled interface.
|
||||
* Users can now configure the number of items per page.
|
||||
* Fix for `#171`_: Allow users to specify their own ``SECRET_KEY`` value.
|
||||
* Moved the dotenv loading to the top of settings.py
|
||||
* Fix for `#112`_: Added checks for binaries required for document
|
||||
consumption.
|
||||
|
||||
* 0.0.3
|
||||
0.3.4
|
||||
=====
|
||||
|
||||
* Added basic tagging
|
||||
* Removal of django-suit due to a licensing conflict I bumped into in 0.3.3.
|
||||
Note that you *can* use Django Suit with Paperless, but only in a
|
||||
non-profit situation as their free license prohibits for-profit use. As a
|
||||
result, I can't bundle Suit with Paperless without conflicting with the
|
||||
GPL. Further development will be done against the stock Django admin.
|
||||
* I shrunk the thumbnails a little 'cause they were too big for me, even on
|
||||
my high-DPI monitor.
|
||||
* BasicAuth support for document and thumbnail downloads, as well as the Push
|
||||
API thanks to @thomasbrueggemann. See `#179`_.
|
||||
|
||||
* 0.0.2
|
||||
0.3.3
|
||||
=====
|
||||
|
||||
* Added language detection
|
||||
* Added datestamps to ``document_exporter``.
|
||||
* Changed ``settings.TESSERACT_LANGUAGE`` to ``settings.OCR_LANGUAGE``.
|
||||
* Thumbnails in the UI and a Django-suit -based face-lift courtesy of @ekw!
|
||||
* Timezone, items per page, and default language are now all configurable,
|
||||
also thanks to @ekw.
|
||||
|
||||
* 0.0.1
|
||||
0.3.2
|
||||
=====
|
||||
|
||||
* Initial release
|
||||
* Fix for `#172`_: defaulting ALLOWED_HOSTS to ``["*"]`` and allowing the
|
||||
user to set her own value via ``PAPERLESS_ALLOWED_HOSTS`` should the need
|
||||
arise.
|
||||
|
||||
0.3.1
|
||||
=====
|
||||
|
||||
* Added a default value for ``CONVERT_BINARY``
|
||||
|
||||
0.3.0
|
||||
=====
|
||||
|
||||
* Updated to using django-filter 1.x
|
||||
* Added some system checks so new users aren't confused by misconfigurations.
|
||||
* Consumer loop time is now configurable for systems with slow writes. Just
|
||||
set ``PAPERLESS_CONSUMER_LOOP_TIME`` to a number of seconds. The default
|
||||
is 10.
|
||||
* As per `#44`_, we've removed support for ``PAPERLESS_CONVERT``,
|
||||
``PAPERLESS_CONSUME``, and ``PAPERLESS_SECRET``. Please use
|
||||
``PAPERLESS_CONVERT_BINARY``, ``PAPERLESS_CONSUMPTION_DIR``, and
|
||||
``PAPERLESS_SHARED_SECRET`` respectively instead.
|
||||
|
||||
0.2.0
|
||||
=====
|
||||
|
||||
* `#150`_: The media root is now a variable you can set in
|
||||
``paperless.conf``.
|
||||
* `#148`_: The database location (sqlite) is now a variable you can set in
|
||||
``paperless.conf``.
|
||||
* `#146`_: Fixed a bug that allowed unauthorised access to the ``/fetch``
|
||||
URL.
|
||||
* `#131`_: Document files are now automatically removed from disk when
|
||||
they're deleted in Paperless.
|
||||
* `#121`_: Fixed a bug where Paperless wasn't setting document creation time
|
||||
based on the file naming scheme.
|
||||
* `#81`_: Added a hook to run an arbitrary script after every document is
|
||||
consumed.
|
||||
* `#98`_: Added optional environment variables for ImageMagick so that it
|
||||
doesn't explode when handling Very Large Documents or when it's just
|
||||
running on a low-memory system. Thanks to `Florian Harr`_ for his help on
|
||||
this one.
|
||||
* `#89`_ Ported the auto-tagging code to correspondents as well. Thanks to
|
||||
`Justin Snyman`_ for the pointers in the issue queue.
|
||||
* Added support for guessing the date from the file name along with the
|
||||
correspondent, title, and tags. Thanks to `Tikitu de Jager`_ for his pull
|
||||
request that I took forever to merge and to `Pit`_ for his efforts on the
|
||||
regex front.
|
||||
* `#94`_: Restored support for changing the created date in the UI. Thanks
|
||||
to `Martin Honermeyer`_ and `Tim White`_ for working with me on this.
|
||||
|
||||
0.1.1
|
||||
=====
|
||||
|
||||
* Potentially **Breaking Change**: All references to "sender" in the code
|
||||
have been renamed to "correspondent" to better reflect the nature of the
|
||||
property (one could quite reasonably scan a document before sending it to
|
||||
someone.)
|
||||
* `#67`_: Rewrote the document exporter and added a new importer that allows
|
||||
for full metadata retention without depending on the file name and
|
||||
modification time. A big thanks to `Tikitu de Jager`_, `Pit`_,
|
||||
`Florian Jung`_, and `Christopher Luu`_ for their code snippets and
|
||||
contributing conversation that lead to this change.
|
||||
* `#20`_: Added *unpaper* support to help in cleaning up the scanned image
|
||||
before it's OCR'd. Thanks to `Pit`_ for this one.
|
||||
* `#71`_ Added (encrypted) thumbnails in anticipation of a proper UI.
|
||||
* `#68`_: Added support for using a proper config file at
|
||||
``/etc/paperless.conf`` and modified the systemd unit files to use it.
|
||||
* Refactored the Vagrant installation process to use environment variables
|
||||
rather than asking the user to modify ``settings.py``.
|
||||
* `#44`_: Harmonise environment variable names with constant names.
|
||||
* `#60`_: Setup logging to actually use the Python native logging framework.
|
||||
* `#53`_: Fixed an annoying bug that caused ``.jpeg`` and ``.JPG`` images
|
||||
to be imported but made unavailable.
|
||||
|
||||
0.1.0
|
||||
=====
|
||||
|
||||
* Docker support! Big thanks to `Wayne Werner`_, `Brian Conn`_, and
|
||||
`Tikitu de Jager`_ for this one, and especially to `Pit`_
|
||||
who spearheadded this effort.
|
||||
* A simple REST API is in place, but it should be considered unstable.
|
||||
* Cleaned up the consumer to use temporary directories instead of a single
|
||||
scratch space. (Thanks `Pit`_)
|
||||
* Improved the efficiency of the consumer by parsing pages more intelligently
|
||||
and introducing a threaded OCR process (thanks again `Pit`_).
|
||||
* `#45`_: Cleaned up the logic for tag matching. Reported by `darkmatter`_.
|
||||
* `#47`_: Auto-rotate landscape documents. Reported by `Paul`_ and fixed by
|
||||
`Pit`_.
|
||||
* `#48`_: Matching algorithms should do so on a word boundary (`darkmatter`_)
|
||||
* `#54`_: Documented the re-tagger (`zedster`_)
|
||||
* `#57`_: Make sure file is preserved on import failure (`darkmatter`_)
|
||||
* Added tox with pep8 checking
|
||||
|
||||
0.0.6
|
||||
=====
|
||||
|
||||
* Added support for parallel OCR (significant work from `Pit`_)
|
||||
* Sped up the language detection (significant work from `Pit`_)
|
||||
* Added simple logging
|
||||
|
||||
0.0.5
|
||||
=====
|
||||
|
||||
* Added support for image files as documents (png, jpg, gif, tiff)
|
||||
* Added a crude means of HTTP POST for document imports
|
||||
* Added IMAP mail support
|
||||
* Added a re-tagging utility
|
||||
* Documentation for the above as well as data migration
|
||||
|
||||
0.0.4
|
||||
=====
|
||||
|
||||
* Added automated tagging basted on keyword matching
|
||||
* Cleaned up the document listing page
|
||||
* Removed ``User`` and ``Group`` from the admin
|
||||
* Added ``pytz`` to the list of requirements
|
||||
|
||||
0.0.3
|
||||
=====
|
||||
|
||||
* Added basic tagging
|
||||
|
||||
0.0.2
|
||||
=====
|
||||
|
||||
* Added language detection
|
||||
* Added datestamps to ``document_exporter``.
|
||||
* Changed ``settings.TESSERACT_LANGUAGE`` to ``settings.OCR_LANGUAGE``.
|
||||
|
||||
0.0.1
|
||||
=====
|
||||
|
||||
* Initial release
|
||||
|
||||
.. _Brian Conn: https://github.com/TheConnMan
|
||||
.. _Christopher Luu: https://github.com/nuudles
|
||||
@@ -206,6 +317,18 @@ Changelog
|
||||
.. _CkuT: https://github.com/CkuT
|
||||
.. _David Martin: https://github.com/ddddavidmartin
|
||||
.. _Paperless Desktop: https://github.com/thomasbrueggemann/paperless-desktop
|
||||
.. _Joshua Gilman: https://github.com/jmgilman
|
||||
.. _ayounggun: https://github.com/ayounggun
|
||||
.. _Kusti Skytén: https://github.com/kskyten
|
||||
.. _maphy-psd: https://github.com/maphy-psd
|
||||
.. _ishirav: https://github.com/ishirav
|
||||
.. _Stefan Hagen: https://github.com/xkpd3
|
||||
.. _dev-rke: https://github.com/dev-rke
|
||||
.. _Lukas Winkler: https://github.com/Findus23
|
||||
.. _chris-aeviator: https://github.com/chris-aeviator
|
||||
.. _Dan Panzarella: https://github.com/pzl
|
||||
.. _addadi: https://github.com/addadi
|
||||
.. _BastianPoe: https://github.com/BastianPoe
|
||||
|
||||
.. _#20: https://github.com/danielquinn/paperless/issues/20
|
||||
.. _#44: https://github.com/danielquinn/paperless/issues/44
|
||||
@@ -229,6 +352,7 @@ Changelog
|
||||
.. _#146: https://github.com/danielquinn/paperless/issues/146
|
||||
.. _#148: https://github.com/danielquinn/paperless/pull/148
|
||||
.. _#150: https://github.com/danielquinn/paperless/pull/150
|
||||
.. _#158: https://github.com/danielquinn/paperless/issues/158
|
||||
.. _#171: https://github.com/danielquinn/paperless/issues/171
|
||||
.. _#172: https://github.com/danielquinn/paperless/issues/172
|
||||
.. _#179: https://github.com/danielquinn/paperless/pull/179
|
||||
@@ -243,4 +367,19 @@ Changelog
|
||||
.. _#228: https://github.com/danielquinn/paperless/pull/228
|
||||
.. _#229: https://github.com/danielquinn/paperless/pull/229
|
||||
.. _#230: https://github.com/danielquinn/paperless/pull/230
|
||||
.. _#232: https://github.com/danielquinn/paperless/issues/232
|
||||
.. _#235: https://github.com/danielquinn/paperless/issues/235
|
||||
.. _#236: https://github.com/danielquinn/paperless/issues/236
|
||||
.. _#255: https://github.com/danielquinn/paperless/pull/255
|
||||
.. _#268: https://github.com/danielquinn/paperless/pull/268
|
||||
.. _#277: https://github.com/danielquinn/paperless/pull/277
|
||||
.. _#272: https://github.com/danielquinn/paperless/issues/272
|
||||
.. _#248: https://github.com/danielquinn/paperless/issues/248
|
||||
.. _#278: https://github.com/danielquinn/paperless/issues/248
|
||||
.. _#283: https://github.com/danielquinn/paperless/issues/283
|
||||
.. _#256: https://github.com/danielquinn/paperless/pull/256
|
||||
.. _#285: https://github.com/danielquinn/paperless/pull/285
|
||||
.. _#291: https://github.com/danielquinn/paperless/pull/291
|
||||
|
||||
.. _pipenv: https://docs.pipenv.org/
|
||||
.. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
|
@@ -125,7 +125,7 @@ So, with all that in mind, here's what you do to get it running:
|
||||
``PATHS AND FOLDERS`` and ``SECURITY``.
|
||||
If you decided to use a subfolder of an existing account, then make sure you
|
||||
set ``PAPERLESS_CONSUME_MAIL_INBOX`` accordingly here. You also have to set
|
||||
the ``PAPERLESS_SHARED_SECRET`` to something you can remember 'cause you'll
|
||||
the ``PAPERLESS_EMAIL_SECRET`` to something you can remember 'cause you'll
|
||||
have to include that in every email you send.
|
||||
3. Restart the :ref:`consumer <utilities-consumer>`. The consumer will check
|
||||
the configured email account at startup and from then on every 10 minutes
|
||||
@@ -147,46 +147,83 @@ So, with all that in mind, here's what you do to get it running:
|
||||
HTTP POST
|
||||
=========
|
||||
|
||||
You can also submit a document via HTTP POST. It doesn't do tags yet, and the
|
||||
URL schema isn't concrete, but it's a start.
|
||||
|
||||
To push your document to Paperless, send an HTTP POST to the server with the
|
||||
following name/value pairs:
|
||||
You can also submit a document via HTTP POST, so long as you do so after
|
||||
authenticating. To push your document to Paperless, send an HTTP POST to the
|
||||
server with the following name/value pairs:
|
||||
|
||||
* ``correspondent``: The name of the document's correspondent. Note that there
|
||||
are restrictions on what characters you can use here. Specifically,
|
||||
alphanumeric characters, `-`, `,`, `.`, and `'` are ok, everything else it
|
||||
alphanumeric characters, `-`, `,`, `.`, and `'` are ok, everything else is
|
||||
out. You also can't use the sequence ` - ` (space, dash, space).
|
||||
* ``title``: The title of the document. The rules for characters is the same
|
||||
here as the correspondent.
|
||||
* ``signature``: For security reasons, we have the correspondent send a
|
||||
signature using a "shared secret" method to make sure that random strangers
|
||||
don't start uploading stuff to your server. The means of generating this
|
||||
signature is defined below.
|
||||
* ``document``: The file you're uploading
|
||||
|
||||
Specify ``enctype="multipart/form-data"``, and then POST your file with::
|
||||
|
||||
Content-Disposition: form-data; name="document"; filename="whatever.pdf"
|
||||
|
||||
An example of this in HTML is a typical form:
|
||||
|
||||
.. _consumption-http-signature:
|
||||
.. code:: html
|
||||
|
||||
Generating the Signature
|
||||
------------------------
|
||||
<form method="post" enctype="multipart/form-data">
|
||||
<input type="text" name="correspondent" value="My Correspondent" />
|
||||
<input type="text" name="title" value="My Title" />
|
||||
<input type="file" name="document" />
|
||||
<input type="submit" name="go" value="Do the thing" />
|
||||
</form>
|
||||
|
||||
Generating a signature based a shared secret is pretty simple: define a secret,
|
||||
and store it on the server and the client. Then use that secret, along with
|
||||
the text you want to verify to generate a string that you can use for
|
||||
verification.
|
||||
|
||||
In the case of Paperless, you configure the server with the secret by setting
|
||||
``UPLOAD_SHARED_SECRET``. Then on your client, you generate your signature by
|
||||
concatenating the correspondent, title, and the secret, and then using sha256
|
||||
to generate a hexdigest.
|
||||
|
||||
If you're using Python, this is what that looks like:
|
||||
But a potentially more useful way to do this would be in Python. Here we use
|
||||
the requests library to handle basic authentication and to send the POST data
|
||||
to the URL.
|
||||
|
||||
.. code:: python
|
||||
|
||||
import os
|
||||
|
||||
from hashlib import sha256
|
||||
signature = sha256(correspondent + title + secret).hexdigest()
|
||||
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
|
||||
# You authenticate via BasicAuth or with a session id.
|
||||
# We use BasicAuth here
|
||||
username = "my-username"
|
||||
password = "my-super-secret-password"
|
||||
|
||||
# Where you have Paperless installed and listening
|
||||
url = "http://localhost:8000/push"
|
||||
|
||||
# Document metadata
|
||||
correspondent = "Test Correspondent"
|
||||
title = "Test Title"
|
||||
|
||||
# The local file you want to push
|
||||
path = "/path/to/some/directory/my-document.pdf"
|
||||
|
||||
|
||||
with open(path, "rb") as f:
|
||||
|
||||
response = requests.post(
|
||||
url=url,
|
||||
data={"title": title, "correspondent": correspondent},
|
||||
files={"document": (os.path.basename(path), f, "application/pdf")},
|
||||
auth=HTTPBasicAuth(username, password),
|
||||
allow_redirects=False
|
||||
)
|
||||
|
||||
if response.status_code == 202:
|
||||
|
||||
# Everything worked out ok
|
||||
print("Upload successful")
|
||||
|
||||
else:
|
||||
|
||||
# If you don't get a 202, it's probably because your credentials
|
||||
# are wrong or something. This will give you a rough idea of what
|
||||
# happened.
|
||||
|
||||
print("We got HTTP status code: {}".format(response.status_code))
|
||||
for k, v in response.headers.items():
|
||||
print("{}: {}".format(k, v))
|
||||
|
104
docs/extending.rst
Normal file
104
docs/extending.rst
Normal file
@@ -0,0 +1,104 @@
|
||||
.. _extending:
|
||||
|
||||
Extending Paperless
|
||||
===================
|
||||
|
||||
For the most part, Paperless is monolithic, so extending it is often best
|
||||
managed by way of modifying the code directly and issuing a pull request on
|
||||
`GitHub`_. However, over time the project has been evolving to be a little
|
||||
more "pluggable" so that users can write their own stuff that talks to it.
|
||||
|
||||
.. _GitHub: https://github.com/danielquinn/paperless
|
||||
|
||||
|
||||
.. _extending-parsers:
|
||||
|
||||
Parsers
|
||||
-------
|
||||
|
||||
You can leverage Paperless' consumption model to have it consume files *other*
|
||||
than ones handled by default like ``.pdf``, ``.jpg``, and ``.tiff``. To do so,
|
||||
you simply follow Django's convention of creating a new app, with a few key
|
||||
requirements.
|
||||
|
||||
|
||||
.. _extending-parsers-parserspy:
|
||||
|
||||
parsers.py
|
||||
..........
|
||||
|
||||
In this file, you create a class that extends
|
||||
``documents.parsers.DocumentParser`` and go about implementing the three
|
||||
required methods:
|
||||
|
||||
* ``get_thumbnail()``: Returns the path to a file we can use as a thumbnail for
|
||||
this document.
|
||||
* ``get_text()``: Returns the text from the document and only the text.
|
||||
* ``get_date()``: If possible, this returns the date of the document, otherwise
|
||||
it should return ``None``.
|
||||
|
||||
|
||||
.. _extending-parsers-signalspy:
|
||||
|
||||
signals.py
|
||||
..........
|
||||
|
||||
At consumption time, Paperless emits a ``document_consumer_declaration``
|
||||
signal which your module has to react to in order to let the consumer know
|
||||
whether or not it's capable of handling a particular file. Think of it like
|
||||
this:
|
||||
|
||||
1. Consumer finds a file in the consumption directory.
|
||||
2. It asks all the available parsers: *"Hey, can you handle this file?"*
|
||||
3. The first parser that says yes gets to handle the file. The order in which
|
||||
the parsers are asked is handled by sorting ``INSTALLED_APPS`` in
|
||||
``settings.py``.
|
||||
|
||||
|
||||
.. _extending-parsers-appspy:
|
||||
|
||||
apps.py
|
||||
.......
|
||||
|
||||
This is a standard Django file, but you'll need to add some code to it to
|
||||
register your parser as being able to handle particular files.
|
||||
|
||||
|
||||
.. _extending-parsers-finally:
|
||||
|
||||
Finally
|
||||
.......
|
||||
|
||||
The last step is to update ``settings.py`` to include your new module.
|
||||
Eventually, this will be dynamic, but at the moment, you have to edit the
|
||||
``INSTALLED_APPS`` section manually. Simply add the path to your AppConfig to
|
||||
the list like this:
|
||||
|
||||
.. code:: python
|
||||
|
||||
INSTALLED_APPS = [
|
||||
...
|
||||
"my_module.apps.MyModuleConfig",
|
||||
"paperless_tesseract.apps.PaperlessTesseractConfig",
|
||||
...
|
||||
]
|
||||
|
||||
Note that we're placing our module *above* ``PaperlessTesseractConfig``. This
|
||||
is to ensure that if your module wants to handle any files typically handled by
|
||||
the default module, yours will win instead. If there's no conflict between
|
||||
what your module does and the default, then order doesn't matter.
|
||||
|
||||
|
||||
.. _extending-parsers-example:
|
||||
|
||||
An Example
|
||||
..........
|
||||
|
||||
The core Paperless functionality is based on this design, so if you want to see
|
||||
what a parser module should look like, have a look at `parsers.py`_,
|
||||
`signals.py`_, and `apps.py`_ in the `paperless_tesseract`_ module.
|
||||
|
||||
.. _parsers.py: https://github.com/danielquinn/paperless/blob/master/src/paperless_tesseract/parsers.py
|
||||
.. _signals.py: https://github.com/danielquinn/paperless/blob/master/src/paperless_tesseract/signals.py
|
||||
.. _apps.py: https://github.com/danielquinn/paperless/blob/master/src/paperless_tesseract/apps.py
|
||||
.. _paperless_tesseract: https://github.com/danielquinn/paperless/blob/master/src/paperless_tesseract/
|
@@ -80,6 +80,12 @@ text and matching algorithm. From the help info there:
|
||||
uses a regex to match the PDF. If you don't know what a regex is, you
|
||||
probably don't want this option.
|
||||
|
||||
When using the "any" or "all" matching algorithms, you can search for terms that
|
||||
consist of multiple words by enclosing them in double quotes. For example, defining
|
||||
a match text of ``"Bank of America" BofA`` using the "any" algorithm, will match
|
||||
documents that contain either "Bank of America" or "BofA", but will not match
|
||||
documents containing "Bank of South America".
|
||||
|
||||
Then just save your tag/correspondent and run another document through the
|
||||
consumer. Once complete, you should see the newly-created document,
|
||||
automatically tagged with the appropriate data.
|
||||
|
@@ -3,11 +3,11 @@
|
||||
Paperless
|
||||
=========
|
||||
|
||||
Paperless is a simple Django application running in two parts:
|
||||
a :ref:`consumer <utilities-consumer>` (the thing that does the indexing) and
|
||||
the :ref:`webserver <utilities-webserver>` (the part that lets you search & download
|
||||
already-indexed documents). If you want to learn more about its functions keep on
|
||||
reading after the installation section.
|
||||
Paperless is a simple Django application running in two parts:
|
||||
a :ref:`consumer <utilities-consumer>` (the thing that does the indexing) and
|
||||
the :ref:`webserver <utilities-webserver>` (the part that lets you search &
|
||||
download already-indexed documents). If you want to learn more about its
|
||||
functions keep on reading after the installation section.
|
||||
|
||||
|
||||
.. _index-why-this-exists:
|
||||
@@ -16,12 +16,13 @@ Why This Exists
|
||||
===============
|
||||
|
||||
Paper is a nightmare. Environmental issues aside, there's no excuse for it in
|
||||
the 21st century. It takes up space, collects dust, doesn't support any form of
|
||||
a search feature, indexing is tedious, it's heavy and prone to damage & loss.
|
||||
the 21st century. It takes up space, collects dust, doesn't support any form
|
||||
of a search feature, indexing is tedious, it's heavy and prone to damage &
|
||||
loss.
|
||||
|
||||
I wrote this to make "going paperless" easier. I do not have to worry about
|
||||
finding stuff again. I feed documents right from the post box into the scanner and
|
||||
then shred them. Perhaps you might find it useful too.
|
||||
I wrote this to make "going paperless" easier. I do not have to worry about
|
||||
finding stuff again. I feed documents right from the post box into the scanner
|
||||
and then shred them. Perhaps you might find it useful too.
|
||||
|
||||
|
||||
|
||||
@@ -39,5 +40,7 @@ Contents
|
||||
utilities
|
||||
guesswork
|
||||
migrating
|
||||
extending
|
||||
troubleshooting
|
||||
scanners
|
||||
changelog
|
||||
|
@@ -11,24 +11,27 @@ should work) that has the following software installed:
|
||||
* `Tesseract`_, plus its language files matching your document base.
|
||||
* `Imagemagick`_ version 6.7.5 or higher
|
||||
* `unpaper`_
|
||||
* `libpoppler-cpp-dev`_ PDF rendering library
|
||||
|
||||
.. _Python3: https://python.org/
|
||||
.. _GNU Privacy Guard: https://gnupg.org
|
||||
.. _Tesseract: https://github.com/tesseract-ocr
|
||||
.. _Imagemagick: http://imagemagick.org/
|
||||
.. _unpaper: https://www.flameeyes.eu/projects/unpaper
|
||||
.. _libpoppler-cpp-dev: https://poppler.freedesktop.org/
|
||||
|
||||
Notably, you should confirm how you access your Python3 installation. Many
|
||||
Linux distributions will install Python3 in parallel to Python2, using the names
|
||||
``python3`` and ``python`` respectively. The same goes for ``pip3`` and
|
||||
``pip``. Running Paperless with Python2 will likely break things, so make sure that
|
||||
you're using the right version.
|
||||
Linux distributions will install Python3 in parallel to Python2, using the
|
||||
names ``python3`` and ``python`` respectively. The same goes for ``pip3`` and
|
||||
``pip``. Running Paperless with Python2 will likely break things, so make sure
|
||||
that you're using the right version.
|
||||
|
||||
For the purposes of simplicity, ``python`` and ``pip`` is used everywhere to
|
||||
refer to their Python3 versions.
|
||||
|
||||
In addition to the above, there are a number of Python requirements, all of
|
||||
which are listed in a file called ``requirements.txt`` in the project root directory.
|
||||
which are listed in a file called ``requirements.txt`` in the project root
|
||||
directory.
|
||||
|
||||
If you're not working on a virtual environment (like Vagrant or Docker), you
|
||||
should probably be using a virtualenv, but that's your call. The reasons why
|
||||
@@ -39,12 +42,13 @@ probably figure that out before continuing.
|
||||
|
||||
.. _requirements-apple:
|
||||
|
||||
Apple-tastic Complications
|
||||
--------------------------
|
||||
Problems with Imagemagick & PDFs
|
||||
--------------------------------
|
||||
|
||||
Some users have `run into problems`_ with installing ImageMagick on Apple
|
||||
systems using HomeBrew. The solution appears to be to install ghostscript as
|
||||
well as ImageMagick:
|
||||
Some users have `run into problems`_ with getting ImageMagick to do its thing
|
||||
with PDFs. Often this is the case with Apple systems using HomeBrew, but other
|
||||
Linuxes have been a problem as well. The solution appears to be to install
|
||||
ghostscript as well as ImageMagick:
|
||||
|
||||
.. _run into problems: https://github.com/danielquinn/paperless/issues/25
|
||||
|
||||
|
29
docs/scanners.rst
Normal file
29
docs/scanners.rst
Normal file
@@ -0,0 +1,29 @@
|
||||
.. _scanners:
|
||||
|
||||
Scanner Recommendations
|
||||
=======================
|
||||
|
||||
As Paperless operates by watching a folder for new files, doesn't care what
|
||||
scanner you use, but sometimes finding a scanner that will write to an FTP,
|
||||
NFS, or SMB server can be difficult. This page is here to help you find one
|
||||
that works right for you based on recommentations from other Paperless users.
|
||||
|
||||
+---------+----------------+-----+-----+-----+----------------+
|
||||
| Brand | Model | Supports | Recommended By |
|
||||
+---------+----------------+-----+-----+-----+----------------+
|
||||
| | | FTP | NFS | SMB | |
|
||||
+=========+================+=====+=====+=====+================+
|
||||
| Brother | `ADS-1500W`_ | yes | no | yes | `danielquinn`_ |
|
||||
+---------+----------------+-----+-----+-----+----------------+
|
||||
| Brother | `MFC-J6930DW`_ | yes | | | `ayounggun`_ |
|
||||
+---------+----------------+-----+-----+-----+----------------+
|
||||
| Fujitsu | `ix500`_ | yes | | yes | `eonist`_ |
|
||||
+---------+----------------+-----+-----+-----+----------------+
|
||||
|
||||
.. _ADS-1500W: https://www.brother.ca/en/p/ads1500w
|
||||
.. _MFC-J6930DW: https://www.brother.ca/en/p/MFCJ6930DW
|
||||
.. _ix500: http://www.fujitsu.com/us/products/computing/peripheral/scanners/scansnap/ix500/
|
||||
|
||||
.. _danielquinn: https://github.com/danielquinn
|
||||
.. _ayounggun: https://github.com/ayounggun
|
||||
.. _eonist: https://github.com/eonist
|
130
docs/setup.rst
130
docs/setup.rst
@@ -95,48 +95,6 @@ Standard (Bare Metal)
|
||||
.. _Paperless webserver: http://127.0.0.1:8000
|
||||
|
||||
|
||||
.. _setup-installation-vagrant:
|
||||
|
||||
Vagrant Method
|
||||
..............
|
||||
|
||||
1. Install `Vagrant`_. How you do that is really between you and your OS.
|
||||
2. Run ``vagrant up``. An instance will start up for you. When it's ready and
|
||||
provisioned...
|
||||
3. Run ``vagrant ssh`` and once inside your new vagrant box, edit
|
||||
``/etc/paperless.conf`` and set the values for:
|
||||
|
||||
* ``PAPERLESS_CONSUMPTION_DIR``: this is where your documents will be
|
||||
dumped to be consumed by Paperless.
|
||||
* ``PAPERLESS_PASSPHRASE``: this is the passphrase Paperless uses to
|
||||
encrypt/decrypt the original document.
|
||||
* ``PAPERLESS_SHARED_SECRET``: this is the "magic word" used when consuming
|
||||
documents from mail or via the API. If you don't use either, leaving it
|
||||
blank is just fine.
|
||||
|
||||
4. Exit the vagrant box and re-enter it with ``vagrant ssh`` again. This
|
||||
updates the environment to make use of the changes you made to the config
|
||||
file.
|
||||
5. Initialise the database with ``/opt/paperless/src/manage.py migrate``.
|
||||
6. Still inside your vagrant box, create a user for your Paperless instance
|
||||
with ``/opt/paperless/src/manage.py createsuperuser``. Follow the prompts to
|
||||
create your user.
|
||||
7. Start the webserver with
|
||||
``/opt/paperless/src/manage.py runserver 0.0.0.0:8000``. You should now be
|
||||
able to visit your (empty) `Paperless webserver`_ at ``172.28.128.4:8000``.
|
||||
You can login with the user/pass you created in #6.
|
||||
8. In a separate window, run ``vagrant ssh`` again, but this time once inside
|
||||
your vagrant instance, you should start the consumer script with
|
||||
``/opt/paperless/src/manage.py document_consumer``.
|
||||
9. Scan something. Put it in the ``CONSUMPTION_DIR``.
|
||||
10. Wait a few minutes
|
||||
11. Visit the document list on your webserver, and it should be there, indexed
|
||||
and downloadable.
|
||||
|
||||
.. _Vagrant: https://vagrantup.com/
|
||||
.. _Paperless server: http://172.28.128.4:8000
|
||||
|
||||
|
||||
.. _setup-installation-docker:
|
||||
|
||||
Docker Method
|
||||
@@ -175,7 +133,8 @@ Docker Method
|
||||
modified versions of the configuration files.
|
||||
4. Modify ``docker-compose.yml`` to your preferences, following the
|
||||
instructions in comments in the file. The only change that is a hard
|
||||
requirement is to specify where the consumption directory should mount.
|
||||
requirement is to specify where the consumption directory should
|
||||
mount.[#dockercomposeyml]_
|
||||
5. Modify ``docker-compose.env`` and adapt the following environment variables:
|
||||
|
||||
``PAPERLESS_PASSPHRASE``
|
||||
@@ -192,7 +151,7 @@ Docker Method
|
||||
default English, set this parameter to a space separated list of
|
||||
three-letter language-codes after `ISO 639-2/T`_. For a list of available
|
||||
languages -- including their three letter codes -- see the
|
||||
`Debian packagelist`_.
|
||||
`Alpine packagelist`_.
|
||||
|
||||
``USERMAP_UID`` and ``USERMAP_GID``
|
||||
If you want to mount the consumption volume (directory ``/consume`` within
|
||||
@@ -282,12 +241,60 @@ Docker Method
|
||||
.. _Docker: https://www.docker.com/
|
||||
.. _docker-compose: https://docs.docker.com/compose/install/
|
||||
.. _ISO 639-2/T: https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
|
||||
.. _Debian packagelist: https://packages.debian.org/search?suite=jessie&searchon=names&keywords=tesseract-ocr-
|
||||
.. _Alpine packagelist: https://pkgs.alpinelinux.org/packages?name=tesseract-ocr-data*&arch=x86_64
|
||||
|
||||
.. [#compose] You of course don't have to use docker-compose, but it
|
||||
simplifies deployment immensely. If you know your way around Docker, feel
|
||||
free to tinker around without using compose!
|
||||
|
||||
.. [#dockercomposeyml] If you're upgrading your docker-compose images from
|
||||
version 1.1.0 or earlier, you might need to change in the
|
||||
``docker-compose.yml`` file the ``image: pitkley/paperless`` directive in
|
||||
both the ``webserver`` and ``consumer`` sections to ``build: ./`` as per the
|
||||
newer ``docker-compose.yml.example`` file
|
||||
|
||||
|
||||
.. _setup-installation-vagrant:
|
||||
|
||||
Vagrant Method
|
||||
..............
|
||||
|
||||
1. Install `Vagrant`_. How you do that is really between you and your OS.
|
||||
2. Run ``vagrant up``. An instance will start up for you. When it's ready and
|
||||
provisioned...
|
||||
3. Run ``vagrant ssh`` and once inside your new vagrant box, edit
|
||||
``/etc/paperless.conf`` and set the values for:
|
||||
|
||||
* ``PAPERLESS_CONSUMPTION_DIR``: this is where your documents will be
|
||||
dumped to be consumed by Paperless.
|
||||
* ``PAPERLESS_PASSPHRASE``: this is the passphrase Paperless uses to
|
||||
encrypt/decrypt the original document.
|
||||
* ``PAPERLESS_SHARED_SECRET``: this is the "magic word" used when consuming
|
||||
documents from mail or via the API. If you don't use either, leaving it
|
||||
blank is just fine.
|
||||
|
||||
4. Exit the vagrant box and re-enter it with ``vagrant ssh`` again. This
|
||||
updates the environment to make use of the changes you made to the config
|
||||
file.
|
||||
5. Initialise the database with ``/opt/paperless/src/manage.py migrate``.
|
||||
6. Still inside your vagrant box, create a user for your Paperless instance
|
||||
with ``/opt/paperless/src/manage.py createsuperuser``. Follow the prompts to
|
||||
create your user.
|
||||
7. Start the webserver with
|
||||
``/opt/paperless/src/manage.py runserver 0.0.0.0:8000``. You should now be
|
||||
able to visit your (empty) `Paperless webserver`_ at ``172.28.128.4:8000``.
|
||||
You can login with the user/pass you created in #6.
|
||||
8. In a separate window, run ``vagrant ssh`` again, but this time once inside
|
||||
your vagrant instance, you should start the consumer script with
|
||||
``/opt/paperless/src/manage.py document_consumer``.
|
||||
9. Scan something. Put it in the ``CONSUMPTION_DIR``.
|
||||
10. Wait a few minutes
|
||||
11. Visit the document list on your webserver, and it should be there, indexed
|
||||
and downloadable.
|
||||
|
||||
.. _Vagrant: https://vagrantup.com/
|
||||
.. _Paperless server: http://172.28.128.4:8000
|
||||
|
||||
|
||||
.. _setup-permanent:
|
||||
|
||||
@@ -394,7 +401,10 @@ Using a Real Webserver
|
||||
The default is to use Django's development server, as that's easy and does the
|
||||
job well enough on a home network. However, if you want to do things right,
|
||||
it's probably a good idea to use a webserver capable of handling more than one
|
||||
thread.
|
||||
thread. You will also have to let the webserver serve the static files (CSS,
|
||||
JavaScript) from the directory configured in ``PAPERLESS_STATICDIR``. For that,
|
||||
you need to run ``./manage.py collectstatic`` in the ``src`` directory. The
|
||||
default static files directory is ``../static``.
|
||||
|
||||
Apache
|
||||
~~~~~~
|
||||
@@ -560,7 +570,8 @@ your gunicorn instance. This should do the trick:
|
||||
Vagrant
|
||||
.......
|
||||
|
||||
You may use the Ubuntu explanation above. Replace ``(local-filesystems and net-device-up IFACE=eth0)`` with ``vagrant-mounted``.
|
||||
You may use the Ubuntu explanation above. Replace
|
||||
``(local-filesystems and net-device-up IFACE=eth0)`` with ``vagrant-mounted``.
|
||||
|
||||
.. _setup-permanent-docker:
|
||||
|
||||
@@ -572,3 +583,28 @@ If you're using Docker, you can set a restart-policy_ in the
|
||||
Docker daemon.
|
||||
|
||||
.. _restart-policy: https://docs.docker.com/engine/reference/commandline/run/#restart-policies-restart
|
||||
|
||||
|
||||
.. _setup-subdirectory:
|
||||
|
||||
Hosting Paperless in a Subdirectory
|
||||
-----------------------------------
|
||||
|
||||
Paperless was designed to run off the root of the hosting domain,
|
||||
(ie: ``https://example.com/``) but with a few changes, you can configure
|
||||
it to run in a subdirectory on your server
|
||||
(ie: ``https://example.com/paperless/``).
|
||||
|
||||
Thanks to the efforts of `maphy-psd`_ on `Github`_, running Paperless in a
|
||||
subdirectory is now as easy as setting a config variable. Simply set
|
||||
``PAPERLESS_FORCE_SCRIPT_NAME`` in your environment or
|
||||
``/etc/paperless.conf`` to the path you want Paperless hosted at, configure
|
||||
Nginx/Apache for your needs and you're done. So, if you want Paperless to live
|
||||
at ``https://example.com/arbitrary/path/to/paperless`` then you just set
|
||||
``PAPERLESS_FORCE_SCRIPT_NAME`` to ``/arbitrary/path/to/paperless``. Note the
|
||||
leading ``/`` there.
|
||||
|
||||
As to how to configure Nginx or Apache for this, that's on you :-)
|
||||
|
||||
.. _maphy-psd: https://github.com/maphy-psd
|
||||
.. _Github: https://github.com/danielquinn/paperless/pull/255
|
||||
|
@@ -5,7 +5,7 @@
|
||||
|
||||
|
||||
###############################################################################
|
||||
#### Paths and folders ####
|
||||
#### Paths & Folders ####
|
||||
###############################################################################
|
||||
|
||||
# This where your documents should go to be consumed. Make sure that it exists
|
||||
@@ -39,7 +39,11 @@ PAPERLESS_CONSUME_MAIL_PASS=""
|
||||
|
||||
# Override the default IMAP inbox here. If not set Paperless defaults to
|
||||
# "INBOX".
|
||||
#PAPERLESS_CONSUME_MAIL_INBOX=""
|
||||
#PAPERLESS_CONSUME_MAIL_INBOX="INBOX"
|
||||
|
||||
# Any email sent to the target account that does not contain this text will be
|
||||
# ignored.
|
||||
PAPERLESS_EMAIL_SECRET=""
|
||||
|
||||
|
||||
###############################################################################
|
||||
@@ -61,11 +65,6 @@ PAPERLESS_CONSUME_MAIL_PASS=""
|
||||
PAPERLESS_PASSPHRASE="secret"
|
||||
|
||||
|
||||
# If you intend to consume documents either via HTTP POST or by email, you must
|
||||
# have a shared secret here.
|
||||
PAPERLESS_SHARED_SECRET=""
|
||||
|
||||
|
||||
# The secret key has a default that should be fine so long as you're hosting
|
||||
# Paperless on a closed network. However, if you're putting this anywhere
|
||||
# public, you should change the key to something unique and verbose.
|
||||
@@ -81,6 +80,11 @@ PAPERLESS_SHARED_SECRET=""
|
||||
# as is "example.com,www.example.com", but NOT " example.com" or "example.com,"
|
||||
#PAPERLESS_ALLOWED_HOSTS="example.com,www.example.com"
|
||||
|
||||
# To host paperless under a subpath url like example.com/paperless you set
|
||||
# this value to /paperless. No trailing slash!
|
||||
#
|
||||
# https://docs.djangoproject.com/en/1.11/ref/settings/#force-script-name
|
||||
#PAPERLESS_FORCE_SCRIPT_NAME=""
|
||||
|
||||
###############################################################################
|
||||
#### Software Tweaks ####
|
||||
@@ -157,10 +161,18 @@ PAPERLESS_SHARED_SECRET=""
|
||||
#### Interface ####
|
||||
###############################################################################
|
||||
|
||||
# Override the default UTC time zone here
|
||||
# Override the default UTC time zone here.
|
||||
# See https://docs.djangoproject.com/en/1.10/ref/settings/#std:setting-TIME_ZONE
|
||||
# for details on how to set it.
|
||||
#PAPERLESS_TIME_ZONE=UTC
|
||||
|
||||
|
||||
# If set, Paperless will show document filters per financial year.
|
||||
# The dates must be in the format "mm-dd", for example "07-15" for July 15.
|
||||
#PAPERLESS_FINANCIAL_YEAR_START="mm-dd"
|
||||
#PAPERLESS_FINANCIAL_YEAR_END="mm-dd"
|
||||
|
||||
|
||||
# The number of items on each page in the web UI. This value must be a
|
||||
# positive integer, but if you don't define one in paperless.conf, a default of
|
||||
# 100 will be used.
|
||||
|
@@ -1,5 +1,6 @@
|
||||
Django==1.10.5
|
||||
Django>=1.11,<2.0
|
||||
Pillow>=3.1.1
|
||||
dateparser>=0.6.0
|
||||
django-crispy-forms>=1.6.1
|
||||
django-extensions>=1.7.6
|
||||
django-filter>=1.0
|
||||
@@ -7,18 +8,21 @@ django-flat-responsive>=1.2.0
|
||||
djangorestframework>=3.5.3
|
||||
filemagic>=1.6
|
||||
fuzzywuzzy[speedup]==0.15.0
|
||||
gunicorn>=19.7.1
|
||||
langdetect>=1.0.7
|
||||
pdftotext>=2.0.1
|
||||
pyocr>=0.4.7
|
||||
python-dateutil>=2.6.0
|
||||
python-dotenv>=0.6.2
|
||||
python-gnupg>=0.3.9
|
||||
pytz>=2016.10
|
||||
gunicorn==19.6.0
|
||||
|
||||
# For the tests
|
||||
pytest
|
||||
factory-boy
|
||||
flake8
|
||||
pytest==3.3.2 # Newer versions break with pytest-sugar
|
||||
pytest-django
|
||||
pytest-sugar
|
||||
pep8
|
||||
flake8
|
||||
pytest-env
|
||||
pycodestyle
|
||||
tox
|
||||
|
@@ -7,9 +7,9 @@ map_uidgid() {
|
||||
USERMAP_ORIG_UID=$(id -g paperless)
|
||||
USERMAP_GID=${USERMAP_GID:-${USERMAP_UID:-$USERMAP_ORIG_GID}}
|
||||
USERMAP_UID=${USERMAP_UID:-$USERMAP_ORIG_UID}
|
||||
if [[ ${USERMAP_UID} != ${USERMAP_ORIG_UID} || ${USERMAP_GID} != ${USERMAP_ORIG_GID} ]]; then
|
||||
if [[ ${USERMAP_UID} != "${USERMAP_ORIG_UID}" || ${USERMAP_GID} != "${USERMAP_ORIG_GID}" ]]; then
|
||||
echo "Mapping UID and GID for paperless:paperless to $USERMAP_UID:$USERMAP_GID"
|
||||
groupmod -g ${USERMAP_GID} paperless
|
||||
addgroup -g "${USERMAP_GID}" paperless
|
||||
sed -i -e "s|:${USERMAP_ORIG_UID}:${USERMAP_GID}:|:${USERMAP_UID}:${USERMAP_GID}:|" /etc/passwd
|
||||
fi
|
||||
}
|
||||
@@ -25,16 +25,16 @@ set_permissions() {
|
||||
echo "failed."
|
||||
echo ""
|
||||
echo "Either try to set it on your host-mounted directory"
|
||||
echo "directly, or make sure that the directory has \`o+x\`"
|
||||
echo "directly, or make sure that the directory has \`g+wx\`"
|
||||
echo "permissions and the files in it at least \`o+r\`."
|
||||
} >&2
|
||||
chmod g+x "${!dir}" || {
|
||||
chmod g+wx "${!dir}" || {
|
||||
echo "Changing group permissions of ${cur_dir_name} directory:"
|
||||
echo " ${!dir}"
|
||||
echo "failed."
|
||||
echo ""
|
||||
echo "Either try to set it on your host-mounted directory"
|
||||
echo "directly, or make sure that the directory has \`o+x\`"
|
||||
echo "directly, or make sure that the directory has \`g+wx\`"
|
||||
echo "permissions and the files in it at least \`o+r\`."
|
||||
} >&2
|
||||
done
|
||||
@@ -56,25 +56,24 @@ install_languages() {
|
||||
return
|
||||
fi
|
||||
|
||||
# Update apt-lists
|
||||
apt-get update
|
||||
|
||||
# Loop over languages to be installed
|
||||
for lang in "${langs[@]}"; do
|
||||
pkg="tesseract-ocr-$lang"
|
||||
if dpkg -s "$pkg" 2>&1 > /dev/null; then
|
||||
pkg="tesseract-ocr-data-$lang"
|
||||
|
||||
# English is installed by default
|
||||
if [ "$lang" == "eng" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
if apk info -e "$pkg" > /dev/null 2>&1; then
|
||||
continue
|
||||
fi
|
||||
if ! apk info "$pkg" > /dev/null 2>&1; then
|
||||
continue
|
||||
fi
|
||||
|
||||
if ! apt-cache show "$pkg" 2>&1 > /dev/null; then
|
||||
continue
|
||||
fi
|
||||
|
||||
apt-get install "$pkg"
|
||||
apk --no-cache --update add "$pkg"
|
||||
done
|
||||
|
||||
# Remove apt lists
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
}
|
||||
|
||||
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from datetime import datetime
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib import admin
|
||||
from django.contrib.auth.models import User, Group
|
||||
@@ -32,6 +34,71 @@ class MonthListFilter(admin.SimpleListFilter):
|
||||
return queryset.filter(created__year=year, created__month=month)
|
||||
|
||||
|
||||
class FinancialYearFilter(admin.SimpleListFilter):
|
||||
|
||||
title = "Financial Year"
|
||||
parameter_name = "fy"
|
||||
_fy_wraps = None
|
||||
|
||||
def _fy_start(self, year):
|
||||
"""Return date of the start of financial year for the given year."""
|
||||
fy_start = "{}-{}".format(str(year), settings.FY_START)
|
||||
return datetime.strptime(fy_start, "%Y-%m-%d").date()
|
||||
|
||||
def _fy_end(self, year):
|
||||
"""Return date of the end of financial year for the given year."""
|
||||
fy_end = "{}-{}".format(str(year), settings.FY_END)
|
||||
return datetime.strptime(fy_end, "%Y-%m-%d").date()
|
||||
|
||||
def _fy_does_wrap(self):
|
||||
"""Return whether the financial year spans across two years."""
|
||||
if self._fy_wraps is None:
|
||||
start = "{}".format(settings.FY_START)
|
||||
start = datetime.strptime(start, "%m-%d").date()
|
||||
end = "{}".format(settings.FY_END)
|
||||
end = datetime.strptime(end, "%m-%d").date()
|
||||
self._fy_wraps = end < start
|
||||
|
||||
return self._fy_wraps
|
||||
|
||||
def _determine_fy(self, date):
|
||||
"""Return a (query, display) financial year tuple of the given date."""
|
||||
if self._fy_does_wrap():
|
||||
fy_start = self._fy_start(date.year)
|
||||
|
||||
if date.date() >= fy_start:
|
||||
query = "{}-{}".format(date.year, date.year + 1)
|
||||
else:
|
||||
query = "{}-{}".format(date.year - 1, date.year)
|
||||
|
||||
# To keep it simple we use the same string for both
|
||||
# query parameter and the display.
|
||||
return (query, query)
|
||||
|
||||
else:
|
||||
query = "{0}-{0}".format(date.year)
|
||||
display = "{}".format(date.year)
|
||||
return (query, display)
|
||||
|
||||
def lookups(self, request, model_admin):
|
||||
if not settings.FY_START or not settings.FY_END:
|
||||
return None
|
||||
|
||||
r = []
|
||||
for document in Document.objects.all():
|
||||
r.append(self._determine_fy(document.created))
|
||||
|
||||
return sorted(set(r), key=lambda x: x[0], reverse=True)
|
||||
|
||||
def queryset(self, request, queryset):
|
||||
if not self.value() or not settings.FY_START or not settings.FY_END:
|
||||
return None
|
||||
|
||||
start, end = self.value().split("-")
|
||||
return queryset.filter(created__gte=self._fy_start(start),
|
||||
created__lte=self._fy_end(end))
|
||||
|
||||
|
||||
class CommonAdmin(admin.ModelAdmin):
|
||||
list_per_page = settings.PAPERLESS_LIST_PER_PAGE
|
||||
|
||||
@@ -59,7 +126,9 @@ class DocumentAdmin(CommonAdmin):
|
||||
|
||||
search_fields = ("correspondent__name", "title", "content")
|
||||
list_display = ("title", "created", "thumbnail", "correspondent", "tags_")
|
||||
list_filter = ("tags", "correspondent", MonthListFilter)
|
||||
list_filter = ("tags", "correspondent", FinancialYearFilter,
|
||||
MonthListFilter)
|
||||
|
||||
ordering = ["-created", "correspondent"]
|
||||
|
||||
def has_add_permission(self, request):
|
||||
@@ -70,9 +139,14 @@ class DocumentAdmin(CommonAdmin):
|
||||
created_.short_description = "Created"
|
||||
|
||||
def thumbnail(self, obj):
|
||||
if settings.FORCE_SCRIPT_NAME:
|
||||
src_link = "{}/fetch/thumb/{}".format(
|
||||
settings.FORCE_SCRIPT_NAME, obj.id)
|
||||
else:
|
||||
src_link = "/fetch/thumb/{}".format(obj.id)
|
||||
png_img = self._html_tag(
|
||||
"img",
|
||||
src="/fetch/thumb/{}".format(obj.id),
|
||||
src=src_link,
|
||||
width=180,
|
||||
alt="Thumbnail of {}".format(obj.file_name),
|
||||
title=obj.file_name
|
||||
|
@@ -118,12 +118,14 @@ class Consumer(object):
|
||||
|
||||
parsed_document = parser_class(doc)
|
||||
thumbnail = parsed_document.get_thumbnail()
|
||||
date = parsed_document.get_date()
|
||||
|
||||
try:
|
||||
document = self._store(
|
||||
parsed_document.get_text(),
|
||||
doc,
|
||||
thumbnail
|
||||
thumbnail,
|
||||
date
|
||||
)
|
||||
except ParseError as e:
|
||||
|
||||
@@ -174,7 +176,7 @@ class Consumer(object):
|
||||
return sorted(
|
||||
options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
|
||||
|
||||
def _store(self, text, doc, thumbnail):
|
||||
def _store(self, text, doc, thumbnail, date):
|
||||
|
||||
file_info = FileInfo.from_path(doc)
|
||||
|
||||
@@ -182,7 +184,7 @@ class Consumer(object):
|
||||
|
||||
self.log("debug", "Saving record to database")
|
||||
|
||||
created = file_info.created or timezone.make_aware(
|
||||
created = file_info.created or date or timezone.make_aware(
|
||||
datetime.datetime.fromtimestamp(stats.st_mtime))
|
||||
|
||||
with open(doc, "rb") as f:
|
||||
|
@@ -2,7 +2,6 @@ import magic
|
||||
import os
|
||||
|
||||
from datetime import datetime
|
||||
from hashlib import sha256
|
||||
from time import mktime
|
||||
|
||||
from django import forms
|
||||
@@ -14,7 +13,6 @@ from .consumer import Consumer
|
||||
|
||||
class UploadForm(forms.Form):
|
||||
|
||||
SECRET = settings.SHARED_SECRET
|
||||
TYPE_LOOKUP = {
|
||||
"application/pdf": Document.TYPE_PDF,
|
||||
"image/png": Document.TYPE_PNG,
|
||||
@@ -32,10 +30,9 @@ class UploadForm(forms.Form):
|
||||
required=False
|
||||
)
|
||||
document = forms.FileField()
|
||||
signature = forms.CharField(max_length=256)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
forms.Form.__init__(*args, **kwargs)
|
||||
forms.Form.__init__(self, *args, **kwargs)
|
||||
self._file_type = None
|
||||
|
||||
def clean_correspondent(self):
|
||||
@@ -82,17 +79,6 @@ class UploadForm(forms.Form):
|
||||
|
||||
return document
|
||||
|
||||
def clean(self):
|
||||
|
||||
corresp = self.cleaned_data.get("correspondent")
|
||||
title = self.cleaned_data.get("title")
|
||||
signature = self.cleaned_data.get("signature")
|
||||
|
||||
if sha256(corresp + title + self.SECRET).hexdigest() == signature:
|
||||
return self.cleaned_data
|
||||
|
||||
raise forms.ValidationError("The signature provided did not validate")
|
||||
|
||||
def save(self):
|
||||
"""
|
||||
Since the consumer already does a lot of work, it's easier just to save
|
||||
@@ -104,7 +90,7 @@ class UploadForm(forms.Form):
|
||||
title = self.cleaned_data.get("title")
|
||||
document = self.cleaned_data.get("document")
|
||||
|
||||
t = int(mktime(datetime.now()))
|
||||
t = int(mktime(datetime.now().timetuple()))
|
||||
file_name = os.path.join(
|
||||
Consumer.CONSUME,
|
||||
"{} - {}.{}".format(correspondent, title, self._file_type)
|
||||
|
@@ -43,7 +43,10 @@ class Message(Loggable):
|
||||
and n attachments, and that we don't care about the message body.
|
||||
"""
|
||||
|
||||
SECRET = settings.SHARED_SECRET
|
||||
SECRET = os.getenv(
|
||||
"PAPERLESS_EMAIL_SECRET",
|
||||
os.getenv("PAPERLESS_SHARED_SECRET") # TODO: Remove after 2017/09
|
||||
)
|
||||
|
||||
def __init__(self, data, group=None):
|
||||
"""
|
||||
@@ -153,11 +156,11 @@ class MailFetcher(Loggable):
|
||||
Loggable.__init__(self)
|
||||
|
||||
self._connection = None
|
||||
self._host = settings.MAIL_CONSUMPTION["HOST"]
|
||||
self._port = settings.MAIL_CONSUMPTION["PORT"]
|
||||
self._username = settings.MAIL_CONSUMPTION["USERNAME"]
|
||||
self._password = settings.MAIL_CONSUMPTION["PASSWORD"]
|
||||
self._inbox = settings.MAIL_CONSUMPTION["INBOX"]
|
||||
self._host = os.getenv("PAPERLESS_CONSUME_MAIL_HOST")
|
||||
self._port = os.getenv("PAPERLESS_CONSUME_MAIL_PORT")
|
||||
self._username = os.getenv("PAPERLESS_CONSUME_MAIL_USER")
|
||||
self._password = os.getenv("PAPERLESS_CONSUME_MAIL_PASS")
|
||||
self._inbox = os.getenv("PAPERLESS_CONSUME_MAIL_INBOX", "INBOX")
|
||||
|
||||
self._enabled = bool(self._host)
|
||||
|
||||
|
@@ -64,7 +64,7 @@ class Command(Renderable, BaseCommand):
|
||||
|
||||
file_target = os.path.join(self.target, document.file_name)
|
||||
|
||||
thumbnail_name = document.file_name + "-tumbnail.png"
|
||||
thumbnail_name = document.file_name + "-thumbnail.png"
|
||||
thumbnail_target = os.path.join(self.target, thumbnail_name)
|
||||
|
||||
document_dict[EXPORTER_FILE_NAME] = document.file_name
|
||||
|
@@ -38,6 +38,9 @@ class GnuPG(object):
|
||||
|
||||
def move_documents_and_create_thumbnails(apps, schema_editor):
|
||||
|
||||
os.makedirs(os.path.join(settings.MEDIA_ROOT, "documents", "originals"), exist_ok=True)
|
||||
os.makedirs(os.path.join(settings.MEDIA_ROOT, "documents", "thumbnails"), exist_ok=True)
|
||||
|
||||
documents = os.listdir(os.path.join(settings.MEDIA_ROOT, "documents"))
|
||||
|
||||
if set(documents) == {"originals", "thumbnails"}:
|
||||
|
21
src/documents/migrations/0018_auto_20170715_1712.py
Normal file
21
src/documents/migrations/0018_auto_20170715_1712.py
Normal file
@@ -0,0 +1,21 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Generated by Django 1.10.5 on 2017-07-15 17:12
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0017_auto_20170512_0507'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='correspondent',
|
||||
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.Correspondent'),
|
||||
),
|
||||
]
|
@@ -1,3 +1,5 @@
|
||||
# coding=utf-8
|
||||
|
||||
import dateutil.parser
|
||||
import logging
|
||||
import os
|
||||
@@ -89,7 +91,7 @@ class MatchingModel(models.Model):
|
||||
search_kwargs = {"flags": re.IGNORECASE}
|
||||
|
||||
if self.matching_algorithm == self.MATCH_ALL:
|
||||
for word in self.match.split(" "):
|
||||
for word in self._split_match():
|
||||
search_result = re.search(
|
||||
r"\b{}\b".format(word), text, **search_kwargs)
|
||||
if not search_result:
|
||||
@@ -97,7 +99,7 @@ class MatchingModel(models.Model):
|
||||
return True
|
||||
|
||||
if self.matching_algorithm == self.MATCH_ANY:
|
||||
for word in self.match.split(" "):
|
||||
for word in self._split_match():
|
||||
if re.search(r"\b{}\b".format(word), text, **search_kwargs):
|
||||
return True
|
||||
return False
|
||||
@@ -121,6 +123,23 @@ class MatchingModel(models.Model):
|
||||
|
||||
raise NotImplementedError("Unsupported matching algorithm")
|
||||
|
||||
def _split_match(self):
|
||||
"""
|
||||
Splits the match to individual keywords, getting rid of unnecessary
|
||||
spaces and grouping quoted words together.
|
||||
|
||||
Example:
|
||||
' some random words "with quotes " and spaces'
|
||||
==>
|
||||
["some", "random", "words", "with\s+quotes", "and", "spaces"]
|
||||
"""
|
||||
findterms = re.compile(r'"([^"]+)"|(\S+)').findall
|
||||
normspace = re.compile(r"\s+").sub
|
||||
return [
|
||||
normspace(" ", (t[0] or t[1]).strip()).replace(" ", r"\s+")
|
||||
for t in findterms(self.match)
|
||||
]
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
|
||||
self.match = self.match.lower()
|
||||
@@ -172,7 +191,12 @@ class Document(models.Model):
|
||||
TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,)
|
||||
|
||||
correspondent = models.ForeignKey(
|
||||
Correspondent, blank=True, null=True, related_name="documents")
|
||||
Correspondent,
|
||||
blank=True,
|
||||
null=True,
|
||||
related_name="documents",
|
||||
on_delete=models.SET_NULL
|
||||
)
|
||||
|
||||
title = models.CharField(max_length=128, blank=True, db_index=True)
|
||||
|
||||
@@ -316,45 +340,45 @@ class FileInfo(object):
|
||||
r"(?P<correspondent>.*) - "
|
||||
r"(?P<title>.*) - "
|
||||
r"(?P<tags>[a-z0-9\-,]*)"
|
||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
|
||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("created-title-tags", re.compile(
|
||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||
r"(?P<title>.*) - "
|
||||
r"(?P<tags>[a-z0-9\-,]*)"
|
||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
|
||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("created-correspondent-title", re.compile(
|
||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||
r"(?P<correspondent>.*) - "
|
||||
r"(?P<title>.*)"
|
||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
|
||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("created-title", re.compile(
|
||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||
r"(?P<title>.*)"
|
||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
|
||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("correspondent-title-tags", re.compile(
|
||||
r"(?P<correspondent>.*) - "
|
||||
r"(?P<title>.*) - "
|
||||
r"(?P<tags>[a-z0-9\-,]*)"
|
||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
|
||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("correspondent-title", re.compile(
|
||||
r"(?P<correspondent>.*) - "
|
||||
r"(?P<title>.*)?"
|
||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
|
||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("title", re.compile(
|
||||
r"(?P<title>.*)"
|
||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
|
||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
||||
flags=re.IGNORECASE
|
||||
))
|
||||
])
|
||||
@@ -397,6 +421,8 @@ class FileInfo(object):
|
||||
r = extension.lower()
|
||||
if r == "jpeg":
|
||||
return "jpg"
|
||||
if r == "tif":
|
||||
return "tiff"
|
||||
return r
|
||||
|
||||
@classmethod
|
||||
|
@@ -9,7 +9,7 @@ class ParseError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class DocumentParser(object):
|
||||
class DocumentParser:
|
||||
"""
|
||||
Subclass this to make your own parser. Have a look at
|
||||
`paperless_tesseract.parsers` for inspiration.
|
||||
@@ -19,7 +19,7 @@ class DocumentParser(object):
|
||||
|
||||
def __init__(self, path):
|
||||
self.document_path = path
|
||||
self.tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH)
|
||||
self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=self.SCRATCH)
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.logging_group = None
|
||||
|
||||
@@ -35,6 +35,12 @@ class DocumentParser(object):
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_date(self):
|
||||
"""
|
||||
Returns the date of the document.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def log(self, level, message):
|
||||
getattr(self.logger, level)(message, extra={
|
||||
"group": self.logging_group
|
||||
|
@@ -1,6 +0,0 @@
|
||||
{% load hacks %}
|
||||
|
||||
{# See documents.templatetags.hacks.change_list_results for an explanation #}
|
||||
|
||||
{% change_list_results %}
|
||||
|
@@ -0,0 +1,13 @@
|
||||
{% extends 'admin/change_form.html' %}
|
||||
|
||||
|
||||
{% block footer %}
|
||||
|
||||
{{ block.super }}
|
||||
|
||||
{# Hack to force Django to make the created date a date input rather than `text` (the default) #}
|
||||
<script>
|
||||
django.jQuery(".field-created input").first().attr("type", "date")
|
||||
</script>
|
||||
|
||||
{% endblock footer %}
|
@@ -0,0 +1,12 @@
|
||||
{% extends 'admin/change_list.html' %}
|
||||
|
||||
|
||||
{% load admin_actions from admin_list%}
|
||||
{% load result_list from hacks %}
|
||||
|
||||
|
||||
{% block result_list %}
|
||||
{% if action_form and actions_on_top and cl.show_admin_actions %}{% admin_actions %}{% endif %}
|
||||
{% result_list cl %}
|
||||
{% if action_form and actions_on_bottom and cl.show_admin_actions %}{% admin_actions %}{% endif %}
|
||||
{% endblock %}
|
@@ -29,18 +29,13 @@
|
||||
.result .header {
|
||||
padding: 5px;
|
||||
background-color: #79AEC8;
|
||||
height: 6em;
|
||||
}
|
||||
.result .header .checkbox {
|
||||
margin-right: 5px;
|
||||
}
|
||||
.result .header .checkbox{
|
||||
width: 5%;
|
||||
float: left;
|
||||
}
|
||||
.result .header .info {
|
||||
width: 90%;
|
||||
float: left;
|
||||
margin-left: 10%;
|
||||
}
|
||||
.result .header a,
|
||||
.result a.tag {
|
||||
|
@@ -6,5 +6,6 @@
|
||||
<meta charset="utf-8">
|
||||
</head>
|
||||
<body>
|
||||
{# One day someone (maybe even myself) is going to write a proper web front-end for Paperless, and this is where it'll start. #}
|
||||
</body>
|
||||
</html>
|
||||
|
@@ -1,41 +1,28 @@
|
||||
import os
|
||||
|
||||
from django.contrib import admin
|
||||
from django.contrib.admin.templatetags.admin_list import (
|
||||
result_headers,
|
||||
result_hidden_fields,
|
||||
results
|
||||
)
|
||||
from django.template import Library
|
||||
from django.template.loader import get_template
|
||||
|
||||
from ..models import Document
|
||||
|
||||
|
||||
register = Library()
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def change_list_results(context):
|
||||
@register.inclusion_tag("admin/documents/document/change_list_results.html")
|
||||
def result_list(cl):
|
||||
"""
|
||||
Django has a lot of places where you can override defaults, but
|
||||
unfortunately, `change_list_results.html` is not one of them. In fact,
|
||||
it's a downright pain in the ass to override this file on a per-model basis
|
||||
and this is the cleanest way I could come up with.
|
||||
|
||||
Basically all we've done here is defined `change_list_results.html` in an
|
||||
`admin` directory which globally overrides that file for *every* model.
|
||||
That template however simply loads this templatetag which determines
|
||||
whether we're currently looking at a `Document` listing or something else
|
||||
and loads the appropriate file in each case.
|
||||
|
||||
Better work arounds for this are welcome as I hate this myself, but at the
|
||||
moment, it's all I could come up with.
|
||||
Copy/pasted from django.contrib.admin.templatetags.admin_list just so I can
|
||||
modify the value passed to `.inclusion_tag()` in the decorator here. There
|
||||
must be a cleaner way... right?
|
||||
"""
|
||||
|
||||
path = os.path.join(
|
||||
os.path.dirname(admin.__file__),
|
||||
"templates",
|
||||
"admin",
|
||||
"change_list_results.html"
|
||||
)
|
||||
|
||||
if context["cl"].model == Document:
|
||||
path = "admin/documents/document/change_list_results.html"
|
||||
|
||||
return get_template(path).render(context)
|
||||
headers = list(result_headers(cl))
|
||||
num_sorted_fields = 0
|
||||
for h in headers:
|
||||
if h['sortable'] and h['sorted']:
|
||||
num_sorted_fields += 1
|
||||
return {'cl': cl,
|
||||
'result_hidden_fields': list(result_hidden_fields(cl)),
|
||||
'result_headers': headers,
|
||||
'num_sorted_fields': num_sorted_fields,
|
||||
'results': list(results(cl))}
|
||||
|
17
src/documents/tests/factories.py
Normal file
17
src/documents/tests/factories.py
Normal file
@@ -0,0 +1,17 @@
|
||||
import factory
|
||||
|
||||
from ..models import Document, Correspondent
|
||||
|
||||
|
||||
class CorrespondentFactory(factory.DjangoModelFactory):
|
||||
|
||||
class Meta:
|
||||
model = Correspondent
|
||||
|
||||
name = factory.Faker("name")
|
||||
|
||||
|
||||
class DocumentFactory(factory.DjangoModelFactory):
|
||||
|
||||
class Meta:
|
||||
model = Document
|
@@ -58,9 +58,9 @@ class TestAttributes(TestCase):
|
||||
|
||||
TAGS = ("tag1", "tag2", "tag3")
|
||||
EXTENSIONS = (
|
||||
"pdf", "png", "jpg", "jpeg", "gif",
|
||||
"PDF", "PNG", "JPG", "JPEG", "GIF",
|
||||
"PdF", "PnG", "JpG", "JPeG", "GiF",
|
||||
"pdf", "png", "jpg", "jpeg", "gif", "tiff", "tif",
|
||||
"PDF", "PNG", "JPG", "JPEG", "GIF", "TIFF", "TIF",
|
||||
"PdF", "PnG", "JpG", "JPeG", "GiF", "TiFf", "TiF",
|
||||
)
|
||||
|
||||
def _test_guess_attributes_from_name(self, path, sender, title, tags):
|
||||
@@ -80,6 +80,8 @@ class TestAttributes(TestCase):
|
||||
self.assertEqual(tuple([t.slug for t in file_info.tags]), tags, f)
|
||||
if extension.lower() == "jpeg":
|
||||
self.assertEqual(file_info.extension, "jpg", f)
|
||||
elif extension.lower() == "tif":
|
||||
self.assertEqual(file_info.extension, "tiff", f)
|
||||
else:
|
||||
self.assertEqual(file_info.extension, extension.lower(), f)
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
from random import randint
|
||||
|
||||
from django.test import TestCase
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from ..models import Correspondent, Document, Tag
|
||||
from ..signals import document_consumption_finished
|
||||
@@ -16,9 +16,15 @@ class TestMatching(TestCase):
|
||||
matching_algorithm=getattr(klass, algorithm)
|
||||
)
|
||||
for string in true:
|
||||
self.assertTrue(instance.matches(string))
|
||||
self.assertTrue(
|
||||
instance.matches(string),
|
||||
'"%s" should match "%s" but it does not' % (text, string)
|
||||
)
|
||||
for string in false:
|
||||
self.assertFalse(instance.matches(string))
|
||||
self.assertFalse(
|
||||
instance.matches(string),
|
||||
'"%s" should not match "%s" but it does' % (text, string)
|
||||
)
|
||||
|
||||
def test_match_all(self):
|
||||
|
||||
@@ -54,6 +60,21 @@ class TestMatching(TestCase):
|
||||
)
|
||||
)
|
||||
|
||||
self._test_matching(
|
||||
'brown fox "lazy dogs"',
|
||||
"MATCH_ALL",
|
||||
(
|
||||
"the quick brown fox jumped over the lazy dogs",
|
||||
"the quick brown fox jumped over the lazy dogs",
|
||||
),
|
||||
(
|
||||
"the quick fox jumped over the lazy dogs",
|
||||
"the quick brown wolf jumped over the lazy dogs",
|
||||
"the quick brown fox jumped over the fat dogs",
|
||||
"the quick brown fox jumped over the lazy... dogs",
|
||||
)
|
||||
)
|
||||
|
||||
def test_match_any(self):
|
||||
|
||||
self._test_matching(
|
||||
@@ -89,6 +110,18 @@ class TestMatching(TestCase):
|
||||
)
|
||||
)
|
||||
|
||||
self._test_matching(
|
||||
'"brown fox" " lazy dogs "',
|
||||
"MATCH_ANY",
|
||||
(
|
||||
"the quick brown fox",
|
||||
"jumped over the lazy dogs.",
|
||||
),
|
||||
(
|
||||
"the lazy fox jumped over the brown dogs",
|
||||
)
|
||||
)
|
||||
|
||||
def test_match_literal(self):
|
||||
|
||||
self._test_matching(
|
||||
@@ -166,7 +199,8 @@ class TestMatching(TestCase):
|
||||
)
|
||||
|
||||
|
||||
class TestApplications(TestCase):
|
||||
@override_settings(POST_CONSUME_SCRIPT=None)
|
||||
class TestDocumentConsumptionFinishedSignal(TestCase):
|
||||
"""
|
||||
We make use of document_consumption_finished, so we should test that it's
|
||||
doing what we expect wrt to tag & correspondent matching.
|
||||
|
31
src/documents/tests/test_models.py
Normal file
31
src/documents/tests/test_models.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from django.test import TestCase
|
||||
|
||||
from ..models import Document, Correspondent
|
||||
from .factories import DocumentFactory, CorrespondentFactory
|
||||
|
||||
|
||||
class CorrespondentTestCase(TestCase):
|
||||
|
||||
def test___str__(self):
|
||||
for s in ("test", "οχι", "test with fun_charÅc'\"terß"):
|
||||
correspondent = CorrespondentFactory.create(name=s)
|
||||
self.assertEqual(str(correspondent), s)
|
||||
|
||||
|
||||
class DocumentTestCase(TestCase):
|
||||
|
||||
def test_correspondent_deletion_does_not_cascade(self):
|
||||
|
||||
self.assertEqual(Correspondent.objects.all().count(), 0)
|
||||
correspondent = CorrespondentFactory.create()
|
||||
self.assertEqual(Correspondent.objects.all().count(), 1)
|
||||
|
||||
self.assertEqual(Document.objects.all().count(), 0)
|
||||
DocumentFactory.create(correspondent=correspondent)
|
||||
self.assertEqual(Document.objects.all().count(), 1)
|
||||
self.assertIsNotNone(Document.objects.all().first().correspondent)
|
||||
|
||||
correspondent.delete()
|
||||
self.assertEqual(Correspondent.objects.all().count(), 0)
|
||||
self.assertEqual(Document.objects.all().count(), 1)
|
||||
self.assertIsNone(Document.objects.all().first().correspondent)
|
@@ -1,5 +1,4 @@
|
||||
from django.http import HttpResponse
|
||||
from django.views.decorators.csrf import csrf_exempt
|
||||
from django.http import HttpResponse, HttpResponseBadRequest
|
||||
from django.views.generic import DetailView, FormView, TemplateView
|
||||
from django_filters.rest_framework import DjangoFilterBackend
|
||||
from paperless.db import GnuPG
|
||||
@@ -81,15 +80,12 @@ class PushView(SessionOrBasicAuthMixin, FormView):
|
||||
|
||||
form_class = UploadForm
|
||||
|
||||
@classmethod
|
||||
def as_view(cls, **kwargs):
|
||||
return csrf_exempt(FormView.as_view(**kwargs))
|
||||
|
||||
def form_valid(self, form):
|
||||
return HttpResponse("1")
|
||||
form.save()
|
||||
return HttpResponse("1", status=202)
|
||||
|
||||
def form_invalid(self, form):
|
||||
return HttpResponse("0")
|
||||
return HttpResponseBadRequest(str(form.errors))
|
||||
|
||||
|
||||
class CorrespondentViewSet(ModelViewSet):
|
||||
|
@@ -84,3 +84,20 @@ def binaries_check(app_configs, **kwargs):
|
||||
check_messages.append(Warning(error.format(binary), hint))
|
||||
|
||||
return check_messages
|
||||
|
||||
|
||||
@register()
|
||||
def config_check(app_configs, **kwargs):
|
||||
warning = (
|
||||
"It looks like you have PAPERLESS_SHARED_SECRET defined. Note that "
|
||||
"in the \npast, this variable was used for both API authentication "
|
||||
"and as the mail \nkeyword. As the API no no longer uses it, this "
|
||||
"variable has been renamed to \nPAPERLESS_EMAIL_SECRET, so if you're "
|
||||
"using the mail feature, you'd best update \nyour variable name.\n\n"
|
||||
"The old variable will stop working in a few months."
|
||||
)
|
||||
|
||||
if os.getenv("PAPERLESS_SHARED_SECRET"):
|
||||
return [Warning(warning)]
|
||||
|
||||
return []
|
||||
|
@@ -47,7 +47,8 @@ _allowed_hosts = os.getenv("PAPERLESS_ALLOWED_HOSTS")
|
||||
if _allowed_hosts:
|
||||
ALLOWED_HOSTS = _allowed_hosts.split(",")
|
||||
|
||||
|
||||
FORCE_SCRIPT_NAME = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
|
||||
|
||||
# Application definition
|
||||
|
||||
INSTALLED_APPS = [
|
||||
@@ -69,6 +70,7 @@ INSTALLED_APPS = [
|
||||
|
||||
"rest_framework",
|
||||
"crispy_forms",
|
||||
"django_filters"
|
||||
|
||||
]
|
||||
|
||||
@@ -208,6 +210,9 @@ OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")
|
||||
# The amount of threads to use for OCR
|
||||
OCR_THREADS = os.getenv("PAPERLESS_OCR_THREADS")
|
||||
|
||||
# OCR all documents?
|
||||
OCR_ALWAYS = bool(os.getenv("PAPERLESS_OCR_ALWAYS", "NO").lower() in ("yes", "y", "1", "t", "true"))
|
||||
|
||||
# If this is true, any failed attempts to OCR a PDF will result in the PDF
|
||||
# being indexed anyway, with whatever we could get. If it's False, the file
|
||||
# will simply be left in the CONSUMPTION_DIR.
|
||||
@@ -236,20 +241,6 @@ CONSUMPTION_DIR = os.getenv("PAPERLESS_CONSUMPTION_DIR")
|
||||
# slowly, you may want to use a higher value than the default.
|
||||
CONSUMER_LOOP_TIME = int(os.getenv("PAPERLESS_CONSUMER_LOOP_TIME", 10))
|
||||
|
||||
# If you want to use IMAP mail consumption, populate this with useful values.
|
||||
# If you leave HOST set to None, we assume you're not going to use this
|
||||
# feature.
|
||||
MAIL_CONSUMPTION = {
|
||||
"HOST": os.getenv("PAPERLESS_CONSUME_MAIL_HOST"),
|
||||
"PORT": os.getenv("PAPERLESS_CONSUME_MAIL_PORT"),
|
||||
"USERNAME": os.getenv("PAPERLESS_CONSUME_MAIL_USER"),
|
||||
"PASSWORD": os.getenv("PAPERLESS_CONSUME_MAIL_PASS"),
|
||||
# If True, use SSL/TLS to connect
|
||||
"USE_SSL": os.getenv("PAPERLESS_CONSUME_MAIL_USE_SSL", "y").lower() == "y",
|
||||
# The name of the inbox on the server
|
||||
"INBOX": os.getenv("PAPERLESS_CONSUME_MAIL_INBOX", "INBOX")
|
||||
}
|
||||
|
||||
# This is used to encrypt the original documents and decrypt them later when
|
||||
# you want to download them. Set it and change the permissions on this file to
|
||||
# 0600, or set it to `None` and you'll be prompted for the passphrase at
|
||||
@@ -259,11 +250,6 @@ MAIL_CONSUMPTION = {
|
||||
# files.
|
||||
PASSPHRASE = os.getenv("PAPERLESS_PASSPHRASE")
|
||||
|
||||
# If you intend to use the "API" to push files into the consumer, you'll need
|
||||
# to provide a shared secret here. Leaving this as the default will disable
|
||||
# the API.
|
||||
SHARED_SECRET = os.getenv("PAPERLESS_SHARED_SECRET", "")
|
||||
|
||||
# Trigger a script after every successful document consumption?
|
||||
PRE_CONSUME_SCRIPT = os.getenv("PAPERLESS_PRE_CONSUME_SCRIPT")
|
||||
POST_CONSUME_SCRIPT = os.getenv("PAPERLESS_POST_CONSUME_SCRIPT")
|
||||
@@ -272,3 +258,9 @@ POST_CONSUME_SCRIPT = os.getenv("PAPERLESS_POST_CONSUME_SCRIPT")
|
||||
# positive integer, but if you don't define one in paperless.conf, a default of
|
||||
# 100 will be used.
|
||||
PAPERLESS_LIST_PER_PAGE = int(os.getenv("PAPERLESS_LIST_PER_PAGE", 100))
|
||||
|
||||
FY_START = os.getenv("PAPERLESS_FINANCIAL_YEAR_START")
|
||||
FY_END = os.getenv("PAPERLESS_FINANCIAL_YEAR_END")
|
||||
|
||||
# Specify the default date order (for autodetected dates)
|
||||
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
|
||||
|
@@ -1,28 +1,17 @@
|
||||
"""paperless URL Configuration
|
||||
|
||||
The `urlpatterns` list routes URLs to views. For more information please see:
|
||||
https://docs.djangoproject.com/en/1.10/topics/http/urls/
|
||||
Examples:
|
||||
Function views
|
||||
1. Add an import: from my_app import views
|
||||
2. Add a URL to urlpatterns: url(r'^$', views.home, name='home')
|
||||
Class-based views
|
||||
1. Add an import: from other_app.views import Home
|
||||
2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home')
|
||||
Including another URLconf
|
||||
1. Add an import: from blog import urls as blog_urls
|
||||
2. Import the include() function: from django.conf.urls import url, include
|
||||
3. Add a URL to urlpatterns: url(r'^blog/', include(blog_urls))
|
||||
"""
|
||||
from django.conf import settings
|
||||
from django.conf.urls import url, static, include
|
||||
from django.conf.urls import include, static, url
|
||||
from django.contrib import admin
|
||||
|
||||
from django.views.decorators.csrf import csrf_exempt
|
||||
from django.views.generic import RedirectView
|
||||
from rest_framework.routers import DefaultRouter
|
||||
|
||||
from documents.views import (
|
||||
IndexView, FetchView, PushView,
|
||||
CorrespondentViewSet, TagViewSet, DocumentViewSet, LogViewSet
|
||||
CorrespondentViewSet,
|
||||
DocumentViewSet,
|
||||
FetchView,
|
||||
LogViewSet,
|
||||
PushView,
|
||||
TagViewSet
|
||||
)
|
||||
from reminders.views import ReminderViewSet
|
||||
|
||||
@@ -42,9 +31,6 @@ urlpatterns = [
|
||||
),
|
||||
url(r"^api/", include(router.urls, namespace="drf")),
|
||||
|
||||
# Normal pages (coming soon)
|
||||
# url(r"^$", IndexView.as_view(), name="index"),
|
||||
|
||||
# File downloads
|
||||
url(
|
||||
r"^fetch/(?P<kind>doc|thumb)/(?P<pk>\d+)$",
|
||||
@@ -52,15 +38,17 @@ urlpatterns = [
|
||||
name="fetch"
|
||||
),
|
||||
|
||||
# File uploads
|
||||
url(r"^push$", csrf_exempt(PushView.as_view()), name="push"),
|
||||
|
||||
# The Django admin
|
||||
url(r"admin/", admin.site.urls),
|
||||
url(r"", admin.site.urls), # This is going away
|
||||
|
||||
# Redirect / to /admin
|
||||
url(r"^$", RedirectView.as_view(permanent=True, url="/admin/")),
|
||||
|
||||
] + static.static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
|
||||
|
||||
if settings.SHARED_SECRET:
|
||||
urlpatterns.insert(0, url(r"^push$", PushView.as_view(), name="push"))
|
||||
|
||||
# Text in each page's <h1> (and above login form).
|
||||
admin.site.site_header = 'Paperless'
|
||||
# Text at the end of each page's <title>.
|
||||
|
@@ -1 +1 @@
|
||||
__version__ = (0, 5, 0)
|
||||
__version__ = (1, 2, 0)
|
||||
|
@@ -3,6 +3,8 @@ import os
|
||||
import re
|
||||
import subprocess
|
||||
from multiprocessing.pool import Pool
|
||||
import dateparser
|
||||
import pdftotext
|
||||
|
||||
import langdetect
|
||||
import pyocr
|
||||
@@ -30,7 +32,10 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
DENSITY = settings.CONVERT_DENSITY if settings.CONVERT_DENSITY else 300
|
||||
THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
|
||||
UNPAPER = settings.UNPAPER_BINARY
|
||||
DATE_ORDER = settings.DATE_ORDER
|
||||
DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
|
||||
OCR_ALWAYS = settings.OCR_ALWAYS
|
||||
TEXT_CACHE = None
|
||||
|
||||
def get_thumbnail(self):
|
||||
"""
|
||||
@@ -46,13 +51,32 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
|
||||
return os.path.join(self.tempdir, "convert-0000.png")
|
||||
|
||||
def _is_ocred(self):
|
||||
# Extract text from PDF using pdftotext
|
||||
text = get_text_from_pdf(self.document_path)
|
||||
|
||||
# We assume, that a PDF with at least 50 characters contains text
|
||||
# (so no OCR required)
|
||||
if len(text) > 50:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def get_text(self):
|
||||
if self.TEXT_CACHE is not None:
|
||||
return self.TEXT_CACHE
|
||||
|
||||
if not self.OCR_ALWAYS and self._is_ocred():
|
||||
self.log("info", "Skipping OCR, using Text from PDF")
|
||||
self.TEXT_CACHE = get_text_from_pdf(self.document_path)
|
||||
return self.TEXT_CACHE
|
||||
|
||||
images = self._get_greyscale()
|
||||
|
||||
try:
|
||||
|
||||
return self._get_ocr(images)
|
||||
self.TEXT_CACHE = self._get_ocr(images)
|
||||
return self.TEXT_CACHE
|
||||
except OCRError as e:
|
||||
raise ParseError(e)
|
||||
|
||||
@@ -175,6 +199,29 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
text += self._ocr(imgs[middle + 1:], self.DEFAULT_OCR_LANGUAGE)
|
||||
return text
|
||||
|
||||
def get_date(self):
|
||||
text = self.get_text()
|
||||
|
||||
# This regular expression will try to find dates in the document at
|
||||
# hand and will match the following formats:
|
||||
# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - MONTH ZZZZ
|
||||
m = re.search(
|
||||
r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' +
|
||||
r'\b([0-9]{1,2}\. [^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' +
|
||||
r'\b([^ ]{3,9} [0-9]{4})\b', text)
|
||||
|
||||
if m is None:
|
||||
return None
|
||||
|
||||
return dateparser.parse(m.group(0),
|
||||
settings={'DATE_ORDER': self.DATE_ORDER,
|
||||
'PREFER_DAY_OF_MONTH': 'first',
|
||||
'RETURN_AS_TIMEZONE_AWARE': True})
|
||||
|
||||
|
||||
def run_convert(*args):
|
||||
|
||||
@@ -212,3 +259,13 @@ def image_to_string(args):
|
||||
except (TesseractError, OtherTesseractError):
|
||||
pass
|
||||
return ocr.image_to_string(f, lang=lang)
|
||||
|
||||
|
||||
def get_text_from_pdf(pdf_file):
|
||||
with open(pdf_file, "rb") as f:
|
||||
try:
|
||||
pdf = pdftotext.PDF(f)
|
||||
except pdftotext.Error:
|
||||
return ""
|
||||
|
||||
return "\n".join(pdf)
|
||||
|
@@ -3,9 +3,9 @@ import re
|
||||
from .parsers import RasterisedDocumentParser
|
||||
|
||||
|
||||
class ConsumerDeclaration(object):
|
||||
class ConsumerDeclaration:
|
||||
|
||||
MATCHING_FILES = re.compile("^.*\.(pdf|jpg|gif|png|tiff?|pnm|bmp)$")
|
||||
MATCHING_FILES = re.compile("^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")
|
||||
|
||||
@classmethod
|
||||
def handle(cls, sender, **kwargs):
|
||||
|
BIN
src/paperless_tesseract/tests/samples/tests_date_1.pdf
Normal file
BIN
src/paperless_tesseract/tests/samples/tests_date_1.pdf
Normal file
Binary file not shown.
BIN
src/paperless_tesseract/tests/samples/tests_date_1.png
Normal file
BIN
src/paperless_tesseract/tests/samples/tests_date_1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 136 KiB |
BIN
src/paperless_tesseract/tests/samples/tests_date_2.pdf
Normal file
BIN
src/paperless_tesseract/tests/samples/tests_date_2.pdf
Normal file
Binary file not shown.
BIN
src/paperless_tesseract/tests/samples/tests_date_2.png
Normal file
BIN
src/paperless_tesseract/tests/samples/tests_date_2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 135 KiB |
BIN
src/paperless_tesseract/tests/samples/tests_date_3.pdf
Normal file
BIN
src/paperless_tesseract/tests/samples/tests_date_3.pdf
Normal file
Binary file not shown.
BIN
src/paperless_tesseract/tests/samples/tests_date_3.png
Normal file
BIN
src/paperless_tesseract/tests/samples/tests_date_3.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 138 KiB |
BIN
src/paperless_tesseract/tests/samples/tests_date_4.pdf
Normal file
BIN
src/paperless_tesseract/tests/samples/tests_date_4.pdf
Normal file
Binary file not shown.
BIN
src/paperless_tesseract/tests/samples/tests_date_4.png
Normal file
BIN
src/paperless_tesseract/tests/samples/tests_date_4.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 138 KiB |
BIN
src/paperless_tesseract/tests/samples/tests_date_5.pdf
Normal file
BIN
src/paperless_tesseract/tests/samples/tests_date_5.pdf
Normal file
Binary file not shown.
BIN
src/paperless_tesseract/tests/samples/tests_date_5.png
Normal file
BIN
src/paperless_tesseract/tests/samples/tests_date_5.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 136 KiB |
BIN
src/paperless_tesseract/tests/samples/tests_date_6.pdf
Normal file
BIN
src/paperless_tesseract/tests/samples/tests_date_6.pdf
Normal file
Binary file not shown.
BIN
src/paperless_tesseract/tests/samples/tests_date_6.png
Normal file
BIN
src/paperless_tesseract/tests/samples/tests_date_6.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 136 KiB |
BIN
src/paperless_tesseract/tests/samples/tests_date_7.pdf
Normal file
BIN
src/paperless_tesseract/tests/samples/tests_date_7.pdf
Normal file
Binary file not shown.
215
src/paperless_tesseract/tests/test_date.py
Normal file
215
src/paperless_tesseract/tests/test_date.py
Normal file
@@ -0,0 +1,215 @@
|
||||
import datetime
|
||||
import os
|
||||
import shutil
|
||||
from unittest import mock
|
||||
from uuid import uuid4
|
||||
|
||||
from dateutil import tz
|
||||
from django.test import TestCase
|
||||
|
||||
from ..parsers import RasterisedDocumentParser
|
||||
|
||||
|
||||
class TestDate(TestCase):
|
||||
|
||||
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
|
||||
SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
|
||||
|
||||
def setUp(self):
|
||||
os.makedirs(self.SCRATCH, exist_ok=True)
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.SCRATCH)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_1_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(document.get_date(),
|
||||
datetime.datetime(2018, 4, 1, 0, 0,
|
||||
tzinfo=tz.tzutc()))
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_1_png(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(document.get_date(),
|
||||
datetime.datetime(2018, 4, 1, 0, 0,
|
||||
tzinfo=tz.tzutc()))
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_2_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(document.get_date(),
|
||||
datetime.datetime(2013, 2, 1, 0, 0,
|
||||
tzinfo=tz.tzutc()))
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_2_png(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(document.get_date(),
|
||||
datetime.datetime(2013, 2, 1, 0, 0,
|
||||
tzinfo=tz.tzutc()))
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_3_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(document.get_date(),
|
||||
datetime.datetime(2018, 10, 5, 0, 0,
|
||||
tzinfo=tz.tzutc()))
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_3_png(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(document.get_date(),
|
||||
datetime.datetime(2018, 10, 5, 0, 0,
|
||||
tzinfo=tz.tzutc()))
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_4_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(document.get_date(),
|
||||
datetime.datetime(2018, 10, 5, 0, 0,
|
||||
tzinfo=tz.tzutc()))
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_4_png(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(document.get_date(),
|
||||
datetime.datetime(2018, 10, 5, 0, 0,
|
||||
tzinfo=tz.tzutc()))
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_5_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(document.get_date(),
|
||||
datetime.datetime(2018, 12, 17, 0, 0,
|
||||
tzinfo=tz.tzutc()))
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_5_png(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(document.get_date(),
|
||||
datetime.datetime(2018, 12, 17, 0, 0,
|
||||
tzinfo=tz.tzutc()))
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_6_pdf_us(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
document.DATE_ORDER = "MDY"
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(document.get_date(),
|
||||
datetime.datetime(2018, 12, 17, 0, 0,
|
||||
tzinfo=tz.tzutc()))
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_6_png_us(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
document.DATE_ORDER = "MDY"
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(document.get_date(),
|
||||
datetime.datetime(2018, 12, 17, 0, 0,
|
||||
tzinfo=tz.tzutc()))
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_6_pdf_eu(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(document.get_date(), None)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_6_png_eu(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(document.get_date(), None)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_7_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_7.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(document.get_date(),
|
||||
datetime.datetime(2018, 4, 1, 0, 0,
|
||||
tzinfo=tz.tzutc()))
|
@@ -12,9 +12,9 @@ class SignalsTestCase(TestCase):
|
||||
"A document with a . in it", "Doc with -- in it"
|
||||
)
|
||||
suffixes = (
|
||||
"pdf", "jpg", "gif", "png", "tiff", "tif", "pnm", "bmp",
|
||||
"PDF", "JPG", "GIF", "PNG", "TIFF", "TIF", "PNM", "BMP",
|
||||
"pDf", "jPg", "gIf", "pNg", "tIff", "tIf", "pNm", "bMp",
|
||||
"pdf", "jpg", "jpeg", "gif", "png", "tiff", "tif", "pnm", "bmp",
|
||||
"PDF", "JPG", "JPEG", "GIF", "PNG", "TIFF", "TIF", "PNM", "BMP",
|
||||
"pDf", "jPg", "jpEg", "gIf", "pNg", "tIff", "tIf", "pNm", "bMp",
|
||||
)
|
||||
|
||||
for prefix in prefixes:
|
||||
|
@@ -1,3 +1,8 @@
|
||||
[pytest]
|
||||
DJANGO_SETTINGS_MODULE=paperless.settings
|
||||
|
||||
addopts = --pythonwarnings=all
|
||||
env =
|
||||
PAPERLESS_CONSUME=/tmp
|
||||
PAPERLESS_PASSPHRASE=THISISNOTASECRET
|
||||
PAPERLESS_SECRET=paperless
|
||||
PAPERLESS_EMAIL_SECRET=paperless
|
||||
|
21
src/tox.ini
21
src/tox.ini
@@ -5,19 +5,18 @@
|
||||
|
||||
[tox]
|
||||
skipsdist = True
|
||||
envlist = py34, py35, py36, pep8
|
||||
envlist = py34, py35, py36, pycodestyle
|
||||
|
||||
[testenv]
|
||||
commands = {envpython} manage.py test
|
||||
commands = pytest
|
||||
deps = -r{toxinidir}/../requirements.txt
|
||||
setenv =
|
||||
PAPERLESS_CONSUME=/tmp
|
||||
PAPERLESS_PASSPHRASE=THISISNOTASECRET
|
||||
PAPERLESS_SECRET=paperless
|
||||
|
||||
[testenv:pep8]
|
||||
commands=pep8
|
||||
deps=pep8
|
||||
[testenv:pycodestyle]
|
||||
commands=pycodestyle
|
||||
deps=pycodestyle
|
||||
|
||||
[pep8]
|
||||
exclude=.tox,migrations,paperless/settings.py
|
||||
[pycodestyle]
|
||||
exclude=
|
||||
.tox,
|
||||
migrations,
|
||||
paperless/settings.py
|
||||
|
Reference in New Issue
Block a user