Merge branch 'master' into dev

This commit is contained in:
Jonas Winkler 2020-10-16 15:02:57 +02:00
commit 421dab786d
39 changed files with 1939 additions and 398 deletions

1
.docker-hub-test Normal file
View File

@ -0,0 +1 @@
Docker Hub test 2

View File

@ -7,8 +7,7 @@ before_install:
sudo: false
matrix:
include:
- python: "3.4"
include:
- python: "3.5"
- python: "3.6"
- python: "3.7-dev"

View File

@ -1,12 +1,13 @@
FROM alpine:3.8
FROM alpine:3.11
LABEL maintainer="The Paperless Project https://github.com/the-paperless-project/paperless" \
contributors="Guy Addadi <addadi@gmail.com>, Pit Kleyersburg <pitkley@googlemail.com>, \
Sven Fischer <git-dev@linux4tw.de>"
# Copy Pipfiles file and init script
# Copy Pipfiles file, init script and gunicorn.conf
COPY Pipfile* /usr/src/paperless/
COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh
COPY scripts/gunicorn.conf /usr/src/paperless/
# Set export and consumption directories
ENV PAPERLESS_EXPORT_DIR=/export \
@ -26,6 +27,7 @@ RUN apk add --no-cache \
shadow \
sudo \
tesseract-ocr \
tzdata \
unpaper && \
apk add --no-cache --virtual .build-dependencies \
g++ \
@ -51,6 +53,9 @@ RUN apk add --no-cache \
adduser -D -u 1000 -G paperless -h /usr/src/paperless paperless && \
chown -Rh paperless:paperless /usr/src/paperless && \
mkdir -p $PAPERLESS_EXPORT_DIR && \
# Avoid setrlimit warnings
# See: https://gitlab.alpinelinux.org/alpine/aports/issues/11122
echo 'Set disable_coredump false' >> /etc/sudo.conf && \
# Setup entrypoint
chmod 755 /sbin/docker-entrypoint.sh
@ -65,3 +70,5 @@ COPY src/ /usr/src/paperless/src/
COPY data/ /usr/src/paperless/data/
COPY media/ /usr/src/paperless/media/
# Collect static files
RUN sudo -HEu paperless /usr/src/paperless/src/manage.py collectstatic --clear --no-input

View File

@ -15,7 +15,7 @@ django-filter = "*"
djangorestframework = "*"
factory-boy = "*"
filemagic = "*"
fuzzywuzzy = {extras = ["speedup"], version = "==0.15.0"}
fuzzywuzzy = {extras = ["speedup"],version = "==0.15.0"}
gunicorn = "*"
inotify-simple = "*"
langdetect = "*"
@ -36,6 +36,8 @@ pytest-env = "*"
pytest-xdist = "*"
psycopg2 = "*"
djangoql = "*"
whitenoise = "*"
brotli = "*"
[dev-packages]
ipython = "*"

713
Pipfile.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -22,9 +22,9 @@ Paperless steuert nicht deinen Scanner, es hilft nur damit umzugehen, was der Sc
1. Kaufe einen Dokumentenscanner, der an einen Ort in deinem Netzwerk schreiben kann. Wenn du Inspirationen brauchst, schau in die [Scannerempfehlungen](https://paperless.readthedocs.io/en/latest/scanners.html).
2. Stelle "Scanne zu FTP" oder ähnliches ein. Es sollte möglich sein, eingescannte Bilder ohne etwas tun zu müssen an einen Server hochzuladen. Natürlich kannst du auch die einscannte Datei händisch hochladen, wenn der Scanner automatisches Hochladen nicht unterstützt. Paperless ist es egal, wie die Dokumente in seinen lokalen Konsumordner gelangen.
3. Besitze einen Zielserver, lasse das Papierless-Konsumskript laufen, um die Datei mit OCR zu versehen und sie in einer lokalen Datenbank zu indexieren.
3. Besitze einen Zielserver, lasse das Paperless-Konsumskript laufen, um die Datei mit OCR zu versehen und sie in einer lokalen Datenbank zu indexieren.
4. Benutze die Weboberfläche, um die Datenbank zu durchforsten und zu finden, was du suchst.
5. Lade die PDF-Datei, die du brauchst/möchtest über die Weboberfläche herunter und mach was auch immer du willst damit. Du kannst es auch drucken und versenden, so als wäre es das Original. In den meisten Fällen, wird das niemanden interessieren oder bemerken.
5. Lade die PDF-Datei, die du brauchst/möchtest über die Weboberfläche herunter und mach was auch immer du willst damit. Du kannst es auch drucken und versenden, so als wäre es das Original. In den meisten Fällen wird das niemanden interessieren oder bemerken.
Hier das, was du bekommt:
@ -42,7 +42,7 @@ Dies alles ist eine wirklich ziemlich einfache, glänzende und benutzerfreundlic
* [ImageMagick](http://imagemagick.org/) wandelt Bilder zwischen Farbe und Graustufen um.
* [Tesseract](https://github.com/tesseract-ocr) erledigt die Buchstabenerkennung.
* [Unpaper](https://www.flameeyes.eu/projects/unpaper) bereinigt und begradigt das eingescannte Bild.
* [Unpaper](https://github.com/unpaper/unpaper) bereinigt und begradigt das eingescannte Bild.
* [GNU Privacy Guard](https://gnupg.org/) wird als Verschlüsselungsbackend genutzt.
* [Python 3](https://python.org/) ist die Sprache des Projekts.
* [Pillow](https://pypi.python.org/pypi/pillowfight/) lädt die Bilddaten als Python-Objekt, um sie mit PyOCR zu verwenden.
@ -58,12 +58,14 @@ Dieses Projekt wurde um 2015 gestartet und es gibt viele Leute, die es verwenden
Ich entwickle keine neuen Funktionen mehr für Paperless, weil es genau das tut, was ich brauche und meine Aufmerksamkeit meinem neuesten Projekt [Aletheia](https://github.com/danielquinn/aletheia) gewidmet ist. Ich verlasse jedoch nicht das Projekt. Ich bin glücklich damit, Pull Requests zu begutachten und Fragen im Issue-Bereich zu beantworten. Wenn du ein Entwickler bist und eine neue Funktion willst, reihe sie in den Issues ein und/oder sende einen PR! Ich bin glücklich damit, neue Sachen hinzuzufügen, habe aber einfach nicht die Zeit, sie selbst zu erarbeiten.
## Verknüpfte Prjekte
## Verknüpfte Projekte
Paperless gibt es bereits seit einer Weile und Leute haben damit angefangen, Sachen rund um Paperless zu entwickeln. Wenn du einer dieser Menschen bist, kannst du dein Projekt zu dieser Liste hinzufügen:
* [Paperless App](https://github.com/bauerj/paperless_app): Eine Android/iOS-App für Paperless.
* [Paperless Desktop](https://github.com/thomasbrueggemann/paperless-desktop): Eine Desktop-Oberfläche für deine Paperless-Installation. Läuft auf Mac, Linux und Windows.
* [ansible-role-paperless](https://github.com/ovv/ansible-role-paperless): Eine einfache Möglichkeit, Paperless via Ansible laufen zu lassen.
* [paperless-cli](https://github.com/stgarf/paperless-cli): Ein golang Kommandozeilenprogramm, welches mit Paperless interagiert.
## Ähnliche Projekte
@ -73,7 +75,7 @@ Es gibt da draußen auch das Projekt [Mayan EDMS](https://mayan.readthedocs.org/
## Wichtiger Hinweis
Dokumentenscanner werden typerweise verwendet, um sensible Dokumente zu scannen. Dinge wie die Sozialversicherungsnummer, Steueraufzeichnungen, Rechnungen, etc. Während Paperless die Originaldateien über das Konsumskript verschlüsselt, sind die OCR-Texte *nicht* verschlüsselt und demnach in Klartext gespeichert (es muss durchsuchbar sein, also wenn jemand eine Idee hat, wie man das mit verschlüsselten Daten tun kann: Ich bin ganz Ohr). Das bedeutet, dass Paperless niemals auf einem nicht vertrauten Host laufen sollte. Stattdessen empfehle ich, wenn du es verwenden willst, es lokal auf einem Server in deinem Zuhause laufen zu lassen.
Dokumentenscanner werden typischerweise verwendet, um sensible Dokumente zu scannen. Dinge wie die Sozialversicherungsnummer, Steueraufzeichnungen, Rechnungen, etc. Während Paperless die Originaldateien über das Konsumskript verschlüsselt, sind die OCR-Texte *nicht* verschlüsselt und demnach in Klartext gespeichert (es muss durchsuchbar sein, also wenn jemand eine Idee hat, wie man das mit verschlüsselten Daten tun kann: Ich bin ganz Ohr). Das bedeutet, dass Paperless niemals auf einem nicht vertrauten Host laufen sollte. Stattdessen empfehle ich, wenn du es verwenden willst, es lokal auf einem Server in deinem Zuhause laufen zu lassen.
## Spenden

View File

@ -41,7 +41,7 @@
* [ImageMagick](http://imagemagick.org/) μετατρέπει τις εικόνες σε έγχρωμες και ασπρόμαυρες.
* [Tesseract](https://github.com/tesseract-ocr) κάνει την αναγνώρηση των χαρακτήρων.
* [Unpaper](https://www.flameeyes.eu/projects/unpaper) despeckles and deskews the scanned image.
* [Unpaper](https://github.com/unpaper/unpaper) despeckles and deskews the scanned image.
* [GNU Privacy Guard](https://gnupg.org/) χρησιμοποιείται για κρυπτογράφηση στο backend.
* [Python 3](https://python.org/) είναι η γλώσσα του project.
* [Pillow](https://pypi.python.org/pypi/pillowfight/) Φορτώνει την εικόνα σαν αντικείμενο στην python και μπορεί να χρησιμοποιηθεί με PyOCR
@ -59,6 +59,7 @@
Το Paperless υπάρχει εδώ και κάποιο καιρό και άνθρωποι έχουν αρχίσει να φτιάχνουν πράγματα γύρω από αυτό. Αν είσαι ένας από αυτούς τους ανθρώπους, μπορούμε να βάλουμε το project σου σε αυτήν την λίστα:
* [Paperless App](https://github.com/bauerj/paperless_app): Μια εφαρμογή Android / iOS για Paperless.
* [Paperless Desktop](https://github.com/thomasbrueggemann/paperless-desktop): Μια desktop εφαρμογή για εγκατάσταση του Paperless. Τρέχει σε Mac, Linux, και Windows.
* [ansible-role-paperless](https://github.com/ovv/ansible-role-paperless): Ένας εύκολο τρόπος για να τρέχει το Paperless μέσω Ansible.

View File

@ -46,7 +46,7 @@ This is all really a quite simple, shiny, user-friendly wrapper around some very
* [ImageMagick](http://imagemagick.org/) converts the images between colour and greyscale.
* [Tesseract](https://github.com/tesseract-ocr) does the character recognition.
* [Unpaper](https://www.flameeyes.eu/projects/unpaper) despeckles and deskews the scanned image.
* [Unpaper](https://github.com/unpaper/unpaper) despeckles and deskews the scanned image.
* [GNU Privacy Guard](https://gnupg.org/) is used as the encryption backend.
* [Python 3](https://python.org/) is the language of the project.
* [Pillow](https://pypi.python.org/pypi/pillowfight/) loads the image data as a python object to be used with PyOCR.
@ -66,6 +66,7 @@ I am no longer doing new development on Paperless as it does exactly what I need
Paperless has been around a while now, and people are starting to build stuff on top of it. If you're one of those people, we can add your project to this list:
* [Paperless App](https://github.com/bauerj/paperless_app): An Android/iOS app for Paperless.
* [Paperless Desktop](https://github.com/thomasbrueggemann/paperless-desktop): A desktop UI for your Paperless installation. Runs on Mac, Linux, and Windows.
* [ansible-role-paperless](https://github.com/ovv/ansible-role-paperless): An easy way to get Paperless running via Ansible.
* [paperless-cli](https://github.com/stgarf/paperless-cli): A golang command line binary to interact with a Paperless instance.

View File

@ -11,12 +11,18 @@
# ...are all explained in that file but can be defined here, since the Docker
# installation doesn't make use of paperless.conf.
# Use this variable to set a timezone for the Paperless Docker containers. If not specified, defaults to UTC.
# TZ=America/Los_Angeles
# Additional languages to install for text recognition. Note that this is
# different from PAPERLESS_OCR_LANGUAGE (default=eng), which defines the
# default language used when guessing the language from the OCR output.
# PAPERLESS_OCR_LANGUAGES=deu ita
# Set Paperless to use SSL for the web interface.
# Enabling this will require ssl.key and ssl.cert files in paperless' data directory.
# PAPERLESS_USE_SSL=false
# You can change the default user and group id to a custom one
# USERMAP_UID=1000
# USERMAP_GID=1000

View File

@ -27,7 +27,7 @@ services:
# value with nothing.
environment:
- PAPERLESS_OCR_LANGUAGES=
command: ["runserver", "--insecure", "--noreload", "0.0.0.0:8000"]
command: ["gunicorn", "-b", "0.0.0.0:8000"]
consumer:
build: ./

44
docs/_static/lxc-install.svg vendored Normal file

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 1.9 MiB

View File

@ -0,0 +1,158 @@
#!/usr/bin/env bash
# Bash script to install paperless in lxc containter
# paperless.lan
#
# Will set-up paperless, apache2 and proftpd
#
# lxc launch ubuntu: paperless
# lxc exec paperless -- sh -c "sudo apt-get update && sudo apt-get install -y wget"
# lxc exec paperless -- sh -c "wget https://raw.githubusercontent.com/the-paperless-project/paperless/master/docs/examples/lxc/lxc-install.sh && /bin/bash lxc-install.sh --email "
#
#
set +e
PASSWORD=$(< /dev/urandom tr -dc _A-Z-a-z-0-9+@%^{} | head -c20;echo;)
EMAIL=
function displayHelp() {
echo "available parameters:
-e <email> | --email <email>
-p <password> | --password <password>
"
}
POSITIONAL=()
while [[ $# -gt 0 ]]
do
key="$1"
i=$key
case $i in
-e|--email)
EMAIL="${2}"
shift
shift
;;
-p|--password)
PASSWORD="${2}"
shift
shift
;;
--default|-h|--help)
shift
displayHelp
exit 0
;;
*)
echo "argument: $i not recognized"
exit 2
;;
esac
done
set -- "${POSITIONAL[@]}" # restore positional parameters
if [ -z $EMAIL ]; then
echo "missing email, try running with -h "
exit 3
fi
if [[ $(/usr/bin/id -u) -ne 0 ]]; then
echo "Not running as root"
exit
fi
if [ $(grep -c paperless /etc/passwd) -eq 0 ]; then
# Add paperless user with no password
adduser --disabled-password --gecos "" paperless
fi
if [ $(grep -c ftpupload /etc/passwd) -eq 0 ]; then
# Add ftpupload
adduser --disabled-password --gecos "" ftpupload
echo "Set ftpupload password: "
#passwd ftpupload
#TODO: generate some password and allow parameter
echo "ftpupload:ftpuploadpassword" | chpasswd
fi
if [ $(id -nG paperless | grep -Fcw ftpupload) -eq 0 ]; then
# Allow paperless group to access
adduser paperless ftpupload
chmod g+w /home/ftpupload
fi
# Get apt up to date
apt-get update
# Needed for plain Paperless
apt-get -y install unpaper gnupg libpoppler-cpp-dev python3-pyocr tesseract-ocr imagemagick optipng git
# Needed for Apache
apt-get -y install apache2 libapache2-mod-wsgi-py3
if [ ! -f /etc/proftpd/proftpd.conf ]; then
# Install ftp server and make sure all uplaoded files are owned by paperless
apt-get -y install proftpd
fi
if [ $(grep -c paperless /etc/proftpd/proftpd.conf) -eq 0 ]; then
cat <<EOF >> /etc/proftpd/proftpd.conf
<Directory /home/ftpupload/>
UserOwner paperless
GroupOwner paperless
</Directory>
EOF
systemctl restart proftpd
fi
#Get Paperless from git
su -c "cd /home/paperless ; git clone https://github.com/the-paperless-project/paperless" paperless
# Install Pip Requirements
apt-get -y install python3-pip python3-venv
cd /home/paperless/paperless
pip3 install -r requirements.txt
# Take paperless.conf.example and set consumuption dir (ftp dir)
sed -e '/PAPERLESS_CONSUMPTION_DIR=/s/=.*/=\"\/home\/ftpupload\/\"/' \
/home/paperless/paperless/paperless.conf.example >/etc/paperless.conf
# Update /etc/paperless.conf with PAPERLESS_SECRET_KEY
SECRET=$(strings /dev/urandom | grep -o '[[:alnum:]]' | head -n 30 | tr -d '\n'; echo)
sed -i "s/#PAPERLESS_SECRET_KEY.*/PAPERLESS_SECRET_KEY=$SECRET/" /etc/paperless.conf
#Initialise the SQLite database
su -c "cd /home/paperless/paperless/src/ ; ./manage.py migrate" paperless
echo "if superuser doesn't exists, create one with login: paperless and password: ${PASSWORD}"
#Create a user for your Paperless instance
su -c "cd /home/paperless/paperless/src/ ; echo ./manage.py create_superuser_with_password --username paperless --email ${EMAIL} --password ${PASSWORD} --preserve" paperless
su -c "cd /home/paperless/paperless/src/ ; ./manage.py create_superuser_with_password --username paperless --email ${EMAIL} --password ${PASSWORD} --preserve" paperless
if [ ! -d /home/paperless/paperless/static ]; then
# 167 static files copied to '/home/paperless/paperless/static'.
su -c "cd /home/paperless/paperless/src/ ; ./manage.py collectstatic" paperless
fi
if [ ! -f /etc/apache2/sites-available/paperless.conf ]; then
# Set-up apache
cp /home/paperless/paperless/docs/examples/lxc/paperless.conf /etc/apache2/sites-available/
a2dissite 000-default.conf
a2ensite paperless.conf
systemctl reload apache2
fi
sed -e "s:home/paperless/project/virtualenv/bin/python:usr/bin/python3:" \
/home/paperless/paperless/scripts/paperless-consumer.service \
>/etc/systemd/system/paperless-consumer.service
sed -i "s:/home/paperless/project/src/manage.py:/home/paperless/paperless/src/manage.py:" \
/etc/systemd/system/paperless-consumer.service
systemctl enable paperless-consumer
systemctl start paperless-consumer
# convert-im6.q16: not authorized
# Security risk ?
# https://stackoverflow.com/questions/42928765/convertnot-authorized-aaaa-error-constitute-c-readimage-453
if [ -f /etc/ImageMagick-6/policy.xml ]; then
mv /etc/ImageMagick-6/policy.xml /etc/ImageMagick-6/policy.xmlout
fi

View File

@ -0,0 +1,18 @@
<VirtualHost *:80>
ServerName paperless.lan
Alias /static/ /home/paperless/paperless/static/
<Directory /home/paperless/paperless/static>
Require all granted
</Directory>
WSGIScriptAlias / /home/paperless/paperless/src/paperless/wsgi.py
WSGIDaemonProcess paperless.lan user=paperless group=paperless threads=5 python-path=/home/paperless/paperless/src
WSGIProcessGroup paperless.lan
<Directory /home/paperless/paperless/src/paperless>
<Files wsgi.py>
Require all granted
</Files>
</Directory>
</VirtualHost>

View File

@ -54,6 +54,34 @@ filename as described above.
.. _dateparser: https://github.com/scrapinghub/dateparser/blob/v0.7.0/docs/usage.rst#settings
Transforming filenames for parsing
----------------------------------
Some devices can't produce filenames that can be parsed by the default
parser. By configuring the option ``PAPERLESS_FILENAME_PARSE_TRANSFORMS`` in
``paperless.conf`` one can add transformations that are applied to the filename
before it's parsed.
The option contains a list of dictionaries of regular expressions (key:
``pattern``) and replacements (key: ``repl``) in JSON format, which are
applied in order by passing them to ``re.subn``. Transformation stops
after the first match, so at most one transformation is applied. The general
syntax is
.. code:: python
[{"pattern":"pattern1", "repl":"repl1"}, {"pattern":"pattern2", "repl":"repl2"}, ..., {"pattern":"patternN", "repl":"replN"}]
The example below is for a Brother ADS-2400N, a scanner that allows
different names to different hardware buttons (useful for handling
multiple entities in one instance), but insists on adding ``_<count>``
to the filename.
.. code:: python
# Brother profile configuration, support "Name_Date_Count" (the default
# setting) and "Name_Count" (use "Name" as tag and "Count" as title).
PAPERLESS_FILENAME_PARSE_TRANSFORMS=[{"pattern":"^([a-z]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.", "repl":"\\2\\3Z - \\4 - \\1."}, {"pattern":"^([a-z]+)_([0-9]+)\\.", "repl":" - \\2 - \\1."}]
.. _guesswork-content:
Reading the Document Contents

View File

@ -92,7 +92,7 @@ files, the ``migrate`` step may not update anything. This is totally normal.
Additionally, as new features are added, the ability to control those features
is typically added by way of an environment variable set in ``paperless.conf``.
You may want to take a look at the ``paperless.conf.example`` file to see if
there's anything new in there compared to what you've got int ``/etc``.
there's anything new in there compared to what you've got in ``/etc``.
If you are :ref:`using Docker <setup-installation-docker>` the update process
is similar:

View File

@ -18,7 +18,7 @@ should work) that has the following software installed:
.. _GNU Privacy Guard: https://gnupg.org
.. _Tesseract: https://github.com/tesseract-ocr
.. _Imagemagick: http://imagemagick.org/
.. _unpaper: https://www.flameeyes.eu/projects/unpaper
.. _unpaper: https://github.com/unpaper/unpaper
.. _libpoppler-cpp-dev: https://poppler.freedesktop.org/
.. _optipng: http://optipng.sourceforge.net/

View File

@ -19,15 +19,20 @@ that works right for you based on recommentations from other Paperless users.
+---------+----------------+-----+-----+-----+----------------+
| Brother | `MFC-J5910DW`_ | yes | | | `bmsleight`_ |
+---------+----------------+-----+-----+-----+----------------+
| Brother | `MFC-9142CDN`_ | yes | | yes | `REOLDEV`_ |
+---------+----------------+-----+-----+-----+----------------+
| Fujitsu | `ix500`_ | yes | | yes | `eonist`_ |
+---------+----------------+-----+-----+-----+----------------+
.. _ADS-1500W: https://www.brother.ca/en/p/ads1500w
.. _MFC-J6930DW: https://www.brother.ca/en/p/MFCJ6930DW
.. _MFC-J5910DW: https://www.brother.co.uk/printers/inkjet-printers/mfcj5910dw
.. _MFC-9142CDN: https://www.brother.co.uk/printers/laser-printers/mfc9140cdn
.. _ix500: http://www.fujitsu.com/us/products/computing/peripheral/scanners/scansnap/ix500/
.. _danielquinn: https://github.com/danielquinn
.. _ayounggun: https://github.com/ayounggun
.. _bmsleight: https://github.com/bmsleight
.. _eonist: https://github.com/eonist
.. _REOLDEV: https://github.com/REOLDEV

View File

@ -43,6 +43,7 @@ You can go multiple routes with setting up and running Paperless:
* The `bare metal route`_
* The `docker route`_
* A suggested `linux containers route`_
The `docker route`_ is quick & easy.
@ -50,10 +51,14 @@ The `docker route`_ is quick & easy.
The `bare metal route`_ is a bit more complicated to setup but makes it easier
should you want to contribute some code back.
The `linux containers route`_ is quick, but makes alot of assumptions on the
set-up, on the other hand the script could be used to install on a base
debian or ubuntu server.
.. _docker route: setup-installation-docker_
.. _bare metal route: setup-installation-bare-metal_
.. _Docker Machine: https://docs.docker.com/machine/
.. _linux containers route: setup-installation-linux-containers_
.. _setup-installation-bare-metal:
@ -82,21 +87,22 @@ Standard (Bare Metal)
this is the default.
4. Initialise the SQLite database with ``./manage.py migrate``.
5. Create a user for your Paperless instance with
5. Collect the static files for the webserver with ``./manage.py collectstatic``.
6. Create a user for your Paperless instance with
``./manage.py createsuperuser``. Follow the prompts to create your user.
6. Start the webserver with ``./manage.py runserver <IP>:<PORT>``.
7. Start the webserver with ``./manage.py runserver <IP>:<PORT>``.
If no specific IP or port is given, the default is ``127.0.0.1:8000`` also
known as http://localhost:8000/.
You should now be able to visit your (empty) installation at
`Paperless webserver`_ or whatever you chose before. You can login with the
user/pass you created in #5.
7. In a separate window, change to the ``src`` directory in this repo again,
8. In a separate window, change to the ``src`` directory in this repo again,
but this time, you should start the consumer script with
``./manage.py document_consumer``.
8. Scan something or put a file into the ``CONSUMPTION_DIR``.
9. Wait a few minutes
10. Visit the document list on your webserver, and it should be there, indexed
9. Scan something or put a file into the ``CONSUMPTION_DIR``.
10. Wait a few minutes
11. Visit the document list on your webserver, and it should be there, indexed
and downloadable.
.. caution::
@ -126,8 +132,8 @@ Docker Method
.. caution::
If you want to use the included ``docker-compose.yml.example`` file, you
need to have at least Docker version **1.10.0** and docker-compose
version **1.6.0**.
need to have at least Docker version **1.12.0** and docker-compose
version **1.9.0**.
See the `Docker installation guide`_ on how to install the current
version of Docker for your operating system or Linux distribution of
@ -153,7 +159,7 @@ Docker Method
If you are using NFS mounts for the consume directory you also need to
change the command to turn off inotify as it doesn't work with NFS
`command: ["document_consumer", "--no-inotify"]`
``command: ["document_consumer", "--no-inotify"]``
5. Modify ``docker-compose.env`` and adapt the following environment variables:
@ -187,6 +193,13 @@ Docker Method
container and thus the one of the consumption directory. Furthermore, you
can change the id of the default user as well using ``USERMAP_UID``.
``PAPERLESS_USE_SSL``
If you want Paperless to use SSL for the user interface, set this variable
to ``true``. You also need to copy your certificate and key to the ``data``
directory, named ``ssl.cert`` and ``ssl.key``.
This is not an ideal solution and, if possible, a reverse proxy with nginx
is preferred.
6. Run ``docker-compose up -d``. This will create and start the necessary
containers.
7. To be able to login, you will need a super user. To create it, execute the
@ -200,7 +213,8 @@ Docker Method
e-mail address and finally a password.
8. The default ``docker-compose.yml`` exports the webserver on your local port
8000. If you haven't adapted this, you should now be able to visit your
`Paperless webserver`_ at ``http://127.0.0.1:8000``. You can login with the
`Paperless webserver`_ at ``http://127.0.0.1:8000`` (or
``https://127.0.0.1:8000`` if you enabled SSL). You can login with the
user and password you just created.
9. Add files to consumption directory the way you prefer to. Following are two
possible options:
@ -326,7 +340,7 @@ and mod_wsgi, with a Paperless installation in ``/home/paperless/``:
</Directory>
WSGIScriptAlias / /home/paperless/paperless/src/paperless/wsgi.py
WSGIDaemonProcess example.com user=paperless group=paperless threads=5 python-path=/home/paperless/paperless/src:/home/paperless/.env/lib/python3.4/site-packages
WSGIDaemonProcess example.com user=paperless group=paperless threads=5 python-path=/home/paperless/paperless/src:/home/paperless/.env/lib/python3.6/site-packages
WSGIProcessGroup example.com
<Directory /home/paperless/paperless/src/paperless>
@ -484,3 +498,45 @@ If you're using Docker, you can set a restart-policy_ in the
Docker daemon.
.. _restart-policy: https://docs.docker.com/engine/reference/commandline/run/#restart-policies-restart
.. _setup-installation-linux-containers:
Suggested way for Linux Container Method
++++++++++++++++++++++++++++++++++++++++
This method uses some rigid assumptions, for the best set-up:-
* Ubuntu lts as the container
* Apache as the webserver
* proftpd as ftp server
* ftpupload as the ftp user
* paperless as the main user for website
* http://paperless.lan is the desired lan url
* LXC set to give ip addresses on your lan
This could also be used as an install on a base debain/ubuntu server,
if the above assumptions are acceptable.
1. Install lxc
2. Lanch paperless container
.. code:: bash
$ lxc launch ubuntu: paperless
3. Run install script within container
.. code:: bash
$ lxc exec paperless -- sh -c "wget https://raw.githubusercontent.com/the-paperless-project/paperless/master/docs/examples/lxc/lxc-install.sh && /bin/bash lxc-install.sh --email"
The script will ask you for an ftpupload password.
As well as the super-user for paperless web front-end.
After around 10 mins, http://paperless.lan is ready and
ftp://paperless.lan with user: ftpupload
See the `Installation recording <_static/lxc-install.svg>`_.

View File

@ -72,4 +72,4 @@ with a DPI of 300, then merging the images into the single PDF with
For more information on this and situations like it, you should take a look
at `Issue #118`_ as that's where this tip originated.
.. _Issue #118: https://github.com/the-paperless-project/paperless/issues/118
.. _Issue #118: https://github.com/the-paperless-project/paperless/issues/118

View File

@ -193,18 +193,19 @@ instructions above to do the import.
.. _utilities-retagger:
The Re-tagger
-------------
Re-running your tagging and correspondent matchers
--------------------------------------------------
Say you've imported a few hundred documents and now want to introduce a tag
and apply its matching to all of the currently-imported docs. This problem is
common enough that there's a tool for it.
Say you've imported a few hundred documents and now want to introduce
a tag or set up a new correspondent, and apply its matching to all of
the currently-imported docs. This problem is common enough that
there are tools for it.
.. _utilities-retagger-howto:
How to Use It
.............
How to Do It
............
This too is done via the ``manage.py`` script:
@ -212,10 +213,16 @@ This too is done via the ``manage.py`` script:
$ /path/to/paperless/src/manage.py document_retagger
That's it. It'll loop over all of the documents in your database and attempt
to match all of your tags to them. If one matches, it'll be applied. And
don't worry, you can run this as often as you like, it won't double-tag
a document.
Run this after changing or adding tagging rules. It'll loop over all
of the documents in your database and attempt to match all of your
tags to them. If one matches, it'll be applied. And don't worry, you
can run this as often as you like, it won't double-tag a document.
.. code:: bash
$ /path/to/paperless/src/manage.py document_correspondents
This is the similar command to run after adding or changing a correspondent.
.. _utilities-encyption:
@ -232,10 +239,10 @@ Basic Syntax
Again we'll use the ``manage.py`` script, passing ``change_storage_type``:
.. code:: bash
.. code:: console
$ /path/to/paperless/src/manage.py change_storage_type --help
usage: manage.py change_storage_type [-h] [--version] [-v {0,1,2,3}]
usage: manage.py change_storage_type [-h] [--version] [-v {0,1,2,3}]
[--settings SETTINGS]
[--pythonpath PYTHONPATH] [--traceback]
[--no-color] [--passphrase PASSPHRASE]

View File

@ -0,0 +1,38 @@
from django.contrib.auth.management.commands import createsuperuser
from django.core.management import CommandError
class Command(createsuperuser.Command):
help = 'Crate a superuser, and allow password to be provided'
def add_arguments(self, parser):
super(Command, self).add_arguments(parser)
parser.add_argument(
'--password', dest='password', default=None,
help='Specifies the password for the superuser.',
)
parser.add_argument(
'--preserve', dest='preserve', default=False, action='store_true',
help='Exit normally if the user already exists.',
)
def handle(self, *args, **options):
password = options.get('password')
username = options.get('username')
database = options.get('database')
if password and not username:
raise CommandError("--username is required if specifying --password")
if username and options.get('preserve'):
exists = self.UserModel._default_manager.db_manager(database).filter(username=username).exists()
if exists:
self.stdout.write("User exists, exiting normally due to --preserve")
return
super(Command, self).handle(*args, **options)
if password:
user = self.UserModel._default_manager.db_manager(database).get(username=username)
user.set_password(password)
user.save()

View File

@ -71,6 +71,17 @@ PAPERLESS_CONSUME_MAIL_PASS=""
# ignored.
PAPERLESS_EMAIL_SECRET=""
# Specify a filename format for the document (directories are supported)
# Use the following placeholders:
# * {correspondent}
# * {title}
# * {created}
# * {added}
# * {tags[KEY]} If your tags conform to key_value or key-value
# * {tags[INDEX]} If your tags are strings, select the tag by index
# Uniqueness of filenames is ensured, as an incrementing counter is attached
# to each filename.
#PAPERLESS_FILENAME_FORMAT=""
###############################################################################
#### Security ####
@ -152,6 +163,23 @@ PAPERLESS_EMAIL_SECRET=""
# as normal.
#PAPERLESS_FILENAME_DATE_ORDER="YMD"
# Sometimes devices won't create filenames which can be parsed properly
# by the filename parser (see
# https://paperless.readthedocs.io/en/latest/guesswork.html).
#
# This setting allows to specify a list of transformations
# in regular expression syntax, which are passed in order to re.sub.
# Transformation stops after the first match, so at most one transformation
# is applied.
#
# Syntax is a JSON array of dictionaries containing "pattern" and "repl"
# as keys.
#
# The example below transforms filenames created by a Brother ADS-2400N
# document scanner in its standard configuration `Name_Date_Count', so that
# count is used as title, name as tag and date can be parsed by paperless.
#PAPERLESS_FILENAME_PARSE_TRANSFORMS=[{"pattern":"^([a-z]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.", "repl":"\\2\\3Z - \\4 - \\1."}]
#
# The following values use sensible defaults for modern systems, but if you're
# running Paperless on a low-resource device (like a Raspberry Pi), modifying

64
requirements.txt Executable file → Normal file
View File

@ -1,12 +1,13 @@
-i https://pypi.python.org/simple
alabaster==0.7.12
apipkg==1.5
atomicwrites==1.2.1
attrs==18.2.0
babel==2.6.0
certifi==2018.11.29
atomicwrites==1.3.0
attrs==19.1.0
babel==2.7.0
brotli==1.0.7
certifi==2019.6.16
chardet==3.0.4
coverage==4.5.2
coverage==4.5.4
coveralls==1.5.1
dateparser==0.7.0
django-cors-headers==2.4.0
@ -17,37 +18,38 @@ django==2.0.10
djangoql==0.12.3
djangorestframework==3.9.1
docopt==0.6.2
docutils==0.14
execnet==1.5.0
docutils==0.15.2
execnet==1.6.1
factory-boy==2.11.1
faker==1.0.2
filelock==3.0.10
faker==2.0.0
filelock==3.0.12
filemagic==1.6
fuzzywuzzy[speedup]==0.15.0
gunicorn==19.9.0
gunicorn==20.0.4
idna==2.8
imagesize==1.1.0
importlib-metadata==0.19
inotify-simple==1.1.8; sys_platform == 'linux'
jinja2==2.10
jinja2==2.10.1
langdetect==1.0.7
markupsafe==1.1.1
more-itertools==7.2.0
numpy==1.15.1
markupsafe==1.1.0
more-itertools==5.0.0
packaging==19.0
packaging==19.1
pdftotext==2.1.1
pillow==5.4.1
pluggy==0.8.1
pluggy==0.12.0
ply==3.11
psycopg2==2.7.7
py==1.7.0
psycopg2==2.8.4
py==1.8.0
pycodestyle==2.4.0
pygments==2.3.1
pygments==2.4.2
pyocr==0.5.3
pyparsing==2.3.1
pyparsing==2.4.2
pytest-cov==2.6.1
pytest-django==3.4.5
pytest-env==0.6.2
pytest-forked==1.0.1
pytest-forked==1.0.2
pytest-sugar==0.9.2
pytest-xdist==1.26.0
pytest==4.1.1
@ -58,16 +60,24 @@ python-dotenv==0.10.1
python-gnupg==0.4.4
python-levenshtein==0.12.0
pytz==2018.9
regex==2019.1.24
requests==2.21.0
regex==2019.6.8
requests==2.22.0
six==1.12.0
snowballstemmer==1.2.1
snowballstemmer==1.9.0
sphinx==1.8.3
sphinxcontrib-websupport==1.1.0
sphinxcontrib-applehelp==1.0.1
sphinxcontrib-devhelp==1.0.1
sphinxcontrib-htmlhelp==1.0.2
sphinxcontrib-jsmath==1.0.1
sphinxcontrib-qthelp==1.0.2
sphinxcontrib-serializinghtml==1.1.3
termcolor==1.1.0
text-unidecode==1.2
toml==0.10.0
tox==3.7.0
tzlocal==1.5.1
urllib3==1.24.1
virtualenv==16.3.0
tzlocal==2.0.0
urllib3==1.25.3
virtualenv==16.7.2
wcwidth==0.1.7
whitenoise==4.1.3
zipp==0.5.2

View File

@ -10,7 +10,7 @@ map_uidgid() {
if [[ ${USERMAP_NEW_UID} != "${USERMAP_ORIG_UID}" || ${USERMAP_NEW_GID} != "${USERMAP_ORIG_GID}" ]]; then
echo "Mapping UID and GID for paperless:paperless to $USERMAP_NEW_UID:$USERMAP_NEW_GID"
usermod -u "${USERMAP_NEW_UID}" paperless
groupmod -g "${USERMAP_NEW_GID}" paperless
groupmod -o -g "${USERMAP_NEW_GID}" paperless
fi
}
@ -99,7 +99,23 @@ if [[ "$1" != "/"* ]]; then
install_languages "$PAPERLESS_OCR_LANGUAGES"
fi
exec sudo -HEu paperless "/usr/src/paperless/src/manage.py" "$@"
if [[ "$1" = "gunicorn" ]]; then
shift
EXTRA_PARAMS=""
SSL_KEY_PATH="/usr/src/paperless/data/ssl.key"
SSL_CERT_PATH="/usr/src/paperless/data/ssl.cert"
if [ "${PAPERLESS_USE_SSL}" = "true" ]; then
if [ -f "${SSL_KEY_PATH}" ] && [ -f "${SSL_CERT_PATH}" ]; then
EXTRA_PARAMS="--certfile=${SSL_CERT_PATH} --keyfile=${SSL_KEY_PATH}"
else
echo "Error: Could not find certfile in ${SSL_CERT_PATH} or keyfile in ${SSL_KEY_PATH}, but \$PAPERLESS_USE_SSL is true. Starting without SSL enabled."
fi
fi
cd /usr/src/paperless/src/ && \
exec sudo -HEu paperless /usr/bin/gunicorn -c /usr/src/paperless/gunicorn.conf ${EXTRA_PARAMS} "$@" paperless.wsgi
else
exec sudo -HEu paperless "/usr/src/paperless/src/manage.py" "$@"
fi
fi
exec "$@"

View File

@ -1,6 +1,6 @@
bind = '127.0.0.1:8000'
backlog = 2048
workers = 1
workers = 3
worker_class = 'sync'
worker_connections = 1000
timeout = 20

View File

@ -1,5 +1,7 @@
[Unit]
Description=Paperless webserver
After=network.target
Wants=network.target
[Service]
User=paperless

View File

@ -94,6 +94,11 @@ class Consumer:
ignored_files.append(file)
else:
files.append(file)
else:
self.logger.warning(
"Skipping %s as it is not a file",
entry.path
)
if not files:
return
@ -234,6 +239,9 @@ class Consumer:
self._write(document, doc, document.source_path)
self._write(document, thumbnail, document.thumbnail_path)
document.set_filename(document.source_filename)
document.save()
self.log("info", "Completed")
return document

View File

@ -26,6 +26,7 @@ class Command(BaseCommand):
def __init__(self, *args, **kwargs):
self.verbosity = 0
self.logger = logging.getLogger(__name__)
self.file_consumer = None
self.mail_fetcher = None
@ -138,6 +139,11 @@ class Command(BaseCommand):
file = os.path.join(directory, event.name)
if os.path.isfile(file):
self.file_consumer.try_consume_file(file)
else:
self.logger.warning(
"Skipping %s as it is not a file",
file
)
else:
break

View File

@ -0,0 +1,24 @@
from django.core.management.base import BaseCommand
from documents.models import Document, Tag
from ...mixins import Renderable
class Command(Renderable, BaseCommand):
help = """
This will rename all documents to match the latest filename format.
""".replace(" ", "")
def __init__(self, *args, **kwargs):
self.verbosity = 0
BaseCommand.__init__(self, *args, **kwargs)
def handle(self, *args, **options):
self.verbosity = options["verbosity"]
for document in Document.objects.all():
# Saving the document again will generate a new filename and rename
document.save()

View File

@ -0,0 +1,37 @@
# Generated by Django 2.0.10 on 2019-04-26 18:57
from django.db import migrations, models
def set_filename(apps, schema_editor):
Document = apps.get_model("documents", "Document")
for doc in Document.objects.all():
file_name = "{:07}.{}".format(doc.pk, doc.file_type)
if doc.storage_type == "gpg":
file_name += ".gpg"
# Set filename
doc.filename = file_name
# Save document
doc.save()
class Migration(migrations.Migration):
dependencies = [
('documents', '0022_auto_20181007_1420'),
]
operations = [
migrations.AddField(
model_name='document',
name='filename',
field=models.FilePathField(default=None,
null=True,
editable=False,
help_text='Current filename in storage',
max_length=256),
),
migrations.RunPython(set_filename)
]

View File

@ -7,12 +7,14 @@ import uuid
from collections import OrderedDict
import dateutil.parser
from django.dispatch import receiver
from django.conf import settings
from django.db import models
from django.template.defaultfilters import slugify
from django.utils import timezone
from django.utils.text import slugify
from fuzzywuzzy import fuzz
from collections import defaultdict
from .managers import LogManager
@ -167,6 +169,14 @@ class Document(models.Model):
added = models.DateTimeField(
default=timezone.now, editable=False, db_index=True)
filename = models.FilePathField(
max_length=256,
editable=False,
default=None,
null=True,
help_text="Current filename in storage"
)
archive_serial_number = models.IntegerField(
blank=True,
null=True,
@ -188,18 +198,125 @@ class Document(models.Model):
return "{}: {}".format(created, self.correspondent or self.title)
return str(created)
def find_renamed_document(self, subdirectory=""):
suffix = "%07i.%s" % (self.pk, self.file_type)
# Append .gpg for encrypted files
if self.storage_type == self.STORAGE_TYPE_GPG:
suffix += ".gpg"
# Go up in the directory hierarchy and try to delete all directories
root = os.path.normpath(Document.filename_to_path(subdirectory))
for filename in os.listdir(root):
if filename.endswith(suffix):
return os.path.join(subdirectory, filename)
fullname = os.path.join(subdirectory, filename)
if os.path.isdir(Document.filename_to_path(fullname)):
return self.find_renamed_document(fullname)
return None
@property
def source_filename(self):
# Initial filename generation (for new documents)
if self.filename is None:
self.filename = self.generate_source_filename()
# Check if document is still available under filename
elif not os.path.isfile(Document.filename_to_path(self.filename)):
recovered_filename = self.find_renamed_document()
# If we have found the file so update the filename
if recovered_filename is not None:
logger = logging.getLogger(__name__)
logger.warning("Filename of document " + str(self.id) +
" has changed and was successfully updated")
self.filename = recovered_filename
# Remove all empty subdirectories from MEDIA_ROOT
Document.delete_all_empty_subdirectories(
Document.filename_to_path(""))
else:
logger = logging.getLogger(__name__)
logger.error("File of document " + str(self.id) + " has " +
"gone and could not be recovered")
return self.filename
@staticmethod
def many_to_dictionary(field):
# Converts ManyToManyField to dictionary by assuming, that field
# entries contain an _ or - which will be used as a delimiter
mydictionary = dict()
for index, t in enumerate(field.all()):
# Populate tag names by index
mydictionary[index] = slugify(t.name)
# Find delimiter
delimiter = t.name.find('_')
if delimiter == -1:
delimiter = t.name.find('-')
if delimiter == -1:
continue
key = t.name[:delimiter]
value = t.name[delimiter+1:]
mydictionary[slugify(key)] = slugify(value)
return mydictionary
def generate_source_filename(self):
# Create filename based on configured format
if settings.PAPERLESS_FILENAME_FORMAT is not None:
tags = defaultdict(lambda: slugify(None),
self.many_to_dictionary(self.tags))
path = settings.PAPERLESS_FILENAME_FORMAT.format(
correspondent=slugify(self.correspondent),
title=slugify(self.title),
created=slugify(self.created),
added=slugify(self.added),
tags=tags)
else:
path = ""
# Always append the primary key to guarantee uniqueness of filename
if len(path) > 0:
filename = "%s-%07i.%s" % (path, self.pk, self.file_type)
else:
filename = "%07i.%s" % (self.pk, self.file_type)
# Append .gpg for encrypted files
if self.storage_type == self.STORAGE_TYPE_GPG:
filename += ".gpg"
return filename
def create_source_directory(self):
new_filename = self.generate_source_filename()
# Determine the full "target" path
dir_new = Document.filename_to_path(os.path.dirname(new_filename))
# Create new path
os.makedirs(dir_new, exist_ok=True)
@property
def source_path(self):
return Document.filename_to_path(self.source_filename)
file_name = "{:07}.{}".format(self.pk, self.file_type)
if self.storage_type == self.STORAGE_TYPE_GPG:
file_name += ".gpg"
@staticmethod
def filename_to_path(filename):
return os.path.join(
settings.MEDIA_ROOT,
"documents",
"originals",
file_name
filename
)
@property
@ -236,6 +353,125 @@ class Document(models.Model):
def thumbnail_url(self):
return reverse("fetch", kwargs={"kind": "thumb", "pk": self.pk})
def set_filename(self, filename):
if os.path.isfile(Document.filename_to_path(filename)):
self.filename = filename
@staticmethod
def try_delete_empty_directories(directory):
# Go up in the directory hierarchy and try to delete all directories
directory = os.path.normpath(directory)
root = os.path.normpath(Document.filename_to_path(""))
while directory != root:
# Try to delete the current directory
try:
os.rmdir(directory)
except os.error:
# Directory not empty, no need to go further up
return
# Cut off actual directory and go one level up
directory, _ = os.path.split(directory)
directory = os.path.normpath(directory)
@staticmethod
def delete_all_empty_subdirectories(directory):
# Go through all folders and try to delete all directories
root = os.path.normpath(Document.filename_to_path(directory))
for filename in os.listdir(root):
fullname = os.path.join(directory, filename)
if not os.path.isdir(Document.filename_to_path(fullname)):
continue
# Go into subdirectory to see, if there is more to delete
Document.delete_all_empty_subdirectories(
os.path.join(directory, filename))
# Try to delete the directory
try:
os.rmdir(Document.filename_to_path(fullname))
continue
except os.error:
# Directory not empty, no need to go further up
continue
@receiver(models.signals.m2m_changed, sender=Document.tags.through)
@receiver(models.signals.post_save, sender=Document)
def update_filename(sender, instance, **kwargs):
# Skip if document has not been saved yet
if instance.filename is None:
return
# Check is file exists and update filename otherwise
if not os.path.isfile(Document.filename_to_path(instance.filename)):
instance.filename = instance.source_filename
# Build the new filename
new_filename = instance.generate_source_filename()
# If the filename is the same, then nothing needs to be done
if instance.filename == new_filename:
return
# Determine the full "target" path
path_new = instance.filename_to_path(new_filename)
dir_new = instance.filename_to_path(os.path.dirname(new_filename))
# Create new path
instance.create_source_directory()
# Determine the full "current" path
path_current = instance.filename_to_path(instance.source_filename)
# Move file
try:
os.rename(path_current, path_new)
except PermissionError:
# Do not update filename in object
return
except FileNotFoundError:
logger = logging.getLogger(__name__)
logger.error("Renaming of document " + str(instance.id) + " failed " +
"as file " + instance.filename + " was no longer present")
return
# Delete empty directory
old_dir = os.path.dirname(instance.filename)
old_path = instance.filename_to_path(old_dir)
Document.try_delete_empty_directories(old_path)
instance.filename = new_filename
# Save instance
# This will not cause a cascade of post_save signals, as next time
# nothing needs to be renamed
instance.save()
@receiver(models.signals.post_delete, sender=Document)
def delete_files(sender, instance, **kwargs):
if instance.filename is None:
return
# Remove the document
old_file = instance.filename_to_path(instance.filename)
try:
os.remove(old_file)
except FileNotFoundError:
logger = logging.getLogger(__name__)
logger.warning("Deleted document " + str(instance.id) + " but file " +
old_file + " was no longer present")
# And remove the directory (if applicable)
old_dir = os.path.dirname(instance.filename)
old_path = instance.filename_to_path(old_dir)
Document.try_delete_empty_directories(old_path)
class Log(models.Model):
@ -405,8 +641,18 @@ class FileInfo:
"<title>.<suffix>"
"""
filename = os.path.basename(path)
# Mutate filename in-place before parsing its components
# by applying at most one of the configured transformations.
for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS:
(filename, count) = pattern.subn(repl, filename)
if count:
break
# Parse filename components.
for regex in cls.REGEXES.values():
m = regex.match(os.path.basename(path))
m = regex.match(filename)
if m:
properties = m.groupdict()
cls._mangle_property(properties, "created")

View File

@ -108,7 +108,7 @@ class DocumentParser:
try:
date = __parser(date_string, self.FILENAME_DATE_ORDER)
except TypeError:
except (TypeError, ValueError):
# Skip all matches that do not parse to a proper date
continue
@ -134,7 +134,7 @@ class DocumentParser:
try:
date = __parser(date_string, self.DATE_ORDER)
except TypeError:
except (TypeError, ValueError):
# Skip all matches that do not parse to a proper date
continue

View File

@ -12,6 +12,10 @@
{% block content %}
{{ block.super }}
<div class="side-preview">
<h2>Preview</h2>
<object data="/fetch/preview/{{object_id}}"></object>
</div>
{% if file_type in "pdf jpg png" %}
@ -47,6 +51,37 @@
{% endblock content %}
{% block extrastyle %}
{{ block.super }}
<style>
.side-preview {
width: 100%;
height: 800px;
clear: both;
}
.side-preview object {
height: 100%;
width: 100%;
}
@media screen and (min-width: 1500px) {
#content-main {
width: 50%;
}
#footer {
padding: 0;
}
.side-preview {
float: right;
width: 40%;
height: 80vh;
clear: none;
}
}
</style>
{% endblock %}
{% block footer %}
{{ block.super }}

View File

@ -1,3 +1,5 @@
import re
from django.test import TestCase
from unittest import mock
from tempfile import TemporaryDirectory
@ -372,3 +374,79 @@ class TestFieldPermutations(TestCase):
info = FileInfo.from_path("/path/to/06112017Z - title.pdf")
self.assertEqual(info.title, "title")
self.assertIsNone(info.created)
def test_filename_parse_transforms(self):
path = "/some/path/to/tag1,tag2_20190908_180610_0001.pdf"
all_patt = re.compile("^.*$")
none_patt = re.compile("$a")
exact_patt = re.compile("^([a-z0-9,]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.")
repl1 = " - \\4 - \\1." # (empty) corrspondent, title and tags
repl2 = "\\2Z - " + repl1 # creation date + repl1
# No transformations configured (= default)
info = FileInfo.from_path(path)
self.assertEqual(info.title, "tag1,tag2_20190908_180610_0001")
self.assertEqual(info.extension, "pdf")
self.assertEqual(info.tags, ())
self.assertIsNone(info.created)
# Pattern doesn't match (filename unaltered)
with self.settings(
FILENAME_PARSE_TRANSFORMS=[(none_patt, "none.gif")]):
info = FileInfo.from_path(path)
self.assertEqual(info.title, "tag1,tag2_20190908_180610_0001")
self.assertEqual(info.extension, "pdf")
# Simple transformation (match all)
with self.settings(
FILENAME_PARSE_TRANSFORMS=[(all_patt, "all.gif")]):
info = FileInfo.from_path(path)
self.assertEqual(info.title, "all")
self.assertEqual(info.extension, "gif")
# Multiple transformations configured (first pattern matches)
with self.settings(
FILENAME_PARSE_TRANSFORMS=[
(all_patt, "all.gif"),
(all_patt, "anotherall.gif")]):
info = FileInfo.from_path(path)
self.assertEqual(info.title, "all")
self.assertEqual(info.extension, "gif")
# Multiple transformations configured (second pattern matches)
with self.settings(
FILENAME_PARSE_TRANSFORMS=[
(none_patt, "none.gif"),
(all_patt, "anotherall.gif")]):
info = FileInfo.from_path(path)
self.assertEqual(info.title, "anotherall")
self.assertEqual(info.extension, "gif")
# Complex transformation without date in replacement string
with self.settings(
FILENAME_PARSE_TRANSFORMS=[(exact_patt, repl1)]):
info = FileInfo.from_path(path)
self.assertEqual(info.title, "0001")
self.assertEqual(info.extension, "pdf")
self.assertEqual(len(info.tags), 2)
self.assertEqual(info.tags[0].slug, "tag1")
self.assertEqual(info.tags[1].slug, "tag2")
self.assertIsNone(info.created)
# Complex transformation with date in replacement string
with self.settings(
FILENAME_PARSE_TRANSFORMS=[
(none_patt, "none.gif"),
(exact_patt, repl2), # <-- matches
(exact_patt, repl1),
(all_patt, "all.gif")]):
info = FileInfo.from_path(path)
self.assertEqual(info.title, "0001")
self.assertEqual(info.extension, "pdf")
self.assertEqual(len(info.tags), 2)
self.assertEqual(info.tags[0].slug, "tag1")
self.assertEqual(info.tags[1].slug, "tag2")
self.assertEqual(info.created.year, 2019)
self.assertEqual(info.created.month, 9)
self.assertEqual(info.created.day, 8)

View File

@ -0,0 +1,559 @@
import datetime
import os
import shutil
from unittest import mock
from uuid import uuid4
from pathlib import Path
from shutil import rmtree
from dateutil import tz
from django.test import TestCase, override_settings
from django.utils.text import slugify
from ..models import Tag, Document, Correspondent
from django.conf import settings
class TestDate(TestCase):
deletion_list = []
def add_to_deletion_list(self, dirname):
self.deletion_list.append(dirname)
def setUp(self):
folder = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
os.makedirs(folder + "/documents/originals")
storage_override = override_settings(MEDIA_ROOT=folder)
storage_override.enable()
self.add_to_deletion_list(folder)
def tearDown(self):
for dirname in self.deletion_list:
shutil.rmtree(dirname, ignore_errors=True)
@override_settings(PAPERLESS_FILENAME_FORMAT="")
def test_source_filename(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
self.assertEqual(document.source_filename, "0000001.pdf")
document.filename = "test.pdf"
self.assertEqual(document.source_filename, "test.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="")
def test_generate_source_filename(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
self.assertEqual(document.generate_source_filename(), "0000001.pdf")
document.storage_type = Document.STORAGE_TYPE_GPG
self.assertEqual(document.generate_source_filename(),
"0000001.pdf.gpg")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_file_renaming(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Test source_path
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf")
# Enable encryption and check again
document.storage_type = Document.STORAGE_TYPE_GPG
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf.gpg")
document.save()
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), True)
# Set a correspondent and save the document
document.correspondent = Correspondent.objects.get_or_create(
name="test")[0]
document.save()
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/test"), True)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), False)
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/test/test-0000001.pdf.gpg"), True)
self.assertEqual(document.generate_source_filename(),
"test/test-0000001.pdf.gpg")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_file_renaming_missing_permissions(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Test source_path
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf")
# Make the folder read- and execute-only (no writing and no renaming)
os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o555)
# Set a correspondent and save the document
document.correspondent = Correspondent.objects.get_or_create(
name="test")[0]
document.save()
# Check proper handling of files
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/none/none-0000001.pdf"), True)
self.assertEqual(document.source_filename,
"none/none-0000001.pdf")
os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o777)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_document_delete(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Ensure file deletion after delete
document.delete()
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf"), False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), False)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_document_delete_nofile(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_directory_not_empty(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
Path(document.source_path + "test").touch()
# Set a correspondent and save the document
document.correspondent = Correspondent.objects.get_or_create(
name="test")[0]
document.save()
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/test"), True)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), True)
# Cleanup
os.remove(settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdftest")
os.rmdir(settings.MEDIA_ROOT + "/documents/originals/none")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_with_underscore(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Add tag to document
document.tags.create(name="type_demo")
document.tags.create(name="foo_bar")
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"demo-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_with_dash(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Add tag to document
document.tags.create(name="type-demo")
document.tags.create(name="foo-bar")
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"demo-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_malformed(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Add tag to document
document.tags.create(name="type:demo")
document.tags.create(name="foo:bar")
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
def test_tags_all(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Add tag to document
document.tags.create(name="demo")
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"demo-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
def test_tags_out_of_bounds_0(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[10000000]}")
def test_tags_out_of_bounds_10000000(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[99]}")
def test_tags_out_of_bounds_99(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}/{correspondent}")
def test_nested_directory_cleanup(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none/none"), True)
document.delete()
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT +
"/documents/originals/none/none/none-0000001.pdf"),
False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none/none"), False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals"), True)
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
def test_format_none(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
self.assertEqual(document.generate_source_filename(), "0000001.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_document_renamed(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Test source_path
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf")
# Rename the document "illegaly"
os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test")
os.rename(settings.MEDIA_ROOT + "/documents/originals/" +
"none/none-0000001.pdf",
settings.MEDIA_ROOT + "/documents/originals/" +
"test/test-0000001.pdf")
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/test/test-0000001.pdf"), True)
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/none/none-0000001.pdf"), False)
# Set new correspondent and expect document to be saved properly
document.correspondent = Correspondent.objects.get_or_create(
name="foo")[0]
document.save()
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/foo/foo-0000001.pdf"), True)
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/foo"), True)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/test"), False)
self.assertEqual(document.generate_source_filename(),
"foo/foo-0000001.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_document_renamed_encrypted(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_GPG
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf.gpg")
document.create_source_directory()
Path(document.source_path).touch()
# Test source_path
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf.gpg")
# Rename the document "illegaly"
os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test")
os.rename(settings.MEDIA_ROOT + "/documents/originals/" +
"none/none-0000001.pdf.gpg",
settings.MEDIA_ROOT + "/documents/originals/" +
"test/test-0000001.pdf.gpg")
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/test/test-0000001.pdf.gpg"), True)
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/none/none-0000001.pdf"), False)
# Set new correspondent and expect document to be saved properly
document.correspondent = Correspondent.objects.get_or_create(
name="foo")[0]
document.save()
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/foo/foo-0000001.pdf.gpg"), True)
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/foo"), True)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/test"), False)
self.assertEqual(document.generate_source_filename(),
"foo/foo-0000001.pdf.gpg")
def test_delete_all_empty_subdirectories(self):
# Create our working directory
tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
os.makedirs(tmp)
self.add_to_deletion_list(tmp)
os.makedirs(os.path.join(tmp, "empty"))
os.makedirs(os.path.join(tmp, "empty", "subdirectory"))
os.makedirs(os.path.join(tmp, "notempty"))
Path(os.path.join(tmp, "notempty", "file")).touch()
Document.delete_all_empty_subdirectories(tmp)
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
self.assertEqual(os.path.isdir(os.path.join(tmp, "empty")), False)
self.assertEqual(os.path.isfile(
os.path.join(tmp, "notempty", "file")), True)
def test_try_delete_empty_directories(self):
# Create our working directory
tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
os.makedirs(tmp)
self.add_to_deletion_list(tmp)
os.makedirs(os.path.join(tmp, "notempty"))
Path(os.path.join(tmp, "notempty", "file")).touch()
os.makedirs(os.path.join(tmp, "notempty", "empty"))
Document.try_delete_empty_directories(
os.path.join(tmp, "notempty", "empty"))
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
self.assertEqual(os.path.isfile(
os.path.join(tmp, "notempty", "file")), True)
self.assertEqual(os.path.isdir(
os.path.join(tmp, "notempty", "empty")), False)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_document_accidentally_deleted(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Test source_path
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf")
# Delete the document "illegaly"
os.remove(settings.MEDIA_ROOT + "/documents/originals/" +
"none/none-0000001.pdf")
# Set new correspondent and expect document to be saved properly
document.correspondent = Correspondent.objects.get_or_create(
name="foo")[0]
document.save()
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), True)
self.assertEqual(document.source_filename,
"none/none-0000001.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_set_filename(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Set existing filename
document.set_filename(tmp)
self.assertEqual(document.source_filename, "none/none-0000001.pdf")
# Set non-existing filename
document.set_filename("doesnotexist")
self.assertEqual(document.source_filename, "none/none-0000001.pdf")

View File

@ -76,7 +76,10 @@ class FetchView(SessionOrBasicAuthMixin, DetailView):
content_type=content_types[self.object.file_type]
)
DISPOSITION = 'inline' if settings.INLINE_DOC else 'attachment'
DISPOSITION = (
'inline' if settings.INLINE_DOC or self.kwargs["kind"] == 'preview'
else 'attachment'
)
response["Content-Disposition"] = '{}; filename="{}"'.format(
DISPOSITION, self.object.file_name)

View File

@ -10,7 +10,9 @@ For the full list of settings and their values, see
https://docs.djangoproject.com/en/1.10/ref/settings/
"""
import json
import os
import re
from dotenv import load_dotenv
@ -62,6 +64,7 @@ FORCE_SCRIPT_NAME = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
# Application definition
INSTALLED_APPS = [
"whitenoise.runserver_nostatic",
"django.contrib.auth",
"django.contrib.contenttypes",
@ -92,6 +95,7 @@ if os.getenv("PAPERLESS_INSTALLED_APPS"):
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'whitenoise.middleware.WhiteNoiseMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'corsheaders.middleware.CorsMiddleware',
'django.middleware.common.CommonMiddleware',
@ -101,8 +105,11 @@ MIDDLEWARE = [
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
# Enable whitenoise compression and caching
STATICFILES_STORAGE = 'whitenoise.storage.CompressedManifestStaticFilesStorage'
# We allow CORS from localhost:8080
CORS_ORIGIN_WHITELIST = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "localhost:8080").split(","))
CORS_ORIGIN_WHITELIST = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8080,https://localhost:8080").split(","))
# If auth is disabled, we just use our "bypass" authentication middleware
if bool(os.getenv("PAPERLESS_DISABLE_LOGIN", "false").lower() in ("yes", "y", "1", "t", "true")):
@ -325,8 +332,16 @@ FY_END = os.getenv("PAPERLESS_FINANCIAL_YEAR_END")
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
# Transformations applied before filename parsing
FILENAME_PARSE_TRANSFORMS = []
for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
FILENAME_PARSE_TRANSFORMS.append((re.compile(t["pattern"]), t["repl"]))
# Specify for how many years a correspondent is considered recent. Recent
# correspondents will be shown in a separate "Recent correspondents" filter as
# well. Set to 0 to disable this filter.
PAPERLESS_RECENT_CORRESPONDENT_YEARS = int(os.getenv(
"PAPERLESS_RECENT_CORRESPONDENT_YEARS", 0))
# Specify the filename format for out files
PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")

View File

@ -39,7 +39,7 @@ urlpatterns = [
# File downloads
url(
r"^fetch/(?P<kind>doc|thumb)/(?P<pk>\d+)$",
r"^fetch/(?P<kind>doc|thumb|preview)/(?P<pk>\d+)$",
FetchView.as_view(),
name="fetch"
),

View File

@ -175,10 +175,26 @@ class TestDate(TestCase):
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
return_value="01-07-0590 00:00:00"
return_value="20 408000l 2475"
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_crazy_date_past(self, *args):
def test_crazy_date_with_spaces(self, *args):
document = RasterisedDocumentParser("/dev/null")
document.get_text()
self.assertIsNone(document.get_date())
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
return_value="No date in here"
)
@mock.patch(
"paperless_tesseract.parsers.RasterisedDocumentParser."
"FILENAME_DATE_ORDER",
new_callable=mock.PropertyMock,
return_value="YMD"
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_filename_date_parse_invalid(self, *args):
document = RasterisedDocumentParser("/tmp/20 408000l 2475 - test.pdf")
document.get_text()
self.assertIsNone(document.get_date())