updated configuration

This commit is contained in:
Jonas Winkler 2020-11-02 21:59:36 +01:00
parent 6b0d5d4cc8
commit 9ce926df7f
3 changed files with 139 additions and 157 deletions

View File

@ -1,35 +1,52 @@
# Database settings for paperless
# If you want to use sqlite instead, remove these settings.
PAPERLESS_DBENGINE="django.db.backends.postgresql_psycopg2"
PAPERLESS_DBHOST="db"
PAPERLESS_DBNAME="paperless"
PAPERLESS_DBUSER="paperless"
PAPERLESS_DBPASS="paperless"
# DONT EDIT. Consumption directory. This is the location of the consumption
# directory inside the container. If you want to modify the location of the
# consumption folder on the host, edit the docker-compose.yml file instead.
PAPERLESS_CONSUMPTION_DIR="../consume"
# Environment variables to set for Paperless
# Commented out variables will be replaced with a default within Paperless.
#
# In addition to what you see here, you can also define any values you find in
# paperless.conf.example here. Values like:
#
# * PAPERLESS_PASSPHRASE
# * PAPERLESS_CONSUME_MAIL_HOST
#
# ...are all explained in that file but can be defined here, since the Docker
# installation doesn't make use of paperless.conf.
# Use this variable to set a timezone for the Paperless Docker containers. If not specified, defaults to UTC.
#TZ=America/Los_Angeles
# Additional languages to install for text recognition. Note that this is
# different from PAPERLESS_OCR_LANGUAGE (default=eng), which defines the
# default language used when guessing the language from the OCR output.
# The container installs English, German, Italian, Spanish and French by
# default.
#PAPERLESS_OCR_LANGUAGES=deu ita spa fra
# The UID and GID of the user used to run paperless in the container. Set this
# to your UID and GID on the host so that you have write access to the
# consumption directory.
#USERMAP_UID=1000
#USERMAP_GID=1000
# Additional languages to install for text recognition, separated by a
# whitespace. Note that this is
# different from PAPERLESS_OCR_LANGUAGE (default=eng), which defines the
# default language used when guessing the language from the OCR output.
# The container installs English, German, Italian, Spanish and French by
# default.
# See https://packages.debian.org/search?keywords=tesseract-ocr-&searchon=names&suite=buster
# for available languages.
#PAPERLESS_OCR_LANGUAGES=tur ces
###############################################################################
# Paperless-specific settings #
###############################################################################
# All settings defined in the paperless.conf.example can be used here. The
# Docker setup does not use the configuration file.
# A few commonly adjusted settings are provided below.
# Adjust this key if you plan to make paperless available publicly. It should
# be a very long sequence of random characters. You don't need to remember it.
#PAPERLESS_SECRET_KEY="change-me"
# Use this variable to set a timezone for the Paperless Docker containers. If not specified, defaults to UTC.
#PAPERLESS_TIME_ZONE=America/Los_Angeles
# The default language to use for OCR. Set this to the language most of your
# documents are written in.
#PAPERLESS_OCR_LANGUAGE="eng"
# By default Paperless does not OCR a document if the text can be retrieved from
# the document directly. Set to true to always OCR documents. (i.e., if you
# know that some of your documents have faulty/bad OCR data)
#PAPERLESS_OCR_ALWAYS="true"

View File

@ -23,7 +23,7 @@
# This where your documents should go to be consumed. Make sure that it exists
# and that the user running the paperless service can read/write its contents
# before you start Paperless.
#PAPERLESS_CONSUMPTION_DIR=""
PAPERLESS_CONSUMPTION_DIR="../consume"
# This is where paperless stores all its data (search index, sqlite database,
# classification model, etc).
@ -165,7 +165,10 @@
# Customize the default language that tesseract will attempt to use when
# parsing documents. It should be a 3-letter language code consistent with ISO
# parsing documents. The default language is used whenever
# - No language could be detected on a document
# - No tesseract data files are available for the detected language
# It should be a 3-letter language code consistent with ISO
# 639: https://www.loc.gov/standards/iso639-2/php/code_list.php
#PAPERLESS_OCR_LANGUAGE=eng
@ -203,16 +206,6 @@
# with little impact to OCR accuracy.
#PAPERLESS_CONVERT_DENSITY=300
# (This setting is ignored on Linux where inotify is used instead of a
# polling loop.)
# The number of seconds that Paperless will wait between checking
# PAPERLESS_CONSUMPTION_DIR. If you tend to write documents to this directory
# rarely, you may want to use a higher value than the default (10).
#PAPERLESS_CONSUMER_LOOP_TIME=10
# By default Paperless does not OCR a document if the text can be retrieved from
# the document directly. Set to true to always OCR documents.
#PAPERLESS_OCR_ALWAYS="false"

View File

@ -1,22 +1,10 @@
"""
Django settings for paperless project.
Generated by 'django-admin startproject' using Django 1.9.
For more information on this file, see
https://docs.djangoproject.com/en/1.10/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/1.10/ref/settings/
"""
import json
import multiprocessing
import os
import re
from dotenv import load_dotenv
# Tap paperless.conf if it's available
if os.path.exists("../paperless.conf"):
load_dotenv("../paperless.conf")
@ -33,45 +21,24 @@ def __get_boolean(key, default="NO"):
"""
return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
###############################################################################
# Directories #
###############################################################################
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATA_DIR = os.getenv('PAPERLESS_DATA_DIR', os.path.join(BASE_DIR, "..", "data"))
STATIC_ROOT = os.getenv("PAPERLESS_STATICDIR", os.path.join(BASE_DIR, "..", "static"))
MEDIA_ROOT = os.getenv('PAPERLESS_MEDIA_ROOT', os.path.join(BASE_DIR, "..", "media"))
INDEX_DIR = os.path.join(DATA_DIR, "index")
ORIGINALS_DIR = os.path.join(MEDIA_ROOT, "documents", "originals")
THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails")
DATA_DIR = os.getenv('PAPERLESS_DATA_DIR', os.path.join(BASE_DIR, "..", "data"))
INDEX_DIR = os.path.join(DATA_DIR, "index")
MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle")
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/
# The secret key has a default that should be fine so long as you're hosting
# Paperless on a closed network. However, if you're putting this anywhere
# public, you should change the key to something unique and verbose.
SECRET_KEY = os.getenv(
"PAPERLESS_SECRET_KEY",
"e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee"
)
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
LOGIN_URL = "admin:login"
_allowed_hosts = os.getenv("PAPERLESS_ALLOWED_HOSTS")
if _allowed_hosts:
ALLOWED_HOSTS = _allowed_hosts.split(",")
else:
ALLOWED_HOSTS = ["*"]
FORCE_SCRIPT_NAME = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
# Application definition
###############################################################################
# Application Definition #
###############################################################################
INSTALLED_APPS = [
"whitenoise.runserver_nostatic",
@ -118,18 +85,17 @@ MIDDLEWARE = [
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
X_FRAME_OPTIONS = 'SAMEORIGIN'
# We allow CORS from localhost:8080
CORS_ORIGIN_WHITELIST = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8080,https://localhost:8080,http://localhost:4200").split(","))
# If auth is disabled, we just use our "bypass" authentication middleware
if bool(os.getenv("PAPERLESS_DISABLE_LOGIN", "false").lower() in ("yes", "y", "1", "t", "true")):
_index = MIDDLEWARE.index("django.contrib.auth.middleware.AuthenticationMiddleware")
MIDDLEWARE[_index] = "paperless.middleware.Middleware"
ROOT_URLCONF = 'paperless.urls'
LOGIN_URL = "admin:login"
FORCE_SCRIPT_NAME = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
WSGI_APPLICATION = 'paperless.wsgi.application'
STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", "/static/")
# what is this used for?
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
@ -146,38 +112,40 @@ TEMPLATES = [
},
]
WSGI_APPLICATION = 'paperless.wsgi.application'
###############################################################################
# Security #
###############################################################################
# NEVER RUN WITH DEBUG IN PRODUCTION.
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
# Database
# https://docs.djangoproject.com/en/1.10/ref/settings/#databases
X_FRAME_OPTIONS = 'SAMEORIGIN'
DATABASES = {
"default": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": os.path.join(
DATA_DIR,
"db.sqlite3"
)
}
}
# We allow CORS from localhost:8080
CORS_ORIGIN_WHITELIST = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8080,https://localhost:8080").split(","))
if os.getenv("PAPERLESS_DBENGINE"):
DATABASES["default"] = {
"ENGINE": os.getenv("PAPERLESS_DBENGINE"),
"NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
"USER": os.getenv("PAPERLESS_DBUSER"),
}
if os.getenv("PAPERLESS_DBPASS"):
DATABASES["default"]["PASSWORD"] = os.getenv("PAPERLESS_DBPASS")
if os.getenv("PAPERLESS_DBHOST"):
DATABASES["default"]["HOST"] = os.getenv("PAPERLESS_DBHOST")
if os.getenv("PAPERLESS_DBPORT"):
DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
if DEBUG:
# Allow access from the angular development server during debugging
CORS_ORIGIN_WHITELIST += ('http://localhost:4200',)
# If auth is disabled, we just use our "bypass" authentication middleware
if bool(os.getenv("PAPERLESS_DISABLE_LOGIN", "false").lower() in ("yes", "y", "1", "t", "true")):
_index = MIDDLEWARE.index("django.contrib.auth.middleware.AuthenticationMiddleware")
MIDDLEWARE[_index] = "paperless.middleware.Middleware"
# Password validation
# https://docs.djangoproject.com/en/1.10/ref/settings/#auth-password-validators
# The secret key has a default that should be fine so long as you're hosting
# Paperless on a closed network. However, if you're putting this anywhere
# public, you should change the key to something unique and verbose.
SECRET_KEY = os.getenv(
"PAPERLESS_SECRET_KEY",
"e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee"
)
_allowed_hosts = os.getenv("PAPERLESS_ALLOWED_HOSTS")
if _allowed_hosts:
ALLOWED_HOSTS = _allowed_hosts.split(",")
else:
ALLOWED_HOSTS = ["*"]
AUTH_PASSWORD_VALIDATORS = [
{
@ -194,9 +162,47 @@ AUTH_PASSWORD_VALIDATORS = [
},
]
# Disable Django's artificial limit on the number of form fields to submit at
# once. This is a protection against overloading the server, but since this is
# a self-hosted sort of gig, the benefits of being able to mass-delete a tonne
# of log entries outweight the benefits of such a safeguard.
# Internationalization
# https://docs.djangoproject.com/en/1.10/topics/i18n/
DATA_UPLOAD_MAX_NUMBER_FIELDS = None
###############################################################################
# Database #
###############################################################################
DATABASES = {
"default": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": os.path.join(
DATA_DIR,
"db.sqlite3"
)
}
}
# Always have sqlite available as a second option for management commands
# This is important when migrating to/from sqlite
DATABASES['sqlite'] = DATABASES['default'].copy()
if os.getenv("PAPERLESS_DBENGINE"):
DATABASES["default"] = {
"ENGINE": os.getenv("PAPERLESS_DBENGINE"),
"NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
"USER": os.getenv("PAPERLESS_DBUSER"),
}
if os.getenv("PAPERLESS_DBPASS"):
DATABASES["default"]["PASSWORD"] = os.getenv("PAPERLESS_DBPASS")
if os.getenv("PAPERLESS_DBHOST"):
DATABASES["default"]["HOST"] = os.getenv("PAPERLESS_DBHOST")
if os.getenv("PAPERLESS_DBPORT"):
DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
###############################################################################
# Internationalization #
###############################################################################
LANGUAGE_CODE = 'en-us'
@ -208,32 +214,9 @@ USE_L10N = True
USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/1.10/howto/static-files/
STATIC_ROOT = os.getenv(
"PAPERLESS_STATICDIR", os.path.join(BASE_DIR, "..", "static"))
STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", "/static/")
# Other
# Disable Django's artificial limit on the number of form fields to submit at
# once. This is a protection against overloading the server, but since this is
# a self-hosted sort of gig, the benefits of being able to mass-delete a tonne
# of log entries outweight the benefits of such a safeguard.
DATA_UPLOAD_MAX_NUMBER_FIELDS = None
# Paperless-specific stuff
# You shouldn't have to edit any of these values. Rather, you can set these
# values in /etc/paperless.conf instead.
# ----------------------------------------------------------------------------
# Logging
###############################################################################
# Logging #
###############################################################################
LOGGING = {
"version": 1,
@ -254,18 +237,20 @@ LOGGING = {
},
}
###############################################################################
# Paperless Specific Settings #
###############################################################################
# The default language that tesseract will attempt to use when parsing
# documents. It should be a 3-letter language code consistent with ISO 639.
OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")
# The amount of threads to use for OCR
OCR_THREADS = int(os.getenv("PAPERLESS_OCR_THREADS", 4))
OCR_THREADS = int(os.getenv("PAPERLESS_OCR_THREADS", multiprocessing.cpu_count()))
# OCR all documents?
OCR_ALWAYS = __get_boolean("PAPERLESS_OCR_ALWAYS", "false")
# GNUPG needs a home directory for some reason
GNUPG_HOME = os.getenv("HOME", "/tmp")
@ -275,13 +260,8 @@ CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR")
CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
CONVERT_DENSITY = int(os.getenv("PAPERLESS_CONVERT_DENSITY", 300))
# Ghostscript
GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
# OptiPNG
OPTIPNG_BINARY = os.getenv("PAPERLESS_OPTIPNG_BINARY", "optipng")
# Unpaper
UNPAPER_BINARY = os.getenv("PAPERLESS_UNPAPER_BINARY", "unpaper")
# This will be created if it doesn't exist
@ -290,14 +270,6 @@ SCRATCH_DIR = os.getenv("PAPERLESS_SCRATCH_DIR", "/tmp/paperless")
# This is where Paperless will look for PDFs to index
CONSUMPTION_DIR = os.getenv("PAPERLESS_CONSUMPTION_DIR")
# (This setting is ignored on Linux where inotify is used instead of a
# polling loop.)
# The number of seconds that Paperless will wait between checking
# CONSUMPTION_DIR. If you tend to write documents to this directory very
# slowly, you may want to use a higher value than the default.
CONSUMER_LOOP_TIME = int(os.getenv("PAPERLESS_CONSUMER_LOOP_TIME", 10))
# Pre-2.x versions of Paperless stored your documents locally with GPG
# encryption, but that is no longer the default. This behaviour is still
# available, but it must be explicitly enabled by setting