mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Merge branch 'dev' into celery-tasks
This commit is contained in:
@@ -11,6 +11,8 @@ writeable_hint = (
|
||||
"Set the permissions of {} to be writeable by the user running the "
|
||||
"Paperless services"
|
||||
)
|
||||
|
||||
|
||||
def path_check(env_var):
|
||||
messages = []
|
||||
directory = os.getenv(env_var)
|
||||
@@ -27,6 +29,7 @@ def path_check(env_var):
|
||||
))
|
||||
return messages
|
||||
|
||||
|
||||
@register()
|
||||
def paths_check(app_configs, **kwargs):
|
||||
"""
|
||||
@@ -34,9 +37,9 @@ def paths_check(app_configs, **kwargs):
|
||||
"""
|
||||
|
||||
check_messages = path_check("PAPERLESS_DATA_DIR") + \
|
||||
path_check("PAPERLESS_MEDIA_ROOT") + \
|
||||
path_check("PAPERLESS_CONSUMPTION_DIR") + \
|
||||
path_check("PAPERLESS_STATICDIR")
|
||||
path_check("PAPERLESS_MEDIA_ROOT") + \
|
||||
path_check("PAPERLESS_CONSUMPTION_DIR") + \
|
||||
path_check("PAPERLESS_STATICDIR")
|
||||
|
||||
return check_messages
|
||||
|
||||
@@ -64,3 +67,16 @@ def binaries_check(app_configs, **kwargs):
|
||||
check_messages.append(Warning(error.format(binary), hint))
|
||||
|
||||
return check_messages
|
||||
|
||||
|
||||
@register()
|
||||
def debug_mode_check(app_configs, **kwargs):
|
||||
if settings.DEBUG:
|
||||
return [Warning(
|
||||
"DEBUG mode is enabled. Disable Debug mode. This is a serious "
|
||||
"security issue, since it puts security overides in place which "
|
||||
"are meant to be only used during development. This "
|
||||
"also means that paperless will tell anyone various "
|
||||
"debugging information when something goes wrong.")]
|
||||
else:
|
||||
return []
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import json
|
||||
import math
|
||||
import multiprocessing
|
||||
import os
|
||||
import re
|
||||
@@ -13,6 +14,18 @@ elif os.path.exists("/etc/paperless.conf"):
|
||||
elif os.path.exists("/usr/local/etc/paperless.conf"):
|
||||
load_dotenv("/usr/local/etc/paperless.conf")
|
||||
|
||||
# There are multiple levels of concurrency in paperless:
|
||||
# - Multiple consumers may be run in parallel.
|
||||
# - Each consumer may process multiple pages in parallel.
|
||||
# - Each Tesseract OCR run may spawn multiple threads to process a single page
|
||||
# slightly faster.
|
||||
# The performance gains from having tesseract use multiple threads are minimal.
|
||||
# However, when multiple pages are processed in parallel, the total number of
|
||||
# OCR threads may exceed the number of available cpu cores, which will
|
||||
# dramatically slow down the consumption process. This settings limits each
|
||||
# Tesseract process to one thread.
|
||||
os.environ['OMP_THREAD_LIMIT'] = "1"
|
||||
|
||||
|
||||
def __get_boolean(key, default="NO"):
|
||||
"""
|
||||
@@ -21,9 +34,11 @@ def __get_boolean(key, default="NO"):
|
||||
"""
|
||||
return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
|
||||
|
||||
|
||||
# NEVER RUN WITH DEBUG IN PRODUCTION.
|
||||
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Directories #
|
||||
###############################################################################
|
||||
@@ -65,6 +80,7 @@ INSTALLED_APPS = [
|
||||
"documents.apps.DocumentsConfig",
|
||||
"paperless_tesseract.apps.PaperlessTesseractConfig",
|
||||
"paperless_text.apps.PaperlessTextConfig",
|
||||
"paperless_mail.apps.PaperlessMailConfig",
|
||||
|
||||
"django.contrib.admin",
|
||||
|
||||
@@ -139,11 +155,11 @@ else:
|
||||
X_FRAME_OPTIONS = 'SAMEORIGIN'
|
||||
|
||||
# We allow CORS from localhost:8080
|
||||
CORS_ORIGIN_WHITELIST = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8080,https://localhost:8080").split(","))
|
||||
CORS_ALLOWED_ORIGINS = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(","))
|
||||
|
||||
if DEBUG:
|
||||
# Allow access from the angular development server during debugging
|
||||
CORS_ORIGIN_WHITELIST += ('http://localhost:4200',)
|
||||
CORS_ALLOWED_ORIGINS += ('http://localhost:4200',)
|
||||
|
||||
# The secret key has a default that should be fine so long as you're hosting
|
||||
# Paperless on a closed network. However, if you're putting this anywhere
|
||||
@@ -195,11 +211,11 @@ DATABASES = {
|
||||
}
|
||||
}
|
||||
|
||||
# Always have sqlite available as a second option for management commands
|
||||
# This is important when migrating to/from sqlite
|
||||
DATABASES['sqlite'] = DATABASES['default'].copy()
|
||||
|
||||
if os.getenv("PAPERLESS_DBHOST"):
|
||||
# Have sqlite available as a second option for management commands
|
||||
# This is important when migrating to/from sqlite
|
||||
DATABASES['sqlite'] = DATABASES['default'].copy()
|
||||
|
||||
DATABASES["default"] = {
|
||||
"ENGINE": "django.db.backends.postgresql_psycopg2",
|
||||
"HOST": os.getenv("PAPERLESS_DBHOST"),
|
||||
@@ -244,6 +260,14 @@ LOGGING = {
|
||||
"handlers": ["dbhandler", "streamhandler"],
|
||||
"level": "DEBUG"
|
||||
},
|
||||
"paperless_mail": {
|
||||
"handlers": ["dbhandler", "streamhandler"],
|
||||
"level": "DEBUG"
|
||||
},
|
||||
"paperless_tesseract": {
|
||||
"handlers": ["dbhandler", "streamhandler"],
|
||||
"level": "DEBUG"
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -251,22 +275,60 @@ LOGGING = {
|
||||
# Task queue #
|
||||
###############################################################################
|
||||
|
||||
|
||||
# Sensible defaults for multitasking:
|
||||
# use a fair balance between worker processes and threads epr worker so that
|
||||
# both consuming many documents in parallel and consuming large documents is
|
||||
# reasonably fast.
|
||||
# Favors threads per worker on smaller systems and never exceeds cpu_count()
|
||||
# in total.
|
||||
|
||||
def default_task_workers():
|
||||
try:
|
||||
return max(
|
||||
math.floor(math.sqrt(multiprocessing.cpu_count())),
|
||||
1
|
||||
)
|
||||
except NotImplementedError:
|
||||
return 1
|
||||
|
||||
|
||||
TASK_WORKERS = int(os.getenv("PAPERLESS_TASK_WORKERS", default_task_workers()))
|
||||
|
||||
Q_CLUSTER = {
|
||||
'name': 'paperless',
|
||||
'catch_up': False,
|
||||
'workers': TASK_WORKERS,
|
||||
'redis': os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")
|
||||
}
|
||||
|
||||
|
||||
def default_threads_per_worker():
|
||||
try:
|
||||
return max(
|
||||
math.floor(multiprocessing.cpu_count() / TASK_WORKERS),
|
||||
1
|
||||
)
|
||||
except NotImplementedError:
|
||||
return 1
|
||||
|
||||
|
||||
THREADS_PER_WORKER = os.getenv("PAPERLESS_THREADS_PER_WORKER", default_threads_per_worker())
|
||||
|
||||
###############################################################################
|
||||
# Paperless Specific Settings #
|
||||
###############################################################################
|
||||
|
||||
CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0))
|
||||
|
||||
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
|
||||
|
||||
OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
|
||||
|
||||
# The default language that tesseract will attempt to use when parsing
|
||||
# documents. It should be a 3-letter language code consistent with ISO 639.
|
||||
OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")
|
||||
|
||||
# The amount of threads to use for OCR
|
||||
OCR_THREADS = int(os.getenv("PAPERLESS_OCR_THREADS", multiprocessing.cpu_count()))
|
||||
|
||||
# OCR all documents?
|
||||
OCR_ALWAYS = __get_boolean("PAPERLESS_OCR_ALWAYS", "false")
|
||||
@@ -311,6 +373,7 @@ FILENAME_PARSE_TRANSFORMS = []
|
||||
for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
|
||||
FILENAME_PARSE_TRANSFORMS.append((re.compile(t["pattern"]), t["repl"]))
|
||||
|
||||
# TODO: this should not have a prefix.
|
||||
# Specify the filename format for out files
|
||||
PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
from django.conf.urls import include, url
|
||||
from django.conf.urls import include
|
||||
from django.contrib import admin
|
||||
from django.contrib.auth.decorators import login_required
|
||||
from django.urls import path, re_path
|
||||
@@ -7,7 +7,6 @@ from django.views.generic import RedirectView
|
||||
from rest_framework.routers import DefaultRouter
|
||||
|
||||
from paperless.consumers import StatusConsumer
|
||||
from paperless.views import FaviconView
|
||||
from documents.views import (
|
||||
CorrespondentViewSet,
|
||||
DocumentViewSet,
|
||||
@@ -19,6 +18,7 @@ from documents.views import (
|
||||
SearchAutoCompleteView,
|
||||
StatisticsView
|
||||
)
|
||||
from paperless.views import FaviconView
|
||||
|
||||
api_router = DefaultRouter()
|
||||
api_router.register(r"correspondents", CorrespondentViewSet)
|
||||
@@ -31,32 +31,32 @@ api_router.register(r"tags", TagViewSet)
|
||||
urlpatterns = [
|
||||
|
||||
# API
|
||||
url(r"^api/auth/",include(('rest_framework.urls', 'rest_framework'), namespace="rest_framework")),
|
||||
url(r"^api/search/autocomplete/", SearchAutoCompleteView.as_view(), name="autocomplete"),
|
||||
url(r"^api/search/", SearchView.as_view(), name="search"),
|
||||
url(r"^api/statistics/", StatisticsView.as_view(), name="statistics"),
|
||||
url(r"^api/", include((api_router.urls, 'drf'), namespace="drf")),
|
||||
re_path(r"^api/auth/", include(('rest_framework.urls', 'rest_framework'), namespace="rest_framework")),
|
||||
re_path(r"^api/search/autocomplete/", SearchAutoCompleteView.as_view(), name="autocomplete"),
|
||||
re_path(r"^api/search/", SearchView.as_view(), name="search"),
|
||||
re_path(r"^api/statistics/", StatisticsView.as_view(), name="statistics"),
|
||||
re_path(r"^api/", include((api_router.urls, 'drf'), namespace="drf")),
|
||||
|
||||
# Favicon
|
||||
url(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),
|
||||
re_path(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),
|
||||
|
||||
# The Django admin
|
||||
url(r"admin/", admin.site.urls),
|
||||
re_path(r"admin/", admin.site.urls),
|
||||
|
||||
# These redirects are here to support clients that use the old FetchView.
|
||||
url(
|
||||
re_path(
|
||||
r"^fetch/doc/(?P<pk>\d+)$",
|
||||
RedirectView.as_view(url='/api/documents/%(pk)s/download/'),
|
||||
),
|
||||
url(
|
||||
re_path(
|
||||
r"^fetch/thumb/(?P<pk>\d+)$",
|
||||
RedirectView.as_view(url='/api/documents/%(pk)s/thumb/'),
|
||||
),
|
||||
url(
|
||||
re_path(
|
||||
r"^fetch/preview/(?P<pk>\d+)$",
|
||||
RedirectView.as_view(url='/api/documents/%(pk)s/preview/'),
|
||||
),
|
||||
url(r"^push$", csrf_exempt(RedirectView.as_view(url='/api/documents/post_document/'))),
|
||||
re_path(r"^push$", csrf_exempt(RedirectView.as_view(url='/api/documents/post_document/'))),
|
||||
|
||||
# Frontend assets TODO: this is pretty bad.
|
||||
path('assets/<path:path>', RedirectView.as_view(url='/static/frontend/assets/%(path)s')),
|
||||
@@ -64,7 +64,7 @@ urlpatterns = [
|
||||
path('accounts/', include('django.contrib.auth.urls')),
|
||||
|
||||
# Root of the Frontent
|
||||
url(r".*", login_required(IndexView.as_view())),
|
||||
re_path(r".*", login_required(IndexView.as_view())),
|
||||
|
||||
]
|
||||
|
||||
@@ -74,8 +74,8 @@ websocket_urlpatterns = [
|
||||
]
|
||||
|
||||
# Text in each page's <h1> (and above login form).
|
||||
admin.site.site_header = 'Paperless'
|
||||
admin.site.site_header = 'Paperless-ng'
|
||||
# Text at the end of each page's <title>.
|
||||
admin.site.site_title = 'Paperless'
|
||||
admin.site.site_title = 'Paperless-ng'
|
||||
# Text at the top of the admin index page.
|
||||
admin.site.index_title = 'Paperless administration'
|
||||
admin.site.index_title = 'Paperless-ng administration'
|
||||
|
@@ -1 +1 @@
|
||||
__version__ = (1, 0, 0)
|
||||
__version__ = (0, 9, 1)
|
||||
|
Reference in New Issue
Block a user