mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge pull request #1551 from paperless-ngx/chore-settings-cleanup
Chore: Cleanup and validate settings
This commit is contained in:
commit
962d0ebb40
@ -1,4 +1,5 @@
|
|||||||
from .checks import binaries_check
|
from .checks import binaries_check
|
||||||
from .checks import paths_check
|
from .checks import paths_check
|
||||||
|
from .checks import settings_values_check
|
||||||
|
|
||||||
__all__ = ["binaries_check", "paths_check"]
|
__all__ = ["binaries_check", "paths_check", "settings_values_check"]
|
||||||
|
@ -96,3 +96,52 @@ def debug_mode_check(app_configs, **kwargs):
|
|||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
@register()
|
||||||
|
def settings_values_check(app_configs, **kwargs):
|
||||||
|
"""
|
||||||
|
Validates at least some of the user provided settings
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _ocrmypdf_settings_check():
|
||||||
|
"""
|
||||||
|
Validates some of the arguments which will be provided to ocrmypdf
|
||||||
|
against the valid options. Use "ocrmypdf --help" to see the valid
|
||||||
|
inputs
|
||||||
|
"""
|
||||||
|
msgs = []
|
||||||
|
if settings.OCR_OUTPUT_TYPE not in {
|
||||||
|
"pdfa",
|
||||||
|
"pdf",
|
||||||
|
"pdfa-1",
|
||||||
|
"pdfa-2",
|
||||||
|
"pdfa-3",
|
||||||
|
}:
|
||||||
|
msgs.append(
|
||||||
|
Error(f'OCR output type "{settings.OCR_OUTPUT_TYPE}" is not valid'),
|
||||||
|
)
|
||||||
|
|
||||||
|
if settings.OCR_MODE not in {"force", "skip", "redo_ocr"}:
|
||||||
|
msgs.append(Error(f'OCR output mode "{settings.OCR_MODE}" is not valid'))
|
||||||
|
|
||||||
|
if settings.OCR_CLEAN not in {"clean", "clean_final"}:
|
||||||
|
msgs.append(Error(f'OCR clean mode "{settings.OCR_CLEAN}" is not valid'))
|
||||||
|
return msgs
|
||||||
|
|
||||||
|
def _timezone_validate():
|
||||||
|
"""
|
||||||
|
Validates the user provided timezone is a valid timezone
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import zoneinfo
|
||||||
|
except ImportError: # pragma: nocover
|
||||||
|
import backports.zoneinfo as zoneinfo
|
||||||
|
msgs = []
|
||||||
|
if settings.TIME_ZONE not in zoneinfo.available_timezones():
|
||||||
|
msgs.append(
|
||||||
|
Error(f'Timezone "{settings.TIME_ZONE}" is not a valid timezone'),
|
||||||
|
)
|
||||||
|
return msgs
|
||||||
|
|
||||||
|
return _ocrmypdf_settings_check() + _timezone_validate()
|
||||||
|
@ -285,7 +285,7 @@ SECRET_KEY = os.getenv(
|
|||||||
|
|
||||||
AUTH_PASSWORD_VALIDATORS = [
|
AUTH_PASSWORD_VALIDATORS = [
|
||||||
{
|
{
|
||||||
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
|
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", # noqa: E501
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
|
"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
|
||||||
@ -445,13 +445,14 @@ LOGGING = {
|
|||||||
|
|
||||||
TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", 1)
|
TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", 1)
|
||||||
|
|
||||||
PAPERLESS_WORKER_TIMEOUT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)
|
WORKER_TIMEOUT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)
|
||||||
|
|
||||||
# Per django-q docs, timeout must be smaller than retry
|
# Per django-q docs, timeout must be smaller than retry
|
||||||
# We default retry to 10s more than the timeout
|
# We default retry to 10s more than the timeout to silence the
|
||||||
PAPERLESS_WORKER_RETRY: Final[int] = __get_int(
|
# warning, as retry functionality isn't used.
|
||||||
|
WORKER_RETRY: Final[int] = __get_int(
|
||||||
"PAPERLESS_WORKER_RETRY",
|
"PAPERLESS_WORKER_RETRY",
|
||||||
PAPERLESS_WORKER_TIMEOUT + 10,
|
WORKER_TIMEOUT + 10,
|
||||||
)
|
)
|
||||||
|
|
||||||
Q_CLUSTER = {
|
Q_CLUSTER = {
|
||||||
@ -459,8 +460,8 @@ Q_CLUSTER = {
|
|||||||
"guard_cycle": 5,
|
"guard_cycle": 5,
|
||||||
"catch_up": False,
|
"catch_up": False,
|
||||||
"recycle": 1,
|
"recycle": 1,
|
||||||
"retry": PAPERLESS_WORKER_RETRY,
|
"retry": WORKER_RETRY,
|
||||||
"timeout": PAPERLESS_WORKER_TIMEOUT,
|
"timeout": WORKER_TIMEOUT,
|
||||||
"workers": TASK_WORKERS,
|
"workers": TASK_WORKERS,
|
||||||
"redis": os.getenv("PAPERLESS_REDIS", "redis://localhost:6379"),
|
"redis": os.getenv("PAPERLESS_REDIS", "redis://localhost:6379"),
|
||||||
"log_level": "DEBUG" if DEBUG else "INFO",
|
"log_level": "DEBUG" if DEBUG else "INFO",
|
||||||
@ -507,7 +508,7 @@ CONSUMER_IGNORE_PATTERNS = list(
|
|||||||
json.loads(
|
json.loads(
|
||||||
os.getenv(
|
os.getenv(
|
||||||
"PAPERLESS_CONSUMER_IGNORE_PATTERNS",
|
"PAPERLESS_CONSUMER_IGNORE_PATTERNS",
|
||||||
'[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]',
|
'[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]', # noqa: E501
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
@ -531,11 +532,9 @@ OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
|
|||||||
OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")
|
OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")
|
||||||
|
|
||||||
# OCRmyPDF --output-type options are available.
|
# OCRmyPDF --output-type options are available.
|
||||||
# TODO: validate this setting.
|
|
||||||
OCR_OUTPUT_TYPE = os.getenv("PAPERLESS_OCR_OUTPUT_TYPE", "pdfa")
|
OCR_OUTPUT_TYPE = os.getenv("PAPERLESS_OCR_OUTPUT_TYPE", "pdfa")
|
||||||
|
|
||||||
# skip. redo, force
|
# skip. redo, force
|
||||||
# TODO: validate this.
|
|
||||||
OCR_MODE = os.getenv("PAPERLESS_OCR_MODE", "skip")
|
OCR_MODE = os.getenv("PAPERLESS_OCR_MODE", "skip")
|
||||||
|
|
||||||
OCR_IMAGE_DPI = os.getenv("PAPERLESS_OCR_IMAGE_DPI")
|
OCR_IMAGE_DPI = os.getenv("PAPERLESS_OCR_IMAGE_DPI")
|
||||||
@ -589,7 +588,8 @@ DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
|
|||||||
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
|
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
|
||||||
|
|
||||||
# Maximum number of dates taken from document start to end to show as suggestions for
|
# Maximum number of dates taken from document start to end to show as suggestions for
|
||||||
# `created` date in the frontend. Duplicates are removed, which can result in fewer dates shown.
|
# `created` date in the frontend. Duplicates are removed, which can result in
|
||||||
|
# fewer dates shown.
|
||||||
NUMBER_OF_SUGGESTED_DATES = __get_int("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
|
NUMBER_OF_SUGGESTED_DATES = __get_int("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
|
||||||
|
|
||||||
# Transformations applied before filename parsing
|
# Transformations applied before filename parsing
|
||||||
@ -600,7 +600,8 @@ for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
|
|||||||
# Specify the filename format for out files
|
# Specify the filename format for out files
|
||||||
FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
|
FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
|
||||||
|
|
||||||
# If this is enabled, variables in filename format will resolve to empty-string instead of 'none'.
|
# If this is enabled, variables in filename format will resolve to
|
||||||
|
# empty-string instead of 'none'.
|
||||||
# Directories with 'empty names' are omitted, too.
|
# Directories with 'empty names' are omitted, too.
|
||||||
FILENAME_FORMAT_REMOVE_NONE = __get_boolean(
|
FILENAME_FORMAT_REMOVE_NONE = __get_boolean(
|
||||||
"PAPERLESS_FILENAME_FORMAT_REMOVE_NONE",
|
"PAPERLESS_FILENAME_FORMAT_REMOVE_NONE",
|
||||||
@ -612,16 +613,15 @@ THUMBNAIL_FONT_NAME = os.getenv(
|
|||||||
"/usr/share/fonts/liberation/LiberationSerif-Regular.ttf",
|
"/usr/share/fonts/liberation/LiberationSerif-Regular.ttf",
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO: this should not have a prefix.
|
|
||||||
# Tika settings
|
# Tika settings
|
||||||
PAPERLESS_TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
|
TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
|
||||||
PAPERLESS_TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
|
TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
|
||||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT = os.getenv(
|
TIKA_GOTENBERG_ENDPOINT = os.getenv(
|
||||||
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT",
|
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT",
|
||||||
"http://localhost:3000",
|
"http://localhost:3000",
|
||||||
)
|
)
|
||||||
|
|
||||||
if PAPERLESS_TIKA_ENABLED:
|
if TIKA_ENABLED:
|
||||||
INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
|
INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
|
||||||
|
|
||||||
|
|
||||||
@ -634,8 +634,9 @@ def _parse_ignore_dates(
|
|||||||
user provided string(s) into dates
|
user provided string(s) into dates
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
env_ignore (str): The value of the environment variable, comma seperated dates
|
env_ignore (str): The value of the environment variable, comma separated dates
|
||||||
date_order (str, optional): The format of the date strings. Defaults to DATE_ORDER.
|
date_order (str, optional): The format of the date strings.
|
||||||
|
Defaults to DATE_ORDER.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Set[datetime.datetime]: The set of parsed date objects
|
Set[datetime.datetime]: The set of parsed date objects
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
import os
|
import os
|
||||||
import shutil
|
|
||||||
|
|
||||||
from django.test import override_settings
|
from django.test import override_settings
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from documents.tests.utils import DirectoriesMixin
|
from documents.tests.utils import DirectoriesMixin
|
||||||
from paperless import binaries_check
|
from paperless.checks import binaries_check
|
||||||
from paperless import paths_check
|
|
||||||
from paperless.checks import debug_mode_check
|
from paperless.checks import debug_mode_check
|
||||||
|
from paperless.checks import paths_check
|
||||||
|
from paperless.checks import settings_values_check
|
||||||
|
|
||||||
|
|
||||||
class TestChecks(DirectoriesMixin, TestCase):
|
class TestChecks(DirectoriesMixin, TestCase):
|
||||||
@ -54,3 +54,89 @@ class TestChecks(DirectoriesMixin, TestCase):
|
|||||||
@override_settings(DEBUG=True)
|
@override_settings(DEBUG=True)
|
||||||
def test_debug_enabled(self):
|
def test_debug_enabled(self):
|
||||||
self.assertEqual(len(debug_mode_check(None)), 1)
|
self.assertEqual(len(debug_mode_check(None)), 1)
|
||||||
|
|
||||||
|
|
||||||
|
class TestSettingsChecks(DirectoriesMixin, TestCase):
|
||||||
|
def test_all_valid(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Default settings
|
||||||
|
WHEN:
|
||||||
|
- Settings are validated
|
||||||
|
THEN:
|
||||||
|
- No system check errors reported
|
||||||
|
"""
|
||||||
|
msgs = settings_values_check(None)
|
||||||
|
self.assertEqual(len(msgs), 0)
|
||||||
|
|
||||||
|
@override_settings(OCR_OUTPUT_TYPE="notapdf")
|
||||||
|
def test_invalid_output_type(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Default settings
|
||||||
|
- OCR output type is invalid
|
||||||
|
WHEN:
|
||||||
|
- Settings are validated
|
||||||
|
THEN:
|
||||||
|
- system check error reported for OCR output type
|
||||||
|
"""
|
||||||
|
msgs = settings_values_check(None)
|
||||||
|
self.assertEqual(len(msgs), 1)
|
||||||
|
|
||||||
|
msg = msgs[0]
|
||||||
|
|
||||||
|
self.assertIn('OCR output type "notapdf"', msg.msg)
|
||||||
|
|
||||||
|
@override_settings(OCR_MODE="makeitso")
|
||||||
|
def test_invalid_ocr_type(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Default settings
|
||||||
|
- OCR type is invalid
|
||||||
|
WHEN:
|
||||||
|
- Settings are validated
|
||||||
|
THEN:
|
||||||
|
- system check error reported for OCR type
|
||||||
|
"""
|
||||||
|
msgs = settings_values_check(None)
|
||||||
|
self.assertEqual(len(msgs), 1)
|
||||||
|
|
||||||
|
msg = msgs[0]
|
||||||
|
|
||||||
|
self.assertIn('OCR output mode "makeitso"', msg.msg)
|
||||||
|
|
||||||
|
@override_settings(OCR_CLEAN="cleanme")
|
||||||
|
def test_invalid_ocr_clean(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Default settings
|
||||||
|
- OCR cleaning type is invalid
|
||||||
|
WHEN:
|
||||||
|
- Settings are validated
|
||||||
|
THEN:
|
||||||
|
- system check error reported for OCR cleaning type
|
||||||
|
"""
|
||||||
|
msgs = settings_values_check(None)
|
||||||
|
self.assertEqual(len(msgs), 1)
|
||||||
|
|
||||||
|
msg = msgs[0]
|
||||||
|
|
||||||
|
self.assertIn('OCR clean mode "cleanme"', msg.msg)
|
||||||
|
|
||||||
|
@override_settings(TIME_ZONE="TheMoon\\MyCrater")
|
||||||
|
def test_invalid_timezone(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Default settings
|
||||||
|
- Timezone is invalid
|
||||||
|
WHEN:
|
||||||
|
- Settings are validated
|
||||||
|
THEN:
|
||||||
|
- system check error reported for timezone
|
||||||
|
"""
|
||||||
|
msgs = settings_values_check(None)
|
||||||
|
self.assertEqual(len(msgs), 1)
|
||||||
|
|
||||||
|
msg = msgs[0]
|
||||||
|
|
||||||
|
self.assertIn('Timezone "TheMoon\\MyCrater"', msg.msg)
|
||||||
|
@ -9,6 +9,6 @@ class PaperlessTikaConfig(AppConfig):
|
|||||||
def ready(self):
|
def ready(self):
|
||||||
from documents.signals import document_consumer_declaration
|
from documents.signals import document_consumer_declaration
|
||||||
|
|
||||||
if settings.PAPERLESS_TIKA_ENABLED:
|
if settings.TIKA_ENABLED:
|
||||||
document_consumer_declaration.connect(tika_consumer_declaration)
|
document_consumer_declaration.connect(tika_consumer_declaration)
|
||||||
AppConfig.ready(self)
|
AppConfig.ready(self)
|
||||||
|
@ -27,7 +27,7 @@ class TikaDocumentParser(DocumentParser):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def extract_metadata(self, document_path, mime_type):
|
def extract_metadata(self, document_path, mime_type):
|
||||||
tika_server = settings.PAPERLESS_TIKA_ENDPOINT
|
tika_server = settings.TIKA_ENDPOINT
|
||||||
try:
|
try:
|
||||||
parsed = parser.from_file(document_path, tika_server)
|
parsed = parser.from_file(document_path, tika_server)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -49,7 +49,7 @@ class TikaDocumentParser(DocumentParser):
|
|||||||
|
|
||||||
def parse(self, document_path, mime_type, file_name=None):
|
def parse(self, document_path, mime_type, file_name=None):
|
||||||
self.log("info", f"Sending {document_path} to Tika server")
|
self.log("info", f"Sending {document_path} to Tika server")
|
||||||
tika_server = settings.PAPERLESS_TIKA_ENDPOINT
|
tika_server = settings.TIKA_ENDPOINT
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parsed = parser.from_file(document_path, tika_server)
|
parsed = parser.from_file(document_path, tika_server)
|
||||||
@ -73,7 +73,7 @@ class TikaDocumentParser(DocumentParser):
|
|||||||
|
|
||||||
def convert_to_pdf(self, document_path, file_name):
|
def convert_to_pdf(self, document_path, file_name):
|
||||||
pdf_path = os.path.join(self.tempdir, "convert.pdf")
|
pdf_path = os.path.join(self.tempdir, "convert.pdf")
|
||||||
gotenberg_server = settings.PAPERLESS_TIKA_GOTENBERG_ENDPOINT
|
gotenberg_server = settings.TIKA_GOTENBERG_ENDPOINT
|
||||||
url = gotenberg_server + "/forms/libreoffice/convert"
|
url = gotenberg_server + "/forms/libreoffice/convert"
|
||||||
|
|
||||||
self.log("info", f"Converting {document_path} to PDF as {pdf_path}")
|
self.log("info", f"Converting {document_path} to PDF as {pdf_path}")
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
[flake8]
|
[flake8]
|
||||||
extend-exclude = */migrations/*, paperless/settings.py, */tests/*
|
extend-exclude = */migrations/*, */tests/*
|
||||||
# E203 - https://www.flake8rules.com/rules/E203.html
|
# E203 - https://www.flake8rules.com/rules/E203.html
|
||||||
# W503 - https://www.flake8rules.com/rules/W503.html
|
# W503 - https://www.flake8rules.com/rules/W503.html
|
||||||
ignore = E203,W503
|
ignore = E203,W503
|
||||||
|
Loading…
x
Reference in New Issue
Block a user