Feature: Allow setting backend configuration settings via the UI (#5126)

* Saving some start on this

* At least partially working for the tesseract parser

* Problems with migration testing need to figure out

* Work around that error

* Fixes max m_pixels

* Moving the settings to main paperless application

* Starting some consumer options

* More fixes and work

* Fixes these last tests

* Fix max_length on OcrSettings.mode field

* Fix all fields on Common & Ocr settings serializers

* Umbrellla config view

* Revert "Umbrellla config view"

This reverts commit fbaf9f4be30f89afeb509099180158a3406416a5.

* Updates to use a single configuration object for all settings

* Squashed commit of the following:

commit 8a0a49dd57
Author: shamoon <4887959+shamoon@users.noreply.github.com>
Date:   Tue Dec 19 23:02:47 2023 -0800

    Fix formatting

commit 66b2d90c50
Author: shamoon <4887959+shamoon@users.noreply.github.com>
Date:   Tue Dec 19 22:36:35 2023 -0800

    Refactor frontend data models

commit 5723bd8dd8
Author: Adam Bogdał <adam@bogdal.pl>
Date:   Wed Dec 20 01:17:43 2023 +0100

    Fix: speed up admin panel for installs with a large number of documents (#5052)

commit 9b08ce1761
Author: shamoon <4887959+shamoon@users.noreply.github.com>
Date:   Tue Dec 19 15:18:51 2023 -0800

    Update PULL_REQUEST_TEMPLATE.md

commit a6248bec2d
Author: shamoon <4887959+shamoon@users.noreply.github.com>
Date:   Tue Dec 19 15:02:05 2023 -0800

    Chore: Update Angular to v17 (#4980)

commit b1f6f52486
Author: shamoon <4887959+shamoon@users.noreply.github.com>
Date:   Tue Dec 19 13:53:56 2023 -0800

    Fix: Dont allow null custom_fields property via API (#5063)

commit 638d9970fd
Author: shamoon <4887959+shamoon@users.noreply.github.com>
Date:   Tue Dec 19 13:43:50 2023 -0800

    Enhancement: symmetric document links (#4907)

commit 5e8de4c1da
Author: shamoon <4887959+shamoon@users.noreply.github.com>
Date:   Tue Dec 19 12:45:04 2023 -0800

    Enhancement: shared icon & shared by me filter (#4859)

commit 088bad9030
Author: Trenton H <797416+stumpylog@users.noreply.github.com>
Date:   Tue Dec 19 12:04:03 2023 -0800

    Bulk updates all the backend libraries (#5061)

* Saving some work on frontend config

* Very basic but dynamically-generated config form

* Saving work on slightly less ugly frontend config

* JSON validation for user_args field

* Fully dynamic config form

* Adds in some additional validators for a nicer error message

* Cleaning up the testing and coverage more

* Reverts unintentional change

* Adds documentation about the settings and the precedence

* Couple more commenting and style fixes

---------

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
Trenton H
2023-12-29 15:42:56 -08:00
committed by GitHub
parent da058b915b
commit 061f33fb05
41 changed files with 1570 additions and 119 deletions

88
src/paperless/config.py Normal file
View File

@@ -0,0 +1,88 @@
import dataclasses
import json
from typing import Optional
from django.conf import settings
from paperless.models import ApplicationConfiguration
@dataclasses.dataclass
class OutputTypeConfig:
"""
Almost all parsers care about the chosen PDF output format
"""
output_type: str = dataclasses.field(init=False)
@staticmethod
def _get_config_instance() -> ApplicationConfiguration:
app_config = ApplicationConfiguration.objects.all().first()
# Workaround for a test where the migration hasn't run to create the single model
if app_config is None:
ApplicationConfiguration.objects.create()
app_config = ApplicationConfiguration.objects.all().first()
return app_config
def __post_init__(self) -> None:
app_config = self._get_config_instance()
self.output_type = app_config.output_type or settings.OCR_OUTPUT_TYPE
@dataclasses.dataclass
class OcrConfig(OutputTypeConfig):
"""
Specific settings for the Tesseract based parser. Options generally
correspond almost directly to the OCRMyPDF options
"""
pages: Optional[int] = dataclasses.field(init=False)
language: str = dataclasses.field(init=False)
mode: str = dataclasses.field(init=False)
skip_archive_file: str = dataclasses.field(init=False)
image_dpi: Optional[int] = dataclasses.field(init=False)
clean: str = dataclasses.field(init=False)
deskew: bool = dataclasses.field(init=False)
rotate: bool = dataclasses.field(init=False)
rotate_threshold: float = dataclasses.field(init=False)
max_image_pixel: Optional[float] = dataclasses.field(init=False)
color_conversion_strategy: str = dataclasses.field(init=False)
user_args: Optional[dict[str, str]] = dataclasses.field(init=False)
def __post_init__(self) -> None:
super().__post_init__()
app_config = self._get_config_instance()
self.pages = app_config.pages or settings.OCR_PAGES
self.language = app_config.language or settings.OCR_LANGUAGE
self.mode = app_config.mode or settings.OCR_MODE
self.skip_archive_file = (
app_config.skip_archive_file or settings.OCR_SKIP_ARCHIVE_FILE
)
self.image_dpi = app_config.image_dpi or settings.OCR_IMAGE_DPI
self.clean = app_config.unpaper_clean or settings.OCR_CLEAN
self.deskew = app_config.deskew or settings.OCR_DESKEW
self.rotate = app_config.rotate_pages or settings.OCR_ROTATE_PAGES
self.rotate_threshold = (
app_config.rotate_pages_threshold or settings.OCR_ROTATE_PAGES_THRESHOLD
)
self.max_image_pixel = (
app_config.max_image_pixels or settings.OCR_MAX_IMAGE_PIXELS
)
self.color_conversion_strategy = (
app_config.color_conversion_strategy
or settings.OCR_COLOR_CONVERSION_STRATEGY
)
user_args = None
if app_config.user_args:
user_args = app_config.user_args
elif settings.OCR_USER_ARGS is not None: # pragma: no cover
try:
user_args = json.loads(settings.OCR_USER_ARGS)
except json.JSONDecodeError:
user_args = {}
self.user_args = user_args

View File

@@ -0,0 +1,180 @@
# Generated by Django 4.2.7 on 2023-12-19 17:51
import django.core.validators
from django.db import migrations
from django.db import models
def _create_singleton(apps, schema_editor):
"""
Creates the first and only instance of the configuration model
"""
settings_model = apps.get_model("paperless", "ApplicationConfiguration")
settings_model.objects.create()
class Migration(migrations.Migration):
initial = True
dependencies = []
operations = [
migrations.CreateModel(
name="ApplicationConfiguration",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"output_type",
models.CharField(
blank=True,
choices=[
("pdf", "pdf"),
("pdfa", "pdfa"),
("pdfa-1", "pdfa-1"),
("pdfa-2", "pdfa-2"),
("pdfa-3", "pdfa-3"),
],
max_length=8,
null=True,
verbose_name="Sets the output PDF type",
),
),
(
"pages",
models.PositiveIntegerField(
null=True,
validators=[
django.core.validators.MinValueValidator(1),
],
verbose_name="Do OCR from page 1 to this value",
),
),
(
"language",
models.CharField(
blank=True,
max_length=32,
null=True,
verbose_name="Do OCR using these languages",
),
),
(
"mode",
models.CharField(
blank=True,
choices=[
("skip", "skip"),
("redo", "redo"),
("force", "force"),
("skip_noarchive", "skip_noarchive"),
],
max_length=16,
null=True,
verbose_name="Sets the OCR mode",
),
),
(
"skip_archive_file",
models.CharField(
blank=True,
choices=[
("never", "never"),
("with_text", "with_text"),
("always", "always"),
],
max_length=16,
null=True,
verbose_name="Controls the generation of an archive file",
),
),
(
"image_dpi",
models.PositiveIntegerField(
null=True,
validators=[
django.core.validators.MinValueValidator(1),
],
verbose_name="Sets image DPI fallback value",
),
),
(
"unpaper_clean",
models.CharField(
blank=True,
choices=[
("clean", "clean"),
("clean-final", "clean-final"),
("none", "none"),
],
max_length=16,
null=True,
verbose_name="Controls the unpaper cleaning",
),
),
(
"deskew",
models.BooleanField(null=True, verbose_name="Enables deskew"),
),
(
"rotate_pages",
models.BooleanField(
null=True,
verbose_name="Enables page rotation",
),
),
(
"rotate_pages_threshold",
models.FloatField(
null=True,
validators=[django.core.validators.MinValueValidator(0.0)],
verbose_name="Sets the threshold for rotation of pages",
),
),
(
"max_image_pixels",
models.FloatField(
null=True,
validators=[
django.core.validators.MinValueValidator(1000000.0),
],
verbose_name="Sets the maximum image size for decompression",
),
),
(
"color_conversion_strategy",
models.CharField(
blank=True,
choices=[
("LeaveColorUnchanged", "LeaveColorUnchanged"),
("RGB", "RGB"),
("UseDeviceIndependentColor", "UseDeviceIndependentColor"),
("Gray", "Gray"),
("CMYK", "CMYK"),
],
max_length=32,
null=True,
verbose_name="Sets the Ghostscript color conversion strategy",
),
),
(
"user_args",
models.JSONField(
null=True,
verbose_name="Adds additional user arguments for OCRMyPDF",
),
),
],
options={
"verbose_name": "paperless application settings",
},
),
migrations.RunPython(_create_singleton, migrations.RunPython.noop),
]

View File

173
src/paperless/models.py Normal file
View File

@@ -0,0 +1,173 @@
from django.core.validators import MinValueValidator
from django.db import models
from django.utils.translation import gettext_lazy as _
DEFAULT_SINGLETON_INSTANCE_ID = 1
class AbstractSingletonModel(models.Model):
class Meta:
abstract = True
def save(self, *args, **kwargs):
"""
Always save as the first and only model
"""
self.pk = DEFAULT_SINGLETON_INSTANCE_ID
super().save(*args, **kwargs)
class OutputTypeChoices(models.TextChoices):
"""
Matches to --output-type
"""
PDF = ("pdf", _("pdf"))
PDF_A = ("pdfa", _("pdfa"))
PDF_A1 = ("pdfa-1", _("pdfa-1"))
PDF_A2 = ("pdfa-2", _("pdfa-2"))
PDF_A3 = ("pdfa-3", _("pdfa-3"))
class ModeChoices(models.TextChoices):
"""
Matches to --skip-text, --redo-ocr, --force-ocr
and our own custom setting
"""
SKIP = ("skip", _("skip"))
REDO = ("redo", _("redo"))
FORCE = ("force", _("force"))
SKIP_NO_ARCHIVE = ("skip_noarchive", _("skip_noarchive"))
class ArchiveFileChoices(models.TextChoices):
"""
Settings to control creation of an archive PDF file
"""
NEVER = ("never", _("never"))
WITH_TEXT = ("with_text", _("with_text"))
ALWAYS = ("always", _("always"))
class CleanChoices(models.TextChoices):
"""
Matches to --clean, --clean-final
"""
CLEAN = ("clean", _("clean"))
FINAL = ("clean-final", _("clean-final"))
NONE = ("none", _("none"))
class ColorConvertChoices(models.TextChoices):
"""
Refer to the Ghostscript documentation for valid options
"""
UNCHANGED = ("LeaveColorUnchanged", _("LeaveColorUnchanged"))
RGB = ("RGB", _("RGB"))
INDEPENDENT = ("UseDeviceIndependentColor", _("UseDeviceIndependentColor"))
GRAY = ("Gray", _("Gray"))
CMYK = ("CMYK", _("CMYK"))
class ApplicationConfiguration(AbstractSingletonModel):
"""
Settings which are common across more than 1 parser
"""
output_type = models.CharField(
verbose_name=_("Sets the output PDF type"),
null=True,
blank=True,
max_length=8,
choices=OutputTypeChoices.choices,
)
"""
Settings for the Tesseract based OCR parser
"""
pages = models.PositiveIntegerField(
verbose_name=_("Do OCR from page 1 to this value"),
null=True,
validators=[MinValueValidator(1)],
)
language = models.CharField(
verbose_name=_("Do OCR using these languages"),
null=True,
blank=True,
max_length=32,
)
mode = models.CharField(
verbose_name=_("Sets the OCR mode"),
null=True,
blank=True,
max_length=16,
choices=ModeChoices.choices,
)
skip_archive_file = models.CharField(
verbose_name=_("Controls the generation of an archive file"),
null=True,
blank=True,
max_length=16,
choices=ArchiveFileChoices.choices,
)
image_dpi = models.PositiveIntegerField(
verbose_name=_("Sets image DPI fallback value"),
null=True,
validators=[MinValueValidator(1)],
)
# Can't call it clean, that's a model method
unpaper_clean = models.CharField(
verbose_name=_("Controls the unpaper cleaning"),
null=True,
blank=True,
max_length=16,
choices=CleanChoices.choices,
)
deskew = models.BooleanField(verbose_name=_("Enables deskew"), null=True)
rotate_pages = models.BooleanField(
verbose_name=_("Enables page rotation"),
null=True,
)
rotate_pages_threshold = models.FloatField(
verbose_name=_("Sets the threshold for rotation of pages"),
null=True,
validators=[MinValueValidator(0.0)],
)
max_image_pixels = models.FloatField(
verbose_name=_("Sets the maximum image size for decompression"),
null=True,
validators=[MinValueValidator(1_000_000.0)],
)
color_conversion_strategy = models.CharField(
verbose_name=_("Sets the Ghostscript color conversion strategy"),
blank=True,
null=True,
max_length=32,
choices=ColorConvertChoices.choices,
)
user_args = models.JSONField(
verbose_name=_("Adds additional user arguments for OCRMyPDF"),
null=True,
)
class Meta:
verbose_name = _("paperless application settings")
def __str__(self) -> str: # pragma: no cover
return "ApplicationConfiguration"

View File

@@ -3,6 +3,8 @@ from django.contrib.auth.models import Permission
from django.contrib.auth.models import User
from rest_framework import serializers
from paperless.models import ApplicationConfiguration
class ObfuscatedUserPasswordField(serializers.Field):
"""
@@ -113,3 +115,9 @@ class ProfileSerializer(serializers.ModelSerializer):
"last_name",
"auth_token",
)
class ApplicationConfigurationSerializer(serializers.ModelSerializer):
class Meta:
model = ApplicationConfiguration
fields = "__all__"

View File

@@ -57,6 +57,15 @@ def __get_int(key: str, default: int) -> int:
return int(os.getenv(key, default))
def __get_optional_int(key: str) -> Optional[int]:
"""
Returns None if the environment key is not present, otherwise an integer
"""
if key in os.environ:
return __get_int(key, -1) # pragma: no cover
return None
def __get_float(key: str, default: float) -> float:
"""
Return an integer value based on the environment variable or a default
@@ -66,18 +75,24 @@ def __get_float(key: str, default: float) -> float:
def __get_path(
key: str,
default: Optional[Union[PathLike, str]] = None,
) -> Optional[Path]:
default: Union[PathLike, str],
) -> Path:
"""
Return a normalized, absolute path based on the environment variable or a default,
if provided. If not set and no default, returns None
if provided
"""
if key in os.environ:
return Path(os.environ[key]).resolve()
elif default is not None:
return Path(default).resolve()
else:
return None
return Path(default).resolve()
def __get_optional_path(key: str) -> Optional[Path]:
"""
Returns None if the environment key is not present, otherwise a fully resolved Path
"""
if key in os.environ:
return __get_path(key, "")
return None
def __get_list(
@@ -327,7 +342,7 @@ MIDDLEWARE = [
]
# Optional to enable compression
if __get_boolean("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: nocover
if __get_boolean("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: no cover
MIDDLEWARE.insert(0, "compression_middleware.middleware.CompressionMiddleware")
ROOT_URLCONF = "paperless.urls"
@@ -495,7 +510,7 @@ CSRF_COOKIE_NAME = f"{COOKIE_PREFIX}csrftoken"
SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid"
LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language"
EMAIL_CERTIFICATE_FILE = __get_path("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
EMAIL_CERTIFICATE_FILE = __get_optional_path("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
###############################################################################
@@ -796,11 +811,10 @@ CONSUMER_BARCODE_STRING: Final[str] = os.getenv(
"PATCHT",
)
consumer_barcode_scanner_tmp: Final[str] = os.getenv(
CONSUMER_BARCODE_SCANNER: Final[str] = os.getenv(
"PAPERLESS_CONSUMER_BARCODE_SCANNER",
"PYZBAR",
)
CONSUMER_BARCODE_SCANNER = consumer_barcode_scanner_tmp.upper()
).upper()
CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = __get_boolean(
"PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE",
@@ -811,15 +825,12 @@ CONSUMER_ASN_BARCODE_PREFIX: Final[str] = os.getenv(
"ASN",
)
CONSUMER_BARCODE_UPSCALE: Final[float] = float(
os.getenv("PAPERLESS_CONSUMER_BARCODE_UPSCALE", 0.0),
CONSUMER_BARCODE_UPSCALE: Final[float] = __get_float(
"PAPERLESS_CONSUMER_BARCODE_UPSCALE",
0.0,
)
CONSUMER_BARCODE_DPI: Final[str] = int(
os.getenv("PAPERLESS_CONSUMER_BARCODE_DPI", 300),
)
CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = __get_boolean(
"PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED",
@@ -834,7 +845,7 @@ CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT: Final[bool] = __get_boolean(
"PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT",
)
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
OCR_PAGES = __get_optional_int("PAPERLESS_OCR_PAGES")
# The default language that tesseract will attempt to use when parsing
# documents. It should be a 3-letter language code consistent with ISO 639.
@@ -848,28 +859,29 @@ OCR_MODE = os.getenv("PAPERLESS_OCR_MODE", "skip")
OCR_SKIP_ARCHIVE_FILE = os.getenv("PAPERLESS_OCR_SKIP_ARCHIVE_FILE", "never")
OCR_IMAGE_DPI = os.getenv("PAPERLESS_OCR_IMAGE_DPI")
OCR_IMAGE_DPI = __get_optional_int("PAPERLESS_OCR_IMAGE_DPI")
OCR_CLEAN = os.getenv("PAPERLESS_OCR_CLEAN", "clean")
OCR_DESKEW = __get_boolean("PAPERLESS_OCR_DESKEW", "true")
OCR_DESKEW: Final[bool] = __get_boolean("PAPERLESS_OCR_DESKEW", "true")
OCR_ROTATE_PAGES = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true")
OCR_ROTATE_PAGES: Final[bool] = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true")
OCR_ROTATE_PAGES_THRESHOLD = float(
os.getenv("PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD", 12.0),
OCR_ROTATE_PAGES_THRESHOLD: Final[float] = __get_float(
"PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD",
12.0,
)
OCR_MAX_IMAGE_PIXELS: Optional[int] = None
if os.environ.get("PAPERLESS_OCR_MAX_IMAGE_PIXELS") is not None:
OCR_MAX_IMAGE_PIXELS: int = int(os.environ.get("PAPERLESS_OCR_MAX_IMAGE_PIXELS"))
OCR_MAX_IMAGE_PIXELS: Final[Optional[int]] = __get_optional_int(
"PAPERLESS_OCR_MAX_IMAGE_PIXELS",
)
OCR_COLOR_CONVERSION_STRATEGY = os.getenv(
"PAPERLESS_OCR_COLOR_CONVERSION_STRATEGY",
"RGB",
)
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS", "{}")
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")
# GNUPG needs a home directory for some reason
GNUPG_HOME = os.getenv("HOME", "/tmp")

View File

@@ -35,6 +35,7 @@ from documents.views import TasksViewSet
from documents.views import UiSettingsView
from documents.views import UnifiedSearchViewSet
from paperless.consumers import StatusConsumer
from paperless.views import ApplicationConfigurationViewSet
from paperless.views import FaviconView
from paperless.views import GenerateAuthTokenView
from paperless.views import GroupViewSet
@@ -60,6 +61,7 @@ api_router.register(r"mail_rules", MailRuleViewSet)
api_router.register(r"share_links", ShareLinkViewSet)
api_router.register(r"consumption_templates", ConsumptionTemplateViewSet)
api_router.register(r"custom_fields", CustomFieldViewSet)
api_router.register(r"config", ApplicationConfigurationViewSet)
urlpatterns = [

View File

@@ -18,6 +18,8 @@ from rest_framework.viewsets import ModelViewSet
from documents.permissions import PaperlessObjectPermissions
from paperless.filters import GroupFilterSet
from paperless.filters import UserFilterSet
from paperless.models import ApplicationConfiguration
from paperless.serialisers import ApplicationConfigurationSerializer
from paperless.serialisers import GroupSerializer
from paperless.serialisers import ProfileSerializer
from paperless.serialisers import UserSerializer
@@ -71,7 +73,7 @@ class StandardPagination(PageNumberPagination):
class FaviconView(View):
def get(self, request, *args, **kwargs): # pragma: nocover
def get(self, request, *args, **kwargs): # pragma: no cover
favicon = os.path.join(
os.path.dirname(__file__),
"static",
@@ -160,3 +162,12 @@ class GenerateAuthTokenView(GenericAPIView):
return Response(
token.key,
)
class ApplicationConfigurationViewSet(ModelViewSet):
model = ApplicationConfiguration
queryset = ApplicationConfiguration.objects
serializer_class = ApplicationConfigurationSerializer
permission_classes = (IsAuthenticated,)