Feature: Allow setting backend configuration settings via the UI (#5126)

* Saving some start on this

* At least partially working for the tesseract parser

* Problems with migration testing need to figure out

* Work around that error

* Fixes max m_pixels

* Moving the settings to main paperless application

* Starting some consumer options

* More fixes and work

* Fixes these last tests

* Fix max_length on OcrSettings.mode field

* Fix all fields on Common & Ocr settings serializers

* Umbrellla config view

* Revert "Umbrellla config view"

This reverts commit fbaf9f4be30f89afeb509099180158a3406416a5.

* Updates to use a single configuration object for all settings

* Squashed commit of the following:

commit 8a0a49dd57
Author: shamoon <4887959+shamoon@users.noreply.github.com>
Date:   Tue Dec 19 23:02:47 2023 -0800

    Fix formatting

commit 66b2d90c50
Author: shamoon <4887959+shamoon@users.noreply.github.com>
Date:   Tue Dec 19 22:36:35 2023 -0800

    Refactor frontend data models

commit 5723bd8dd8
Author: Adam Bogdał <adam@bogdal.pl>
Date:   Wed Dec 20 01:17:43 2023 +0100

    Fix: speed up admin panel for installs with a large number of documents (#5052)

commit 9b08ce1761
Author: shamoon <4887959+shamoon@users.noreply.github.com>
Date:   Tue Dec 19 15:18:51 2023 -0800

    Update PULL_REQUEST_TEMPLATE.md

commit a6248bec2d
Author: shamoon <4887959+shamoon@users.noreply.github.com>
Date:   Tue Dec 19 15:02:05 2023 -0800

    Chore: Update Angular to v17 (#4980)

commit b1f6f52486
Author: shamoon <4887959+shamoon@users.noreply.github.com>
Date:   Tue Dec 19 13:53:56 2023 -0800

    Fix: Dont allow null custom_fields property via API (#5063)

commit 638d9970fd
Author: shamoon <4887959+shamoon@users.noreply.github.com>
Date:   Tue Dec 19 13:43:50 2023 -0800

    Enhancement: symmetric document links (#4907)

commit 5e8de4c1da
Author: shamoon <4887959+shamoon@users.noreply.github.com>
Date:   Tue Dec 19 12:45:04 2023 -0800

    Enhancement: shared icon & shared by me filter (#4859)

commit 088bad9030
Author: Trenton H <797416+stumpylog@users.noreply.github.com>
Date:   Tue Dec 19 12:04:03 2023 -0800

    Bulk updates all the backend libraries (#5061)

* Saving some work on frontend config

* Very basic but dynamically-generated config form

* Saving work on slightly less ugly frontend config

* JSON validation for user_args field

* Fully dynamic config form

* Adds in some additional validators for a nicer error message

* Cleaning up the testing and coverage more

* Reverts unintentional change

* Adds documentation about the settings and the precedence

* Couple more commenting and style fixes

---------

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
Trenton H
2023-12-29 15:42:56 -08:00
committed by GitHub
parent da058b915b
commit 061f33fb05
41 changed files with 1570 additions and 119 deletions

View File

@@ -0,0 +1,232 @@
import json
from django.test import TestCase
from django.test import override_settings
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from paperless.models import ApplicationConfiguration
from paperless.models import CleanChoices
from paperless.models import ColorConvertChoices
from paperless.models import ModeChoices
from paperless.models import OutputTypeChoices
from paperless_tesseract.parsers import RasterisedDocumentParser
class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
@staticmethod
def get_params():
"""
Helper to get just the OCRMyPDF parameters from the parser
"""
return RasterisedDocumentParser(None).construct_ocrmypdf_parameters(
input_file="input.pdf",
output_file="output.pdf",
sidecar_file="sidecar.txt",
mime_type="application/pdf",
safe_fallback=False,
)
def test_db_settings_ocr_pages(self):
"""
GIVEN:
- Django settings defines different value for OCR_PAGES than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_PAGES=10):
instance = ApplicationConfiguration.objects.all().first()
instance.pages = 5
instance.save()
params = self.get_params()
self.assertEqual(params["pages"], "1-5")
def test_db_settings_ocr_language(self):
"""
GIVEN:
- Django settings defines different value for OCR_LANGUAGE than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_LANGUAGE="eng+deu"):
instance = ApplicationConfiguration.objects.all().first()
instance.language = "fra+ita"
instance.save()
params = self.get_params()
self.assertEqual(params["language"], "fra+ita")
def test_db_settings_ocr_output_type(self):
"""
GIVEN:
- Django settings defines different value for OCR_OUTPUT_TYPE than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_OUTPUT_TYPE="pdfa-3"):
instance = ApplicationConfiguration.objects.all().first()
instance.output_type = OutputTypeChoices.PDF_A
instance.save()
params = self.get_params()
self.assertEqual(params["output_type"], "pdfa")
def test_db_settings_ocr_mode(self):
"""
GIVEN:
- Django settings defines different value for OCR_MODE than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_MODE="redo"):
instance = ApplicationConfiguration.objects.all().first()
instance.mode = ModeChoices.SKIP
instance.save()
params = self.get_params()
self.assertTrue(params["skip_text"])
self.assertNotIn("redo_ocr", params)
self.assertNotIn("force_ocr", params)
def test_db_settings_ocr_clean(self):
"""
GIVEN:
- Django settings defines different value for OCR_CLEAN than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_CLEAN="clean-final"):
instance = ApplicationConfiguration.objects.all().first()
instance.unpaper_clean = CleanChoices.CLEAN
instance.save()
params = self.get_params()
self.assertTrue(params["clean"])
self.assertNotIn("clean_final", params)
with override_settings(OCR_CLEAN="clean-final"):
instance = ApplicationConfiguration.objects.all().first()
instance.unpaper_clean = CleanChoices.FINAL
instance.save()
params = self.get_params()
self.assertTrue(params["clean_final"])
self.assertNotIn("clean", params)
def test_db_settings_ocr_deskew(self):
"""
GIVEN:
- Django settings defines different value for OCR_DESKEW than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_DESKEW=False):
instance = ApplicationConfiguration.objects.all().first()
instance.deskew = True
instance.save()
params = self.get_params()
self.assertTrue(params["deskew"])
def test_db_settings_ocr_rotate(self):
"""
GIVEN:
- Django settings defines different value for OCR_ROTATE_PAGES
and OCR_ROTATE_PAGES_THRESHOLD than configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_ROTATE_PAGES=False, OCR_ROTATE_PAGES_THRESHOLD=30.0):
instance = ApplicationConfiguration.objects.all().first()
instance.rotate_pages = True
instance.rotate_pages_threshold = 15.0
instance.save()
params = self.get_params()
self.assertTrue(params["rotate_pages"])
self.assertAlmostEqual(params["rotate_pages_threshold"], 15.0)
def test_db_settings_ocr_max_pixels(self):
"""
GIVEN:
- Django settings defines different value for OCR_MAX_IMAGE_PIXELS than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_MAX_IMAGE_PIXELS=2_000_000.0):
instance = ApplicationConfiguration.objects.all().first()
instance.max_image_pixels = 1_000_000.0
instance.save()
params = self.get_params()
self.assertAlmostEqual(params["max_image_mpixels"], 1.0)
def test_db_settings_ocr_color_convert(self):
"""
GIVEN:
- Django settings defines different value for OCR_COLOR_CONVERSION_STRATEGY than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(OCR_COLOR_CONVERSION_STRATEGY="LeaveColorUnchanged"):
instance = ApplicationConfiguration.objects.all().first()
instance.color_conversion_strategy = ColorConvertChoices.INDEPENDENT
instance.save()
params = self.get_params()
self.assertEqual(
params["color_conversion_strategy"],
"UseDeviceIndependentColor",
)
def test_ocr_user_args(self):
"""
GIVEN:
- Django settings defines different value for OCR_USER_ARGS than
configuration object
WHEN:
- OCR parameters are constructed
THEN:
- Configuration from database is utilized
"""
with override_settings(
OCR_USER_ARGS=json.dumps({"continue_on_soft_render_error": True}),
):
instance = ApplicationConfiguration.objects.all().first()
instance.user_args = {"unpaper_args": "--pre-rotate 90"}
instance.save()
params = self.get_params()
self.assertIn("unpaper_args", params)
self.assertEqual(
params["unpaper_args"],
"--pre-rotate 90",
)