mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-08-20 00:56:26 +00:00
Feature: Allow setting backend configuration settings via the UI (#5126)
* Saving some start on this * At least partially working for the tesseract parser * Problems with migration testing need to figure out * Work around that error * Fixes max m_pixels * Moving the settings to main paperless application * Starting some consumer options * More fixes and work * Fixes these last tests * Fix max_length on OcrSettings.mode field * Fix all fields on Common & Ocr settings serializers * Umbrellla config view * Revert "Umbrellla config view" This reverts commit fbaf9f4be30f89afeb509099180158a3406416a5. * Updates to use a single configuration object for all settings * Squashed commit of the following: commit8a0a49dd57
Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 23:02:47 2023 -0800 Fix formatting commit66b2d90c50
Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 22:36:35 2023 -0800 Refactor frontend data models commit5723bd8dd8
Author: Adam Bogdał <adam@bogdal.pl> Date: Wed Dec 20 01:17:43 2023 +0100 Fix: speed up admin panel for installs with a large number of documents (#5052) commit9b08ce1761
Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 15:18:51 2023 -0800 Update PULL_REQUEST_TEMPLATE.md commita6248bec2d
Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 15:02:05 2023 -0800 Chore: Update Angular to v17 (#4980) commitb1f6f52486
Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 13:53:56 2023 -0800 Fix: Dont allow null custom_fields property via API (#5063) commit638d9970fd
Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 13:43:50 2023 -0800 Enhancement: symmetric document links (#4907) commit5e8de4c1da
Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 12:45:04 2023 -0800 Enhancement: shared icon & shared by me filter (#4859) commit088bad9030
Author: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Tue Dec 19 12:04:03 2023 -0800 Bulk updates all the backend libraries (#5061) * Saving some work on frontend config * Very basic but dynamically-generated config form * Saving work on slightly less ugly frontend config * JSON validation for user_args field * Fully dynamic config form * Adds in some additional validators for a nicer error message * Cleaning up the testing and coverage more * Reverts unintentional change * Adds documentation about the settings and the precedence * Couple more commenting and style fixes --------- Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
@@ -2,7 +2,6 @@ import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import uuid
|
||||
from contextlib import AbstractContextManager
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
@@ -17,28 +16,6 @@ from documents.tests.utils import FileSystemAssertsMixin
|
||||
from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||
from paperless_tesseract.parsers import post_process_text
|
||||
|
||||
image_to_string_calls = []
|
||||
|
||||
|
||||
def fake_convert(input_file, output_file, **kwargs):
|
||||
with open(input_file) as f:
|
||||
lines = f.readlines()
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
with open(output_file % i, "w") as f2:
|
||||
f2.write(line.strip())
|
||||
|
||||
|
||||
class FakeImageFile(AbstractContextManager):
|
||||
def __init__(self, fname):
|
||||
self.fname = fname
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
return os.path.basename(self.fname)
|
||||
|
||||
|
||||
class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
SAMPLE_FILES = Path(__file__).resolve().parent / "samples"
|
||||
@@ -769,43 +746,52 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
self.assertEqual(params["sidecar"], "sidecar.txt")
|
||||
|
||||
with override_settings(OCR_CLEAN="none"):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
params = parser.construct_ocrmypdf_parameters("", "", "", "")
|
||||
self.assertNotIn("clean", params)
|
||||
self.assertNotIn("clean_final", params)
|
||||
|
||||
with override_settings(OCR_CLEAN="clean"):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
params = parser.construct_ocrmypdf_parameters("", "", "", "")
|
||||
self.assertTrue(params["clean"])
|
||||
self.assertNotIn("clean_final", params)
|
||||
|
||||
with override_settings(OCR_CLEAN="clean-final", OCR_MODE="skip"):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
params = parser.construct_ocrmypdf_parameters("", "", "", "")
|
||||
self.assertTrue(params["clean_final"])
|
||||
self.assertNotIn("clean", params)
|
||||
|
||||
with override_settings(OCR_CLEAN="clean-final", OCR_MODE="redo"):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
params = parser.construct_ocrmypdf_parameters("", "", "", "")
|
||||
self.assertTrue(params["clean"])
|
||||
self.assertNotIn("clean_final", params)
|
||||
|
||||
with override_settings(OCR_DESKEW=True, OCR_MODE="skip"):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
params = parser.construct_ocrmypdf_parameters("", "", "", "")
|
||||
self.assertTrue(params["deskew"])
|
||||
|
||||
with override_settings(OCR_DESKEW=True, OCR_MODE="redo"):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
params = parser.construct_ocrmypdf_parameters("", "", "", "")
|
||||
self.assertNotIn("deskew", params)
|
||||
|
||||
with override_settings(OCR_DESKEW=False, OCR_MODE="skip"):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
params = parser.construct_ocrmypdf_parameters("", "", "", "")
|
||||
self.assertNotIn("deskew", params)
|
||||
|
||||
with override_settings(OCR_MAX_IMAGE_PIXELS=1_000_001.0):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
params = parser.construct_ocrmypdf_parameters("", "", "", "")
|
||||
self.assertIn("max_image_mpixels", params)
|
||||
self.assertAlmostEqual(params["max_image_mpixels"], 1, places=4)
|
||||
|
||||
with override_settings(OCR_MAX_IMAGE_PIXELS=-1_000_001.0):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
params = parser.construct_ocrmypdf_parameters("", "", "", "")
|
||||
self.assertNotIn("max_image_mpixels", params)
|
||||
|
||||
|
232
src/paperless_tesseract/tests/test_parser_custom_settings.py
Normal file
232
src/paperless_tesseract/tests/test_parser_custom_settings.py
Normal file
@@ -0,0 +1,232 @@
|
||||
import json
|
||||
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
from paperless.models import ApplicationConfiguration
|
||||
from paperless.models import CleanChoices
|
||||
from paperless.models import ColorConvertChoices
|
||||
from paperless.models import ModeChoices
|
||||
from paperless.models import OutputTypeChoices
|
||||
from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||
|
||||
|
||||
class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
@staticmethod
|
||||
def get_params():
|
||||
"""
|
||||
Helper to get just the OCRMyPDF parameters from the parser
|
||||
"""
|
||||
return RasterisedDocumentParser(None).construct_ocrmypdf_parameters(
|
||||
input_file="input.pdf",
|
||||
output_file="output.pdf",
|
||||
sidecar_file="sidecar.txt",
|
||||
mime_type="application/pdf",
|
||||
safe_fallback=False,
|
||||
)
|
||||
|
||||
def test_db_settings_ocr_pages(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_PAGES than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_PAGES=10):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.pages = 5
|
||||
instance.save()
|
||||
|
||||
params = self.get_params()
|
||||
self.assertEqual(params["pages"], "1-5")
|
||||
|
||||
def test_db_settings_ocr_language(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_LANGUAGE than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_LANGUAGE="eng+deu"):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.language = "fra+ita"
|
||||
instance.save()
|
||||
|
||||
params = self.get_params()
|
||||
self.assertEqual(params["language"], "fra+ita")
|
||||
|
||||
def test_db_settings_ocr_output_type(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_OUTPUT_TYPE than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_OUTPUT_TYPE="pdfa-3"):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.output_type = OutputTypeChoices.PDF_A
|
||||
instance.save()
|
||||
|
||||
params = self.get_params()
|
||||
self.assertEqual(params["output_type"], "pdfa")
|
||||
|
||||
def test_db_settings_ocr_mode(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_MODE than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_MODE="redo"):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.mode = ModeChoices.SKIP
|
||||
instance.save()
|
||||
|
||||
params = self.get_params()
|
||||
self.assertTrue(params["skip_text"])
|
||||
self.assertNotIn("redo_ocr", params)
|
||||
self.assertNotIn("force_ocr", params)
|
||||
|
||||
def test_db_settings_ocr_clean(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_CLEAN than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_CLEAN="clean-final"):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.unpaper_clean = CleanChoices.CLEAN
|
||||
instance.save()
|
||||
|
||||
params = self.get_params()
|
||||
self.assertTrue(params["clean"])
|
||||
self.assertNotIn("clean_final", params)
|
||||
|
||||
with override_settings(OCR_CLEAN="clean-final"):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.unpaper_clean = CleanChoices.FINAL
|
||||
instance.save()
|
||||
|
||||
params = self.get_params()
|
||||
self.assertTrue(params["clean_final"])
|
||||
self.assertNotIn("clean", params)
|
||||
|
||||
def test_db_settings_ocr_deskew(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_DESKEW than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_DESKEW=False):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.deskew = True
|
||||
instance.save()
|
||||
|
||||
params = self.get_params()
|
||||
self.assertTrue(params["deskew"])
|
||||
|
||||
def test_db_settings_ocr_rotate(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_ROTATE_PAGES
|
||||
and OCR_ROTATE_PAGES_THRESHOLD than configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_ROTATE_PAGES=False, OCR_ROTATE_PAGES_THRESHOLD=30.0):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.rotate_pages = True
|
||||
instance.rotate_pages_threshold = 15.0
|
||||
instance.save()
|
||||
|
||||
params = self.get_params()
|
||||
self.assertTrue(params["rotate_pages"])
|
||||
self.assertAlmostEqual(params["rotate_pages_threshold"], 15.0)
|
||||
|
||||
def test_db_settings_ocr_max_pixels(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_MAX_IMAGE_PIXELS than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_MAX_IMAGE_PIXELS=2_000_000.0):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.max_image_pixels = 1_000_000.0
|
||||
instance.save()
|
||||
|
||||
params = self.get_params()
|
||||
self.assertAlmostEqual(params["max_image_mpixels"], 1.0)
|
||||
|
||||
def test_db_settings_ocr_color_convert(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_COLOR_CONVERSION_STRATEGY than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(OCR_COLOR_CONVERSION_STRATEGY="LeaveColorUnchanged"):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.color_conversion_strategy = ColorConvertChoices.INDEPENDENT
|
||||
instance.save()
|
||||
|
||||
params = self.get_params()
|
||||
self.assertEqual(
|
||||
params["color_conversion_strategy"],
|
||||
"UseDeviceIndependentColor",
|
||||
)
|
||||
|
||||
def test_ocr_user_args(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Django settings defines different value for OCR_USER_ARGS than
|
||||
configuration object
|
||||
WHEN:
|
||||
- OCR parameters are constructed
|
||||
THEN:
|
||||
- Configuration from database is utilized
|
||||
"""
|
||||
with override_settings(
|
||||
OCR_USER_ARGS=json.dumps({"continue_on_soft_render_error": True}),
|
||||
):
|
||||
instance = ApplicationConfiguration.objects.all().first()
|
||||
instance.user_args = {"unpaper_args": "--pre-rotate 90"}
|
||||
instance.save()
|
||||
|
||||
params = self.get_params()
|
||||
|
||||
self.assertIn("unpaper_args", params)
|
||||
self.assertEqual(
|
||||
params["unpaper_args"],
|
||||
"--pre-rotate 90",
|
||||
)
|
Reference in New Issue
Block a user