Feature: Allow setting backend configuration settings via the UI (#5126)

* Saving some start on this * At least partially working for the tesseract parser * Problems with migration testing need to figure out * Work around that error * Fixes max m_pixels * Moving the settings to main paperless application * Starting some consumer options * More fixes and work * Fixes these last tests * Fix max_length on OcrSettings.mode field * Fix all fields on Common & Ocr settings serializers * Umbrellla config view * Revert "Umbrellla config view" This reverts commit fbaf9f4be30f89afeb509099180158a3406416a5. * Updates to use a single configuration object for all settings * Squashed commit of the following: commit 8a0a49dd57 Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 23:02:47 2023 -0800 Fix formatting commit 66b2d90c50 Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 22:36:35 2023 -0800 Refactor frontend data models commit 5723bd8dd8 Author: Adam Bogdał <adam@bogdal.pl> Date: Wed Dec 20 01:17:43 2023 +0100 Fix: speed up admin panel for installs with a large number of documents (#5052) commit 9b08ce1761 Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 15:18:51 2023 -0800 Update PULL_REQUEST_TEMPLATE.md commit a6248bec2d Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 15:02:05 2023 -0800 Chore: Update Angular to v17 (#4980) commit b1f6f52486 Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 13:53:56 2023 -0800 Fix: Dont allow null custom_fields property via API (#5063) commit 638d9970fd Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 13:43:50 2023 -0800 Enhancement: symmetric document links (#4907) commit 5e8de4c1da Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 12:45:04 2023 -0800 Enhancement: shared icon & shared by me filter (#4859) commit 088bad9030 Author: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Tue Dec 19 12:04:03 2023 -0800 Bulk updates all the backend libraries (#5061) * Saving some work on frontend config * Very basic but dynamically-generated config form * Saving work on slightly less ugly frontend config * JSON validation for user_args field * Fully dynamic config form * Adds in some additional validators for a nicer error message * Cleaning up the testing and coverage more * Reverts unintentional change * Adds documentation about the settings and the precedence * Couple more commenting and style fixes --------- Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
2025-09-06 21:13:43 -05:00 · 2023-12-29 15:42:56 -08:00
parent 718eaf04d4
commit c8a62715ec
41 changed files with 1570 additions and 119 deletions
--- a/src/documents/classifier.py
+++ b/src/documents/classifier.py
@@ -52,7 +52,7 @@ def load_classifier() -> Optional["DocumentClassifier"]:
    except OSError:
        logger.exception("IO error while loading document classification model")
        classifier = None
-    except Exception:  # pragma: nocover
+    except Exception:  # pragma: no cover
        logger.exception("Unknown error while loading document classification model")
        classifier = None

@@ -318,7 +318,7 @@ class DocumentClassifier:

        return True

-    def preprocess_content(self, content: str) -> str:  # pragma: nocover
+    def preprocess_content(self, content: str) -> str:  # pragma: no cover
        """
        Process to contents of a document, distilling it down into
        words which are meaningful to the content
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -420,7 +420,7 @@ class Consumer(LoggingMixin):

        document_parser: DocumentParser = parser_class(
            self.logging_group,
-            progress_callback,
+            progress_callback=progress_callback,
        )

        self.log.debug(f"Parser: {type(document_parser).__name__}")
--- a/src/documents/management/commands/document_consumer.py
+++ b/src/documents/management/commands/document_consumer.py
@@ -26,7 +26,7 @@ from documents.tasks import consume_file
 try:
    from inotifyrecursive import INotify
    from inotifyrecursive import flags
-except ImportError:  # pragma: nocover
+except ImportError:  # pragma: no cover
    INotify = flags = None

 logger = logging.getLogger("paperless.management.consumer")
--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -41,6 +41,7 @@ from documents.settings import EXPORTER_THUMBNAIL_NAME
 from documents.utils import copy_file_with_basic_stats
 from paperless import version
 from paperless.db import GnuPG
+from paperless.models import ApplicationConfiguration
 from paperless_mail.models import MailAccount
 from paperless_mail.models import MailRule

@@ -291,6 +292,10 @@ class Command(BaseCommand):
                serializers.serialize("json", CustomField.objects.all()),
            )

+            manifest += json.loads(
+                serializers.serialize("json", ApplicationConfiguration.objects.all()),
+            )
+
            # These are treated specially and included in the per-document manifest
            # if that setting is enabled.  Otherwise, they are just exported to the bulk
            # manifest
--- a/src/documents/management/commands/loaddata_stdin.py
+++ b/src/documents/management/commands/loaddata_stdin.py
@@ -5,7 +5,7 @@ from django.core.management.commands.loaddata import Command as LoadDataCommand

 # This class is used to migrate data between databases
 # That's difficult to test
-class Command(LoadDataCommand):  # pragma: nocover
+class Command(LoadDataCommand):  # pragma: no cover
    """
    Allow the loading of data from standard in.  Sourced originally from:
    https://gist.github.com/bmispelon/ad5a2c333443b3a1d051 (MIT licensed)
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -125,8 +125,10 @@ def get_parser_class_for_mime_type(mime_type: str) -> Optional[type["DocumentPar
    if not options:
        return None

+    best_parser = sorted(options, key=lambda _: _["weight"], reverse=True)[0]
+
    # Return the parser with the highest weight.
-    return sorted(options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
+    return best_parser["parser"]


 def run_convert(
@@ -318,6 +320,7 @@ class DocumentParser(LoggingMixin):
    def __init__(self, logging_group, progress_callback=None):
        super().__init__()
        self.logging_group = logging_group
+        self.settings = self.get_settings()
        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
        self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)

@@ -330,6 +333,12 @@ class DocumentParser(LoggingMixin):
        if self.progress_callback:
            self.progress_callback(current_progress, max_progress)

+    def get_settings(self):  # pragma: no cover
+        """
+        A parser must implement this
+        """
+        raise NotImplementedError
+
    def read_file_handle_unicode_errors(self, filepath: Path) -> str:
        """
        Helper utility for reading from a file, and handling a problem with its
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -172,7 +172,15 @@ class TestFieldPermutations(TestCase):
            self.assertEqual(info.title, "anotherall")


-class DummyParser(DocumentParser):
+class _BaseTestParser(DocumentParser):
+    def get_settings(self):
+        """
+        This parser does not implement additional settings yet
+        """
+        return None
+
+
+class DummyParser(_BaseTestParser):
    def __init__(self, logging_group, scratch_dir, archive_path):
        super().__init__(logging_group, None)
        _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
@@ -185,7 +193,7 @@ class DummyParser(DocumentParser):
        self.text = "The Text"


-class CopyParser(DocumentParser):
+class CopyParser(_BaseTestParser):
    def get_thumbnail(self, document_path, mime_type, file_name=None):
        return self.fake_thumb

@@ -199,7 +207,7 @@ class CopyParser(DocumentParser):
        shutil.copy(document_path, self.archive_path)


-class FaultyParser(DocumentParser):
+class FaultyParser(_BaseTestParser):
    def __init__(self, logging_group, scratch_dir):
        super().__init__(logging_group)
        _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
@@ -211,7 +219,7 @@ class FaultyParser(DocumentParser):
        raise ParseError("Does not compute.")


-class FaultyGenericExceptionParser(DocumentParser):
+class FaultyGenericExceptionParser(_BaseTestParser):
    def __init__(self, logging_group, scratch_dir):
        super().__init__(logging_group)
        _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
--- a/src/documents/tests/test_management_exporter.py
+++ b/src/documents/tests/test_management_exporter.py
@@ -168,7 +168,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):

        manifest = self._do_export(use_filename_format=use_filename_format)

-        self.assertEqual(len(manifest), 172)
+        self.assertEqual(len(manifest), 178)

        # dont include consumer or AnonymousUser users
        self.assertEqual(
@@ -262,7 +262,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
            self.assertEqual(Document.objects.get(id=self.d4.id).title, "wow_dec")
            self.assertEqual(GroupObjectPermission.objects.count(), 1)
            self.assertEqual(UserObjectPermission.objects.count(), 1)
-            self.assertEqual(Permission.objects.count(), 124)
+            self.assertEqual(Permission.objects.count(), 128)
            messages = check_sanity()
            # everything is alright after the test
            self.assertEqual(len(messages), 0)
@@ -694,15 +694,15 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
            os.path.join(self.dirs.media_dir, "documents"),
        )

-        self.assertEqual(ContentType.objects.count(), 31)
-        self.assertEqual(Permission.objects.count(), 124)
+        self.assertEqual(ContentType.objects.count(), 32)
+        self.assertEqual(Permission.objects.count(), 128)

        manifest = self._do_export()

        with paperless_environment():
            self.assertEqual(
                len(list(filter(lambda e: e["model"] == "auth.permission", manifest))),
-                124,
+                128,
            )
            # add 1 more to db to show objects are not re-created by import
            Permission.objects.create(
@@ -710,7 +710,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
                codename="test_perm",
                content_type_id=1,
            )
-            self.assertEqual(Permission.objects.count(), 125)
+            self.assertEqual(Permission.objects.count(), 129)

            # will cause an import error
            self.user.delete()
@@ -719,5 +719,5 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
            with self.assertRaises(IntegrityError):
                call_command("document_importer", "--no-progress-bar", self.target)

-            self.assertEqual(ContentType.objects.count(), 31)
-            self.assertEqual(Permission.objects.count(), 125)
+            self.assertEqual(ContentType.objects.count(), 32)
+            self.assertEqual(Permission.objects.count(), 129)