Merge branch 'dev' into feature-unified-search

2025-11-11 03:56:07 -06:00 · 2021-04-03 20:31:16 +02:00
parent b28c3b3e1f 4cfb987c2f
commit c55675e49c
59 changed files with 1160 additions and 957 deletions
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -64,9 +64,9 @@ class Consumer(LoggingMixin):
                                                     {'type': 'status_update',
                                                      'data': payload})

-    def _fail(self, message, log_message=None):
+    def _fail(self, message, log_message=None, exc_info=None):
        self._send_progress(100, 100, 'FAILED', message)
-        self.log("error", log_message or message)
+        self.log("error", log_message or message, exc_info=exc_info)
        raise ConsumerError(f"{self.filename}: {log_message or message}")

    def __init__(self):
@@ -120,7 +120,8 @@ class Consumer(LoggingMixin):
        except Exception as e:
            self._fail(
                MESSAGE_PRE_CONSUME_SCRIPT_ERROR,
-                f"Error while executing pre-consume script: {e}"
+                f"Error while executing pre-consume script: {e}",
+                exc_info=True
            )

    def run_post_consume_script(self, document):
@@ -150,7 +151,8 @@ class Consumer(LoggingMixin):
        except Exception as e:
            self._fail(
                MESSAGE_POST_CONSUME_SCRIPT_ERROR,
-                f"Error while executing post-consume script: {e}"
+                f"Error while executing post-consume script: {e}",
+                exc_info=True
            )

    def try_consume_file(self,
@@ -255,7 +257,8 @@ class Consumer(LoggingMixin):
            document_parser.cleanup()
            self._fail(
                str(e),
-                f"Error while consuming document {self.filename}: {e}"
+                f"Error while consuming document {self.filename}: {e}",
+                exc_info=True
            )

        # Prepare the document classifier.
@@ -326,7 +329,8 @@ class Consumer(LoggingMixin):
            self._fail(
                str(e),
                f"The following error occured while consuming "
-                f"{self.filename}: {e}"
+                f"{self.filename}: {e}",
+                exc_info=True
            )
        finally:
            document_parser.cleanup()
--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -6,15 +6,18 @@ import time

 import tqdm
 from django.conf import settings
+from django.contrib.auth.models import User
 from django.core import serializers
 from django.core.management.base import BaseCommand, CommandError
 from django.db import transaction
 from filelock import FileLock

-from documents.models import Document, Correspondent, Tag, DocumentType
+from documents.models import Document, Correspondent, Tag, DocumentType, \
+    SavedView, SavedViewFilterRule
 from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
    EXPORTER_ARCHIVE_NAME
 from paperless.db import GnuPG
+from paperless_mail.models import MailAccount, MailRule
 from ...file_handling import generate_filename, delete_empty_directories


@@ -105,6 +108,21 @@ class Command(BaseCommand):
                serializers.serialize("json", documents))
            manifest += document_manifest

+            manifest += json.loads(serializers.serialize(
+                "json", MailAccount.objects.all()))
+
+            manifest += json.loads(serializers.serialize(
+                "json", MailRule.objects.all()))
+
+            manifest += json.loads(serializers.serialize(
+                "json", SavedView.objects.all()))
+
+            manifest += json.loads(serializers.serialize(
+                "json", SavedViewFilterRule.objects.all()))
+
+            manifest += json.loads(serializers.serialize(
+                "json", User.objects.all()))
+
        # 3. Export files from each document
        for index, document_dict in tqdm.tqdm(enumerate(document_manifest),
                                              total=len(document_manifest)):
--- a/src/documents/matching.py
+++ b/src/documents/matching.py
@@ -90,7 +90,7 @@ def matches(matching_model, document):

    elif matching_model.matching_algorithm == MatchingModel.MATCH_LITERAL:
        result = bool(re.search(
-            rf"\b{matching_model.match}\b",
+            rf"\b{re.escape(matching_model.match)}\b",
            document_content,
            **search_kwargs
        ))
@@ -161,6 +161,9 @@ def _split_match(matching_model):
    findterms = re.compile(r'"([^"]+)"|(\S+)').findall
    normspace = re.compile(r"\s+").sub
    return [
-        normspace(" ", (t[0] or t[1]).strip()).replace(" ", r"\s+")
+        # normspace(" ", (t[0] or t[1]).strip()).replace(" ", r"\s+")
+        re.escape(
+            normspace(" ", (t[0] or t[1]).strip())
+        ).replace(r"\ ", r"\s+")
        for t in findterms(matching_model.match)
    ]
--- a/src/documents/tests/test_management_exporter.py
+++ b/src/documents/tests/test_management_exporter.py
@@ -69,7 +69,7 @@ class TestExportImport(DirectoriesMixin, TestCase):

        manifest = self._do_export(use_filename_format=use_filename_format)

-        self.assertEqual(len(manifest), 7)
+        self.assertEqual(len(manifest), 8)
        self.assertEqual(len(list(filter(lambda e: e['model'] == 'documents.document', manifest))), 4)

        self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))