Merge branch 'dev' into feature-unified-search

This commit is contained in:
jonaswinkler
2021-04-03 20:31:16 +02:00
59 changed files with 1160 additions and 957 deletions

View File

@@ -64,9 +64,9 @@ class Consumer(LoggingMixin):
{'type': 'status_update',
'data': payload})
def _fail(self, message, log_message=None):
def _fail(self, message, log_message=None, exc_info=None):
self._send_progress(100, 100, 'FAILED', message)
self.log("error", log_message or message)
self.log("error", log_message or message, exc_info=exc_info)
raise ConsumerError(f"{self.filename}: {log_message or message}")
def __init__(self):
@@ -120,7 +120,8 @@ class Consumer(LoggingMixin):
except Exception as e:
self._fail(
MESSAGE_PRE_CONSUME_SCRIPT_ERROR,
f"Error while executing pre-consume script: {e}"
f"Error while executing pre-consume script: {e}",
exc_info=True
)
def run_post_consume_script(self, document):
@@ -150,7 +151,8 @@ class Consumer(LoggingMixin):
except Exception as e:
self._fail(
MESSAGE_POST_CONSUME_SCRIPT_ERROR,
f"Error while executing post-consume script: {e}"
f"Error while executing post-consume script: {e}",
exc_info=True
)
def try_consume_file(self,
@@ -255,7 +257,8 @@ class Consumer(LoggingMixin):
document_parser.cleanup()
self._fail(
str(e),
f"Error while consuming document {self.filename}: {e}"
f"Error while consuming document {self.filename}: {e}",
exc_info=True
)
# Prepare the document classifier.
@@ -326,7 +329,8 @@ class Consumer(LoggingMixin):
self._fail(
str(e),
f"The following error occured while consuming "
f"{self.filename}: {e}"
f"{self.filename}: {e}",
exc_info=True
)
finally:
document_parser.cleanup()

View File

@@ -6,15 +6,18 @@ import time
import tqdm
from django.conf import settings
from django.contrib.auth.models import User
from django.core import serializers
from django.core.management.base import BaseCommand, CommandError
from django.db import transaction
from filelock import FileLock
from documents.models import Document, Correspondent, Tag, DocumentType
from documents.models import Document, Correspondent, Tag, DocumentType, \
SavedView, SavedViewFilterRule
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
EXPORTER_ARCHIVE_NAME
from paperless.db import GnuPG
from paperless_mail.models import MailAccount, MailRule
from ...file_handling import generate_filename, delete_empty_directories
@@ -105,6 +108,21 @@ class Command(BaseCommand):
serializers.serialize("json", documents))
manifest += document_manifest
manifest += json.loads(serializers.serialize(
"json", MailAccount.objects.all()))
manifest += json.loads(serializers.serialize(
"json", MailRule.objects.all()))
manifest += json.loads(serializers.serialize(
"json", SavedView.objects.all()))
manifest += json.loads(serializers.serialize(
"json", SavedViewFilterRule.objects.all()))
manifest += json.loads(serializers.serialize(
"json", User.objects.all()))
# 3. Export files from each document
for index, document_dict in tqdm.tqdm(enumerate(document_manifest),
total=len(document_manifest)):

View File

@@ -90,7 +90,7 @@ def matches(matching_model, document):
elif matching_model.matching_algorithm == MatchingModel.MATCH_LITERAL:
result = bool(re.search(
rf"\b{matching_model.match}\b",
rf"\b{re.escape(matching_model.match)}\b",
document_content,
**search_kwargs
))
@@ -161,6 +161,9 @@ def _split_match(matching_model):
findterms = re.compile(r'"([^"]+)"|(\S+)').findall
normspace = re.compile(r"\s+").sub
return [
normspace(" ", (t[0] or t[1]).strip()).replace(" ", r"\s+")
# normspace(" ", (t[0] or t[1]).strip()).replace(" ", r"\s+")
re.escape(
normspace(" ", (t[0] or t[1]).strip())
).replace(r"\ ", r"\s+")
for t in findterms(matching_model.match)
]

View File

@@ -69,7 +69,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
manifest = self._do_export(use_filename_format=use_filename_format)
self.assertEqual(len(manifest), 7)
self.assertEqual(len(manifest), 8)
self.assertEqual(len(list(filter(lambda e: e['model'] == 'documents.document', manifest))), 4)
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))