mail handling: When exceptions occur during account/rule/message handling, paperless will continue with the next account/rule/message.

mail handling: When paperless encounters a very long

fixes #82
This commit is contained in:
jonaswinkler 2020-12-04 15:42:05 +01:00
parent 991a46c4f0
commit 371745b6dc
6 changed files with 197 additions and 140 deletions

View File

@ -28,10 +28,10 @@ class LoggingMixin:
def renew_logging_group(self):
self.logging_group = uuid.uuid4()
def log(self, level, message):
def log(self, level, message, **kwargs):
target = ".".join([self.__class__.__module__, self.__class__.__name__])
logger = logging.getLogger(target)
getattr(logger, level)(message, extra={
"group": self.logging_group
})
}, **kwargs)

View File

@ -274,7 +274,7 @@ LOGGING = {
"class": "documents.loggers.PaperlessHandler",
},
"console": {
"level": "WARNING",
"level": "INFO",
"class": "logging.StreamHandler",
"formatter": "verbose",
}

View File

@ -4,6 +4,7 @@ from datetime import timedelta, date
import magic
from django.conf import settings
from django.db import DatabaseError
from django.utils.text import slugify
from django_q.tasks import async_task
from imap_tools import MailBox, MailBoxUnencrypted, AND, MailMessageFlags, \
@ -86,46 +87,6 @@ def make_criterias(rule):
return {**criterias, **get_rule_action(rule).get_criteria()}
def get_title(message, att, rule):
if rule.assign_title_from == MailRule.TITLE_FROM_SUBJECT:
title = message.subject
elif rule.assign_title_from == MailRule.TITLE_FROM_FILENAME:
title = os.path.splitext(os.path.basename(att.filename))[0]
else:
raise ValueError("Unknown title selector.")
return title
def get_correspondent(message, rule):
if rule.assign_correspondent_from == MailRule.CORRESPONDENT_FROM_NOTHING:
correspondent = None
elif rule.assign_correspondent_from == MailRule.CORRESPONDENT_FROM_EMAIL:
correspondent_name = message.from_
correspondent = Correspondent.objects.get_or_create(
name=correspondent_name, defaults={
"slug": slugify(correspondent_name)
})[0]
elif rule.assign_correspondent_from == MailRule.CORRESPONDENT_FROM_NAME:
if message.from_values and \
'name' in message.from_values \
and message.from_values['name']:
correspondent_name = message.from_values['name']
else:
correspondent_name = message.from_
correspondent = Correspondent.objects.get_or_create(
name=correspondent_name, defaults={
"slug": slugify(correspondent_name)
})[0]
elif rule.assign_correspondent_from == MailRule.CORRESPONDENT_FROM_CUSTOM:
correspondent = rule.assign_correspondent
else:
raise ValueError("Unknwown correspondent selector")
return correspondent
def get_mailbox(server, port, security):
if security == MailAccount.IMAP_SECURITY_NONE:
mailbox = MailBoxUnencrypted(server, port)
@ -140,6 +101,51 @@ def get_mailbox(server, port, security):
class MailAccountHandler(LoggingMixin):
def _correspondent_from_name(self, name):
try:
return Correspondent.objects.get_or_create(
name=name, defaults={
"slug": slugify(name)
})[0]
except DatabaseError as e:
self.log(
"error",
f"Error while retrieving correspondent {name}: {e}"
)
return None
def get_title(self, message, att, rule):
if rule.assign_title_from == MailRule.TITLE_FROM_SUBJECT:
return message.subject
elif rule.assign_title_from == MailRule.TITLE_FROM_FILENAME:
return os.path.splitext(os.path.basename(att.filename))[0]
else:
raise ValueError("Unknown title selector.")
def get_correspondent(self, message, rule):
c_from = rule.assign_correspondent_from
if c_from == MailRule.CORRESPONDENT_FROM_NOTHING:
return None
elif c_from == MailRule.CORRESPONDENT_FROM_EMAIL:
return self._correspondent_from_name(message.from_)
elif c_from == MailRule.CORRESPONDENT_FROM_NAME:
if message.from_values and 'name' in message.from_values and message.from_values['name']: # NOQA: E501
return self._correspondent_from_name(
message.from_values['name'])
else:
return self._correspondent_from_name(message.from_)
elif c_from == MailRule.CORRESPONDENT_FROM_CUSTOM:
return rule.assign_correspondent
else:
raise ValueError("Unknwown correspondent selector")
def handle_mail_account(self, account):
self.renew_logging_group()
@ -156,79 +162,89 @@ class MailAccountHandler(LoggingMixin):
M.login(account.username, account.password)
except Exception:
raise MailError(
f"Error while authenticating account {account.name}")
f"Error while authenticating account {account}")
self.log('debug', f"Account {account}: Processing "
f"{account.rules.count()} rule(s)")
for rule in account.rules.order_by('order'):
self.log(
'debug',
f"Account {account}: Processing rule {rule.name}")
self.log(
'debug',
f"Rule {account}.{rule}: Selecting folder {rule.folder}")
try:
M.folder.set(rule.folder)
except MailboxFolderSelectError:
raise MailError(
f"Rule {rule.name}: Folder {rule.folder} "
f"does not exist in account {account.name}")
total_processed_files += self.handle_mail_rule(M, rule)
except Exception as e:
self.log(
"error",
f"Rule {rule}: Error while processing rule: {e}",
exc_info=True
)
criterias = make_criterias(rule)
return total_processed_files
def handle_mail_rule(self, M, rule):
self.log(
'debug',
f"Rule {rule}: Selecting folder {rule.folder}")
try:
M.folder.set(rule.folder)
except MailboxFolderSelectError:
raise MailError(
f"Rule {rule}: Folder {rule.folder} "
f"does not exist in account {rule.account}")
criterias = make_criterias(rule)
self.log(
'debug',
f"Rule {rule}: Searching folder with criteria "
f"{str(AND(**criterias))}")
try:
messages = M.fetch(criteria=AND(**criterias),
mark_seen=False)
except Exception:
raise MailError(
f"Rule {rule}: Error while fetching folder {rule.folder}")
post_consume_messages = []
mails_processed = 0
total_processed_files = 0
for message in messages:
try:
processed_files = self.handle_message(message, rule)
if processed_files > 0:
post_consume_messages.append(message.uid)
total_processed_files += processed_files
mails_processed += 1
except Exception as e:
self.log(
'debug',
f"Rule {account}.{rule}: Searching folder with criteria "
f"{str(AND(**criterias))}")
"error",
f"Rule {rule}: Error while processing mail "
f"{message.uid}: {e}",
exc_info=True)
try:
messages = M.fetch(criteria=AND(**criterias),
mark_seen=False)
except Exception:
raise MailError(
f"Rule {rule.name}: Error while fetching folder "
f"{rule.folder} of account {account.name}")
self.log(
'debug',
f"Rule {rule}: Processed {mails_processed} matching mail(s)")
post_consume_messages = []
self.log(
'debug',
f"Rule {rule}: Running mail actions on "
f"{len(post_consume_messages)} mails")
mails_processed = 0
try:
get_rule_action(rule).post_consume(
M,
post_consume_messages,
rule.action_parameter)
for message in messages:
try:
processed_files = self.handle_message(message, rule)
except Exception:
raise MailError(
f"Rule {rule.name}: Error while processing mail "
f"{message.uid} of account {account.name}")
if processed_files > 0:
post_consume_messages.append(message.uid)
total_processed_files += processed_files
mails_processed += 1
self.log(
'debug',
f"Rule {account}.{rule}: Processed {mails_processed} "
f"matching mail(s)")
self.log(
'debug',
f"Rule {account}.{rule}: Running mail actions on "
f"{len(post_consume_messages)} mails")
try:
get_rule_action(rule).post_consume(
M,
post_consume_messages,
rule.action_parameter)
except Exception:
raise MailError(
f"Rule {rule.name}: Error while processing "
f"post-consume actions for account {account.name}")
except Exception as e:
raise MailError(
f"Rule {rule}: Error while processing post-consume actions: "
f"{e}")
return total_processed_files
@ -238,11 +254,11 @@ class MailAccountHandler(LoggingMixin):
self.log(
'debug',
f"Rule {rule.account}.{rule}: "
f"Rule {rule}: "
f"Processing mail {message.subject} from {message.from_} with "
f"{len(message.attachments)} attachment(s)")
correspondent = get_correspondent(message, rule)
correspondent = self.get_correspondent(message, rule)
tag = rule.assign_tag
doc_type = rule.assign_document_type
@ -253,12 +269,12 @@ class MailAccountHandler(LoggingMixin):
if not att.content_disposition == "attachment":
self.log(
'debug',
f"Rule {rule.account}.{rule}: "
f"Rule {rule}: "
f"Skipping attachment {att.filename} "
f"with content disposition inline")
f"with content disposition {att.content_disposition}")
continue
title = get_title(message, att, rule)
title = self.get_title(message, att, rule)
# don't trust the content type of the attachment. Could be
# generic application/octet-stream.
@ -274,7 +290,7 @@ class MailAccountHandler(LoggingMixin):
self.log(
'info',
f"Rule {rule.account}.{rule}: "
f"Rule {rule}: "
f"Consuming attachment {att.filename} from mail "
f"{message.subject} from {message.from_}")
@ -293,7 +309,7 @@ class MailAccountHandler(LoggingMixin):
else:
self.log(
'debug',
f"Rule {rule.account}.{rule}: "
f"Rule {rule}: "
f"Skipping attachment {att.filename} "
f"since guessed mime type {mime_type} is not supported "
f"by paperless")

View File

@ -139,4 +139,4 @@ class MailRule(models.Model):
)
def __str__(self):
return self.name
return f"{self.account.name}.{self.name}"

View File

@ -1,14 +1,20 @@
import logging
from paperless_mail.mail import MailAccountHandler
from paperless_mail.mail import MailAccountHandler, MailError
from paperless_mail.models import MailAccount
def process_mail_accounts():
total_new_documents = 0
for account in MailAccount.objects.all():
total_new_documents += MailAccountHandler().handle_mail_account(
account)
try:
total_new_documents += MailAccountHandler().handle_mail_account(
account)
except MailError as e:
logging.getLogger(__name__).error(
f"Error while processing mail account {account}: {e}",
exc_info=True
)
if total_new_documents > 0:
return f"Added {total_new_documents} document(s)."
@ -21,4 +27,4 @@ def process_mail_account(name):
account = MailAccount.objects.get(name=name)
MailAccountHandler().handle_mail_account(account)
except MailAccount.DoesNotExist:
logging.error("Unknown mail acccount: {}".format(name))
logging.getLogger(__name__).error(f"Unknown mail acccount: {name}")

View File

@ -4,12 +4,13 @@ from typing import ContextManager
from unittest import mock
from django.core.management import call_command
from django.db import DatabaseError
from django.test import TestCase
from imap_tools import MailMessageFlags, MailboxFolderSelectError
from documents.models import Correspondent
from paperless_mail import tasks
from paperless_mail.mail import MailError, MailAccountHandler, get_correspondent, get_title
from paperless_mail.mail import MailError, MailAccountHandler
from paperless_mail.models import MailRule, MailAccount
@ -165,28 +166,30 @@ class TestMail(TestCase):
me_localhost = Correspondent.objects.create(name=message2.from_)
someone_else = Correspondent.objects.create(name="someone else")
handler = MailAccountHandler()
rule = MailRule(name="a", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING)
self.assertIsNone(get_correspondent(message, rule))
self.assertIsNone(handler.get_correspondent(message, rule))
rule = MailRule(name="b", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL)
c = get_correspondent(message, rule)
c = handler.get_correspondent(message, rule)
self.assertIsNotNone(c)
self.assertEqual(c.name, "someone@somewhere.com")
c = get_correspondent(message2, rule)
c = handler.get_correspondent(message2, rule)
self.assertIsNotNone(c)
self.assertEqual(c.name, "me@localhost.com")
self.assertEqual(c.id, me_localhost.id)
rule = MailRule(name="c", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME)
c = get_correspondent(message, rule)
c = handler.get_correspondent(message, rule)
self.assertIsNotNone(c)
self.assertEqual(c.name, "Someone!")
c = get_correspondent(message2, rule)
c = handler.get_correspondent(message2, rule)
self.assertIsNotNone(c)
self.assertEqual(c.id, me_localhost.id)
rule = MailRule(name="d", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_CUSTOM, assign_correspondent=someone_else)
c = get_correspondent(message, rule)
c = handler.get_correspondent(message, rule)
self.assertEqual(c, someone_else)
def test_get_title(self):
@ -194,10 +197,13 @@ class TestMail(TestCase):
message.subject = "the message title"
att = namedtuple('Attachment', [])
att.filename = "this_is_the_file.pdf"
handler = MailAccountHandler()
rule = MailRule(name="a", assign_title_from=MailRule.TITLE_FROM_FILENAME)
self.assertEqual(get_title(message, att, rule), "this_is_the_file")
self.assertEqual(handler.get_title(message, att, rule), "this_is_the_file")
rule = MailRule(name="b", assign_title_from=MailRule.TITLE_FROM_SUBJECT)
self.assertEqual(get_title(message, att, rule), "the message title")
self.assertEqual(handler.get_title(message, att, rule), "the message title")
def test_handle_message(self):
message = create_message(subject="the message title", from_="Myself", num_attachments=2)
@ -319,7 +325,7 @@ class TestMail(TestCase):
self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(len(self.bogus_mailbox.messages_spam), 1)
def test_errors(self):
def test_error_login(self):
account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="wrong")
try:
@ -329,26 +335,55 @@ class TestMail(TestCase):
else:
self.fail("Should raise exception")
def test_error_skip_account(self):
account_faulty = MailAccount.objects.create(name="test", imap_server="", username="admin", password="wroasdng")
account = MailAccount.objects.create(name="test2", imap_server="", username="admin", password="secret")
rule = MailRule.objects.create(name="testrule", account=account, folder="uuuh")
rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MOVE,
action_parameter="spam", filter_subject="Claim")
try:
self.mail_account_handler.handle_mail_account(account)
except MailError as e:
self.assertTrue("uuuh does not exist" in str(e))
else:
self.fail("Should raise exception")
tasks.process_mail_accounts()
self.assertEqual(self.async_task.call_count, 1)
self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(len(self.bogus_mailbox.messages_spam), 1)
account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret")
def test_error_skip_rule(self):
rule = MailRule.objects.create(name="testrule2", account=account, action=MailRule.ACTION_MOVE, action_parameter="doesnotexist", filter_subject="Claim")
account = MailAccount.objects.create(name="test2", imap_server="", username="admin", password="secret")
rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MOVE,
action_parameter="spam", filter_subject="Claim", order=1, folder="uuuhhhh")
rule2 = MailRule.objects.create(name="testrule2", account=account, action=MailRule.ACTION_MOVE,
action_parameter="spam", filter_subject="Claim", order=2)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 1)
self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(len(self.bogus_mailbox.messages_spam), 1)
@mock.patch("paperless_mail.mail.MailAccountHandler.get_correspondent")
def test_error_skip_mail(self, m):
def get_correspondent_fake(message, rule):
if message.from_ == 'amazon@amazon.de':
raise ValueError("Does not compute.")
else:
return None
m.side_effect = get_correspondent_fake
account = MailAccount.objects.create(name="test2", imap_server="", username="admin", password="secret")
rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MOVE, action_parameter="spam")
self.mail_account_handler.handle_mail_account(account)
# test that we still consume mail even if some mails throw errors.
self.assertEqual(self.async_task.call_count, 2)
# faulty mail still in inbox, untouched
self.assertEqual(len(self.bogus_mailbox.messages), 1)
self.assertEqual(self.bogus_mailbox.messages[0].from_, 'amazon@amazon.de')
try:
self.mail_account_handler.handle_mail_account(account)
except MailError as e:
self.assertTrue("Error while processing post-consume actions" in str(e))
else:
self.fail("Should raise exception")
def test_filters(self):