mail handling: When exceptions occur during account/rule/message handling, paperless will continue with the next account/rule/message.

mail handling: When paperless encounters a very long

fixes #82
This commit is contained in:
jonaswinkler 2020-12-04 15:42:05 +01:00
parent 991a46c4f0
commit 371745b6dc
6 changed files with 197 additions and 140 deletions

View File

@ -28,10 +28,10 @@ class LoggingMixin:
def renew_logging_group(self): def renew_logging_group(self):
self.logging_group = uuid.uuid4() self.logging_group = uuid.uuid4()
def log(self, level, message): def log(self, level, message, **kwargs):
target = ".".join([self.__class__.__module__, self.__class__.__name__]) target = ".".join([self.__class__.__module__, self.__class__.__name__])
logger = logging.getLogger(target) logger = logging.getLogger(target)
getattr(logger, level)(message, extra={ getattr(logger, level)(message, extra={
"group": self.logging_group "group": self.logging_group
}) }, **kwargs)

View File

@ -274,7 +274,7 @@ LOGGING = {
"class": "documents.loggers.PaperlessHandler", "class": "documents.loggers.PaperlessHandler",
}, },
"console": { "console": {
"level": "WARNING", "level": "INFO",
"class": "logging.StreamHandler", "class": "logging.StreamHandler",
"formatter": "verbose", "formatter": "verbose",
} }

View File

@ -4,6 +4,7 @@ from datetime import timedelta, date
import magic import magic
from django.conf import settings from django.conf import settings
from django.db import DatabaseError
from django.utils.text import slugify from django.utils.text import slugify
from django_q.tasks import async_task from django_q.tasks import async_task
from imap_tools import MailBox, MailBoxUnencrypted, AND, MailMessageFlags, \ from imap_tools import MailBox, MailBoxUnencrypted, AND, MailMessageFlags, \
@ -86,46 +87,6 @@ def make_criterias(rule):
return {**criterias, **get_rule_action(rule).get_criteria()} return {**criterias, **get_rule_action(rule).get_criteria()}
def get_title(message, att, rule):
if rule.assign_title_from == MailRule.TITLE_FROM_SUBJECT:
title = message.subject
elif rule.assign_title_from == MailRule.TITLE_FROM_FILENAME:
title = os.path.splitext(os.path.basename(att.filename))[0]
else:
raise ValueError("Unknown title selector.")
return title
def get_correspondent(message, rule):
if rule.assign_correspondent_from == MailRule.CORRESPONDENT_FROM_NOTHING:
correspondent = None
elif rule.assign_correspondent_from == MailRule.CORRESPONDENT_FROM_EMAIL:
correspondent_name = message.from_
correspondent = Correspondent.objects.get_or_create(
name=correspondent_name, defaults={
"slug": slugify(correspondent_name)
})[0]
elif rule.assign_correspondent_from == MailRule.CORRESPONDENT_FROM_NAME:
if message.from_values and \
'name' in message.from_values \
and message.from_values['name']:
correspondent_name = message.from_values['name']
else:
correspondent_name = message.from_
correspondent = Correspondent.objects.get_or_create(
name=correspondent_name, defaults={
"slug": slugify(correspondent_name)
})[0]
elif rule.assign_correspondent_from == MailRule.CORRESPONDENT_FROM_CUSTOM:
correspondent = rule.assign_correspondent
else:
raise ValueError("Unknwown correspondent selector")
return correspondent
def get_mailbox(server, port, security): def get_mailbox(server, port, security):
if security == MailAccount.IMAP_SECURITY_NONE: if security == MailAccount.IMAP_SECURITY_NONE:
mailbox = MailBoxUnencrypted(server, port) mailbox = MailBoxUnencrypted(server, port)
@ -140,6 +101,51 @@ def get_mailbox(server, port, security):
class MailAccountHandler(LoggingMixin): class MailAccountHandler(LoggingMixin):
def _correspondent_from_name(self, name):
try:
return Correspondent.objects.get_or_create(
name=name, defaults={
"slug": slugify(name)
})[0]
except DatabaseError as e:
self.log(
"error",
f"Error while retrieving correspondent {name}: {e}"
)
return None
def get_title(self, message, att, rule):
if rule.assign_title_from == MailRule.TITLE_FROM_SUBJECT:
return message.subject
elif rule.assign_title_from == MailRule.TITLE_FROM_FILENAME:
return os.path.splitext(os.path.basename(att.filename))[0]
else:
raise ValueError("Unknown title selector.")
def get_correspondent(self, message, rule):
c_from = rule.assign_correspondent_from
if c_from == MailRule.CORRESPONDENT_FROM_NOTHING:
return None
elif c_from == MailRule.CORRESPONDENT_FROM_EMAIL:
return self._correspondent_from_name(message.from_)
elif c_from == MailRule.CORRESPONDENT_FROM_NAME:
if message.from_values and 'name' in message.from_values and message.from_values['name']: # NOQA: E501
return self._correspondent_from_name(
message.from_values['name'])
else:
return self._correspondent_from_name(message.from_)
elif c_from == MailRule.CORRESPONDENT_FROM_CUSTOM:
return rule.assign_correspondent
else:
raise ValueError("Unknwown correspondent selector")
def handle_mail_account(self, account): def handle_mail_account(self, account):
self.renew_logging_group() self.renew_logging_group()
@ -156,79 +162,89 @@ class MailAccountHandler(LoggingMixin):
M.login(account.username, account.password) M.login(account.username, account.password)
except Exception: except Exception:
raise MailError( raise MailError(
f"Error while authenticating account {account.name}") f"Error while authenticating account {account}")
self.log('debug', f"Account {account}: Processing " self.log('debug', f"Account {account}: Processing "
f"{account.rules.count()} rule(s)") f"{account.rules.count()} rule(s)")
for rule in account.rules.order_by('order'): for rule in account.rules.order_by('order'):
self.log(
'debug',
f"Account {account}: Processing rule {rule.name}")
self.log(
'debug',
f"Rule {account}.{rule}: Selecting folder {rule.folder}")
try: try:
M.folder.set(rule.folder) total_processed_files += self.handle_mail_rule(M, rule)
except MailboxFolderSelectError: except Exception as e:
raise MailError( self.log(
f"Rule {rule.name}: Folder {rule.folder} " "error",
f"does not exist in account {account.name}") f"Rule {rule}: Error while processing rule: {e}",
exc_info=True
)
criterias = make_criterias(rule) return total_processed_files
def handle_mail_rule(self, M, rule):
self.log(
'debug',
f"Rule {rule}: Selecting folder {rule.folder}")
try:
M.folder.set(rule.folder)
except MailboxFolderSelectError:
raise MailError(
f"Rule {rule}: Folder {rule.folder} "
f"does not exist in account {rule.account}")
criterias = make_criterias(rule)
self.log(
'debug',
f"Rule {rule}: Searching folder with criteria "
f"{str(AND(**criterias))}")
try:
messages = M.fetch(criteria=AND(**criterias),
mark_seen=False)
except Exception:
raise MailError(
f"Rule {rule}: Error while fetching folder {rule.folder}")
post_consume_messages = []
mails_processed = 0
total_processed_files = 0
for message in messages:
try:
processed_files = self.handle_message(message, rule)
if processed_files > 0:
post_consume_messages.append(message.uid)
total_processed_files += processed_files
mails_processed += 1
except Exception as e:
self.log( self.log(
'debug', "error",
f"Rule {account}.{rule}: Searching folder with criteria " f"Rule {rule}: Error while processing mail "
f"{str(AND(**criterias))}") f"{message.uid}: {e}",
exc_info=True)
try: self.log(
messages = M.fetch(criteria=AND(**criterias), 'debug',
mark_seen=False) f"Rule {rule}: Processed {mails_processed} matching mail(s)")
except Exception:
raise MailError(
f"Rule {rule.name}: Error while fetching folder "
f"{rule.folder} of account {account.name}")
post_consume_messages = [] self.log(
'debug',
f"Rule {rule}: Running mail actions on "
f"{len(post_consume_messages)} mails")
mails_processed = 0 try:
get_rule_action(rule).post_consume(
M,
post_consume_messages,
rule.action_parameter)
for message in messages: except Exception as e:
try: raise MailError(
processed_files = self.handle_message(message, rule) f"Rule {rule}: Error while processing post-consume actions: "
except Exception: f"{e}")
raise MailError(
f"Rule {rule.name}: Error while processing mail "
f"{message.uid} of account {account.name}")
if processed_files > 0:
post_consume_messages.append(message.uid)
total_processed_files += processed_files
mails_processed += 1
self.log(
'debug',
f"Rule {account}.{rule}: Processed {mails_processed} "
f"matching mail(s)")
self.log(
'debug',
f"Rule {account}.{rule}: Running mail actions on "
f"{len(post_consume_messages)} mails")
try:
get_rule_action(rule).post_consume(
M,
post_consume_messages,
rule.action_parameter)
except Exception:
raise MailError(
f"Rule {rule.name}: Error while processing "
f"post-consume actions for account {account.name}")
return total_processed_files return total_processed_files
@ -238,11 +254,11 @@ class MailAccountHandler(LoggingMixin):
self.log( self.log(
'debug', 'debug',
f"Rule {rule.account}.{rule}: " f"Rule {rule}: "
f"Processing mail {message.subject} from {message.from_} with " f"Processing mail {message.subject} from {message.from_} with "
f"{len(message.attachments)} attachment(s)") f"{len(message.attachments)} attachment(s)")
correspondent = get_correspondent(message, rule) correspondent = self.get_correspondent(message, rule)
tag = rule.assign_tag tag = rule.assign_tag
doc_type = rule.assign_document_type doc_type = rule.assign_document_type
@ -253,12 +269,12 @@ class MailAccountHandler(LoggingMixin):
if not att.content_disposition == "attachment": if not att.content_disposition == "attachment":
self.log( self.log(
'debug', 'debug',
f"Rule {rule.account}.{rule}: " f"Rule {rule}: "
f"Skipping attachment {att.filename} " f"Skipping attachment {att.filename} "
f"with content disposition inline") f"with content disposition {att.content_disposition}")
continue continue
title = get_title(message, att, rule) title = self.get_title(message, att, rule)
# don't trust the content type of the attachment. Could be # don't trust the content type of the attachment. Could be
# generic application/octet-stream. # generic application/octet-stream.
@ -274,7 +290,7 @@ class MailAccountHandler(LoggingMixin):
self.log( self.log(
'info', 'info',
f"Rule {rule.account}.{rule}: " f"Rule {rule}: "
f"Consuming attachment {att.filename} from mail " f"Consuming attachment {att.filename} from mail "
f"{message.subject} from {message.from_}") f"{message.subject} from {message.from_}")
@ -293,7 +309,7 @@ class MailAccountHandler(LoggingMixin):
else: else:
self.log( self.log(
'debug', 'debug',
f"Rule {rule.account}.{rule}: " f"Rule {rule}: "
f"Skipping attachment {att.filename} " f"Skipping attachment {att.filename} "
f"since guessed mime type {mime_type} is not supported " f"since guessed mime type {mime_type} is not supported "
f"by paperless") f"by paperless")

View File

@ -139,4 +139,4 @@ class MailRule(models.Model):
) )
def __str__(self): def __str__(self):
return self.name return f"{self.account.name}.{self.name}"

View File

@ -1,14 +1,20 @@
import logging import logging
from paperless_mail.mail import MailAccountHandler from paperless_mail.mail import MailAccountHandler, MailError
from paperless_mail.models import MailAccount from paperless_mail.models import MailAccount
def process_mail_accounts(): def process_mail_accounts():
total_new_documents = 0 total_new_documents = 0
for account in MailAccount.objects.all(): for account in MailAccount.objects.all():
total_new_documents += MailAccountHandler().handle_mail_account( try:
account) total_new_documents += MailAccountHandler().handle_mail_account(
account)
except MailError as e:
logging.getLogger(__name__).error(
f"Error while processing mail account {account}: {e}",
exc_info=True
)
if total_new_documents > 0: if total_new_documents > 0:
return f"Added {total_new_documents} document(s)." return f"Added {total_new_documents} document(s)."
@ -21,4 +27,4 @@ def process_mail_account(name):
account = MailAccount.objects.get(name=name) account = MailAccount.objects.get(name=name)
MailAccountHandler().handle_mail_account(account) MailAccountHandler().handle_mail_account(account)
except MailAccount.DoesNotExist: except MailAccount.DoesNotExist:
logging.error("Unknown mail acccount: {}".format(name)) logging.getLogger(__name__).error(f"Unknown mail acccount: {name}")

View File

@ -4,12 +4,13 @@ from typing import ContextManager
from unittest import mock from unittest import mock
from django.core.management import call_command from django.core.management import call_command
from django.db import DatabaseError
from django.test import TestCase from django.test import TestCase
from imap_tools import MailMessageFlags, MailboxFolderSelectError from imap_tools import MailMessageFlags, MailboxFolderSelectError
from documents.models import Correspondent from documents.models import Correspondent
from paperless_mail import tasks from paperless_mail import tasks
from paperless_mail.mail import MailError, MailAccountHandler, get_correspondent, get_title from paperless_mail.mail import MailError, MailAccountHandler
from paperless_mail.models import MailRule, MailAccount from paperless_mail.models import MailRule, MailAccount
@ -165,28 +166,30 @@ class TestMail(TestCase):
me_localhost = Correspondent.objects.create(name=message2.from_) me_localhost = Correspondent.objects.create(name=message2.from_)
someone_else = Correspondent.objects.create(name="someone else") someone_else = Correspondent.objects.create(name="someone else")
handler = MailAccountHandler()
rule = MailRule(name="a", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING) rule = MailRule(name="a", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING)
self.assertIsNone(get_correspondent(message, rule)) self.assertIsNone(handler.get_correspondent(message, rule))
rule = MailRule(name="b", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL) rule = MailRule(name="b", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL)
c = get_correspondent(message, rule) c = handler.get_correspondent(message, rule)
self.assertIsNotNone(c) self.assertIsNotNone(c)
self.assertEqual(c.name, "someone@somewhere.com") self.assertEqual(c.name, "someone@somewhere.com")
c = get_correspondent(message2, rule) c = handler.get_correspondent(message2, rule)
self.assertIsNotNone(c) self.assertIsNotNone(c)
self.assertEqual(c.name, "me@localhost.com") self.assertEqual(c.name, "me@localhost.com")
self.assertEqual(c.id, me_localhost.id) self.assertEqual(c.id, me_localhost.id)
rule = MailRule(name="c", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME) rule = MailRule(name="c", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME)
c = get_correspondent(message, rule) c = handler.get_correspondent(message, rule)
self.assertIsNotNone(c) self.assertIsNotNone(c)
self.assertEqual(c.name, "Someone!") self.assertEqual(c.name, "Someone!")
c = get_correspondent(message2, rule) c = handler.get_correspondent(message2, rule)
self.assertIsNotNone(c) self.assertIsNotNone(c)
self.assertEqual(c.id, me_localhost.id) self.assertEqual(c.id, me_localhost.id)
rule = MailRule(name="d", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_CUSTOM, assign_correspondent=someone_else) rule = MailRule(name="d", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_CUSTOM, assign_correspondent=someone_else)
c = get_correspondent(message, rule) c = handler.get_correspondent(message, rule)
self.assertEqual(c, someone_else) self.assertEqual(c, someone_else)
def test_get_title(self): def test_get_title(self):
@ -194,10 +197,13 @@ class TestMail(TestCase):
message.subject = "the message title" message.subject = "the message title"
att = namedtuple('Attachment', []) att = namedtuple('Attachment', [])
att.filename = "this_is_the_file.pdf" att.filename = "this_is_the_file.pdf"
handler = MailAccountHandler()
rule = MailRule(name="a", assign_title_from=MailRule.TITLE_FROM_FILENAME) rule = MailRule(name="a", assign_title_from=MailRule.TITLE_FROM_FILENAME)
self.assertEqual(get_title(message, att, rule), "this_is_the_file") self.assertEqual(handler.get_title(message, att, rule), "this_is_the_file")
rule = MailRule(name="b", assign_title_from=MailRule.TITLE_FROM_SUBJECT) rule = MailRule(name="b", assign_title_from=MailRule.TITLE_FROM_SUBJECT)
self.assertEqual(get_title(message, att, rule), "the message title") self.assertEqual(handler.get_title(message, att, rule), "the message title")
def test_handle_message(self): def test_handle_message(self):
message = create_message(subject="the message title", from_="Myself", num_attachments=2) message = create_message(subject="the message title", from_="Myself", num_attachments=2)
@ -319,7 +325,7 @@ class TestMail(TestCase):
self.assertEqual(len(self.bogus_mailbox.messages), 2) self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(len(self.bogus_mailbox.messages_spam), 1) self.assertEqual(len(self.bogus_mailbox.messages_spam), 1)
def test_errors(self): def test_error_login(self):
account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="wrong") account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="wrong")
try: try:
@ -329,26 +335,55 @@ class TestMail(TestCase):
else: else:
self.fail("Should raise exception") self.fail("Should raise exception")
def test_error_skip_account(self):
account_faulty = MailAccount.objects.create(name="test", imap_server="", username="admin", password="wroasdng")
account = MailAccount.objects.create(name="test2", imap_server="", username="admin", password="secret") account = MailAccount.objects.create(name="test2", imap_server="", username="admin", password="secret")
rule = MailRule.objects.create(name="testrule", account=account, folder="uuuh") rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MOVE,
action_parameter="spam", filter_subject="Claim")
try: tasks.process_mail_accounts()
self.mail_account_handler.handle_mail_account(account) self.assertEqual(self.async_task.call_count, 1)
except MailError as e: self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertTrue("uuuh does not exist" in str(e)) self.assertEqual(len(self.bogus_mailbox.messages_spam), 1)
else:
self.fail("Should raise exception")
account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret") def test_error_skip_rule(self):
rule = MailRule.objects.create(name="testrule2", account=account, action=MailRule.ACTION_MOVE, action_parameter="doesnotexist", filter_subject="Claim") account = MailAccount.objects.create(name="test2", imap_server="", username="admin", password="secret")
rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MOVE,
action_parameter="spam", filter_subject="Claim", order=1, folder="uuuhhhh")
rule2 = MailRule.objects.create(name="testrule2", account=account, action=MailRule.ACTION_MOVE,
action_parameter="spam", filter_subject="Claim", order=2)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 1)
self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(len(self.bogus_mailbox.messages_spam), 1)
@mock.patch("paperless_mail.mail.MailAccountHandler.get_correspondent")
def test_error_skip_mail(self, m):
def get_correspondent_fake(message, rule):
if message.from_ == 'amazon@amazon.de':
raise ValueError("Does not compute.")
else:
return None
m.side_effect = get_correspondent_fake
account = MailAccount.objects.create(name="test2", imap_server="", username="admin", password="secret")
rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MOVE, action_parameter="spam")
self.mail_account_handler.handle_mail_account(account)
# test that we still consume mail even if some mails throw errors.
self.assertEqual(self.async_task.call_count, 2)
# faulty mail still in inbox, untouched
self.assertEqual(len(self.bogus_mailbox.messages), 1)
self.assertEqual(self.bogus_mailbox.messages[0].from_, 'amazon@amazon.de')
try:
self.mail_account_handler.handle_mail_account(account)
except MailError as e:
self.assertTrue("Error while processing post-consume actions" in str(e))
else:
self.fail("Should raise exception")
def test_filters(self): def test_filters(self):