From 371745b6dc45c22ccf0e9fed7684eb4801d7e76a Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Fri, 4 Dec 2020 15:42:05 +0100 Subject: [PATCH] mail handling: When exceptions occur during account/rule/message handling, paperless will continue with the next account/rule/message. mail handling: When paperless encounters a very long fixes #82 --- src/documents/loggers.py | 4 +- src/paperless/settings.py | 2 +- src/paperless_mail/mail.py | 230 ++++++++++++++------------ src/paperless_mail/models.py | 2 +- src/paperless_mail/tasks.py | 14 +- src/paperless_mail/tests/test_mail.py | 85 +++++++--- 6 files changed, 197 insertions(+), 140 deletions(-) diff --git a/src/documents/loggers.py b/src/documents/loggers.py index 76dbe0163..863bc0c34 100644 --- a/src/documents/loggers.py +++ b/src/documents/loggers.py @@ -28,10 +28,10 @@ class LoggingMixin: def renew_logging_group(self): self.logging_group = uuid.uuid4() - def log(self, level, message): + def log(self, level, message, **kwargs): target = ".".join([self.__class__.__module__, self.__class__.__name__]) logger = logging.getLogger(target) getattr(logger, level)(message, extra={ "group": self.logging_group - }) + }, **kwargs) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 88915c7c5..c7ecf7645 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -274,7 +274,7 @@ LOGGING = { "class": "documents.loggers.PaperlessHandler", }, "console": { - "level": "WARNING", + "level": "INFO", "class": "logging.StreamHandler", "formatter": "verbose", } diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index 1ce4fe825..08f7365da 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -4,6 +4,7 @@ from datetime import timedelta, date import magic from django.conf import settings +from django.db import DatabaseError from django.utils.text import slugify from django_q.tasks import async_task from imap_tools import MailBox, MailBoxUnencrypted, AND, MailMessageFlags, \ @@ -86,46 +87,6 @@ def make_criterias(rule): return {**criterias, **get_rule_action(rule).get_criteria()} -def get_title(message, att, rule): - if rule.assign_title_from == MailRule.TITLE_FROM_SUBJECT: - title = message.subject - elif rule.assign_title_from == MailRule.TITLE_FROM_FILENAME: - title = os.path.splitext(os.path.basename(att.filename))[0] - else: - raise ValueError("Unknown title selector.") - - return title - - -def get_correspondent(message, rule): - if rule.assign_correspondent_from == MailRule.CORRESPONDENT_FROM_NOTHING: - correspondent = None - elif rule.assign_correspondent_from == MailRule.CORRESPONDENT_FROM_EMAIL: - correspondent_name = message.from_ - correspondent = Correspondent.objects.get_or_create( - name=correspondent_name, defaults={ - "slug": slugify(correspondent_name) - })[0] - elif rule.assign_correspondent_from == MailRule.CORRESPONDENT_FROM_NAME: - if message.from_values and \ - 'name' in message.from_values \ - and message.from_values['name']: - correspondent_name = message.from_values['name'] - else: - correspondent_name = message.from_ - - correspondent = Correspondent.objects.get_or_create( - name=correspondent_name, defaults={ - "slug": slugify(correspondent_name) - })[0] - elif rule.assign_correspondent_from == MailRule.CORRESPONDENT_FROM_CUSTOM: - correspondent = rule.assign_correspondent - else: - raise ValueError("Unknwown correspondent selector") - - return correspondent - - def get_mailbox(server, port, security): if security == MailAccount.IMAP_SECURITY_NONE: mailbox = MailBoxUnencrypted(server, port) @@ -140,6 +101,51 @@ def get_mailbox(server, port, security): class MailAccountHandler(LoggingMixin): + def _correspondent_from_name(self, name): + try: + return Correspondent.objects.get_or_create( + name=name, defaults={ + "slug": slugify(name) + })[0] + except DatabaseError as e: + self.log( + "error", + f"Error while retrieving correspondent {name}: {e}" + ) + return None + + def get_title(self, message, att, rule): + if rule.assign_title_from == MailRule.TITLE_FROM_SUBJECT: + return message.subject + + elif rule.assign_title_from == MailRule.TITLE_FROM_FILENAME: + return os.path.splitext(os.path.basename(att.filename))[0] + + else: + raise ValueError("Unknown title selector.") + + def get_correspondent(self, message, rule): + c_from = rule.assign_correspondent_from + + if c_from == MailRule.CORRESPONDENT_FROM_NOTHING: + return None + + elif c_from == MailRule.CORRESPONDENT_FROM_EMAIL: + return self._correspondent_from_name(message.from_) + + elif c_from == MailRule.CORRESPONDENT_FROM_NAME: + if message.from_values and 'name' in message.from_values and message.from_values['name']: # NOQA: E501 + return self._correspondent_from_name( + message.from_values['name']) + else: + return self._correspondent_from_name(message.from_) + + elif c_from == MailRule.CORRESPONDENT_FROM_CUSTOM: + return rule.assign_correspondent + + else: + raise ValueError("Unknwown correspondent selector") + def handle_mail_account(self, account): self.renew_logging_group() @@ -156,79 +162,89 @@ class MailAccountHandler(LoggingMixin): M.login(account.username, account.password) except Exception: raise MailError( - f"Error while authenticating account {account.name}") + f"Error while authenticating account {account}") self.log('debug', f"Account {account}: Processing " f"{account.rules.count()} rule(s)") for rule in account.rules.order_by('order'): - self.log( - 'debug', - f"Account {account}: Processing rule {rule.name}") - - self.log( - 'debug', - f"Rule {account}.{rule}: Selecting folder {rule.folder}") - try: - M.folder.set(rule.folder) - except MailboxFolderSelectError: - raise MailError( - f"Rule {rule.name}: Folder {rule.folder} " - f"does not exist in account {account.name}") + total_processed_files += self.handle_mail_rule(M, rule) + except Exception as e: + self.log( + "error", + f"Rule {rule}: Error while processing rule: {e}", + exc_info=True + ) - criterias = make_criterias(rule) + return total_processed_files + def handle_mail_rule(self, M, rule): + + self.log( + 'debug', + f"Rule {rule}: Selecting folder {rule.folder}") + + try: + M.folder.set(rule.folder) + except MailboxFolderSelectError: + raise MailError( + f"Rule {rule}: Folder {rule.folder} " + f"does not exist in account {rule.account}") + + criterias = make_criterias(rule) + + self.log( + 'debug', + f"Rule {rule}: Searching folder with criteria " + f"{str(AND(**criterias))}") + + try: + messages = M.fetch(criteria=AND(**criterias), + mark_seen=False) + except Exception: + raise MailError( + f"Rule {rule}: Error while fetching folder {rule.folder}") + + post_consume_messages = [] + + mails_processed = 0 + total_processed_files = 0 + + for message in messages: + try: + processed_files = self.handle_message(message, rule) + if processed_files > 0: + post_consume_messages.append(message.uid) + + total_processed_files += processed_files + mails_processed += 1 + except Exception as e: self.log( - 'debug', - f"Rule {account}.{rule}: Searching folder with criteria " - f"{str(AND(**criterias))}") + "error", + f"Rule {rule}: Error while processing mail " + f"{message.uid}: {e}", + exc_info=True) - try: - messages = M.fetch(criteria=AND(**criterias), - mark_seen=False) - except Exception: - raise MailError( - f"Rule {rule.name}: Error while fetching folder " - f"{rule.folder} of account {account.name}") + self.log( + 'debug', + f"Rule {rule}: Processed {mails_processed} matching mail(s)") - post_consume_messages = [] + self.log( + 'debug', + f"Rule {rule}: Running mail actions on " + f"{len(post_consume_messages)} mails") - mails_processed = 0 + try: + get_rule_action(rule).post_consume( + M, + post_consume_messages, + rule.action_parameter) - for message in messages: - try: - processed_files = self.handle_message(message, rule) - except Exception: - raise MailError( - f"Rule {rule.name}: Error while processing mail " - f"{message.uid} of account {account.name}") - if processed_files > 0: - post_consume_messages.append(message.uid) - - total_processed_files += processed_files - mails_processed += 1 - - self.log( - 'debug', - f"Rule {account}.{rule}: Processed {mails_processed} " - f"matching mail(s)") - - self.log( - 'debug', - f"Rule {account}.{rule}: Running mail actions on " - f"{len(post_consume_messages)} mails") - - try: - get_rule_action(rule).post_consume( - M, - post_consume_messages, - rule.action_parameter) - - except Exception: - raise MailError( - f"Rule {rule.name}: Error while processing " - f"post-consume actions for account {account.name}") + except Exception as e: + raise MailError( + f"Rule {rule}: Error while processing post-consume actions: " + f"{e}") return total_processed_files @@ -238,11 +254,11 @@ class MailAccountHandler(LoggingMixin): self.log( 'debug', - f"Rule {rule.account}.{rule}: " + f"Rule {rule}: " f"Processing mail {message.subject} from {message.from_} with " f"{len(message.attachments)} attachment(s)") - correspondent = get_correspondent(message, rule) + correspondent = self.get_correspondent(message, rule) tag = rule.assign_tag doc_type = rule.assign_document_type @@ -253,12 +269,12 @@ class MailAccountHandler(LoggingMixin): if not att.content_disposition == "attachment": self.log( 'debug', - f"Rule {rule.account}.{rule}: " + f"Rule {rule}: " f"Skipping attachment {att.filename} " - f"with content disposition inline") + f"with content disposition {att.content_disposition}") continue - title = get_title(message, att, rule) + title = self.get_title(message, att, rule) # don't trust the content type of the attachment. Could be # generic application/octet-stream. @@ -274,7 +290,7 @@ class MailAccountHandler(LoggingMixin): self.log( 'info', - f"Rule {rule.account}.{rule}: " + f"Rule {rule}: " f"Consuming attachment {att.filename} from mail " f"{message.subject} from {message.from_}") @@ -293,7 +309,7 @@ class MailAccountHandler(LoggingMixin): else: self.log( 'debug', - f"Rule {rule.account}.{rule}: " + f"Rule {rule}: " f"Skipping attachment {att.filename} " f"since guessed mime type {mime_type} is not supported " f"by paperless") diff --git a/src/paperless_mail/models.py b/src/paperless_mail/models.py index fbcfaf980..aa1ac5684 100644 --- a/src/paperless_mail/models.py +++ b/src/paperless_mail/models.py @@ -139,4 +139,4 @@ class MailRule(models.Model): ) def __str__(self): - return self.name + return f"{self.account.name}.{self.name}" diff --git a/src/paperless_mail/tasks.py b/src/paperless_mail/tasks.py index 2eb4cbf74..68fb859a4 100644 --- a/src/paperless_mail/tasks.py +++ b/src/paperless_mail/tasks.py @@ -1,14 +1,20 @@ import logging -from paperless_mail.mail import MailAccountHandler +from paperless_mail.mail import MailAccountHandler, MailError from paperless_mail.models import MailAccount def process_mail_accounts(): total_new_documents = 0 for account in MailAccount.objects.all(): - total_new_documents += MailAccountHandler().handle_mail_account( - account) + try: + total_new_documents += MailAccountHandler().handle_mail_account( + account) + except MailError as e: + logging.getLogger(__name__).error( + f"Error while processing mail account {account}: {e}", + exc_info=True + ) if total_new_documents > 0: return f"Added {total_new_documents} document(s)." @@ -21,4 +27,4 @@ def process_mail_account(name): account = MailAccount.objects.get(name=name) MailAccountHandler().handle_mail_account(account) except MailAccount.DoesNotExist: - logging.error("Unknown mail acccount: {}".format(name)) + logging.getLogger(__name__).error(f"Unknown mail acccount: {name}") diff --git a/src/paperless_mail/tests/test_mail.py b/src/paperless_mail/tests/test_mail.py index 6a737cfa5..3cd3e8499 100644 --- a/src/paperless_mail/tests/test_mail.py +++ b/src/paperless_mail/tests/test_mail.py @@ -4,12 +4,13 @@ from typing import ContextManager from unittest import mock from django.core.management import call_command +from django.db import DatabaseError from django.test import TestCase from imap_tools import MailMessageFlags, MailboxFolderSelectError from documents.models import Correspondent from paperless_mail import tasks -from paperless_mail.mail import MailError, MailAccountHandler, get_correspondent, get_title +from paperless_mail.mail import MailError, MailAccountHandler from paperless_mail.models import MailRule, MailAccount @@ -165,28 +166,30 @@ class TestMail(TestCase): me_localhost = Correspondent.objects.create(name=message2.from_) someone_else = Correspondent.objects.create(name="someone else") + handler = MailAccountHandler() + rule = MailRule(name="a", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING) - self.assertIsNone(get_correspondent(message, rule)) + self.assertIsNone(handler.get_correspondent(message, rule)) rule = MailRule(name="b", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL) - c = get_correspondent(message, rule) + c = handler.get_correspondent(message, rule) self.assertIsNotNone(c) self.assertEqual(c.name, "someone@somewhere.com") - c = get_correspondent(message2, rule) + c = handler.get_correspondent(message2, rule) self.assertIsNotNone(c) self.assertEqual(c.name, "me@localhost.com") self.assertEqual(c.id, me_localhost.id) rule = MailRule(name="c", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME) - c = get_correspondent(message, rule) + c = handler.get_correspondent(message, rule) self.assertIsNotNone(c) self.assertEqual(c.name, "Someone!") - c = get_correspondent(message2, rule) + c = handler.get_correspondent(message2, rule) self.assertIsNotNone(c) self.assertEqual(c.id, me_localhost.id) rule = MailRule(name="d", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_CUSTOM, assign_correspondent=someone_else) - c = get_correspondent(message, rule) + c = handler.get_correspondent(message, rule) self.assertEqual(c, someone_else) def test_get_title(self): @@ -194,10 +197,13 @@ class TestMail(TestCase): message.subject = "the message title" att = namedtuple('Attachment', []) att.filename = "this_is_the_file.pdf" + + handler = MailAccountHandler() + rule = MailRule(name="a", assign_title_from=MailRule.TITLE_FROM_FILENAME) - self.assertEqual(get_title(message, att, rule), "this_is_the_file") + self.assertEqual(handler.get_title(message, att, rule), "this_is_the_file") rule = MailRule(name="b", assign_title_from=MailRule.TITLE_FROM_SUBJECT) - self.assertEqual(get_title(message, att, rule), "the message title") + self.assertEqual(handler.get_title(message, att, rule), "the message title") def test_handle_message(self): message = create_message(subject="the message title", from_="Myself", num_attachments=2) @@ -319,7 +325,7 @@ class TestMail(TestCase): self.assertEqual(len(self.bogus_mailbox.messages), 2) self.assertEqual(len(self.bogus_mailbox.messages_spam), 1) - def test_errors(self): + def test_error_login(self): account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="wrong") try: @@ -329,26 +335,55 @@ class TestMail(TestCase): else: self.fail("Should raise exception") + def test_error_skip_account(self): + account_faulty = MailAccount.objects.create(name="test", imap_server="", username="admin", password="wroasdng") + account = MailAccount.objects.create(name="test2", imap_server="", username="admin", password="secret") - rule = MailRule.objects.create(name="testrule", account=account, folder="uuuh") + rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MOVE, + action_parameter="spam", filter_subject="Claim") - try: - self.mail_account_handler.handle_mail_account(account) - except MailError as e: - self.assertTrue("uuuh does not exist" in str(e)) - else: - self.fail("Should raise exception") + tasks.process_mail_accounts() + self.assertEqual(self.async_task.call_count, 1) + self.assertEqual(len(self.bogus_mailbox.messages), 2) + self.assertEqual(len(self.bogus_mailbox.messages_spam), 1) - account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret") + def test_error_skip_rule(self): - rule = MailRule.objects.create(name="testrule2", account=account, action=MailRule.ACTION_MOVE, action_parameter="doesnotexist", filter_subject="Claim") + account = MailAccount.objects.create(name="test2", imap_server="", username="admin", password="secret") + rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MOVE, + action_parameter="spam", filter_subject="Claim", order=1, folder="uuuhhhh") + rule2 = MailRule.objects.create(name="testrule2", account=account, action=MailRule.ACTION_MOVE, + action_parameter="spam", filter_subject="Claim", order=2) + + self.mail_account_handler.handle_mail_account(account) + self.assertEqual(self.async_task.call_count, 1) + self.assertEqual(len(self.bogus_mailbox.messages), 2) + self.assertEqual(len(self.bogus_mailbox.messages_spam), 1) + + + @mock.patch("paperless_mail.mail.MailAccountHandler.get_correspondent") + def test_error_skip_mail(self, m): + + def get_correspondent_fake(message, rule): + if message.from_ == 'amazon@amazon.de': + raise ValueError("Does not compute.") + else: + return None + + m.side_effect = get_correspondent_fake + + account = MailAccount.objects.create(name="test2", imap_server="", username="admin", password="secret") + rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MOVE, action_parameter="spam") + + self.mail_account_handler.handle_mail_account(account) + + # test that we still consume mail even if some mails throw errors. + self.assertEqual(self.async_task.call_count, 2) + + # faulty mail still in inbox, untouched + self.assertEqual(len(self.bogus_mailbox.messages), 1) + self.assertEqual(self.bogus_mailbox.messages[0].from_, 'amazon@amazon.de') - try: - self.mail_account_handler.handle_mail_account(account) - except MailError as e: - self.assertTrue("Error while processing post-consume actions" in str(e)) - else: - self.fail("Should raise exception") def test_filters(self):