updated logging, logging for the mail consumer to see whats happening

This commit is contained in:
Jonas Winkler
2020-11-18 13:23:30 +01:00
parent 7408d3855c
commit 680ab3d56b
10 changed files with 214 additions and 152 deletions

View File

@@ -1,18 +1,7 @@
from django.contrib import admin
from django import forms
from paperless_mail.models import MailAccount, MailRule
class MailAccountForm(forms.ModelForm):
password = forms.CharField(widget=forms.PasswordInput)
class Meta:
fields = '__all__'
model = MailAccount
class MailAccountAdmin(admin.ModelAdmin):
list_display = ("name", "imap_server", "username")

View File

@@ -8,6 +8,7 @@ from django_q.tasks import async_task
from imap_tools import MailBox, MailBoxUnencrypted, AND, MailMessageFlags, \
MailboxFolderSelectError
from documents.loggers import LoggingMixin
from documents.models import Correspondent
from paperless_mail.models import MailAccount, MailRule
@@ -83,72 +84,6 @@ def make_criterias(rule):
return {**criterias, **get_rule_action(rule).get_criteria()}
def handle_mail_account(account):
if account.imap_security == MailAccount.IMAP_SECURITY_NONE:
mailbox = MailBoxUnencrypted(account.imap_server, account.imap_port)
elif account.imap_security == MailAccount.IMAP_SECURITY_STARTTLS:
mailbox = MailBox(account.imap_server, account.imap_port, starttls=True)
elif account.imap_security == MailAccount.IMAP_SECURITY_SSL:
mailbox = MailBox(account.imap_server, account.imap_port)
else:
raise ValueError("Unknown IMAP security")
total_processed_files = 0
with mailbox as M:
try:
M.login(account.username, account.password)
except Exception:
raise MailError(
f"Error while authenticating account {account.name}")
for rule in account.rules.all():
try:
M.folder.set(rule.folder)
except MailboxFolderSelectError:
raise MailError(
f"Rule {rule.name}: Folder {rule.folder} does not exist "
f"in account {account.name}")
criterias = make_criterias(rule)
try:
messages = M.fetch(criteria=AND(**criterias), mark_seen=False)
except Exception:
raise MailError(
f"Rule {rule.name}: Error while fetching folder "
f"{rule.folder} of account {account.name}")
post_consume_messages = []
for message in messages:
try:
processed_files = handle_message(message, rule)
except Exception:
raise MailError(
f"Rule {rule.name}: Error while processing mail "
f"{message.uid} of account {account.name}")
if processed_files > 0:
post_consume_messages.append(message.uid)
total_processed_files += processed_files
try:
get_rule_action(rule).post_consume(
M,
post_consume_messages,
rule.action_parameter)
except Exception:
raise MailError(
f"Rule {rule.name}: Error while processing post-consume "
f"actions for account {account.name}")
return total_processed_files
def get_title(message, att, rule):
if rule.assign_title_from == MailRule.TITLE_FROM_SUBJECT:
title = message.subject
@@ -189,39 +124,155 @@ def get_correspondent(message, rule):
return correspondent
def handle_message(message, rule):
if not message.attachments:
return 0
def get_mailbox(server, port, security):
if security == MailAccount.IMAP_SECURITY_NONE:
mailbox = MailBoxUnencrypted(server, port)
elif security == MailAccount.IMAP_SECURITY_STARTTLS:
mailbox = MailBox(server, port, starttls=True)
elif security == MailAccount.IMAP_SECURITY_SSL:
mailbox = MailBox(server, port)
else:
raise ValueError("Unknown IMAP security")
return mailbox
correspondent = get_correspondent(message, rule)
tag = rule.assign_tag
doc_type = rule.assign_document_type
class MailAccountHandler(LoggingMixin):
processed_attachments = 0
def handle_mail_account(self, account):
for att in message.attachments:
self.renew_logging_group()
title = get_title(message, att, rule)
self.log('debug', f"Processing mail account {account}")
# TODO: check with parsers what files types are supported
if att.content_type == 'application/pdf':
total_processed_files = 0
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
_, temp_filename = tempfile.mkstemp(prefix="paperless-mail-", dir=settings.SCRATCH_DIR)
with open(temp_filename, 'wb') as f:
f.write(att.payload)
with get_mailbox(account.imap_server,
account.imap_port,
account.imap_security) as M:
async_task(
"documents.tasks.consume_file",
path=temp_filename,
override_filename=att.filename,
override_title=title,
override_correspondent_id=correspondent.id if correspondent else None,
override_document_type_id=doc_type.id if doc_type else None,
override_tag_ids=[tag.id] if tag else None,
task_name=f"Mail: {att.filename}"
)
try:
M.login(account.username, account.password)
except Exception:
raise MailError(
f"Error while authenticating account {account.name}")
processed_attachments += 1
self.log('debug', f"Account {account}: Processing "
f"{account.rules.count()} rule(s)")
return processed_attachments
for rule in account.rules.all():
self.log(
'debug',
f"Account {account}: Processing rule {rule.name}")
self.log(
'debug',
f"Rule {account}.{rule}: Selecting folder {rule.folder}")
try:
M.folder.set(rule.folder)
except MailboxFolderSelectError:
raise MailError(
f"Rule {rule.name}: Folder {rule.folder} does not exist "
f"in account {account.name}")
criterias = make_criterias(rule)
self.log(
'debug',
f"Rule {account}.{rule}: Searching folder with criteria "
f"{str(AND(**criterias))}")
try:
messages = M.fetch(criteria=AND(**criterias), mark_seen=False)
except Exception:
raise MailError(
f"Rule {rule.name}: Error while fetching folder "
f"{rule.folder} of account {account.name}")
post_consume_messages = []
mails_processed = 0
for message in messages:
try:
processed_files = self.handle_message(message, rule)
except Exception:
raise MailError(
f"Rule {rule.name}: Error while processing mail "
f"{message.uid} of account {account.name}")
if processed_files > 0:
post_consume_messages.append(message.uid)
total_processed_files += processed_files
mails_processed += 1
self.log(
'debug',
f"Rule {account}.{rule}: Processed {mails_processed} "
f"matching mail(s)")
self.log(
'debug',
f"Rule {account}.{rule}: Running mail actions on "
f"{len(post_consume_messages)} mails")
try:
get_rule_action(rule).post_consume(
M,
post_consume_messages,
rule.action_parameter)
except Exception:
raise MailError(
f"Rule {rule.name}: Error while processing post-consume "
f"actions for account {account.name}")
return total_processed_files
def handle_message(self, message, rule):
if not message.attachments:
return 0
self.log(
'debug',
f"Rule {rule.account}.{rule}: "
f"Processing mail {message.subject} from {message.from_} with "
f"{len(message.attachments)} attachment(s)")
correspondent = get_correspondent(message, rule)
tag = rule.assign_tag
doc_type = rule.assign_document_type
processed_attachments = 0
for att in message.attachments:
title = get_title(message, att, rule)
# TODO: check with parsers what files types are supported
if att.content_type == 'application/pdf':
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
_, temp_filename = tempfile.mkstemp(prefix="paperless-mail-", dir=settings.SCRATCH_DIR)
with open(temp_filename, 'wb') as f:
f.write(att.payload)
self.log(
'info',
f"Rule {rule.account}.{rule}: "
f"Consuming attachment {att.filename} from mail "
f"{message.subject} from {message.from_}")
async_task(
"documents.tasks.consume_file",
path=temp_filename,
override_filename=att.filename,
override_title=title,
override_correspondent_id=correspondent.id if correspondent else None,
override_document_type_id=doc_type.id if doc_type else None,
override_tag_ids=[tag.id] if tag else None,
task_name=f"Mail: {att.filename}"
)
processed_attachments += 1
return processed_attachments

View File

@@ -1,13 +1,14 @@
import logging
from paperless_mail import mail
from paperless_mail.mail import MailAccountHandler
from paperless_mail.models import MailAccount
def process_mail_accounts():
total_new_documents = 0
for account in MailAccount.objects.all():
total_new_documents += mail.handle_mail_account(account)
total_new_documents += MailAccountHandler().handle_mail_account(account)
if total_new_documents > 0:
return f"Added {total_new_documents} document(s)."
@@ -18,6 +19,6 @@ def process_mail_accounts():
def process_mail_account(name):
account = MailAccount.objects.find(name=name)
if account:
mail.handle_mail_account(account)
MailAccountHandler().handle_mail_account(account)
else:
logging.error("Unknown mail acccount: {}".format(name))

View File

@@ -7,7 +7,7 @@ from django.test import TestCase
from imap_tools import MailMessageFlags, MailboxFolderSelectError
from documents.models import Correspondent
from paperless_mail.mail import get_correspondent, get_title, handle_message, handle_mail_account, MailError
from paperless_mail.mail import MailError, MailAccountHandler, get_correspondent, get_title
from paperless_mail.models import MailRule, MailAccount
@@ -126,6 +126,8 @@ class TestMail(TestCase):
self.reset_bogus_mailbox()
self.mail_account_handler = MailAccountHandler()
def reset_bogus_mailbox(self):
self.bogus_mailbox.messages = []
self.bogus_mailbox.messages_spam = []
@@ -182,6 +184,7 @@ class TestMail(TestCase):
def test_handle_message(self):
message = namedtuple('MailMessage', [])
message.subject = "the message title"
message.from_ = "Myself"
att = namedtuple('Attachment', [])
att.filename = "test1.pdf"
@@ -200,9 +203,10 @@ class TestMail(TestCase):
message.attachments = [att, att2, att3]
rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME)
account = MailAccount()
rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME, account=account)
result = handle_message(message, rule)
result = self.mail_account_handler.handle_message(message, rule)
self.assertEqual(result, 2)
@@ -224,7 +228,7 @@ class TestMail(TestCase):
message.attachments = []
rule = MailRule()
result = handle_message(message, rule)
result = self.mail_account_handler.handle_message(message, rule)
self.assertFalse(m.called)
self.assertEqual(result, 0)
@@ -235,11 +239,13 @@ class TestMail(TestCase):
rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MARK_READ)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 2)
handle_mail_account(account)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 2)
self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 0)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
def test_handle_mail_account_delete(self):
@@ -249,7 +255,7 @@ class TestMail(TestCase):
self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
handle_mail_account(account)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 2)
self.assertEqual(len(self.bogus_mailbox.messages), 1)
@@ -258,11 +264,13 @@ class TestMail(TestCase):
rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_FLAG, filter_subject="Invoice")
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 2)
handle_mail_account(account)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 1)
self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 1)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
def test_handle_mail_account_move(self):
account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="secret")
@@ -272,7 +280,7 @@ class TestMail(TestCase):
self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.assertEqual(len(self.bogus_mailbox.messages_spam), 0)
handle_mail_account(account)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 1)
self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(len(self.bogus_mailbox.messages_spam), 1)
@@ -281,7 +289,7 @@ class TestMail(TestCase):
account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="wrong")
try:
handle_mail_account(account)
self.mail_account_handler.handle_mail_account(account)
except MailError as e:
self.assertTrue(str(e).startswith("Error while authenticating account"))
else:
@@ -291,7 +299,7 @@ class TestMail(TestCase):
rule = MailRule.objects.create(name="testrule", account=account, folder="uuuh")
try:
handle_mail_account(account)
self.mail_account_handler.handle_mail_account(account)
except MailError as e:
self.assertTrue("uuuh does not exist" in str(e))
else:
@@ -302,7 +310,7 @@ class TestMail(TestCase):
rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MOVE, action_parameter="doesnotexist", filter_subject="Claim")
try:
handle_mail_account(account)
self.mail_account_handler.handle_mail_account(account)
except MailError as e:
self.assertTrue("Error while processing post-consume actions" in str(e))
else:
@@ -316,7 +324,7 @@ class TestMail(TestCase):
self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
handle_mail_account(account)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(self.async_task.call_count, 1)
@@ -326,7 +334,7 @@ class TestMail(TestCase):
rule.filter_body = "electronic"
rule.save()
self.assertEqual(len(self.bogus_mailbox.messages), 3)
handle_mail_account(account)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(self.async_task.call_count, 2)
@@ -336,7 +344,7 @@ class TestMail(TestCase):
rule.filter_body = None
rule.save()
self.assertEqual(len(self.bogus_mailbox.messages), 3)
handle_mail_account(account)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(len(self.bogus_mailbox.messages), 1)
self.assertEqual(self.async_task.call_count, 4)
@@ -347,6 +355,6 @@ class TestMail(TestCase):
rule.filter_subject = "Invoice"
rule.save()
self.assertEqual(len(self.bogus_mailbox.messages), 3)
handle_mail_account(account)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(self.async_task.call_count, 5)