Merge pull request #2718 from paperless-ngx/feature/2396-better-mail-actions

Feature/2396 better mail actions
This commit is contained in:
Jonas Winkler 2023-03-02 23:14:04 +01:00 committed by GitHub
commit e9e3d75383
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 706 additions and 309 deletions

View File

@ -3,6 +3,7 @@ from django.contrib import admin
from django.utils.translation import gettext_lazy as _
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
from paperless_mail.models import ProcessedMail
class MailAccountAdminForm(forms.ModelForm):
@ -105,5 +106,33 @@ class MailRuleAdmin(admin.ModelAdmin):
ordering = ["order"]
class ProcessedMailAdmin(admin.ModelAdmin):
class Meta:
model = ProcessedMail
fields = "__all__"
list_display = ("rule", "processed", "status", "subject", "received")
ordering = ["-processed"]
readonly_fields = [
"owner",
"rule",
"folder",
"uid",
"subject",
"received",
"processed",
"status",
"error",
]
list_display_links = ["subject"]
list_filter = ("status", "rule")
admin.site.register(MailAccount, MailAccountAdmin)
admin.site.register(MailRule, MailRuleAdmin)
admin.site.register(ProcessedMail, ProcessedMailAdmin)

View File

@ -1,14 +1,22 @@
import datetime
import itertools
import logging
import os
import re
import tempfile
import traceback
from datetime import date
from datetime import timedelta
from fnmatch import fnmatch
from typing import Dict
from typing import List
from typing import Union
import magic
import pathvalidate
from celery import chord
from celery import shared_task
from celery.canvas import Signature
from django.conf import settings
from django.db import DatabaseError
from documents.loggers import LoggingMixin
@ -23,8 +31,10 @@ from imap_tools import MailMessage
from imap_tools import MailMessageFlags
from imap_tools import NOT
from imap_tools.mailbox import MailBoxTls
from imap_tools.query import LogicOperator
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
from paperless_mail.models import ProcessedMail
# Apple Mail sets multiple IMAP KEYWORD and the general "\Flagged" FLAG
# imaplib => conn.fetch(b"<message_id>", "FLAGS")
@ -54,40 +64,76 @@ class MailError(Exception):
class BaseMailAction:
def get_criteria(self) -> Dict:
"""
Base class for mail actions. A mail action is performed on a mail after
consumption of the document is complete and is used to signal to the user
that this mail was processed by paperless via the mail client.
Furthermore, mail actions reduce the amount of mails to be analyzed by
excluding mails on which the action was already performed (i.e., excluding
read mails when the action is to mark mails as read).
"""
def get_criteria(self) -> Union[Dict, LogicOperator]:
"""
Returns filtering criteria/query for this mail action.
"""
return {}
def post_consume(self, M, message_uids, parameter):
pass # pragma: nocover
def post_consume(self, M: MailBox, message_uid: str, parameter: str):
"""
Perform mail action on the given mail uid in the mailbox.
"""
raise NotImplementedError()
class DeleteMailAction(BaseMailAction):
def post_consume(self, M, message_uids, parameter):
M.delete(message_uids)
"""
A mail action that deletes mails after processing.
"""
def post_consume(self, M: MailBox, message_uid: str, parameter: str):
M.delete(message_uid)
class MarkReadMailAction(BaseMailAction):
"""
A mail action that marks mails as read after processing.
"""
def get_criteria(self):
return {"seen": False}
def post_consume(self, M, message_uids, parameter):
M.flag(message_uids, [MailMessageFlags.SEEN], True)
def post_consume(self, M: MailBox, message_uid: str, parameter: str):
M.flag(message_uid, [MailMessageFlags.SEEN], True)
class MoveMailAction(BaseMailAction):
def post_consume(self, M, message_uids, parameter):
M.move(message_uids, parameter)
"""
A mail action that moves mails to a different folder after processing.
"""
def post_consume(self, M, message_uid, parameter):
M.move(message_uid, parameter)
class FlagMailAction(BaseMailAction):
"""
A mail action that marks mails as important ("star") after processing.
"""
def get_criteria(self):
return {"flagged": False}
def post_consume(self, M, message_uids, parameter):
M.flag(message_uids, [MailMessageFlags.FLAGGED], True)
def post_consume(self, M: MailBox, message_uid: str, parameter: str):
M.flag(message_uid, [MailMessageFlags.FLAGGED], True)
class TagMailAction(BaseMailAction):
"""
A mail action that tags mails after processing.
"""
def __init__(self, parameter):
# The custom tag should look like "apple:<color>"
@ -110,40 +156,182 @@ class TagMailAction(BaseMailAction):
# AppleMail: We only need to check if mails are \Flagged
if self.color:
return {"flagged": False}
elif self.keyword:
return AND(NOT(gmail_label=self.keyword), no_keyword=self.keyword)
else:
raise ValueError("This should never happen.")
return {"no_keyword": self.keyword, "gmail_label": self.keyword}
def post_consume(self, M: MailBox, message_uids, parameter):
def post_consume(self, M: MailBox, message_uid: str, parameter: str):
if re.search(r"gmail\.com$|googlemail\.com$", M._host):
for uid in message_uids:
M.client.uid("STORE", uid, "X-GM-LABELS", self.keyword)
M.client.uid("STORE", message_uid, "X-GM-LABELS", self.keyword)
# AppleMail
elif self.color:
# Remove all existing $MailFlagBits
M.flag(
message_uids,
message_uid,
set(itertools.chain(*APPLE_MAIL_TAG_COLORS.values())),
False,
)
# Set new $MailFlagBits
M.flag(message_uids, APPLE_MAIL_TAG_COLORS.get(self.color), True)
M.flag(message_uid, APPLE_MAIL_TAG_COLORS.get(self.color), True)
# Set the general \Flagged
# This defaults to the "red" flag in AppleMail and
# "stars" in Thunderbird or GMail
M.flag(message_uids, [MailMessageFlags.FLAGGED], True)
M.flag(message_uid, [MailMessageFlags.FLAGGED], True)
elif self.keyword:
M.flag(message_uids, [self.keyword], True)
M.flag(message_uid, [self.keyword], True)
else:
raise MailError("No keyword specified.")
def mailbox_login(mailbox: MailBox, account: MailAccount):
logger = logging.getLogger("paperless_mail")
try:
mailbox.login(account.username, account.password)
except UnicodeEncodeError:
logger.debug("Falling back to AUTH=PLAIN")
try:
mailbox.login_utf8(account.username, account.password)
except Exception as e:
logger.error(
"Unable to authenticate with mail server using AUTH=PLAIN",
)
raise MailError(
f"Error while authenticating account {account}",
) from e
except Exception as e:
logger.error(
f"Error while authenticating account {account}: {e}",
exc_info=False,
)
raise MailError(
f"Error while authenticating account {account}",
) from e
@shared_task
def apply_mail_action(
result: List[str],
rule_id: int,
message_uid: str,
message_subject: str,
message_date: datetime.datetime,
):
"""
This shared task applies the mail action of a particular mail rule to the
given mail. Creates a ProcessedMail object, so that the mail won't be
processed in the future.
"""
rule = MailRule.objects.get(pk=rule_id)
account = MailAccount.objects.get(pk=rule.account.pk)
try:
action = get_rule_action(rule)
with get_mailbox(
server=account.imap_server,
port=account.imap_port,
security=account.imap_security,
) as M:
mailbox_login(M, account)
M.folder.set(rule.folder)
action.post_consume(M, message_uid, rule.action_parameter)
ProcessedMail.objects.create(
owner=rule.owner,
rule=rule,
folder=rule.folder,
uid=message_uid,
subject=message_subject,
received=message_date,
status="SUCCESS",
)
except Exception as e:
ProcessedMail.objects.create(
owner=rule.owner,
rule=rule,
folder=rule.folder,
uid=message_uid,
subject=message_subject,
received=message_date,
status="FAILED",
error=traceback.format_exc(),
)
raise e
@shared_task
def error_callback(
request,
exc,
tb,
rule_id: int,
message_uid: str,
message_subject: str,
message_date: datetime.datetime,
):
"""
A shared task that is called whenever something goes wrong during
consumption of a file. See queue_consumption_tasks.
"""
rule = MailRule.objects.get(pk=rule_id)
ProcessedMail.objects.create(
rule=rule,
folder=rule.folder,
uid=message_uid,
subject=message_subject,
received=message_date,
status="FAILED",
error=traceback.format_exc(),
)
def queue_consumption_tasks(
*,
consume_tasks: List[Signature],
rule: MailRule,
message: MailMessage,
):
"""
Queue a list of consumption tasks (Signatures for the consume_file shared
task) with celery.
"""
mail_action_task = apply_mail_action.s(
rule_id=rule.pk,
message_uid=message.uid,
message_subject=message.subject,
message_date=message.date,
)
chord(header=consume_tasks, body=mail_action_task).on_error(
error_callback.s(
rule_id=rule.pk,
message_uid=message.uid,
message_subject=message.subject,
message_date=message.date,
),
).delay()
def get_rule_action(rule) -> BaseMailAction:
"""
Returns a BaseMailAction instance for the given rule.
"""
if rule.action == MailRule.MailAction.FLAG:
return FlagMailAction()
elif rule.action == MailRule.MailAction.DELETE:
@ -159,6 +347,10 @@ def get_rule_action(rule) -> BaseMailAction:
def make_criterias(rule):
"""
Returns criteria to be applied to MailBox.fetch for the given rule.
"""
maximum_age = date.today() - timedelta(days=rule.maximum_age)
criterias = {}
if rule.maximum_age > 0:
@ -170,10 +362,18 @@ def make_criterias(rule):
if rule.filter_body:
criterias["body"] = rule.filter_body
return {**criterias, **get_rule_action(rule).get_criteria()}
rule_query = get_rule_action(rule).get_criteria()
if isinstance(rule_query, dict):
return AND(**rule_query, **criterias)
else:
return AND(rule_query, **criterias)
def get_mailbox(server, port, security) -> MailBox:
"""
Returns the correct MailBox instance for the given configuration.
"""
if security == MailAccount.ImapSecurity.NONE:
mailbox = MailBoxUnencrypted(server, port)
elif security == MailAccount.ImapSecurity.STARTTLS:
@ -186,6 +386,16 @@ def get_mailbox(server, port, security) -> MailBox:
class MailAccountHandler(LoggingMixin):
"""
The main class that handles mail accounts.
* processes all rules for a given mail account
* for each mail rule, fetches relevant mails, and queues documents from
matching mails for consumption
* marks processed mails in the database, so that they won't be processed
again
* runs mail actions on the mail server, when consumption is completed
"""
logging_name = "paperless_mail"
@ -196,7 +406,7 @@ class MailAccountHandler(LoggingMixin):
self.log("error", f"Error while retrieving correspondent {name}: {e}")
return None
def get_title(self, message, att, rule):
def _get_title(self, message, att, rule):
if rule.assign_title_from == MailRule.TitleSource.FROM_SUBJECT:
return message.subject
@ -208,7 +418,7 @@ class MailAccountHandler(LoggingMixin):
"Unknown title selector.",
) # pragma: nocover
def get_correspondent(self, message: MailMessage, rule):
def _get_correspondent(self, message: MailMessage, rule):
c_from = rule.assign_correspondent_from
if c_from == MailRule.CorrespondentSource.FROM_NOTHING:
@ -233,6 +443,9 @@ class MailAccountHandler(LoggingMixin):
) # pragma: nocover
def handle_mail_account(self, account: MailAccount):
"""
Main entry method to handle a specific mail account.
"""
self.renew_logging_group()
@ -252,32 +465,7 @@ class MailAccountHandler(LoggingMixin):
self.log("debug", f"GMAIL Label Support: {supports_gmail_labels}")
self.log("debug", f"AUTH=PLAIN Support: {supports_auth_plain}")
try:
M.login(account.username, account.password)
except UnicodeEncodeError:
self.log("debug", "Falling back to AUTH=PLAIN")
try:
M.login_utf8(account.username, account.password)
except Exception as e:
self.log(
"error",
"Unable to authenticate with mail server using AUTH=PLAIN",
)
raise MailError(
f"Error while authenticating account {account}",
) from e
except Exception as e:
self.log(
"error",
f"Error while authenticating account {account}: {e}",
exc_info=False,
)
raise MailError(
f"Error while authenticating account {account}",
) from e
mailbox_login(M, account)
self.log(
"debug",
@ -287,10 +475,9 @@ class MailAccountHandler(LoggingMixin):
for rule in account.rules.order_by("order"):
try:
total_processed_files += self.handle_mail_rule(
total_processed_files += self._handle_mail_rule(
M,
rule,
supports_gmail_labels,
)
except Exception as e:
self.log(
@ -309,11 +496,10 @@ class MailAccountHandler(LoggingMixin):
return total_processed_files
def handle_mail_rule(
def _handle_mail_rule(
self,
M: MailBox,
rule: MailRule,
supports_gmail_labels: bool = False,
):
self.log("debug", f"Rule {rule}: Selecting folder {rule.folder}")
@ -343,27 +529,14 @@ class MailAccountHandler(LoggingMixin):
criterias = make_criterias(rule)
# Deal with the Gmail label extension
if "gmail_label" in criterias:
gmail_label = criterias["gmail_label"]
del criterias["gmail_label"]
if not supports_gmail_labels:
criterias_imap = AND(**criterias)
else:
criterias_imap = AND(NOT(gmail_label=gmail_label), **criterias)
else:
criterias_imap = AND(**criterias)
self.log(
"debug",
f"Rule {rule}: Searching folder with criteria " f"{str(criterias_imap)}",
f"Rule {rule}: Searching folder with criteria " f"{str(criterias)}",
)
try:
messages = M.fetch(
criteria=criterias_imap,
criteria=criterias,
mark_seen=False,
charset=rule.account.character_set,
)
@ -372,16 +545,20 @@ class MailAccountHandler(LoggingMixin):
f"Rule {rule}: Error while fetching folder {rule.folder}",
) from err
post_consume_messages = []
mails_processed = 0
total_processed_files = 0
for message in messages:
if ProcessedMail.objects.filter(
rule=rule,
uid=message.uid,
folder=rule.folder,
).exists():
self.log("debug", f"Skipping mail {message}, already processed.")
continue
try:
processed_files = self.handle_message(message, rule)
if processed_files > 0:
post_consume_messages.append(message.uid)
processed_files = self._handle_message(message, rule)
total_processed_files += processed_files
mails_processed += 1
@ -394,27 +571,9 @@ class MailAccountHandler(LoggingMixin):
self.log("debug", f"Rule {rule}: Processed {mails_processed} matching mail(s)")
self.log(
"debug",
f"Rule {rule}: Running mail actions on "
f"{len(post_consume_messages)} mails",
)
try:
get_rule_action(rule).post_consume(
M,
post_consume_messages,
rule.action_parameter,
)
except Exception as e:
raise MailError(
f"Rule {rule}: Error while processing post-consume actions: " f"{e}",
) from e
return total_processed_files
def handle_message(self, message, rule: MailRule) -> int:
def _handle_message(self, message, rule: MailRule) -> int:
processed_elements = 0
# Skip Message handling when only attachments are to be processed but
@ -432,7 +591,7 @@ class MailAccountHandler(LoggingMixin):
f"{len(message.attachments)} attachment(s)",
)
correspondent = self.get_correspondent(message, rule)
correspondent = self._get_correspondent(message, rule)
tag_ids = [tag.id for tag in rule.assign_tags.all()]
doc_type = rule.assign_document_type
@ -440,7 +599,7 @@ class MailAccountHandler(LoggingMixin):
rule.consumption_scope == MailRule.ConsumptionScope.EML_ONLY
or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING
):
processed_elements += self.process_eml(
processed_elements += self._process_eml(
message,
rule,
correspondent,
@ -452,7 +611,7 @@ class MailAccountHandler(LoggingMixin):
rule.consumption_scope == MailRule.ConsumptionScope.ATTACHMENTS_ONLY
or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING
):
processed_elements += self.process_attachments(
processed_elements += self._process_attachments(
message,
rule,
correspondent,
@ -462,7 +621,7 @@ class MailAccountHandler(LoggingMixin):
return processed_elements
def process_attachments(
def _process_attachments(
self,
message: MailMessage,
rule: MailRule,
@ -471,6 +630,9 @@ class MailAccountHandler(LoggingMixin):
doc_type,
):
processed_attachments = 0
consume_tasks = list()
for att in message.attachments:
if (
@ -495,7 +657,7 @@ class MailAccountHandler(LoggingMixin):
):
continue
title = self.get_title(message, att, rule)
title = self._get_title(message, att, rule)
# don't trust the content type of the attachment. Could be
# generic application/octet-stream.
@ -518,7 +680,7 @@ class MailAccountHandler(LoggingMixin):
f"{message.subject} from {message.from_}",
)
consume_file.delay(
consume_task = consume_file.s(
path=temp_filename,
override_filename=pathvalidate.sanitize_filename(
att.filename,
@ -532,6 +694,8 @@ class MailAccountHandler(LoggingMixin):
override_owner_id=rule.owner.id if rule.owner else None,
)
consume_tasks.append(consume_task)
processed_attachments += 1
else:
self.log(
@ -541,9 +705,16 @@ class MailAccountHandler(LoggingMixin):
f"since guessed mime type {mime_type} is not supported "
f"by paperless",
)
queue_consumption_tasks(
consume_tasks=consume_tasks,
rule=rule,
message=message,
)
return processed_attachments
def process_eml(
def _process_eml(
self,
message: MailMessage,
rule: MailRule,
@ -585,7 +756,7 @@ class MailAccountHandler(LoggingMixin):
f"{message.subject} from {message.from_}",
)
consume_file.delay(
consume_task = consume_file.s(
path=temp_filename,
override_filename=pathvalidate.sanitize_filename(
message.subject + ".eml",
@ -596,5 +767,12 @@ class MailAccountHandler(LoggingMixin):
override_tag_ids=tag_ids,
override_owner_id=rule.owner.id if rule.owner else None,
)
queue_consumption_tasks(
consume_tasks=[consume_task],
rule=rule,
message=message,
)
processed_elements = 1
return processed_elements

View File

@ -0,0 +1,66 @@
# Generated by Django 4.1.5 on 2023-02-21 17:15
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("paperless_mail", "0017_mailaccount_owner_mailrule_owner"),
]
operations = [
migrations.CreateModel(
name="ProcessedMail",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("folder", models.CharField(max_length=256, verbose_name="folder")),
("uid", models.CharField(max_length=256, verbose_name="folder")),
("subject", models.CharField(max_length=256, verbose_name="subject")),
("received", models.DateTimeField(verbose_name="received")),
(
"processed",
models.DateTimeField(
default=django.utils.timezone.now, verbose_name="processed"
),
),
("status", models.CharField(max_length=256, verbose_name="status")),
(
"error",
models.TextField(blank=True, null=True, verbose_name="error"),
),
(
"owner",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
(
"rule",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="paperless_mail.mailrule",
),
),
],
options={
"abstract": False,
},
),
]

View File

@ -1,5 +1,6 @@
import documents.models as document_models
from django.db import models
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
@ -214,3 +215,66 @@ class MailRule(document_models.ModelWithOwner):
def __str__(self):
return f"{self.account.name}.{self.name}"
class ProcessedMail(document_models.ModelWithOwner):
rule = models.ForeignKey(
MailRule,
null=False,
blank=False,
on_delete=models.CASCADE,
editable=False,
)
folder = models.CharField(
_("folder"),
null=False,
blank=False,
max_length=256,
editable=False,
)
uid = models.CharField(
_("uid"),
null=False,
blank=False,
max_length=256,
editable=False,
)
subject = models.CharField(
_("subject"),
null=False,
blank=False,
max_length=256,
editable=False,
)
received = models.DateTimeField(
_("received"),
null=False,
blank=False,
editable=False,
)
processed = models.DateTimeField(
_("processed"),
default=timezone.now,
editable=False,
)
status = models.CharField(
_("status"),
null=False,
blank=False,
max_length=256,
editable=False,
)
error = models.TextField(
_("error"),
null=True,
blank=True,
editable=False,
)

View File

@ -23,6 +23,7 @@ from imap_tools import MailMessage
from imap_tools import MailMessageFlags
from imap_tools import NOT
from paperless_mail import tasks
from paperless_mail.mail import apply_mail_action
from paperless_mail.mail import MailAccountHandler
from paperless_mail.mail import MailError
from paperless_mail.mail import TagMailAction
@ -168,69 +169,6 @@ class BogusMailBox(ContextManager):
raise Exception()
_used_uids = set()
def create_message(
attachments: Union[int, List[_AttachmentDef]] = 1,
body: str = "",
subject: str = "the suject",
from_: str = "noone@mail.com",
seen: bool = False,
flagged: bool = False,
processed: bool = False,
) -> MailMessage:
email_msg = email.message.EmailMessage()
# TODO: This does NOT set the UID
email_msg["Message-ID"] = str(uuid.uuid4())
email_msg["Subject"] = subject
email_msg["From"] = from_
email_msg.set_content(body)
# Either add some default number of attachments
# or the provided attachments
if isinstance(attachments, int):
for i in range(attachments):
attachment = _AttachmentDef(filename=f"file_{i}.pdf")
email_msg.add_attachment(
attachment.content,
maintype=attachment.maintype,
subtype=attachment.subtype,
disposition=attachment.disposition,
filename=attachment.filename,
)
else:
for attachment in attachments:
email_msg.add_attachment(
attachment.content,
maintype=attachment.maintype,
subtype=attachment.subtype,
disposition=attachment.disposition,
filename=attachment.filename,
)
# Convert the EmailMessage to an imap_tools MailMessage
imap_msg = MailMessage.from_bytes(email_msg.as_bytes())
# TODO: Unsure how to add a uid to the actual EmailMessage. This hacks it in,
# based on how imap_tools uses regex to extract it.
# This should be a large enough pool
uid = random.randint(1, 10000)
while uid in _used_uids:
uid = random.randint(1, 10000)
_used_uids.add(uid)
imap_msg._raw_uid_data = f"UID {uid}".encode()
imap_msg.seen = seen
imap_msg.flagged = flagged
if processed:
imap_msg._raw_flag_data.append(b"+FLAGS (processed)")
MailMessage.flags.fget.cache_clear()
return imap_msg
def fake_magic_from_buffer(buffer, mime=False):
if mime:
if "PDF" in str(buffer):
@ -244,14 +182,17 @@ def fake_magic_from_buffer(buffer, mime=False):
@mock.patch("paperless_mail.mail.magic.from_buffer", fake_magic_from_buffer)
class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def setUp(self):
self._used_uids = set()
self.bogus_mailbox = BogusMailBox()
patcher = mock.patch("paperless_mail.mail.MailBox")
m = patcher.start()
self.bogus_mailbox = BogusMailBox()
m.return_value = self.bogus_mailbox
self.addCleanup(patcher.stop)
patcher = mock.patch("paperless_mail.mail.consume_file.delay")
self.async_task = patcher.start()
patcher = mock.patch("paperless_mail.mail.queue_consumption_tasks")
self._queue_consumption_tasks_mock = patcher.start()
self.addCleanup(patcher.stop)
self.reset_bogus_mailbox()
@ -259,11 +200,71 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.mail_account_handler = MailAccountHandler()
super().setUp()
def create_message(
self,
attachments: Union[int, List[_AttachmentDef]] = 1,
body: str = "",
subject: str = "the suject",
from_: str = "noone@mail.com",
seen: bool = False,
flagged: bool = False,
processed: bool = False,
) -> MailMessage:
email_msg = email.message.EmailMessage()
# TODO: This does NOT set the UID
email_msg["Message-ID"] = str(uuid.uuid4())
email_msg["Subject"] = subject
email_msg["From"] = from_
email_msg.set_content(body)
# Either add some default number of attachments
# or the provided attachments
if isinstance(attachments, int):
for i in range(attachments):
attachment = _AttachmentDef(filename=f"file_{i}.pdf")
email_msg.add_attachment(
attachment.content,
maintype=attachment.maintype,
subtype=attachment.subtype,
disposition=attachment.disposition,
filename=attachment.filename,
)
else:
for attachment in attachments:
email_msg.add_attachment(
attachment.content,
maintype=attachment.maintype,
subtype=attachment.subtype,
disposition=attachment.disposition,
filename=attachment.filename,
)
# Convert the EmailMessage to an imap_tools MailMessage
imap_msg = MailMessage.from_bytes(email_msg.as_bytes())
# TODO: Unsure how to add a uid to the actual EmailMessage. This hacks it in,
# based on how imap_tools uses regex to extract it.
# This should be a large enough pool
uid = random.randint(1, 10000)
while uid in self._used_uids:
uid = random.randint(1, 10000)
self._used_uids.add(uid)
imap_msg._raw_uid_data = f"UID {uid}".encode()
imap_msg.seen = seen
imap_msg.flagged = flagged
if processed:
imap_msg._raw_flag_data.append(b"+FLAGS (processed)")
MailMessage.flags.fget.cache_clear()
return imap_msg
def reset_bogus_mailbox(self):
self.bogus_mailbox.messages = []
self.bogus_mailbox.messages_spam = []
self.bogus_mailbox.messages.append(
create_message(
self.create_message(
subject="Invoice 1",
from_="amazon@amazon.de",
body="cables",
@ -273,7 +274,7 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
),
)
self.bogus_mailbox.messages.append(
create_message(
self.create_message(
subject="Invoice 2",
body="from my favorite electronic store",
seen=False,
@ -282,7 +283,7 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
),
)
self.bogus_mailbox.messages.append(
create_message(
self.create_message(
subject="Claim your $10M price now!",
from_="amazon@amazon-some-indian-site.org",
seen=False,
@ -314,16 +315,16 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
name="a",
assign_correspondent_from=MailRule.CorrespondentSource.FROM_NOTHING,
)
self.assertIsNone(handler.get_correspondent(message, rule))
self.assertIsNone(handler._get_correspondent(message, rule))
rule = MailRule(
name="b",
assign_correspondent_from=MailRule.CorrespondentSource.FROM_EMAIL,
)
c = handler.get_correspondent(message, rule)
c = handler._get_correspondent(message, rule)
self.assertIsNotNone(c)
self.assertEqual(c.name, "someone@somewhere.com")
c = handler.get_correspondent(message2, rule)
c = handler._get_correspondent(message2, rule)
self.assertIsNotNone(c)
self.assertEqual(c.name, "me@localhost.com")
self.assertEqual(c.id, me_localhost.id)
@ -332,10 +333,10 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
name="c",
assign_correspondent_from=MailRule.CorrespondentSource.FROM_NAME,
)
c = handler.get_correspondent(message, rule)
c = handler._get_correspondent(message, rule)
self.assertIsNotNone(c)
self.assertEqual(c.name, "Someone!")
c = handler.get_correspondent(message2, rule)
c = handler._get_correspondent(message2, rule)
self.assertIsNotNone(c)
self.assertEqual(c.id, me_localhost.id)
@ -344,7 +345,7 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
assign_correspondent_from=MailRule.CorrespondentSource.FROM_CUSTOM,
assign_correspondent=someone_else,
)
c = handler.get_correspondent(message, rule)
c = handler._get_correspondent(message, rule)
self.assertEqual(c, someone_else)
def test_get_title(self):
@ -359,15 +360,15 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
name="a",
assign_title_from=MailRule.TitleSource.FROM_FILENAME,
)
self.assertEqual(handler.get_title(message, att, rule), "this_is_the_file")
self.assertEqual(handler._get_title(message, att, rule), "this_is_the_file")
rule = MailRule(
name="b",
assign_title_from=MailRule.TitleSource.FROM_SUBJECT,
)
self.assertEqual(handler.get_title(message, att, rule), "the message title")
self.assertEqual(handler._get_title(message, att, rule), "the message title")
def test_handle_message(self):
message = create_message(
message = self.create_message(
subject="the message title",
from_="Myself",
attachments=2,
@ -381,24 +382,18 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
)
rule.save()
result = self.mail_account_handler.handle_message(message, rule)
result = self.mail_account_handler._handle_message(message, rule)
self.assertEqual(result, 2)
self.assertEqual(len(self.async_task.call_args_list), 2)
args1, kwargs1 = self.async_task.call_args_list[0]
args2, kwargs2 = self.async_task.call_args_list[1]
self.assertIsFile(kwargs1["path"])
self.assertEqual(kwargs1["override_title"], "file_0")
self.assertEqual(kwargs1["override_filename"], "file_0.pdf")
self.assertIsFile(kwargs2["path"])
self.assertEqual(kwargs2["override_title"], "file_1")
self.assertEqual(kwargs2["override_filename"], "file_1.pdf")
self.assert_queue_consumption_tasks_call_args(
[
[
{"override_title": "file_0", "override_filename": "file_0.pdf"},
{"override_title": "file_1", "override_filename": "file_1.pdf"},
],
],
)
def test_handle_empty_message(self):
message = namedtuple("MailMessage", [])
@ -406,13 +401,13 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
message.attachments = []
rule = MailRule()
result = self.mail_account_handler.handle_message(message, rule)
result = self.mail_account_handler._handle_message(message, rule)
self.assertFalse(self.async_task.called)
self.assertFalse(self._queue_consumption_tasks_mock.called)
self.assertEqual(result, 0)
def test_handle_unknown_mime_type(self):
message = create_message(
message = self.create_message(
attachments=[
_AttachmentDef(filename="f1.pdf"),
_AttachmentDef(
@ -430,17 +425,19 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
)
rule.save()
result = self.mail_account_handler.handle_message(message, rule)
result = self.mail_account_handler._handle_message(message, rule)
self.assertEqual(result, 1)
self.assertEqual(self.async_task.call_count, 1)
args, kwargs = self.async_task.call_args
self.assertIsFile(kwargs["path"])
self.assertEqual(kwargs["override_filename"], "f1.pdf")
self.assert_queue_consumption_tasks_call_args(
[
[
{"override_filename": "f1.pdf"},
],
],
)
def test_handle_disposition(self):
message = create_message(
message = self.create_message(
attachments=[
_AttachmentDef(
filename="f1.pdf",
@ -458,16 +455,18 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
)
rule.save()
result = self.mail_account_handler.handle_message(message, rule)
result = self.mail_account_handler._handle_message(message, rule)
self.assertEqual(result, 1)
self.assertEqual(self.async_task.call_count, 1)
args, kwargs = self.async_task.call_args
self.assertEqual(kwargs["override_filename"], "f2.pdf")
self.assert_queue_consumption_tasks_call_args(
[
[
{"override_filename": "f2.pdf"},
],
],
)
def test_handle_inline_files(self):
message = create_message(
message = self.create_message(
attachments=[
_AttachmentDef(
filename="f1.pdf",
@ -486,13 +485,19 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
)
rule.save()
result = self.mail_account_handler.handle_message(message, rule)
result = self.mail_account_handler._handle_message(message, rule)
self.assertEqual(result, 2)
self.assertEqual(self.async_task.call_count, 2)
self.assert_queue_consumption_tasks_call_args(
[
[
{"override_filename": "f1.pdf"},
{"override_filename": "f2.pdf"},
],
],
)
def test_filename_filter(self):
message = create_message(
message = self.create_message(
attachments=[
_AttachmentDef(filename="f1.pdf"),
_AttachmentDef(filename="f2.pdf"),
@ -504,33 +509,33 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
)
tests = [
("*.pdf", ["f1.pdf", "f1.Pdf", "f2.pdf", "f3.pdf", "file.PDf"]),
("*.pdf", ["f1.pdf", "f2.pdf", "f3.pdf", "file.PDf", "f1.Pdf"]),
("f1.pdf", ["f1.pdf", "f1.Pdf"]),
("f1", []),
("*", ["f1.pdf", "f2.pdf", "f3.pdf", "f2.png", "f1.Pdf", "file.PDf"]),
("*", ["f1.pdf", "f2.pdf", "f3.pdf", "f2.png", "file.PDf", "f1.Pdf"]),
("*.png", ["f2.png"]),
]
for (pattern, matches) in tests:
matches.sort()
self.async_task.reset_mock()
account = MailAccount(name=str(uuid.uuid4()))
account.save()
rule = MailRule(
name=str(uuid.uuid4()),
assign_title_from=MailRule.TitleSource.FROM_FILENAME,
account=account,
filter_attachment_filename=pattern,
)
rule.save()
with self.subTest(msg=pattern):
print(f"PATTERN {pattern}")
self._queue_consumption_tasks_mock.reset_mock()
account = MailAccount(name=str(uuid.uuid4()))
account.save()
rule = MailRule(
name=str(uuid.uuid4()),
assign_title_from=MailRule.TitleSource.FROM_FILENAME,
account=account,
filter_attachment_filename=pattern,
)
rule.save()
result = self.mail_account_handler.handle_message(message, rule)
self.assertEqual(result, len(matches), f"Error with pattern: {pattern}")
filenames = sorted(
a[1]["override_filename"] for a in self.async_task.call_args_list
)
self.assertListEqual(filenames, matches)
self.mail_account_handler._handle_message(message, rule)
self.assert_queue_consumption_tasks_call_args(
[
[{"override_filename": m} for m in matches],
],
)
def test_handle_mail_account_mark_read(self):
@ -548,10 +553,11 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 2)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 2)
self.apply_mail_actions()
self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 0)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
@ -571,10 +577,11 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
filter_subject="Invoice",
)
self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 2)
self.apply_mail_actions()
self.assertEqual(len(self.bogus_mailbox.messages), 1)
def test_handle_mail_account_flag(self):
@ -593,10 +600,11 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 2)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 1)
self.apply_mail_actions()
self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 1)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
@ -616,13 +624,12 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
filter_subject="Claim",
)
self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.assertEqual(len(self.bogus_mailbox.messages_spam), 0)
self.mail_account_handler.handle_mail_account(account)
self.apply_mail_actions()
self.assertEqual(self.async_task.call_count, 1)
self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(len(self.bogus_mailbox.messages_spam), 1)
@ -642,12 +649,13 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.fetch("UNKEYWORD processed", False)), 2)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 2)
self.assertEqual(len(self.bogus_mailbox.fetch("UNKEYWORD processed", False)), 0)
self.apply_mail_actions()
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.assertEqual(len(self.bogus_mailbox.fetch("UNKEYWORD processed", False)), 0)
def test_handle_mail_account_tag_gmail(self):
self.bogus_mailbox._host = "imap.gmail.com"
@ -668,11 +676,12 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.assertEqual(self.async_task.call_count, 0)
criteria = NOT(gmail_label="processed")
self.assertEqual(len(self.bogus_mailbox.fetch(criteria, False)), 2)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 2)
self.apply_mail_actions()
self.assertEqual(len(self.bogus_mailbox.fetch(criteria, False)), 0)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
@ -702,10 +711,11 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 2)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 2)
self.apply_mail_actions()
self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 0)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
@ -717,11 +727,11 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
password="wrong",
)
with self.assertRaises(MailError) as context:
with self.assertRaisesRegex(
MailError,
"Error while authenticating account",
) as context:
self.mail_account_handler.handle_mail_account(account)
self.assertTrue(
str(context).startswith("Error while authenticating account"),
)
def test_error_skip_account(self):
_ = MailAccount.objects.create(
@ -746,7 +756,8 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
)
tasks.process_mail_accounts()
self.assertEqual(self.async_task.call_count, 1)
self.apply_mail_actions()
self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(len(self.bogus_mailbox.messages_spam), 1)
@ -777,7 +788,8 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 1)
self.apply_mail_actions()
self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(len(self.bogus_mailbox.messages_spam), 1)
@ -812,7 +824,7 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.mail_account_handler.handle_mail_account(account)
self.bogus_mailbox.folder.list.assert_called_once()
self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(self._queue_consumption_tasks_mock.call_count, 0)
def test_error_folder_set_error_listing(self):
"""
@ -845,9 +857,9 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.mail_account_handler.handle_mail_account(account)
self.bogus_mailbox.folder.list.assert_called_once()
self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(self._queue_consumption_tasks_mock.call_count, 0)
@mock.patch("paperless_mail.mail.MailAccountHandler.get_correspondent")
@mock.patch("paperless_mail.mail.MailAccountHandler._get_correspondent")
def test_error_skip_mail(self, m):
def get_correspondent_fake(message, rule):
if message.from_ == "amazon@amazon.de":
@ -871,9 +883,10 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
)
self.mail_account_handler.handle_mail_account(account)
self.apply_mail_actions()
# test that we still consume mail even if some mails throw errors.
self.assertEqual(self.async_task.call_count, 2)
self.assertEqual(self._queue_consumption_tasks_mock.call_count, 2)
# faulty mail still in inbox, untouched
self.assertEqual(len(self.bogus_mailbox.messages), 1)
@ -898,14 +911,18 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.mail_account_handler.handle_mail_account(account)
self.async_task.assert_called_once()
args, kwargs = self.async_task.call_args
self._queue_consumption_tasks_mock.assert_called_once()
c = Correspondent.objects.get(name="amazon@amazon.de")
# should work
self.assertEqual(kwargs["override_correspondent_id"], c.id)
self.assert_queue_consumption_tasks_call_args(
[
[
{"override_correspondent_id": c.id},
],
],
)
self.async_task.reset_mock()
self._queue_consumption_tasks_mock.reset_mock()
self.reset_bogus_mailbox()
with mock.patch("paperless_mail.mail.Correspondent.objects.get_or_create") as m:
@ -913,9 +930,13 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.mail_account_handler.handle_mail_account(account)
args, kwargs = self.async_task.call_args
self.async_task.assert_called_once()
self.assertEqual(kwargs["override_correspondent_id"], None)
self.assert_queue_consumption_tasks_call_args(
[
[
{"override_correspondent_id": None},
],
],
)
def test_filters(self):
@ -925,50 +946,40 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
username="admin",
password="secret",
)
rule = MailRule.objects.create(
name="testrule3",
account=account,
action=MailRule.MailAction.DELETE,
filter_subject="Claim",
)
self.assertEqual(self.async_task.call_count, 0)
for (f_body, f_from, f_subject, expected_mail_count) in [
(None, None, "Claim", 1),
("electronic", None, None, 1),
(None, "amazon", None, 2),
("cables", "amazon", "Invoice", 1),
]:
with self.subTest(f_body=f_body, f_from=f_from, f_subject=f_subject):
MailRule.objects.all().delete()
rule = MailRule.objects.create(
name="testrule3",
account=account,
action=MailRule.MailAction.DELETE,
filter_subject=f_subject,
filter_body=f_body,
filter_from=f_from,
)
self.reset_bogus_mailbox()
self._queue_consumption_tasks_mock.reset_mock()
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(self.async_task.call_count, 1)
self.assertEqual(self._queue_consumption_tasks_mock.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.reset_bogus_mailbox()
self.mail_account_handler.handle_mail_account(account)
self.apply_mail_actions()
rule.filter_subject = None
rule.filter_body = "electronic"
rule.save()
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(self.async_task.call_count, 2)
self.reset_bogus_mailbox()
rule.filter_from = "amazon"
rule.filter_body = None
rule.save()
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(len(self.bogus_mailbox.messages), 1)
self.assertEqual(self.async_task.call_count, 4)
self.reset_bogus_mailbox()
rule.filter_from = "amazon"
rule.filter_body = "cables"
rule.filter_subject = "Invoice"
rule.save()
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(len(self.bogus_mailbox.messages), 2)
self.assertEqual(self.async_task.call_count, 5)
self.assertEqual(
len(self.bogus_mailbox.messages),
3 - expected_mail_count,
)
self.assertEqual(
self._queue_consumption_tasks_mock.call_count,
expected_mail_count,
)
def test_auth_plain_fallback(self):
"""
@ -992,12 +1003,13 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.assertEqual(self.async_task.call_count, 0)
self.assertEqual(self._queue_consumption_tasks_mock.call_count, 0)
self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 2)
self.mail_account_handler.handle_mail_account(account)
self.apply_mail_actions()
self.assertEqual(self.async_task.call_count, 2)
self.assertEqual(self._queue_consumption_tasks_mock.call_count, 2)
self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 0)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
@ -1030,6 +1042,54 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
account,
)
def assert_queue_consumption_tasks_call_args(self, expected_call_args: List):
"""
Verifies that queue_consumption_tasks has been called with the expected arguments.
expected_call_args is the following format:
* List of calls to queue_consumption_tasks, called once per mail, where each element is:
* List of signatures for the consume_file task, where each element is:
* dictionary containing arguments that need to be present in the consume_file signature.
"""
# assert number of calls to queue_consumption_tasks mathc
self.assertEqual(
len(self._queue_consumption_tasks_mock.call_args_list),
len(expected_call_args),
)
for (mock_args, mock_kwargs), expected_signatures in zip(
self._queue_consumption_tasks_mock.call_args_list,
expected_call_args,
):
consume_tasks = mock_kwargs["consume_tasks"]
# assert number of consume_file tasks match
self.assertEqual(len(consume_tasks), len(expected_signatures))
for consume_task, expected_signature in zip(
consume_tasks,
expected_signatures,
):
# assert the file exists
self.assertIsFile(consume_task.kwargs["path"])
# assert all expected arguments are present in the signature
for key, value in expected_signature.items():
self.assertIn(key, consume_task.kwargs)
self.assertEqual(consume_task.kwargs[key], value)
def apply_mail_actions(self):
"""
Applies pending actions to mails by inspecting calls to the queue_consumption_tasks method.
"""
for args, kwargs in self._queue_consumption_tasks_mock.call_args_list:
message = kwargs["message"]
rule = kwargs["rule"]
apply_mail_action([], rule.pk, message.uid, message.subject, message.date)
class TestManagementCommand(TestCase):
@mock.patch(