From d7cb7c78af427b6564037e1608c7a7d171bc64a4 Mon Sep 17 00:00:00 2001 From: Jonas <17569239+jonaswinkler@users.noreply.github.com> Date: Thu, 16 Feb 2023 22:51:46 +0100 Subject: [PATCH 01/11] adjust mail workflow, execute mail actions only after consumption is successful --- src/paperless_mail/mail.py | 91 ++++++++++++++++++++----------------- src/paperless_mail/tasks.py | 15 +++++- 2 files changed, 64 insertions(+), 42 deletions(-) diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index 04caca63c..ca09bb683 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -9,6 +9,7 @@ from typing import Dict import magic import pathvalidate +from celery import chord from django.conf import settings from django.db import DatabaseError from documents.loggers import LoggingMixin @@ -25,6 +26,7 @@ from imap_tools import NOT from imap_tools.mailbox import MailBoxTls from paperless_mail.models import MailAccount from paperless_mail.models import MailRule +from paperless_mail.tasks import apply_mail_action # Apple Mail sets multiple IMAP KEYWORD and the general "\Flagged" FLAG # imaplib => conn.fetch(b"", "FLAGS") @@ -57,34 +59,34 @@ class BaseMailAction: def get_criteria(self) -> Dict: return {} - def post_consume(self, M, message_uids, parameter): + def post_consume(self, M, message_uid, parameter): pass # pragma: nocover class DeleteMailAction(BaseMailAction): - def post_consume(self, M, message_uids, parameter): - M.delete(message_uids) + def post_consume(self, M, message_uid, parameter): + M.delete(message_uid) class MarkReadMailAction(BaseMailAction): def get_criteria(self): return {"seen": False} - def post_consume(self, M, message_uids, parameter): - M.flag(message_uids, [MailMessageFlags.SEEN], True) + def post_consume(self, M, message_uid, parameter): + M.flag(message_uid, [MailMessageFlags.SEEN], True) class MoveMailAction(BaseMailAction): - def post_consume(self, M, message_uids, parameter): - M.move(message_uids, parameter) + def post_consume(self, M, message_uid, parameter): + M.move(message_uid, parameter) class FlagMailAction(BaseMailAction): def get_criteria(self): return {"flagged": False} - def post_consume(self, M, message_uids, parameter): - M.flag(message_uids, [MailMessageFlags.FLAGGED], True) + def post_consume(self, M, message_uid, parameter): + M.flag(message_uid, [MailMessageFlags.FLAGGED], True) class TagMailAction(BaseMailAction): @@ -113,9 +115,9 @@ class TagMailAction(BaseMailAction): return {"no_keyword": self.keyword, "gmail_label": self.keyword} - def post_consume(self, M: MailBox, message_uids, parameter): + def post_consume(self, M: MailBox, message_uid, parameter): if re.search(r"gmail\.com$|googlemail\.com$", M._host): - for uid in message_uids: + for uid in message_uid: M.client.uid("STORE", uid, "X-GM-LABELS", self.keyword) # AppleMail @@ -123,21 +125,21 @@ class TagMailAction(BaseMailAction): # Remove all existing $MailFlagBits M.flag( - message_uids, + message_uid, set(itertools.chain(*APPLE_MAIL_TAG_COLORS.values())), False, ) # Set new $MailFlagBits - M.flag(message_uids, APPLE_MAIL_TAG_COLORS.get(self.color), True) + M.flag(message_uid, APPLE_MAIL_TAG_COLORS.get(self.color), True) # Set the general \Flagged # This defaults to the "red" flag in AppleMail and # "stars" in Thunderbird or GMail - M.flag(message_uids, [MailMessageFlags.FLAGGED], True) + M.flag(message_uid, [MailMessageFlags.FLAGGED], True) elif self.keyword: - M.flag(message_uids, [self.keyword], True) + M.flag(message_uid, [self.keyword], True) else: raise MailError("No keyword specified.") @@ -372,16 +374,12 @@ class MailAccountHandler(LoggingMixin): f"Rule {rule}: Error while fetching folder {rule.folder}", ) from err - post_consume_messages = [] - mails_processed = 0 total_processed_files = 0 for message in messages: try: - processed_files = self.handle_message(message, rule) - if processed_files > 0: - post_consume_messages.append(message.uid) + processed_files = self.handle_message(M, message, rule) total_processed_files += processed_files mails_processed += 1 @@ -394,27 +392,9 @@ class MailAccountHandler(LoggingMixin): self.log("debug", f"Rule {rule}: Processed {mails_processed} matching mail(s)") - self.log( - "debug", - f"Rule {rule}: Running mail actions on " - f"{len(post_consume_messages)} mails", - ) - - try: - get_rule_action(rule).post_consume( - M, - post_consume_messages, - rule.action_parameter, - ) - - except Exception as e: - raise MailError( - f"Rule {rule}: Error while processing post-consume actions: " f"{e}", - ) from e - return total_processed_files - def handle_message(self, message, rule: MailRule) -> int: + def handle_message(self, M: MailBox, message, rule: MailRule) -> int: processed_elements = 0 # Skip Message handling when only attachments are to be processed but @@ -441,6 +421,7 @@ class MailAccountHandler(LoggingMixin): or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING ): processed_elements += self.process_eml( + M, message, rule, correspondent, @@ -453,6 +434,7 @@ class MailAccountHandler(LoggingMixin): or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING ): processed_elements += self.process_attachments( + M, message, rule, correspondent, @@ -464,6 +446,7 @@ class MailAccountHandler(LoggingMixin): def process_attachments( self, + M: MailBox, message: MailMessage, rule: MailRule, correspondent, @@ -471,6 +454,9 @@ class MailAccountHandler(LoggingMixin): doc_type, ): processed_attachments = 0 + + consume_tasks = list() + for att in message.attachments: if ( @@ -518,7 +504,7 @@ class MailAccountHandler(LoggingMixin): f"{message.subject} from {message.from_}", ) - consume_file.delay( + consume_task = consume_file.s( path=temp_filename, override_filename=pathvalidate.sanitize_filename( att.filename, @@ -531,6 +517,8 @@ class MailAccountHandler(LoggingMixin): override_tag_ids=tag_ids, ) + consume_tasks.append(consume_task) + processed_attachments += 1 else: self.log( @@ -540,10 +528,21 @@ class MailAccountHandler(LoggingMixin): f"since guessed mime type {mime_type} is not supported " f"by paperless", ) + + mail_action_task = apply_mail_action.s( + M=M, + action=get_rule_action(rule), + message_uid=message.uid, + parameter=rule.action_parameter, + ) + + chord(header=consume_tasks, body=mail_action_task).delay() + return processed_attachments def process_eml( self, + M: MailBox, message: MailMessage, rule: MailRule, correspondent, @@ -584,7 +583,7 @@ class MailAccountHandler(LoggingMixin): f"{message.subject} from {message.from_}", ) - consume_file.delay( + consume_task = consume_file.s( path=temp_filename, override_filename=pathvalidate.sanitize_filename( message.subject + ".eml", @@ -594,5 +593,15 @@ class MailAccountHandler(LoggingMixin): override_document_type_id=doc_type.id if doc_type else None, override_tag_ids=tag_ids, ) + + mail_action_task = apply_mail_action.s( + M=M, + action=get_rule_action(rule), + message_uid=message.uid, + parameter=rule.action_parameter, + ) + + (consume_task | mail_action_task).delay() + processed_elements = 1 return processed_elements diff --git a/src/paperless_mail/tasks.py b/src/paperless_mail/tasks.py index 5c92233de..9716f13a8 100644 --- a/src/paperless_mail/tasks.py +++ b/src/paperless_mail/tasks.py @@ -1,6 +1,8 @@ import logging from celery import shared_task +from imap_tools import MailBox +from paperless_mail.mail import BaseMailAction from paperless_mail.mail import MailAccountHandler from paperless_mail.mail import MailError from paperless_mail.models import MailAccount @@ -8,12 +10,23 @@ from paperless_mail.models import MailAccount logger = logging.getLogger("paperless.mail.tasks") +@shared_task +def apply_mail_action( + result: str, + M: MailBox, + action: BaseMailAction, + message_uid: str, + parameter: str, +): + action.post_consume(M, message_uid, parameter) + + @shared_task def process_mail_accounts(): total_new_documents = 0 for account in MailAccount.objects.all(): try: - total_new_documents += MailAccountHandler().handle_mail_account(account) + total_new_documents += MailAccountHandler().handl2e_mail_account(account) except MailError: logger.exception(f"Error while processing mail account {account}") From 94ebe3b61c39613c8b4c72720c4f937b59952b01 Mon Sep 17 00:00:00 2001 From: Jonas Winkler <17569239+jonaswinkler@users.noreply.github.com> Date: Mon, 20 Feb 2023 11:46:46 +0100 Subject: [PATCH 02/11] implement better mail actions --- src/paperless_mail/mail.py | 32 ++++++++++++++++++++++++-------- src/paperless_mail/tasks.py | 16 ++-------------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index b49d3c3b5..acf1654d5 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -9,7 +9,7 @@ from typing import Dict import magic import pathvalidate -from celery import chord +from celery import chord, shared_task from django.conf import settings from django.db import DatabaseError from documents.loggers import LoggingMixin @@ -26,7 +26,6 @@ from imap_tools import NOT from imap_tools.mailbox import MailBoxTls from paperless_mail.models import MailAccount from paperless_mail.models import MailRule -from paperless_mail.tasks import apply_mail_action # Apple Mail sets multiple IMAP KEYWORD and the general "\Flagged" FLAG # imaplib => conn.fetch(b"", "FLAGS") @@ -145,6 +144,27 @@ class TagMailAction(BaseMailAction): raise MailError("No keyword specified.") +@shared_task +def apply_mail_action( + result: str, + rule_id: int, + message_uid: str, +): + rule = MailRule.objects.get(pk=rule_id) + account = MailAccount.objects.get(pk=rule.account.pk) + + action = get_rule_action(rule) + + with get_mailbox( + server=account.imap_server, + port=account.imap_port, + security=account.imap_security, + ) as M: + M.login(username=account.username, password=account.password) + M.folder.set(rule.folder) + action.post_consume(M, message_uid, rule.action_parameter) + + def get_rule_action(rule) -> BaseMailAction: if rule.action == MailRule.MailAction.FLAG: return FlagMailAction() @@ -531,10 +551,8 @@ class MailAccountHandler(LoggingMixin): ) mail_action_task = apply_mail_action.s( - M=M, - action=get_rule_action(rule), + rule_id=rule.pk, message_uid=message.uid, - parameter=rule.action_parameter, ) chord(header=consume_tasks, body=mail_action_task).delay() @@ -597,10 +615,8 @@ class MailAccountHandler(LoggingMixin): ) mail_action_task = apply_mail_action.s( - M=M, - action=get_rule_action(rule), + rule_id=rule.pk, message_uid=message.uid, - parameter=rule.action_parameter, ) (consume_task | mail_action_task).delay() diff --git a/src/paperless_mail/tasks.py b/src/paperless_mail/tasks.py index 9716f13a8..ab013a41e 100644 --- a/src/paperless_mail/tasks.py +++ b/src/paperless_mail/tasks.py @@ -1,8 +1,7 @@ import logging from celery import shared_task -from imap_tools import MailBox -from paperless_mail.mail import BaseMailAction + from paperless_mail.mail import MailAccountHandler from paperless_mail.mail import MailError from paperless_mail.models import MailAccount @@ -10,23 +9,12 @@ from paperless_mail.models import MailAccount logger = logging.getLogger("paperless.mail.tasks") -@shared_task -def apply_mail_action( - result: str, - M: MailBox, - action: BaseMailAction, - message_uid: str, - parameter: str, -): - action.post_consume(M, message_uid, parameter) - - @shared_task def process_mail_accounts(): total_new_documents = 0 for account in MailAccount.objects.all(): try: - total_new_documents += MailAccountHandler().handl2e_mail_account(account) + total_new_documents += MailAccountHandler().handle_mail_account(account) except MailError: logger.exception(f"Error while processing mail account {account}") From 7e1c1da42427e8555240c8879d257ed2361d1bf8 Mon Sep 17 00:00:00 2001 From: Jonas Winkler <17569239+jonaswinkler@users.noreply.github.com> Date: Mon, 20 Feb 2023 11:57:56 +0100 Subject: [PATCH 03/11] remove unused argument --- src/paperless_mail/mail.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index acf1654d5..316803fb3 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -9,7 +9,8 @@ from typing import Dict import magic import pathvalidate -from celery import chord, shared_task +from celery import chord +from celery import shared_task from django.conf import settings from django.db import DatabaseError from documents.loggers import LoggingMixin @@ -399,7 +400,7 @@ class MailAccountHandler(LoggingMixin): for message in messages: try: - processed_files = self.handle_message(M, message, rule) + processed_files = self.handle_message(message, rule) total_processed_files += processed_files mails_processed += 1 @@ -414,7 +415,7 @@ class MailAccountHandler(LoggingMixin): return total_processed_files - def handle_message(self, M: MailBox, message, rule: MailRule) -> int: + def handle_message(self, message, rule: MailRule) -> int: processed_elements = 0 # Skip Message handling when only attachments are to be processed but @@ -441,7 +442,6 @@ class MailAccountHandler(LoggingMixin): or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING ): processed_elements += self.process_eml( - M, message, rule, correspondent, @@ -454,7 +454,6 @@ class MailAccountHandler(LoggingMixin): or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING ): processed_elements += self.process_attachments( - M, message, rule, correspondent, @@ -466,7 +465,6 @@ class MailAccountHandler(LoggingMixin): def process_attachments( self, - M: MailBox, message: MailMessage, rule: MailRule, correspondent, @@ -561,7 +559,6 @@ class MailAccountHandler(LoggingMixin): def process_eml( self, - M: MailBox, message: MailMessage, rule: MailRule, correspondent, From a37177703c476fd7424e72ca6eaaf0ca67befa16 Mon Sep 17 00:00:00 2001 From: Jonas Winkler <17569239+jonaswinkler@users.noreply.github.com> Date: Tue, 21 Feb 2023 13:50:34 +0100 Subject: [PATCH 04/11] mark mails as processed internally, don't process processed mails again --- src/paperless_mail/mail.py | 89 ++++++++++++++----- .../migrations/0018_processedmail.py | 57 ++++++++++++ src/paperless_mail/models.py | 37 ++++++++ 3 files changed, 162 insertions(+), 21 deletions(-) create mode 100644 src/paperless_mail/migrations/0018_processedmail.py diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index 316803fb3..b0cb7b750 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -2,6 +2,7 @@ import itertools import os import re import tempfile +import traceback from datetime import date from datetime import timedelta from fnmatch import fnmatch @@ -11,6 +12,7 @@ import magic import pathvalidate from celery import chord from celery import shared_task +from celery.canvas import Signature from django.conf import settings from django.db import DatabaseError from documents.loggers import LoggingMixin @@ -27,6 +29,7 @@ from imap_tools import NOT from imap_tools.mailbox import MailBoxTls from paperless_mail.models import MailAccount from paperless_mail.models import MailRule +from paperless_mail.models import ProcessedMail # Apple Mail sets multiple IMAP KEYWORD and the general "\Flagged" FLAG # imaplib => conn.fetch(b"", "FLAGS") @@ -154,16 +157,62 @@ def apply_mail_action( rule = MailRule.objects.get(pk=rule_id) account = MailAccount.objects.get(pk=rule.account.pk) - action = get_rule_action(rule) + try: - with get_mailbox( - server=account.imap_server, - port=account.imap_port, - security=account.imap_security, - ) as M: - M.login(username=account.username, password=account.password) - M.folder.set(rule.folder) - action.post_consume(M, message_uid, rule.action_parameter) + action = get_rule_action(rule) + + with get_mailbox( + server=account.imap_server, + port=account.imap_port, + security=account.imap_security, + ) as M: + M.login(username=account.username, password=account.password) + M.folder.set(rule.folder) + action.post_consume(M, message_uid, rule.action_parameter) + + ProcessedMail.objects.create( + rule=rule, + folder=rule.folder, + uid=message_uid, + status="SUCCESS", + ) + + except Exception as e: + ProcessedMail.objects.create( + rule=rule, + folder=rule.folder, + uid=message_uid, + status="FAILED", + error=traceback.format_exc(), + ) + raise e + + +@shared_task +def error_callback(request, exc, tb, rule_id: int, message_uid: str): + rule = MailRule.objects.get(pk=rule_id) + + ProcessedMail.objects.create( + rule=rule, + folder=rule.folder, + uid=message_uid, + status="FAILED", + error=traceback.format_exc(), + ) + + +def queue_consumption_tasks( + consume_tasks: list[Signature], + rule: MailRule, + message_uid: str, +): + mail_action_task = apply_mail_action.s( + rule_id=rule.pk, + message_uid=message_uid, + ) + chord(header=consume_tasks, body=mail_action_task).on_error( + error_callback.s(rule_id=rule.pk, message_uid=message_uid), + ).delay() def get_rule_action(rule) -> BaseMailAction: @@ -399,6 +448,14 @@ class MailAccountHandler(LoggingMixin): total_processed_files = 0 for message in messages: + if ProcessedMail.objects.filter( + rule=rule, + uid=message.uid, + folder=rule.folder, + ).exists(): + self.log("debug", f"Skipping mail {message}, already processed.") + continue + try: processed_files = self.handle_message(message, rule) @@ -548,12 +605,7 @@ class MailAccountHandler(LoggingMixin): f"by paperless", ) - mail_action_task = apply_mail_action.s( - rule_id=rule.pk, - message_uid=message.uid, - ) - - chord(header=consume_tasks, body=mail_action_task).delay() + queue_consumption_tasks(consume_tasks, rule, message.uid) return processed_attachments @@ -611,12 +663,7 @@ class MailAccountHandler(LoggingMixin): override_owner_id=rule.owner.id if rule.owner else None, ) - mail_action_task = apply_mail_action.s( - rule_id=rule.pk, - message_uid=message.uid, - ) - - (consume_task | mail_action_task).delay() + queue_consumption_tasks([consume_task], rule, message.uid) processed_elements = 1 return processed_elements diff --git a/src/paperless_mail/migrations/0018_processedmail.py b/src/paperless_mail/migrations/0018_processedmail.py new file mode 100644 index 000000000..ff15ffb77 --- /dev/null +++ b/src/paperless_mail/migrations/0018_processedmail.py @@ -0,0 +1,57 @@ +# Generated by Django 4.1.5 on 2023-02-21 12:48 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ("paperless_mail", "0017_mailaccount_owner_mailrule_owner"), + ] + + operations = [ + migrations.CreateModel( + name="ProcessedMail", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("folder", models.CharField(max_length=256, verbose_name="folder")), + ("uid", models.CharField(max_length=256, verbose_name="folder")), + ("status", models.CharField(max_length=256, verbose_name="status")), + ( + "error", + models.TextField(blank=True, null=True, verbose_name="error"), + ), + ( + "owner", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to=settings.AUTH_USER_MODEL, + verbose_name="owner", + ), + ), + ( + "rule", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="paperless_mail.mailrule", + ), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/src/paperless_mail/models.py b/src/paperless_mail/models.py index f8a30c3be..c702181ef 100644 --- a/src/paperless_mail/models.py +++ b/src/paperless_mail/models.py @@ -214,3 +214,40 @@ class MailRule(document_models.ModelWithOwner): def __str__(self): return f"{self.account.name}.{self.name}" + + +class ProcessedMail(document_models.ModelWithOwner): + + rule = models.ForeignKey( + MailRule, + null=False, + blank=False, + on_delete=models.CASCADE, + ) + + folder = models.CharField( + _("folder"), + null=False, + blank=False, + max_length=256, + ) + + uid = models.CharField( + _("folder"), + null=False, + blank=False, + max_length=256, + ) + + status = models.CharField( + _("status"), + null=False, + blank=False, + max_length=256, + ) + + error = models.TextField( + _("error"), + null=True, + blank=True, + ) From 1189df1fe6d95e35b5f6044e64f9ffb1d3cbca7e Mon Sep 17 00:00:00 2001 From: Jonas Winkler <17569239+jonaswinkler@users.noreply.github.com> Date: Tue, 21 Feb 2023 18:37:08 +0100 Subject: [PATCH 05/11] changes --- src/paperless_mail/admin.py | 27 +++++++++++++ src/paperless_mail/mail.py | 38 ++++++++++++++++--- .../migrations/0018_processedmail.py | 11 +++++- src/paperless_mail/models.py | 29 +++++++++++++- 4 files changed, 97 insertions(+), 8 deletions(-) diff --git a/src/paperless_mail/admin.py b/src/paperless_mail/admin.py index ce5341e4e..df7bac86a 100644 --- a/src/paperless_mail/admin.py +++ b/src/paperless_mail/admin.py @@ -3,6 +3,7 @@ from django.contrib import admin from django.utils.translation import gettext_lazy as _ from paperless_mail.models import MailAccount from paperless_mail.models import MailRule +from paperless_mail.models import ProcessedMail class MailAccountAdminForm(forms.ModelForm): @@ -105,5 +106,31 @@ class MailRuleAdmin(admin.ModelAdmin): ordering = ["order"] +class ProcessedMailAdmin(admin.ModelAdmin): + class Meta: + + model = ProcessedMail + fields = "__all__" + + list_display = ("rule", "processed", "status", "subject", "received") + + ordering = ["-processed"] + + readonly_fields = [ + "owner", + "processed", + "received", + "status", + "subject", + "error", + "uid", + "folder", + "rule", + ] + + list_filter = ("status",) + + admin.site.register(MailAccount, MailAccountAdmin) admin.site.register(MailRule, MailRuleAdmin) +admin.site.register(ProcessedMail, ProcessedMailAdmin) diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index b0cb7b750..a1ee55827 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -1,3 +1,4 @@ +import datetime import itertools import os import re @@ -153,6 +154,8 @@ def apply_mail_action( result: str, rule_id: int, message_uid: str, + message_subject: str, + message_date: datetime.datetime, ): rule = MailRule.objects.get(pk=rule_id) account = MailAccount.objects.get(pk=rule.account.pk) @@ -171,17 +174,23 @@ def apply_mail_action( action.post_consume(M, message_uid, rule.action_parameter) ProcessedMail.objects.create( + owner=rule.owner, rule=rule, folder=rule.folder, uid=message_uid, + subject=message_subject, + received=message_date, status="SUCCESS", ) except Exception as e: ProcessedMail.objects.create( + owner=rule.owner, rule=rule, folder=rule.folder, uid=message_uid, + subject=message_subject, + received=message_date, status="FAILED", error=traceback.format_exc(), ) @@ -189,13 +198,23 @@ def apply_mail_action( @shared_task -def error_callback(request, exc, tb, rule_id: int, message_uid: str): +def error_callback( + request, + exc, + tb, + rule_id: int, + message_uid: str, + message_subject: str, + message_date: datetime.datetime, +): rule = MailRule.objects.get(pk=rule_id) ProcessedMail.objects.create( rule=rule, folder=rule.folder, uid=message_uid, + subject=message_subject, + received=message_date, status="FAILED", error=traceback.format_exc(), ) @@ -204,14 +223,21 @@ def error_callback(request, exc, tb, rule_id: int, message_uid: str): def queue_consumption_tasks( consume_tasks: list[Signature], rule: MailRule, - message_uid: str, + message: MailMessage, ): mail_action_task = apply_mail_action.s( rule_id=rule.pk, - message_uid=message_uid, + message_uid=message.uid, + message_subject=message.subject, + message_date=message.date, ) chord(header=consume_tasks, body=mail_action_task).on_error( - error_callback.s(rule_id=rule.pk, message_uid=message_uid), + error_callback.s( + rule_id=rule.pk, + message_uid=message.uid, + message_subject=message.subject, + message_date=message.date, + ), ).delay() @@ -605,7 +631,7 @@ class MailAccountHandler(LoggingMixin): f"by paperless", ) - queue_consumption_tasks(consume_tasks, rule, message.uid) + queue_consumption_tasks(consume_tasks, rule, message) return processed_attachments @@ -663,7 +689,7 @@ class MailAccountHandler(LoggingMixin): override_owner_id=rule.owner.id if rule.owner else None, ) - queue_consumption_tasks([consume_task], rule, message.uid) + queue_consumption_tasks([consume_task], rule, message) processed_elements = 1 return processed_elements diff --git a/src/paperless_mail/migrations/0018_processedmail.py b/src/paperless_mail/migrations/0018_processedmail.py index ff15ffb77..18b44a93a 100644 --- a/src/paperless_mail/migrations/0018_processedmail.py +++ b/src/paperless_mail/migrations/0018_processedmail.py @@ -1,8 +1,9 @@ -# Generated by Django 4.1.5 on 2023-02-21 12:48 +# Generated by Django 4.1.5 on 2023-02-21 17:15 from django.conf import settings from django.db import migrations, models import django.db.models.deletion +import django.utils.timezone class Migration(migrations.Migration): @@ -27,6 +28,14 @@ class Migration(migrations.Migration): ), ("folder", models.CharField(max_length=256, verbose_name="folder")), ("uid", models.CharField(max_length=256, verbose_name="folder")), + ("subject", models.CharField(max_length=256, verbose_name="subject")), + ("received", models.DateTimeField(verbose_name="received")), + ( + "processed", + models.DateTimeField( + default=django.utils.timezone.now, verbose_name="processed" + ), + ), ("status", models.CharField(max_length=256, verbose_name="status")), ( "error", diff --git a/src/paperless_mail/models.py b/src/paperless_mail/models.py index c702181ef..afe7a8ada 100644 --- a/src/paperless_mail/models.py +++ b/src/paperless_mail/models.py @@ -1,5 +1,6 @@ import documents.models as document_models from django.db import models +from django.utils import timezone from django.utils.translation import gettext_lazy as _ @@ -223,6 +224,7 @@ class ProcessedMail(document_models.ModelWithOwner): null=False, blank=False, on_delete=models.CASCADE, + editable=False, ) folder = models.CharField( @@ -230,13 +232,36 @@ class ProcessedMail(document_models.ModelWithOwner): null=False, blank=False, max_length=256, + editable=False, ) uid = models.CharField( - _("folder"), + _("uid"), null=False, blank=False, max_length=256, + editable=False, + ) + + subject = models.CharField( + _("subject"), + null=False, + blank=False, + max_length=256, + editable=False, + ) + + received = models.DateTimeField( + _("received"), + null=False, + blank=False, + editable=False, + ) + + processed = models.DateTimeField( + _("processed"), + default=timezone.now, + editable=False, ) status = models.CharField( @@ -244,10 +269,12 @@ class ProcessedMail(document_models.ModelWithOwner): null=False, blank=False, max_length=256, + editable=False, ) error = models.TextField( _("error"), null=True, blank=True, + editable=False, ) From 3c3c847db54dea3aca4313915862f9547cfb600a Mon Sep 17 00:00:00 2001 From: Jonas Winkler <17569239+jonaswinkler@users.noreply.github.com> Date: Tue, 21 Feb 2023 22:07:14 +0100 Subject: [PATCH 06/11] reconfigure mail admin --- src/paperless_mail/admin.py | 16 ++++++++-------- src/paperless_mail/mail.py | 3 ++- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/paperless_mail/admin.py b/src/paperless_mail/admin.py index df7bac86a..ca2737e2e 100644 --- a/src/paperless_mail/admin.py +++ b/src/paperless_mail/admin.py @@ -118,17 +118,17 @@ class ProcessedMailAdmin(admin.ModelAdmin): readonly_fields = [ "owner", - "processed", - "received", - "status", - "subject", - "error", - "uid", - "folder", "rule", + "folder", + "uid", + "subject", + "received", + "processed", + "status", + "error", ] - list_filter = ("status",) + list_filter = ("status", "rule") admin.site.register(MailAccount, MailAccountAdmin) diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index a1ee55827..740a7cbce 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -8,6 +8,7 @@ from datetime import date from datetime import timedelta from fnmatch import fnmatch from typing import Dict +from typing import List import magic import pathvalidate @@ -151,7 +152,7 @@ class TagMailAction(BaseMailAction): @shared_task def apply_mail_action( - result: str, + result: List[str], rule_id: int, message_uid: str, message_subject: str, From 201a4a7ef96fdd4cb9c4a68d215c374ef918c084 Mon Sep 17 00:00:00 2001 From: Jonas Winkler <17569239+jonaswinkler@users.noreply.github.com> Date: Thu, 23 Feb 2023 22:02:38 +0100 Subject: [PATCH 07/11] changes --- src/paperless_mail/mail.py | 152 ++++++--- src/paperless_mail/tasks.py | 1 - src/paperless_mail/tests/test_mail.py | 436 ++++++++++++++------------ 3 files changed, 349 insertions(+), 240 deletions(-) diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index 740a7cbce..0b35c9004 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -9,6 +9,7 @@ from datetime import timedelta from fnmatch import fnmatch from typing import Dict from typing import List +from typing import Union import magic import pathvalidate @@ -29,6 +30,7 @@ from imap_tools import MailMessage from imap_tools import MailMessageFlags from imap_tools import NOT from imap_tools.mailbox import MailBoxTls +from imap_tools.query import LogicOperator from paperless_mail.models import MailAccount from paperless_mail.models import MailRule from paperless_mail.models import ProcessedMail @@ -61,19 +63,43 @@ class MailError(Exception): class BaseMailAction: - def get_criteria(self) -> Dict: + """ + Base class for mail actions. A mail action is performed on a mail after + consumption of the document is complete and is used to signal to the user + that this mail was processed by paperless via the mail client. + + Furthermore, mail actions reduce the amount of mails to be analyzed by + excluding mails on which the action was already performed (i.e., excluding + read mails when the action is to mark mails as read). + """ + + def get_criteria(self) -> Union[Dict, LogicOperator]: + """ + Returns filtering criteria/query for this mail action. + """ return {} - def post_consume(self, M, message_uid, parameter): - pass # pragma: nocover + def post_consume(self, M: MailBox, message_uid: str, parameter: str): + """ + Perform mail action on the given mail uid in the mailbox. + """ + raise NotImplementedError() class DeleteMailAction(BaseMailAction): + """ + A mail action that deletes mails after processing. + """ + def post_consume(self, M, message_uid, parameter): M.delete(message_uid) class MarkReadMailAction(BaseMailAction): + """ + A mail action that marks mails as read after processing. + """ + def get_criteria(self): return {"seen": False} @@ -82,11 +108,19 @@ class MarkReadMailAction(BaseMailAction): class MoveMailAction(BaseMailAction): + """ + A mail action that moves mails to a different folder after processing. + """ + def post_consume(self, M, message_uid, parameter): M.move(message_uid, parameter) class FlagMailAction(BaseMailAction): + """ + A mail action that marks mails as important ("star") after processing. + """ + def get_criteria(self): return {"flagged": False} @@ -95,6 +129,10 @@ class FlagMailAction(BaseMailAction): class TagMailAction(BaseMailAction): + """ + A mail action that tags mails after processing. + """ + def __init__(self, parameter): # The custom tag should look like "apple:" @@ -117,8 +155,10 @@ class TagMailAction(BaseMailAction): # AppleMail: We only need to check if mails are \Flagged if self.color: return {"flagged": False} - - return {"no_keyword": self.keyword, "gmail_label": self.keyword} + elif self.keyword: + return AND(NOT(gmail_label=self.keyword), no_keyword=self.keyword) + else: + raise ValueError("This should never happen.") def post_consume(self, M: MailBox, message_uid, parameter): if re.search(r"gmail\.com$|googlemail\.com$", M._host): @@ -158,6 +198,12 @@ def apply_mail_action( message_subject: str, message_date: datetime.datetime, ): + """ + This shared task applies the mail action of a particular mail rule to the + given mail. Creates a ProcessedMail object, so that the mail won't be + processed in the future. + """ + rule = MailRule.objects.get(pk=rule_id) account = MailAccount.objects.get(pk=rule.account.pk) @@ -208,6 +254,10 @@ def error_callback( message_subject: str, message_date: datetime.datetime, ): + """ + A shared task that is called whenever something goes wrong during + consumption of a file. See queue_consumption_tasks. + """ rule = MailRule.objects.get(pk=rule_id) ProcessedMail.objects.create( @@ -222,10 +272,16 @@ def error_callback( def queue_consumption_tasks( + *, consume_tasks: list[Signature], rule: MailRule, message: MailMessage, ): + """ + Queue a list of consumption tasks (Signatures for the consume_file shared + task) with celery. + """ + mail_action_task = apply_mail_action.s( rule_id=rule.pk, message_uid=message.uid, @@ -243,6 +299,10 @@ def queue_consumption_tasks( def get_rule_action(rule) -> BaseMailAction: + """ + Returns a BaseMailAction instance for the given rule. + """ + if rule.action == MailRule.MailAction.FLAG: return FlagMailAction() elif rule.action == MailRule.MailAction.DELETE: @@ -258,6 +318,10 @@ def get_rule_action(rule) -> BaseMailAction: def make_criterias(rule): + """ + Returns criteria to be applied to MailBox.fetch for the given rule. + """ + maximum_age = date.today() - timedelta(days=rule.maximum_age) criterias = {} if rule.maximum_age > 0: @@ -269,10 +333,18 @@ def make_criterias(rule): if rule.filter_body: criterias["body"] = rule.filter_body - return {**criterias, **get_rule_action(rule).get_criteria()} + rule_query = get_rule_action(rule).get_criteria() + if isinstance(rule_query, dict): + return AND(**rule_query, **criterias) + else: + return AND(rule_query, **criterias) def get_mailbox(server, port, security) -> MailBox: + """ + Returns the correct MailBox instance for the given configuration. + """ + if security == MailAccount.ImapSecurity.NONE: mailbox = MailBoxUnencrypted(server, port) elif security == MailAccount.ImapSecurity.STARTTLS: @@ -285,6 +357,16 @@ def get_mailbox(server, port, security) -> MailBox: class MailAccountHandler(LoggingMixin): + """ + The main class that handles mail accounts. + + * processes all rules for a given mail account + * for each mail rule, fetches relevant mails, and queues documents from + matching mails for consumption + * marks processed mails in the database, so that they won't be processed + again + * runs mail actions on the mail server, when consumption is completed + """ logging_name = "paperless_mail" @@ -295,7 +377,7 @@ class MailAccountHandler(LoggingMixin): self.log("error", f"Error while retrieving correspondent {name}: {e}") return None - def get_title(self, message, att, rule): + def _get_title(self, message, att, rule): if rule.assign_title_from == MailRule.TitleSource.FROM_SUBJECT: return message.subject @@ -307,7 +389,7 @@ class MailAccountHandler(LoggingMixin): "Unknown title selector.", ) # pragma: nocover - def get_correspondent(self, message: MailMessage, rule): + def _get_correspondent(self, message: MailMessage, rule): c_from = rule.assign_correspondent_from if c_from == MailRule.CorrespondentSource.FROM_NOTHING: @@ -332,6 +414,9 @@ class MailAccountHandler(LoggingMixin): ) # pragma: nocover def handle_mail_account(self, account: MailAccount): + """ + Main entry method to handle a specific mail account. + """ self.renew_logging_group() @@ -386,10 +471,9 @@ class MailAccountHandler(LoggingMixin): for rule in account.rules.order_by("order"): try: - total_processed_files += self.handle_mail_rule( + total_processed_files += self._handle_mail_rule( M, rule, - supports_gmail_labels, ) except Exception as e: self.log( @@ -408,11 +492,10 @@ class MailAccountHandler(LoggingMixin): return total_processed_files - def handle_mail_rule( + def _handle_mail_rule( self, M: MailBox, rule: MailRule, - supports_gmail_labels: bool = False, ): self.log("debug", f"Rule {rule}: Selecting folder {rule.folder}") @@ -442,27 +525,14 @@ class MailAccountHandler(LoggingMixin): criterias = make_criterias(rule) - # Deal with the Gmail label extension - if "gmail_label" in criterias: - - gmail_label = criterias["gmail_label"] - del criterias["gmail_label"] - - if not supports_gmail_labels: - criterias_imap = AND(**criterias) - else: - criterias_imap = AND(NOT(gmail_label=gmail_label), **criterias) - else: - criterias_imap = AND(**criterias) - self.log( "debug", - f"Rule {rule}: Searching folder with criteria " f"{str(criterias_imap)}", + f"Rule {rule}: Searching folder with criteria " f"{str(criterias)}", ) try: messages = M.fetch( - criteria=criterias_imap, + criteria=criterias, mark_seen=False, charset=rule.account.character_set, ) @@ -484,7 +554,7 @@ class MailAccountHandler(LoggingMixin): continue try: - processed_files = self.handle_message(message, rule) + processed_files = self._handle_message(message, rule) total_processed_files += processed_files mails_processed += 1 @@ -499,7 +569,7 @@ class MailAccountHandler(LoggingMixin): return total_processed_files - def handle_message(self, message, rule: MailRule) -> int: + def _handle_message(self, message, rule: MailRule) -> int: processed_elements = 0 # Skip Message handling when only attachments are to be processed but @@ -517,7 +587,7 @@ class MailAccountHandler(LoggingMixin): f"{len(message.attachments)} attachment(s)", ) - correspondent = self.get_correspondent(message, rule) + correspondent = self._get_correspondent(message, rule) tag_ids = [tag.id for tag in rule.assign_tags.all()] doc_type = rule.assign_document_type @@ -525,7 +595,7 @@ class MailAccountHandler(LoggingMixin): rule.consumption_scope == MailRule.ConsumptionScope.EML_ONLY or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING ): - processed_elements += self.process_eml( + processed_elements += self._process_eml( message, rule, correspondent, @@ -537,7 +607,7 @@ class MailAccountHandler(LoggingMixin): rule.consumption_scope == MailRule.ConsumptionScope.ATTACHMENTS_ONLY or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING ): - processed_elements += self.process_attachments( + processed_elements += self._process_attachments( message, rule, correspondent, @@ -547,7 +617,7 @@ class MailAccountHandler(LoggingMixin): return processed_elements - def process_attachments( + def _process_attachments( self, message: MailMessage, rule: MailRule, @@ -583,7 +653,7 @@ class MailAccountHandler(LoggingMixin): ): continue - title = self.get_title(message, att, rule) + title = self._get_title(message, att, rule) # don't trust the content type of the attachment. Could be # generic application/octet-stream. @@ -632,11 +702,15 @@ class MailAccountHandler(LoggingMixin): f"by paperless", ) - queue_consumption_tasks(consume_tasks, rule, message) + queue_consumption_tasks( + consume_tasks=consume_tasks, + rule=rule, + message=message, + ) return processed_attachments - def process_eml( + def _process_eml( self, message: MailMessage, rule: MailRule, @@ -690,7 +764,11 @@ class MailAccountHandler(LoggingMixin): override_owner_id=rule.owner.id if rule.owner else None, ) - queue_consumption_tasks([consume_task], rule, message) + queue_consumption_tasks( + consume_tasks=[consume_task], + rule=rule, + message=message, + ) processed_elements = 1 return processed_elements diff --git a/src/paperless_mail/tasks.py b/src/paperless_mail/tasks.py index ab013a41e..5c92233de 100644 --- a/src/paperless_mail/tasks.py +++ b/src/paperless_mail/tasks.py @@ -1,7 +1,6 @@ import logging from celery import shared_task - from paperless_mail.mail import MailAccountHandler from paperless_mail.mail import MailError from paperless_mail.models import MailAccount diff --git a/src/paperless_mail/tests/test_mail.py b/src/paperless_mail/tests/test_mail.py index fb3a8e071..b63d11614 100644 --- a/src/paperless_mail/tests/test_mail.py +++ b/src/paperless_mail/tests/test_mail.py @@ -23,6 +23,7 @@ from imap_tools import MailMessage from imap_tools import MailMessageFlags from imap_tools import NOT from paperless_mail import tasks +from paperless_mail.mail import apply_mail_action from paperless_mail.mail import MailAccountHandler from paperless_mail.mail import MailError from paperless_mail.mail import TagMailAction @@ -168,69 +169,6 @@ class BogusMailBox(ContextManager): raise Exception() -_used_uids = set() - - -def create_message( - attachments: Union[int, List[_AttachmentDef]] = 1, - body: str = "", - subject: str = "the suject", - from_: str = "noone@mail.com", - seen: bool = False, - flagged: bool = False, - processed: bool = False, -) -> MailMessage: - email_msg = email.message.EmailMessage() - # TODO: This does NOT set the UID - email_msg["Message-ID"] = str(uuid.uuid4()) - email_msg["Subject"] = subject - email_msg["From"] = from_ - email_msg.set_content(body) - - # Either add some default number of attachments - # or the provided attachments - if isinstance(attachments, int): - for i in range(attachments): - attachment = _AttachmentDef(filename=f"file_{i}.pdf") - email_msg.add_attachment( - attachment.content, - maintype=attachment.maintype, - subtype=attachment.subtype, - disposition=attachment.disposition, - filename=attachment.filename, - ) - else: - for attachment in attachments: - email_msg.add_attachment( - attachment.content, - maintype=attachment.maintype, - subtype=attachment.subtype, - disposition=attachment.disposition, - filename=attachment.filename, - ) - - # Convert the EmailMessage to an imap_tools MailMessage - imap_msg = MailMessage.from_bytes(email_msg.as_bytes()) - - # TODO: Unsure how to add a uid to the actual EmailMessage. This hacks it in, - # based on how imap_tools uses regex to extract it. - # This should be a large enough pool - uid = random.randint(1, 10000) - while uid in _used_uids: - uid = random.randint(1, 10000) - _used_uids.add(uid) - - imap_msg._raw_uid_data = f"UID {uid}".encode() - - imap_msg.seen = seen - imap_msg.flagged = flagged - if processed: - imap_msg._raw_flag_data.append(b"+FLAGS (processed)") - MailMessage.flags.fget.cache_clear() - - return imap_msg - - def fake_magic_from_buffer(buffer, mime=False): if mime: if "PDF" in str(buffer): @@ -244,14 +182,17 @@ def fake_magic_from_buffer(buffer, mime=False): @mock.patch("paperless_mail.mail.magic.from_buffer", fake_magic_from_buffer) class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): def setUp(self): + self._used_uids = set() + + self.bogus_mailbox = BogusMailBox() + patcher = mock.patch("paperless_mail.mail.MailBox") m = patcher.start() - self.bogus_mailbox = BogusMailBox() m.return_value = self.bogus_mailbox self.addCleanup(patcher.stop) - patcher = mock.patch("paperless_mail.mail.consume_file.delay") - self.async_task = patcher.start() + patcher = mock.patch("paperless_mail.mail.queue_consumption_tasks") + self._queue_consumption_tasks_mock = patcher.start() self.addCleanup(patcher.stop) self.reset_bogus_mailbox() @@ -259,11 +200,71 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.mail_account_handler = MailAccountHandler() super().setUp() + def create_message( + self, + attachments: Union[int, List[_AttachmentDef]] = 1, + body: str = "", + subject: str = "the suject", + from_: str = "noone@mail.com", + seen: bool = False, + flagged: bool = False, + processed: bool = False, + ) -> MailMessage: + email_msg = email.message.EmailMessage() + # TODO: This does NOT set the UID + email_msg["Message-ID"] = str(uuid.uuid4()) + email_msg["Subject"] = subject + email_msg["From"] = from_ + email_msg.set_content(body) + + # Either add some default number of attachments + # or the provided attachments + if isinstance(attachments, int): + for i in range(attachments): + attachment = _AttachmentDef(filename=f"file_{i}.pdf") + email_msg.add_attachment( + attachment.content, + maintype=attachment.maintype, + subtype=attachment.subtype, + disposition=attachment.disposition, + filename=attachment.filename, + ) + else: + for attachment in attachments: + email_msg.add_attachment( + attachment.content, + maintype=attachment.maintype, + subtype=attachment.subtype, + disposition=attachment.disposition, + filename=attachment.filename, + ) + + # Convert the EmailMessage to an imap_tools MailMessage + imap_msg = MailMessage.from_bytes(email_msg.as_bytes()) + + # TODO: Unsure how to add a uid to the actual EmailMessage. This hacks it in, + # based on how imap_tools uses regex to extract it. + # This should be a large enough pool + uid = random.randint(1, 10000) + while uid in self._used_uids: + uid = random.randint(1, 10000) + self._used_uids.add(uid) + + imap_msg._raw_uid_data = f"UID {uid}".encode() + + imap_msg.seen = seen + imap_msg.flagged = flagged + if processed: + imap_msg._raw_flag_data.append(b"+FLAGS (processed)") + MailMessage.flags.fget.cache_clear() + + return imap_msg + def reset_bogus_mailbox(self): self.bogus_mailbox.messages = [] self.bogus_mailbox.messages_spam = [] self.bogus_mailbox.messages.append( - create_message( + self.create_message( subject="Invoice 1", from_="amazon@amazon.de", body="cables", @@ -273,7 +274,7 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): ), ) self.bogus_mailbox.messages.append( - create_message( + self.create_message( subject="Invoice 2", body="from my favorite electronic store", seen=False, @@ -282,7 +283,7 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): ), ) self.bogus_mailbox.messages.append( - create_message( + self.create_message( subject="Claim your $10M price now!", from_="amazon@amazon-some-indian-site.org", seen=False, @@ -314,16 +315,16 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): name="a", assign_correspondent_from=MailRule.CorrespondentSource.FROM_NOTHING, ) - self.assertIsNone(handler.get_correspondent(message, rule)) + self.assertIsNone(handler._get_correspondent(message, rule)) rule = MailRule( name="b", assign_correspondent_from=MailRule.CorrespondentSource.FROM_EMAIL, ) - c = handler.get_correspondent(message, rule) + c = handler._get_correspondent(message, rule) self.assertIsNotNone(c) self.assertEqual(c.name, "someone@somewhere.com") - c = handler.get_correspondent(message2, rule) + c = handler._get_correspondent(message2, rule) self.assertIsNotNone(c) self.assertEqual(c.name, "me@localhost.com") self.assertEqual(c.id, me_localhost.id) @@ -332,10 +333,10 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): name="c", assign_correspondent_from=MailRule.CorrespondentSource.FROM_NAME, ) - c = handler.get_correspondent(message, rule) + c = handler._get_correspondent(message, rule) self.assertIsNotNone(c) self.assertEqual(c.name, "Someone!") - c = handler.get_correspondent(message2, rule) + c = handler._get_correspondent(message2, rule) self.assertIsNotNone(c) self.assertEqual(c.id, me_localhost.id) @@ -344,7 +345,7 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): assign_correspondent_from=MailRule.CorrespondentSource.FROM_CUSTOM, assign_correspondent=someone_else, ) - c = handler.get_correspondent(message, rule) + c = handler._get_correspondent(message, rule) self.assertEqual(c, someone_else) def test_get_title(self): @@ -359,15 +360,15 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): name="a", assign_title_from=MailRule.TitleSource.FROM_FILENAME, ) - self.assertEqual(handler.get_title(message, att, rule), "this_is_the_file") + self.assertEqual(handler._get_title(message, att, rule), "this_is_the_file") rule = MailRule( name="b", assign_title_from=MailRule.TitleSource.FROM_SUBJECT, ) - self.assertEqual(handler.get_title(message, att, rule), "the message title") + self.assertEqual(handler._get_title(message, att, rule), "the message title") def test_handle_message(self): - message = create_message( + message = self.create_message( subject="the message title", from_="Myself", attachments=2, @@ -381,24 +382,18 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): ) rule.save() - result = self.mail_account_handler.handle_message(message, rule) + result = self.mail_account_handler._handle_message(message, rule) self.assertEqual(result, 2) - self.assertEqual(len(self.async_task.call_args_list), 2) - - args1, kwargs1 = self.async_task.call_args_list[0] - args2, kwargs2 = self.async_task.call_args_list[1] - - self.assertIsFile(kwargs1["path"]) - - self.assertEqual(kwargs1["override_title"], "file_0") - self.assertEqual(kwargs1["override_filename"], "file_0.pdf") - - self.assertIsFile(kwargs2["path"]) - - self.assertEqual(kwargs2["override_title"], "file_1") - self.assertEqual(kwargs2["override_filename"], "file_1.pdf") + self.verify_queue_consumption_tasks_call_args( + [ + [ + {"override_title": "file_0", "override_filename": "file_0.pdf"}, + {"override_title": "file_1", "override_filename": "file_1.pdf"}, + ], + ], + ) def test_handle_empty_message(self): message = namedtuple("MailMessage", []) @@ -406,13 +401,13 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): message.attachments = [] rule = MailRule() - result = self.mail_account_handler.handle_message(message, rule) + result = self.mail_account_handler._handle_message(message, rule) - self.assertFalse(self.async_task.called) + self.assertFalse(self._queue_consumption_tasks_mock.called) self.assertEqual(result, 0) def test_handle_unknown_mime_type(self): - message = create_message( + message = self.create_message( attachments=[ _AttachmentDef(filename="f1.pdf"), _AttachmentDef( @@ -430,17 +425,19 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): ) rule.save() - result = self.mail_account_handler.handle_message(message, rule) + result = self.mail_account_handler._handle_message(message, rule) self.assertEqual(result, 1) - self.assertEqual(self.async_task.call_count, 1) - - args, kwargs = self.async_task.call_args - self.assertIsFile(kwargs["path"]) - self.assertEqual(kwargs["override_filename"], "f1.pdf") + self.verify_queue_consumption_tasks_call_args( + [ + [ + {"override_filename": "f1.pdf"}, + ], + ], + ) def test_handle_disposition(self): - message = create_message( + message = self.create_message( attachments=[ _AttachmentDef( filename="f1.pdf", @@ -458,16 +455,18 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): ) rule.save() - result = self.mail_account_handler.handle_message(message, rule) - + result = self.mail_account_handler._handle_message(message, rule) self.assertEqual(result, 1) - self.assertEqual(self.async_task.call_count, 1) - - args, kwargs = self.async_task.call_args - self.assertEqual(kwargs["override_filename"], "f2.pdf") + self.verify_queue_consumption_tasks_call_args( + [ + [ + {"override_filename": "f2.pdf"}, + ], + ], + ) def test_handle_inline_files(self): - message = create_message( + message = self.create_message( attachments=[ _AttachmentDef( filename="f1.pdf", @@ -486,13 +485,19 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): ) rule.save() - result = self.mail_account_handler.handle_message(message, rule) - + result = self.mail_account_handler._handle_message(message, rule) self.assertEqual(result, 2) - self.assertEqual(self.async_task.call_count, 2) + self.verify_queue_consumption_tasks_call_args( + [ + [ + {"override_filename": "f1.pdf"}, + {"override_filename": "f2.pdf"}, + ], + ], + ) def test_filename_filter(self): - message = create_message( + message = self.create_message( attachments=[ _AttachmentDef(filename="f1.pdf"), _AttachmentDef(filename="f2.pdf"), @@ -504,33 +509,33 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): ) tests = [ - ("*.pdf", ["f1.pdf", "f1.Pdf", "f2.pdf", "f3.pdf", "file.PDf"]), + ("*.pdf", ["f1.pdf", "f2.pdf", "f3.pdf", "file.PDf", "f1.Pdf"]), ("f1.pdf", ["f1.pdf", "f1.Pdf"]), ("f1", []), - ("*", ["f1.pdf", "f2.pdf", "f3.pdf", "f2.png", "f1.Pdf", "file.PDf"]), + ("*", ["f1.pdf", "f2.pdf", "f3.pdf", "f2.png", "file.PDf", "f1.Pdf"]), ("*.png", ["f2.png"]), ] for (pattern, matches) in tests: - matches.sort() - self.async_task.reset_mock() - account = MailAccount(name=str(uuid.uuid4())) - account.save() - rule = MailRule( - name=str(uuid.uuid4()), - assign_title_from=MailRule.TitleSource.FROM_FILENAME, - account=account, - filter_attachment_filename=pattern, - ) - rule.save() + with self.subTest(msg=pattern): + print(f"PATTERN {pattern}") + self._queue_consumption_tasks_mock.reset_mock() + account = MailAccount(name=str(uuid.uuid4())) + account.save() + rule = MailRule( + name=str(uuid.uuid4()), + assign_title_from=MailRule.TitleSource.FROM_FILENAME, + account=account, + filter_attachment_filename=pattern, + ) + rule.save() - result = self.mail_account_handler.handle_message(message, rule) - - self.assertEqual(result, len(matches), f"Error with pattern: {pattern}") - filenames = sorted( - a[1]["override_filename"] for a in self.async_task.call_args_list - ) - self.assertListEqual(filenames, matches) + self.mail_account_handler._handle_message(message, rule) + self.verify_queue_consumption_tasks_call_args( + [ + [{"override_filename": m} for m in matches], + ], + ) def test_handle_mail_account_mark_read(self): @@ -548,10 +553,11 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): ) self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.assertEqual(self.async_task.call_count, 0) self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 2) + self.mail_account_handler.handle_mail_account(account) - self.assertEqual(self.async_task.call_count, 2) + self.apply_mail_actions() + self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 0) self.assertEqual(len(self.bogus_mailbox.messages), 3) @@ -571,10 +577,11 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): filter_subject="Invoice", ) - self.assertEqual(self.async_task.call_count, 0) self.assertEqual(len(self.bogus_mailbox.messages), 3) + self.mail_account_handler.handle_mail_account(account) - self.assertEqual(self.async_task.call_count, 2) + self.apply_mail_actions() + self.assertEqual(len(self.bogus_mailbox.messages), 1) def test_handle_mail_account_flag(self): @@ -593,10 +600,11 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): ) self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.assertEqual(self.async_task.call_count, 0) self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 2) + self.mail_account_handler.handle_mail_account(account) - self.assertEqual(self.async_task.call_count, 1) + self.apply_mail_actions() + self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 1) self.assertEqual(len(self.bogus_mailbox.messages), 3) @@ -616,13 +624,12 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): filter_subject="Claim", ) - self.assertEqual(self.async_task.call_count, 0) self.assertEqual(len(self.bogus_mailbox.messages), 3) self.assertEqual(len(self.bogus_mailbox.messages_spam), 0) self.mail_account_handler.handle_mail_account(account) + self.apply_mail_actions() - self.assertEqual(self.async_task.call_count, 1) self.assertEqual(len(self.bogus_mailbox.messages), 2) self.assertEqual(len(self.bogus_mailbox.messages_spam), 1) @@ -642,12 +649,13 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): ) self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.assertEqual(self.async_task.call_count, 0) self.assertEqual(len(self.bogus_mailbox.fetch("UNKEYWORD processed", False)), 2) + self.mail_account_handler.handle_mail_account(account) - self.assertEqual(self.async_task.call_count, 2) - self.assertEqual(len(self.bogus_mailbox.fetch("UNKEYWORD processed", False)), 0) + self.apply_mail_actions() + self.assertEqual(len(self.bogus_mailbox.messages), 3) + self.assertEqual(len(self.bogus_mailbox.fetch("UNKEYWORD processed", False)), 0) def test_handle_mail_account_tag_gmail(self): self.bogus_mailbox._host = "imap.gmail.com" @@ -668,11 +676,12 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): ) self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.assertEqual(self.async_task.call_count, 0) criteria = NOT(gmail_label="processed") self.assertEqual(len(self.bogus_mailbox.fetch(criteria, False)), 2) + self.mail_account_handler.handle_mail_account(account) - self.assertEqual(self.async_task.call_count, 2) + self.apply_mail_actions() + self.assertEqual(len(self.bogus_mailbox.fetch(criteria, False)), 0) self.assertEqual(len(self.bogus_mailbox.messages), 3) @@ -702,10 +711,11 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): ) self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.assertEqual(self.async_task.call_count, 0) self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 2) + self.mail_account_handler.handle_mail_account(account) - self.assertEqual(self.async_task.call_count, 2) + self.apply_mail_actions() + self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 0) self.assertEqual(len(self.bogus_mailbox.messages), 3) @@ -717,11 +727,11 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): password="wrong", ) - with self.assertRaises(MailError) as context: + with self.assertRaisesRegex( + MailError, + "Error while authenticating account", + ) as context: self.mail_account_handler.handle_mail_account(account) - self.assertTrue( - str(context).startswith("Error while authenticating account"), - ) def test_error_skip_account(self): _ = MailAccount.objects.create( @@ -746,7 +756,8 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): ) tasks.process_mail_accounts() - self.assertEqual(self.async_task.call_count, 1) + self.apply_mail_actions() + self.assertEqual(len(self.bogus_mailbox.messages), 2) self.assertEqual(len(self.bogus_mailbox.messages_spam), 1) @@ -777,7 +788,8 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): ) self.mail_account_handler.handle_mail_account(account) - self.assertEqual(self.async_task.call_count, 1) + self.apply_mail_actions() + self.assertEqual(len(self.bogus_mailbox.messages), 2) self.assertEqual(len(self.bogus_mailbox.messages_spam), 1) @@ -812,7 +824,7 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.mail_account_handler.handle_mail_account(account) self.bogus_mailbox.folder.list.assert_called_once() - self.assertEqual(self.async_task.call_count, 0) + self.assertEqual(self._queue_consumption_tasks_mock.call_count, 0) def test_error_folder_set_error_listing(self): """ @@ -845,9 +857,9 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.mail_account_handler.handle_mail_account(account) self.bogus_mailbox.folder.list.assert_called_once() - self.assertEqual(self.async_task.call_count, 0) + self.assertEqual(self._queue_consumption_tasks_mock.call_count, 0) - @mock.patch("paperless_mail.mail.MailAccountHandler.get_correspondent") + @mock.patch("paperless_mail.mail.MailAccountHandler._get_correspondent") def test_error_skip_mail(self, m): def get_correspondent_fake(message, rule): if message.from_ == "amazon@amazon.de": @@ -871,9 +883,10 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): ) self.mail_account_handler.handle_mail_account(account) + self.apply_mail_actions() # test that we still consume mail even if some mails throw errors. - self.assertEqual(self.async_task.call_count, 2) + self.assertEqual(self._queue_consumption_tasks_mock.call_count, 2) # faulty mail still in inbox, untouched self.assertEqual(len(self.bogus_mailbox.messages), 1) @@ -898,7 +911,7 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.mail_account_handler.handle_mail_account(account) - self.async_task.assert_called_once() + self._queue_consumption_tasks_mock.assert_called_once() args, kwargs = self.async_task.call_args c = Correspondent.objects.get(name="amazon@amazon.de") @@ -925,50 +938,40 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): username="admin", password="secret", ) - rule = MailRule.objects.create( - name="testrule3", - account=account, - action=MailRule.MailAction.DELETE, - filter_subject="Claim", - ) - self.assertEqual(self.async_task.call_count, 0) + for (f_body, f_from, f_subject, expected_mail_count) in [ + (None, None, "Claim", 1), + ("electronic", None, None, 1), + (None, "amazon", None, 2), + ("cables", "amazon", "Invoice", 1), + ]: + with self.subTest(f_body=f_body, f_from=f_from, f_subject=f_subject): + MailRule.objects.all().delete() + rule = MailRule.objects.create( + name="testrule3", + account=account, + action=MailRule.MailAction.DELETE, + filter_subject=f_subject, + filter_body=f_body, + filter_from=f_from, + ) + self.reset_bogus_mailbox() + self._queue_consumption_tasks_mock.reset_mock() - self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.mail_account_handler.handle_mail_account(account) - self.assertEqual(len(self.bogus_mailbox.messages), 2) - self.assertEqual(self.async_task.call_count, 1) + self.assertEqual(self._queue_consumption_tasks_mock.call_count, 0) + self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.reset_bogus_mailbox() + self.mail_account_handler.handle_mail_account(account) + self.apply_mail_actions() - rule.filter_subject = None - rule.filter_body = "electronic" - rule.save() - self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.mail_account_handler.handle_mail_account(account) - self.assertEqual(len(self.bogus_mailbox.messages), 2) - self.assertEqual(self.async_task.call_count, 2) - - self.reset_bogus_mailbox() - - rule.filter_from = "amazon" - rule.filter_body = None - rule.save() - self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.mail_account_handler.handle_mail_account(account) - self.assertEqual(len(self.bogus_mailbox.messages), 1) - self.assertEqual(self.async_task.call_count, 4) - - self.reset_bogus_mailbox() - - rule.filter_from = "amazon" - rule.filter_body = "cables" - rule.filter_subject = "Invoice" - rule.save() - self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.mail_account_handler.handle_mail_account(account) - self.assertEqual(len(self.bogus_mailbox.messages), 2) - self.assertEqual(self.async_task.call_count, 5) + self.assertEqual( + len(self.bogus_mailbox.messages), + 3 - expected_mail_count, + ) + self.assertEqual( + self._queue_consumption_tasks_mock.call_count, + expected_mail_count, + ) def test_auth_plain_fallback(self): """ @@ -992,12 +995,13 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): ) self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.assertEqual(self.async_task.call_count, 0) + self.assertEqual(self._queue_consumption_tasks_mock.call_count, 0) self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 2) self.mail_account_handler.handle_mail_account(account) + self.apply_mail_actions() - self.assertEqual(self.async_task.call_count, 2) + self.assertEqual(self._queue_consumption_tasks_mock.call_count, 2) self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 0) self.assertEqual(len(self.bogus_mailbox.messages), 3) @@ -1030,6 +1034,34 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): account, ) + def verify_queue_consumption_tasks_call_args(self, params): + + self.assertEqual( + len(self._queue_consumption_tasks_mock.call_args_list), + len(params), + ) + + for (args, kwargs), param in zip( + self._queue_consumption_tasks_mock.call_args_list, + params, + ): + + consume_tasks = kwargs["consume_tasks"] + + self.assertEqual(len(consume_tasks), len(param)) + + for consume_task, p in zip(consume_tasks, param): + self.assertIsFile(consume_task.kwargs["path"]) + for key, value in p.items(): + self.assertIn(key, consume_task.kwargs) + self.assertEqual(consume_task.kwargs[key], value) + + def apply_mail_actions(self): + for args, kwargs in self._queue_consumption_tasks_mock.call_args_list: + message = kwargs["message"] + rule = kwargs["rule"] + apply_mail_action([], rule.pk, message.uid, message.subject, message.date) + class TestManagementCommand(TestCase): @mock.patch( From 3ce1e01d96049e677b358936d84a0fab1237e9cc Mon Sep 17 00:00:00 2001 From: Jonas Winkler <17569239+jonaswinkler@users.noreply.github.com> Date: Fri, 24 Feb 2023 00:03:28 +0100 Subject: [PATCH 08/11] fix the test cases --- src/paperless_mail/mail.py | 70 ++++++++++++++------------- src/paperless_mail/tests/test_mail.py | 22 ++++++--- 2 files changed, 52 insertions(+), 40 deletions(-) diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index 0b35c9004..fae770b74 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -1,5 +1,6 @@ import datetime import itertools +import logging import os import re import tempfile @@ -91,7 +92,7 @@ class DeleteMailAction(BaseMailAction): A mail action that deletes mails after processing. """ - def post_consume(self, M, message_uid, parameter): + def post_consume(self, M: MailBox, message_uid: str, parameter: str): M.delete(message_uid) @@ -103,7 +104,7 @@ class MarkReadMailAction(BaseMailAction): def get_criteria(self): return {"seen": False} - def post_consume(self, M, message_uid, parameter): + def post_consume(self, M: MailBox, message_uid: str, parameter: str): M.flag(message_uid, [MailMessageFlags.SEEN], True) @@ -124,7 +125,7 @@ class FlagMailAction(BaseMailAction): def get_criteria(self): return {"flagged": False} - def post_consume(self, M, message_uid, parameter): + def post_consume(self, M: MailBox, message_uid: str, parameter: str): M.flag(message_uid, [MailMessageFlags.FLAGGED], True) @@ -160,10 +161,9 @@ class TagMailAction(BaseMailAction): else: raise ValueError("This should never happen.") - def post_consume(self, M: MailBox, message_uid, parameter): + def post_consume(self, M: MailBox, message_uid: str, parameter: str): if re.search(r"gmail\.com$|googlemail\.com$", M._host): - for uid in message_uid: - M.client.uid("STORE", uid, "X-GM-LABELS", self.keyword) + M.client.uid("STORE", message_uid, "X-GM-LABELS", self.keyword) # AppleMail elif self.color: @@ -190,6 +190,35 @@ class TagMailAction(BaseMailAction): raise MailError("No keyword specified.") +def mailbox_login(mailbox: MailBox, account: MailAccount): + logger = logging.getLogger("paperless_mail") + + try: + + mailbox.login(account.username, account.password) + + except UnicodeEncodeError: + logger.debug("Falling back to AUTH=PLAIN") + + try: + mailbox.login_utf8(account.username, account.password) + except Exception as e: + logger.error( + "Unable to authenticate with mail server using AUTH=PLAIN", + ) + raise MailError( + f"Error while authenticating account {account}", + ) from e + except Exception as e: + logger.error( + f"Error while authenticating account {account}: {e}", + exc_info=False, + ) + raise MailError( + f"Error while authenticating account {account}", + ) from e + + @shared_task def apply_mail_action( result: List[str], @@ -216,7 +245,7 @@ def apply_mail_action( port=account.imap_port, security=account.imap_security, ) as M: - M.login(username=account.username, password=account.password) + mailbox_login(M, account) M.folder.set(rule.folder) action.post_consume(M, message_uid, rule.action_parameter) @@ -436,32 +465,7 @@ class MailAccountHandler(LoggingMixin): self.log("debug", f"GMAIL Label Support: {supports_gmail_labels}") self.log("debug", f"AUTH=PLAIN Support: {supports_auth_plain}") - try: - - M.login(account.username, account.password) - - except UnicodeEncodeError: - self.log("debug", "Falling back to AUTH=PLAIN") - - try: - M.login_utf8(account.username, account.password) - except Exception as e: - self.log( - "error", - "Unable to authenticate with mail server using AUTH=PLAIN", - ) - raise MailError( - f"Error while authenticating account {account}", - ) from e - except Exception as e: - self.log( - "error", - f"Error while authenticating account {account}: {e}", - exc_info=False, - ) - raise MailError( - f"Error while authenticating account {account}", - ) from e + mailbox_login(M, account) self.log( "debug", diff --git a/src/paperless_mail/tests/test_mail.py b/src/paperless_mail/tests/test_mail.py index b63d11614..17972e10c 100644 --- a/src/paperless_mail/tests/test_mail.py +++ b/src/paperless_mail/tests/test_mail.py @@ -912,13 +912,17 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.mail_account_handler.handle_mail_account(account) self._queue_consumption_tasks_mock.assert_called_once() - args, kwargs = self.async_task.call_args c = Correspondent.objects.get(name="amazon@amazon.de") - # should work - self.assertEqual(kwargs["override_correspondent_id"], c.id) + self.verify_queue_consumption_tasks_call_args( + [ + [ + {"override_correspondent_id": c.id}, + ], + ], + ) - self.async_task.reset_mock() + self._queue_consumption_tasks_mock.reset_mock() self.reset_bogus_mailbox() with mock.patch("paperless_mail.mail.Correspondent.objects.get_or_create") as m: @@ -926,9 +930,13 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.mail_account_handler.handle_mail_account(account) - args, kwargs = self.async_task.call_args - self.async_task.assert_called_once() - self.assertEqual(kwargs["override_correspondent_id"], None) + self.verify_queue_consumption_tasks_call_args( + [ + [ + {"override_correspondent_id": None}, + ], + ], + ) def test_filters(self): From 5eb4b975ae1c007f6d77ad16da50b0be46cd1ea8 Mon Sep 17 00:00:00 2001 From: Jonas Winkler <17569239+jonaswinkler@users.noreply.github.com> Date: Fri, 24 Feb 2023 00:15:40 +0100 Subject: [PATCH 09/11] fix the test cases for python 3.8 --- src/paperless_mail/mail.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index fae770b74..355ad4b63 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -302,7 +302,7 @@ def error_callback( def queue_consumption_tasks( *, - consume_tasks: list[Signature], + consume_tasks: List[Signature], rule: MailRule, message: MailMessage, ): From 7a2a3e048e724f8d1afbc68df40eebf004f5f7a7 Mon Sep 17 00:00:00 2001 From: Jonas Winkler <17569239+jonaswinkler@users.noreply.github.com> Date: Fri, 24 Feb 2023 12:49:54 +0100 Subject: [PATCH 10/11] cleanup test code --- src/paperless_mail/tests/test_mail.py | 52 ++++++++++++++++++--------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/src/paperless_mail/tests/test_mail.py b/src/paperless_mail/tests/test_mail.py index 17972e10c..5e9b654fb 100644 --- a/src/paperless_mail/tests/test_mail.py +++ b/src/paperless_mail/tests/test_mail.py @@ -386,7 +386,7 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.assertEqual(result, 2) - self.verify_queue_consumption_tasks_call_args( + self.assert_queue_consumption_tasks_call_args( [ [ {"override_title": "file_0", "override_filename": "file_0.pdf"}, @@ -428,7 +428,7 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): result = self.mail_account_handler._handle_message(message, rule) self.assertEqual(result, 1) - self.verify_queue_consumption_tasks_call_args( + self.assert_queue_consumption_tasks_call_args( [ [ {"override_filename": "f1.pdf"}, @@ -457,7 +457,7 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): result = self.mail_account_handler._handle_message(message, rule) self.assertEqual(result, 1) - self.verify_queue_consumption_tasks_call_args( + self.assert_queue_consumption_tasks_call_args( [ [ {"override_filename": "f2.pdf"}, @@ -487,7 +487,7 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): result = self.mail_account_handler._handle_message(message, rule) self.assertEqual(result, 2) - self.verify_queue_consumption_tasks_call_args( + self.assert_queue_consumption_tasks_call_args( [ [ {"override_filename": "f1.pdf"}, @@ -531,7 +531,7 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): rule.save() self.mail_account_handler._handle_message(message, rule) - self.verify_queue_consumption_tasks_call_args( + self.assert_queue_consumption_tasks_call_args( [ [{"override_filename": m} for m in matches], ], @@ -914,7 +914,7 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self._queue_consumption_tasks_mock.assert_called_once() c = Correspondent.objects.get(name="amazon@amazon.de") - self.verify_queue_consumption_tasks_call_args( + self.assert_queue_consumption_tasks_call_args( [ [ {"override_correspondent_id": c.id}, @@ -930,7 +930,7 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.mail_account_handler.handle_mail_account(account) - self.verify_queue_consumption_tasks_call_args( + self.assert_queue_consumption_tasks_call_args( [ [ {"override_correspondent_id": None}, @@ -1042,29 +1042,49 @@ class TestMail(DirectoriesMixin, FileSystemAssertsMixin, TestCase): account, ) - def verify_queue_consumption_tasks_call_args(self, params): + def assert_queue_consumption_tasks_call_args(self, expected_call_args: List): + """ + Verifies that queue_consumption_tasks has been called with the expected arguments. + expected_call_args is the following format: + + * List of calls to queue_consumption_tasks, called once per mail, where each element is: + * List of signatures for the consume_file task, where each element is: + * dictionary containing arguments that need to be present in the consume_file signature. + + """ + + # assert number of calls to queue_consumption_tasks mathc self.assertEqual( len(self._queue_consumption_tasks_mock.call_args_list), - len(params), + len(expected_call_args), ) - for (args, kwargs), param in zip( + for (mock_args, mock_kwargs), expected_signatures in zip( self._queue_consumption_tasks_mock.call_args_list, - params, + expected_call_args, ): + consume_tasks = mock_kwargs["consume_tasks"] - consume_tasks = kwargs["consume_tasks"] + # assert number of consume_file tasks match + self.assertEqual(len(consume_tasks), len(expected_signatures)) - self.assertEqual(len(consume_tasks), len(param)) - - for consume_task, p in zip(consume_tasks, param): + for consume_task, expected_signature in zip( + consume_tasks, + expected_signatures, + ): + # assert the file exists self.assertIsFile(consume_task.kwargs["path"]) - for key, value in p.items(): + + # assert all expected arguments are present in the signature + for key, value in expected_signature.items(): self.assertIn(key, consume_task.kwargs) self.assertEqual(consume_task.kwargs[key], value) def apply_mail_actions(self): + """ + Applies pending actions to mails by inspecting calls to the queue_consumption_tasks method. + """ for args, kwargs in self._queue_consumption_tasks_mock.call_args_list: message = kwargs["message"] rule = kwargs["rule"] From e0f324f61c981c52b10039ce58e5a39d114a070a Mon Sep 17 00:00:00 2001 From: Jonas Winkler <17569239+jonaswinkler@users.noreply.github.com> Date: Wed, 1 Mar 2023 18:17:55 +0100 Subject: [PATCH 11/11] fix links in django admin --- src/paperless_mail/admin.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/paperless_mail/admin.py b/src/paperless_mail/admin.py index ca2737e2e..349559ec1 100644 --- a/src/paperless_mail/admin.py +++ b/src/paperless_mail/admin.py @@ -128,6 +128,8 @@ class ProcessedMailAdmin(admin.ModelAdmin): "error", ] + list_display_links = ["subject"] + list_filter = ("status", "rule")