mark mails as processed internally, don't process processed mails again

This commit is contained in:
Jonas Winkler 2023-02-21 13:50:34 +01:00
parent 7e1c1da424
commit a37177703c
3 changed files with 162 additions and 21 deletions

View File

@ -2,6 +2,7 @@ import itertools
import os import os
import re import re
import tempfile import tempfile
import traceback
from datetime import date from datetime import date
from datetime import timedelta from datetime import timedelta
from fnmatch import fnmatch from fnmatch import fnmatch
@ -11,6 +12,7 @@ import magic
import pathvalidate import pathvalidate
from celery import chord from celery import chord
from celery import shared_task from celery import shared_task
from celery.canvas import Signature
from django.conf import settings from django.conf import settings
from django.db import DatabaseError from django.db import DatabaseError
from documents.loggers import LoggingMixin from documents.loggers import LoggingMixin
@ -27,6 +29,7 @@ from imap_tools import NOT
from imap_tools.mailbox import MailBoxTls from imap_tools.mailbox import MailBoxTls
from paperless_mail.models import MailAccount from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule from paperless_mail.models import MailRule
from paperless_mail.models import ProcessedMail
# Apple Mail sets multiple IMAP KEYWORD and the general "\Flagged" FLAG # Apple Mail sets multiple IMAP KEYWORD and the general "\Flagged" FLAG
# imaplib => conn.fetch(b"<message_id>", "FLAGS") # imaplib => conn.fetch(b"<message_id>", "FLAGS")
@ -154,16 +157,62 @@ def apply_mail_action(
rule = MailRule.objects.get(pk=rule_id) rule = MailRule.objects.get(pk=rule_id)
account = MailAccount.objects.get(pk=rule.account.pk) account = MailAccount.objects.get(pk=rule.account.pk)
action = get_rule_action(rule) try:
with get_mailbox( action = get_rule_action(rule)
server=account.imap_server,
port=account.imap_port, with get_mailbox(
security=account.imap_security, server=account.imap_server,
) as M: port=account.imap_port,
M.login(username=account.username, password=account.password) security=account.imap_security,
M.folder.set(rule.folder) ) as M:
action.post_consume(M, message_uid, rule.action_parameter) M.login(username=account.username, password=account.password)
M.folder.set(rule.folder)
action.post_consume(M, message_uid, rule.action_parameter)
ProcessedMail.objects.create(
rule=rule,
folder=rule.folder,
uid=message_uid,
status="SUCCESS",
)
except Exception as e:
ProcessedMail.objects.create(
rule=rule,
folder=rule.folder,
uid=message_uid,
status="FAILED",
error=traceback.format_exc(),
)
raise e
@shared_task
def error_callback(request, exc, tb, rule_id: int, message_uid: str):
rule = MailRule.objects.get(pk=rule_id)
ProcessedMail.objects.create(
rule=rule,
folder=rule.folder,
uid=message_uid,
status="FAILED",
error=traceback.format_exc(),
)
def queue_consumption_tasks(
consume_tasks: list[Signature],
rule: MailRule,
message_uid: str,
):
mail_action_task = apply_mail_action.s(
rule_id=rule.pk,
message_uid=message_uid,
)
chord(header=consume_tasks, body=mail_action_task).on_error(
error_callback.s(rule_id=rule.pk, message_uid=message_uid),
).delay()
def get_rule_action(rule) -> BaseMailAction: def get_rule_action(rule) -> BaseMailAction:
@ -399,6 +448,14 @@ class MailAccountHandler(LoggingMixin):
total_processed_files = 0 total_processed_files = 0
for message in messages: for message in messages:
if ProcessedMail.objects.filter(
rule=rule,
uid=message.uid,
folder=rule.folder,
).exists():
self.log("debug", f"Skipping mail {message}, already processed.")
continue
try: try:
processed_files = self.handle_message(message, rule) processed_files = self.handle_message(message, rule)
@ -548,12 +605,7 @@ class MailAccountHandler(LoggingMixin):
f"by paperless", f"by paperless",
) )
mail_action_task = apply_mail_action.s( queue_consumption_tasks(consume_tasks, rule, message.uid)
rule_id=rule.pk,
message_uid=message.uid,
)
chord(header=consume_tasks, body=mail_action_task).delay()
return processed_attachments return processed_attachments
@ -611,12 +663,7 @@ class MailAccountHandler(LoggingMixin):
override_owner_id=rule.owner.id if rule.owner else None, override_owner_id=rule.owner.id if rule.owner else None,
) )
mail_action_task = apply_mail_action.s( queue_consumption_tasks([consume_task], rule, message.uid)
rule_id=rule.pk,
message_uid=message.uid,
)
(consume_task | mail_action_task).delay()
processed_elements = 1 processed_elements = 1
return processed_elements return processed_elements

View File

@ -0,0 +1,57 @@
# Generated by Django 4.1.5 on 2023-02-21 12:48
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("paperless_mail", "0017_mailaccount_owner_mailrule_owner"),
]
operations = [
migrations.CreateModel(
name="ProcessedMail",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("folder", models.CharField(max_length=256, verbose_name="folder")),
("uid", models.CharField(max_length=256, verbose_name="folder")),
("status", models.CharField(max_length=256, verbose_name="status")),
(
"error",
models.TextField(blank=True, null=True, verbose_name="error"),
),
(
"owner",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
(
"rule",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="paperless_mail.mailrule",
),
),
],
options={
"abstract": False,
},
),
]

View File

@ -214,3 +214,40 @@ class MailRule(document_models.ModelWithOwner):
def __str__(self): def __str__(self):
return f"{self.account.name}.{self.name}" return f"{self.account.name}.{self.name}"
class ProcessedMail(document_models.ModelWithOwner):
rule = models.ForeignKey(
MailRule,
null=False,
blank=False,
on_delete=models.CASCADE,
)
folder = models.CharField(
_("folder"),
null=False,
blank=False,
max_length=256,
)
uid = models.CharField(
_("folder"),
null=False,
blank=False,
max_length=256,
)
status = models.CharField(
_("status"),
null=False,
blank=False,
max_length=256,
)
error = models.TextField(
_("error"),
null=True,
blank=True,
)