add feature to consume imap mail als .eml

This commit is contained in:
phail 2022-04-15 14:40:02 +02:00
parent 5fcf1b5434
commit cca576f518
4 changed files with 149 additions and 62 deletions

View File

@ -56,6 +56,7 @@ class MailRuleAdmin(admin.ModelAdmin):
"filter_body",
"filter_attachment_filename",
"maximum_age",
"consumption_scope",
"attachment_type",
),
},

View File

@ -269,8 +269,11 @@ class MailAccountHandler(LoggingMixin):
return total_processed_files
def handle_message(self, message, rule) -> int:
if not message.attachments:
def handle_message(self, message, rule: MailRule) -> int:
if (
not message.attachments
and rule.consumption_scope == MailRule.ConsumptionScope.ATTACHMENTS_ONLY
):
return 0
self.log(
@ -286,76 +289,113 @@ class MailAccountHandler(LoggingMixin):
processed_attachments = 0
for att in message.attachments:
if (
rule.consumption_scope == MailRule.ConsumptionScope.EML_ONLY
or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING
):
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
_, temp_filename = tempfile.mkstemp(
prefix="paperless-mail-",
dir=settings.SCRATCH_DIR,
)
with open(temp_filename, "wb") as f:
f.write(message.obj.as_bytes())
if (
not att.content_disposition == "attachment"
and rule.attachment_type
== MailRule.AttachmentProcessing.ATTACHMENTS_ONLY
):
self.log(
"debug",
f"Rule {rule}: "
f"Skipping attachment {att.filename} "
f"with content disposition {att.content_disposition}",
)
continue
self.log(
"info",
f"Rule {rule}: "
f"Consuming eml from mail "
f"{message.subject} from {message.from_}",
)
if rule.filter_attachment_filename:
# Force the filename and pattern to the lowercase
# as this is system dependent otherwise
if not fnmatch(
att.filename.lower(),
rule.filter_attachment_filename.lower(),
async_task(
"documents.tasks.consume_file",
path=temp_filename,
override_filename=pathvalidate.sanitize_filename(
message.subject + ".eml",
),
override_title=message.subject,
override_correspondent_id=correspondent.id if correspondent else None,
override_document_type_id=doc_type.id if doc_type else None,
override_tag_ids=[tag.id] if tag else None,
task_name=message.subject[:100],
)
processed_attachments += 1
if (
rule.consumption_scope == MailRule.ConsumptionScope.ATTACHMENTS_ONLY
or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING
):
for att in message.attachments:
if (
not att.content_disposition == "attachment"
and rule.attachment_type
== MailRule.AttachmentProcessing.ATTACHMENTS_ONLY
):
self.log(
"debug",
f"Rule {rule}: "
f"Skipping attachment {att.filename} "
f"with content disposition {att.content_disposition}",
)
continue
title = self.get_title(message, att, rule)
if rule.filter_attachment_filename:
# Force the filename and pattern to the lowercase
# as this is system dependent otherwise
if not fnmatch(
att.filename.lower(),
rule.filter_attachment_filename.lower(),
):
continue
# don't trust the content type of the attachment. Could be
# generic application/octet-stream.
mime_type = magic.from_buffer(att.payload, mime=True)
title = self.get_title(message, att, rule)
if is_mime_type_supported(mime_type):
# don't trust the content type of the attachment. Could be
# generic application/octet-stream.
mime_type = magic.from_buffer(att.payload, mime=True)
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
_, temp_filename = tempfile.mkstemp(
prefix="paperless-mail-",
dir=settings.SCRATCH_DIR,
)
with open(temp_filename, "wb") as f:
f.write(att.payload)
if is_mime_type_supported(mime_type):
self.log(
"info",
f"Rule {rule}: "
f"Consuming attachment {att.filename} from mail "
f"{message.subject} from {message.from_}",
)
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
_, temp_filename = tempfile.mkstemp(
prefix="paperless-mail-",
dir=settings.SCRATCH_DIR,
)
with open(temp_filename, "wb") as f:
f.write(att.payload)
async_task(
"documents.tasks.consume_file",
path=temp_filename,
override_filename=pathvalidate.sanitize_filename(
att.filename,
),
override_title=title,
override_correspondent_id=correspondent.id
if correspondent
else None,
override_document_type_id=doc_type.id if doc_type else None,
override_tag_ids=[tag.id] if tag else None,
task_name=att.filename[:100],
)
self.log(
"info",
f"Rule {rule}: "
f"Consuming attachment {att.filename} from mail "
f"{message.subject} from {message.from_}",
)
processed_attachments += 1
else:
self.log(
"debug",
f"Rule {rule}: "
f"Skipping attachment {att.filename} "
f"since guessed mime type {mime_type} is not supported "
f"by paperless",
)
async_task(
"documents.tasks.consume_file",
path=temp_filename,
override_filename=pathvalidate.sanitize_filename(
att.filename,
),
override_title=title,
override_correspondent_id=correspondent.id
if correspondent
else None,
override_document_type_id=doc_type.id if doc_type else None,
override_tag_ids=[tag.id] if tag else None,
task_name=att.filename[:100],
)
processed_attachments += 1
else:
self.log(
"debug",
f"Rule {rule}: "
f"Skipping attachment {att.filename} "
f"since guessed mime type {mime_type} is not supported "
f"by paperless",
)
return processed_attachments

View File

@ -0,0 +1,32 @@
# Generated by Django 4.0.4 on 2022-04-14 22:36
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0009_alter_mailrule_action_alter_mailrule_folder"),
]
operations = [
migrations.AddField(
model_name="mailrule",
name="consumption_scope",
field=models.PositiveIntegerField(
choices=[
(1, "Only process attachments."),
(
2,
"Process full Mail (with embedded attachments in file) as .eml",
),
(
3,
"Process full Mail (with embedded attachments in file) as .eml + process attachments as separate documents",
),
],
default=1,
verbose_name="consumption scope",
),
),
]

View File

@ -56,6 +56,14 @@ class MailRule(models.Model):
verbose_name = _("mail rule")
verbose_name_plural = _("mail rules")
class ConsumptionScope(models.IntegerChoices):
ATTACHMENTS_ONLY = 1, _("Only process attachments.")
EML_ONLY = 2, _("Process full Mail (with embedded attachments in file) as .eml")
EVERYTHING = 3, _(
"Process full Mail (with embedded attachments in file) as .eml "
"+ process attachments as separate documents",
)
class AttachmentProcessing(models.IntegerChoices):
ATTACHMENTS_ONLY = 1, _("Only process attachments.")
EVERYTHING = 2, _("Process all files, including 'inline' " "attachments.")
@ -144,6 +152,12 @@ class MailRule(models.Model):
),
)
consumption_scope = models.PositiveIntegerField(
_("consumption scope"),
choices=ConsumptionScope.choices,
default=ConsumptionScope.ATTACHMENTS_ONLY,
)
action = models.PositiveIntegerField(
_("action"),
choices=AttachmentAction.choices,