diff --git a/src/paperless_mail/admin.py b/src/paperless_mail/admin.py index b56bc0727..1e22e6ebd 100644 --- a/src/paperless_mail/admin.py +++ b/src/paperless_mail/admin.py @@ -56,6 +56,7 @@ class MailRuleAdmin(admin.ModelAdmin): "filter_body", "filter_attachment_filename", "maximum_age", + "consumption_scope", "attachment_type", ), }, diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index 1e868ceaa..72a74639c 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -269,8 +269,11 @@ class MailAccountHandler(LoggingMixin): return total_processed_files - def handle_message(self, message, rule) -> int: - if not message.attachments: + def handle_message(self, message, rule: MailRule) -> int: + if ( + not message.attachments + and rule.consumption_scope == MailRule.ConsumptionScope.ATTACHMENTS_ONLY + ): return 0 self.log( @@ -286,76 +289,113 @@ class MailAccountHandler(LoggingMixin): processed_attachments = 0 - for att in message.attachments: + if ( + rule.consumption_scope == MailRule.ConsumptionScope.EML_ONLY + or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING + ): + os.makedirs(settings.SCRATCH_DIR, exist_ok=True) + _, temp_filename = tempfile.mkstemp( + prefix="paperless-mail-", + dir=settings.SCRATCH_DIR, + ) + with open(temp_filename, "wb") as f: + f.write(message.obj.as_bytes()) - if ( - not att.content_disposition == "attachment" - and rule.attachment_type - == MailRule.AttachmentProcessing.ATTACHMENTS_ONLY - ): - self.log( - "debug", - f"Rule {rule}: " - f"Skipping attachment {att.filename} " - f"with content disposition {att.content_disposition}", - ) - continue + self.log( + "info", + f"Rule {rule}: " + f"Consuming eml from mail " + f"{message.subject} from {message.from_}", + ) - if rule.filter_attachment_filename: - # Force the filename and pattern to the lowercase - # as this is system dependent otherwise - if not fnmatch( - att.filename.lower(), - rule.filter_attachment_filename.lower(), + async_task( + "documents.tasks.consume_file", + path=temp_filename, + override_filename=pathvalidate.sanitize_filename( + message.subject + ".eml", + ), + override_title=message.subject, + override_correspondent_id=correspondent.id if correspondent else None, + override_document_type_id=doc_type.id if doc_type else None, + override_tag_ids=[tag.id] if tag else None, + task_name=message.subject[:100], + ) + processed_attachments += 1 + + if ( + rule.consumption_scope == MailRule.ConsumptionScope.ATTACHMENTS_ONLY + or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING + ): + for att in message.attachments: + + if ( + not att.content_disposition == "attachment" + and rule.attachment_type + == MailRule.AttachmentProcessing.ATTACHMENTS_ONLY ): + self.log( + "debug", + f"Rule {rule}: " + f"Skipping attachment {att.filename} " + f"with content disposition {att.content_disposition}", + ) continue - title = self.get_title(message, att, rule) + if rule.filter_attachment_filename: + # Force the filename and pattern to the lowercase + # as this is system dependent otherwise + if not fnmatch( + att.filename.lower(), + rule.filter_attachment_filename.lower(), + ): + continue - # don't trust the content type of the attachment. Could be - # generic application/octet-stream. - mime_type = magic.from_buffer(att.payload, mime=True) + title = self.get_title(message, att, rule) - if is_mime_type_supported(mime_type): + # don't trust the content type of the attachment. Could be + # generic application/octet-stream. + mime_type = magic.from_buffer(att.payload, mime=True) - os.makedirs(settings.SCRATCH_DIR, exist_ok=True) - _, temp_filename = tempfile.mkstemp( - prefix="paperless-mail-", - dir=settings.SCRATCH_DIR, - ) - with open(temp_filename, "wb") as f: - f.write(att.payload) + if is_mime_type_supported(mime_type): - self.log( - "info", - f"Rule {rule}: " - f"Consuming attachment {att.filename} from mail " - f"{message.subject} from {message.from_}", - ) + os.makedirs(settings.SCRATCH_DIR, exist_ok=True) + _, temp_filename = tempfile.mkstemp( + prefix="paperless-mail-", + dir=settings.SCRATCH_DIR, + ) + with open(temp_filename, "wb") as f: + f.write(att.payload) - async_task( - "documents.tasks.consume_file", - path=temp_filename, - override_filename=pathvalidate.sanitize_filename( - att.filename, - ), - override_title=title, - override_correspondent_id=correspondent.id - if correspondent - else None, - override_document_type_id=doc_type.id if doc_type else None, - override_tag_ids=[tag.id] if tag else None, - task_name=att.filename[:100], - ) + self.log( + "info", + f"Rule {rule}: " + f"Consuming attachment {att.filename} from mail " + f"{message.subject} from {message.from_}", + ) - processed_attachments += 1 - else: - self.log( - "debug", - f"Rule {rule}: " - f"Skipping attachment {att.filename} " - f"since guessed mime type {mime_type} is not supported " - f"by paperless", - ) + async_task( + "documents.tasks.consume_file", + path=temp_filename, + override_filename=pathvalidate.sanitize_filename( + att.filename, + ), + override_title=title, + override_correspondent_id=correspondent.id + if correspondent + else None, + override_document_type_id=doc_type.id if doc_type else None, + override_tag_ids=[tag.id] if tag else None, + task_name=att.filename[:100], + ) + + processed_attachments += 1 + else: + self.log( + "debug", + f"Rule {rule}: " + f"Skipping attachment {att.filename} " + f"since guessed mime type {mime_type} is not supported " + f"by paperless", + ) return processed_attachments diff --git a/src/paperless_mail/migrations/0010_mailrule_consumption_scope.py b/src/paperless_mail/migrations/0010_mailrule_consumption_scope.py new file mode 100644 index 000000000..8569cd378 --- /dev/null +++ b/src/paperless_mail/migrations/0010_mailrule_consumption_scope.py @@ -0,0 +1,32 @@ +# Generated by Django 4.0.4 on 2022-04-14 22:36 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("paperless_mail", "0009_alter_mailrule_action_alter_mailrule_folder"), + ] + + operations = [ + migrations.AddField( + model_name="mailrule", + name="consumption_scope", + field=models.PositiveIntegerField( + choices=[ + (1, "Only process attachments."), + ( + 2, + "Process full Mail (with embedded attachments in file) as .eml", + ), + ( + 3, + "Process full Mail (with embedded attachments in file) as .eml + process attachments as separate documents", + ), + ], + default=1, + verbose_name="consumption scope", + ), + ), + ] diff --git a/src/paperless_mail/models.py b/src/paperless_mail/models.py index 2c7b9fb6d..e4809a790 100644 --- a/src/paperless_mail/models.py +++ b/src/paperless_mail/models.py @@ -56,6 +56,14 @@ class MailRule(models.Model): verbose_name = _("mail rule") verbose_name_plural = _("mail rules") + class ConsumptionScope(models.IntegerChoices): + ATTACHMENTS_ONLY = 1, _("Only process attachments.") + EML_ONLY = 2, _("Process full Mail (with embedded attachments in file) as .eml") + EVERYTHING = 3, _( + "Process full Mail (with embedded attachments in file) as .eml " + "+ process attachments as separate documents", + ) + class AttachmentProcessing(models.IntegerChoices): ATTACHMENTS_ONLY = 1, _("Only process attachments.") EVERYTHING = 2, _("Process all files, including 'inline' " "attachments.") @@ -144,6 +152,12 @@ class MailRule(models.Model): ), ) + consumption_scope = models.PositiveIntegerField( + _("consumption scope"), + choices=ConsumptionScope.choices, + default=ConsumptionScope.ATTACHMENTS_ONLY, + ) + action = models.PositiveIntegerField( _("action"), choices=AttachmentAction.choices,