From e107d5df6fb266a5f63f3677b98869d3a05087f5 Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Wed, 6 Jan 2021 02:39:59 +0100 Subject: [PATCH] fixes #153, adds option for inline attachments and filename filters --- src/paperless_mail/admin.py | 5 ++- src/paperless_mail/mail.py | 7 ++- .../migrations/0007_auto_20210106_0138.py | 23 ++++++++++ src/paperless_mail/models.py | 25 +++++++++++ src/paperless_mail/tests/test_mail.py | 43 +++++++++++++++++++ 5 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 src/paperless_mail/migrations/0007_auto_20210106_0138.py diff --git a/src/paperless_mail/admin.py b/src/paperless_mail/admin.py index 4c63c2b29..d6789ebe5 100644 --- a/src/paperless_mail/admin.py +++ b/src/paperless_mail/admin.py @@ -12,6 +12,7 @@ class MailAccountAdmin(admin.ModelAdmin): class MailRuleAdmin(admin.ModelAdmin): radio_fields = { + "attachment_type": admin.VERTICAL, "action": admin.VERTICAL, "assign_title_from": admin.VERTICAL, "assign_correspondent_from": admin.VERTICAL @@ -29,7 +30,9 @@ class MailRuleAdmin(admin.ModelAdmin): ('filter_from', 'filter_subject', 'filter_body', - 'maximum_age') + 'filter_attachment_filename', + 'maximum_age', + 'attachment_type') }), (_("Actions"), { 'description': diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index 537807400..e848af28a 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -1,6 +1,7 @@ import os import tempfile from datetime import timedelta, date +from fnmatch import fnmatch import magic import pathvalidate @@ -263,7 +264,7 @@ class MailAccountHandler(LoggingMixin): for att in message.attachments: - if not att.content_disposition == "attachment": + if not att.content_disposition == "attachment" and rule.attachment_type == MailRule.ATTACHMENT_TYPE_ATTACHMENTS_ONLY: # NOQA: E501 self.log( 'debug', f"Rule {rule}: " @@ -271,6 +272,10 @@ class MailAccountHandler(LoggingMixin): f"with content disposition {att.content_disposition}") continue + if rule.filter_attachment_filename: + if not fnmatch(att.filename, rule.filter_attachment_filename): + continue + title = self.get_title(message, att, rule) # don't trust the content type of the attachment. Could be diff --git a/src/paperless_mail/migrations/0007_auto_20210106_0138.py b/src/paperless_mail/migrations/0007_auto_20210106_0138.py new file mode 100644 index 000000000..2da9eecc9 --- /dev/null +++ b/src/paperless_mail/migrations/0007_auto_20210106_0138.py @@ -0,0 +1,23 @@ +# Generated by Django 3.1.5 on 2021-01-06 01:38 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('paperless_mail', '0006_auto_20210101_2340'), + ] + + operations = [ + migrations.AddField( + model_name='mailrule', + name='attachment_type', + field=models.PositiveIntegerField(choices=[(1, 'Only process attachments.'), (2, "Process all files, including 'inline' attachments.")], default=1, help_text="Inline attachments include embedded images, so it's best to combine this option with a filename filter.", verbose_name='attachment type'), + ), + migrations.AddField( + model_name='mailrule', + name='filter_attachment_filename', + field=models.CharField(blank=True, help_text='Only consume documents which entirely match this filename if specified. Wildcards such as *.pdf or *invoice* are allowed. Case insensitive.', max_length=256, null=True, verbose_name='filter attachment filename'), + ), + ] diff --git a/src/paperless_mail/models.py b/src/paperless_mail/models.py index ddfb4d43c..7e56b47a7 100644 --- a/src/paperless_mail/models.py +++ b/src/paperless_mail/models.py @@ -60,6 +60,15 @@ class MailRule(models.Model): verbose_name = _("mail rule") verbose_name_plural = _("mail rules") + ATTACHMENT_TYPE_ATTACHMENTS_ONLY = 1 + ATTACHMENT_TYPE_EVERYTHING = 2 + + ATTACHMENT_TYPES = ( + (ATTACHMENT_TYPE_ATTACHMENTS_ONLY, _("Only process attachments.")), + (ATTACHMENT_TYPE_EVERYTHING, _("Process all files, including 'inline' " + "attachments.")) + ) + ACTION_DELETE = 1 ACTION_MOVE = 2 ACTION_MARK_READ = 3 @@ -125,11 +134,27 @@ class MailRule(models.Model): _("filter body"), max_length=256, null=True, blank=True) + filter_attachment_filename = models.CharField( + _("filter attachment filename"), + max_length=256, null=True, blank=True, + help_text=_("Only consume documents which entirely match this " + "filename if specified. Wildcards such as *.pdf or " + "*invoice* are allowed. Case insensitive.") + ) + maximum_age = models.PositiveIntegerField( _("maximum age"), default=30, help_text=_("Specified in days.")) + attachment_type = models.PositiveIntegerField( + _("attachment type"), + choices=ATTACHMENT_TYPES, + default=ATTACHMENT_TYPE_ATTACHMENTS_ONLY, + help_text=_("Inline attachments include embedded images, so it's best " + "to combine this option with a filename filter.") + ) + action = models.PositiveIntegerField( _("action"), choices=ACTIONS, diff --git a/src/paperless_mail/tests/test_mail.py b/src/paperless_mail/tests/test_mail.py index 9c0f52c53..77db096c4 100644 --- a/src/paperless_mail/tests/test_mail.py +++ b/src/paperless_mail/tests/test_mail.py @@ -273,6 +273,49 @@ class TestMail(TestCase): args, kwargs = self.async_task.call_args self.assertEqual(kwargs['override_filename'], "f2.pdf") + def test_handle_inline_files(self): + message = create_message() + message.attachments = [ + create_attachment(filename="f1.pdf", content_disposition='inline'), + create_attachment(filename="f2.pdf", content_disposition='attachment') + ] + + account = MailAccount() + rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME, account=account, attachment_type=MailRule.ATTACHMENT_TYPE_EVERYTHING) + + result = self.mail_account_handler.handle_message(message, rule) + + self.assertEqual(result, 2) + self.assertEqual(self.async_task.call_count, 2) + + def test_filename_filter(self): + message = create_message() + message.attachments = [ + create_attachment(filename="f1.pdf"), + create_attachment(filename="f2.pdf"), + create_attachment(filename="f3.pdf"), + create_attachment(filename="f2.png"), + ] + + tests = [ + ("*.pdf", ["f1.pdf", "f2.pdf", "f3.pdf"]), + ("f1.pdf", ["f1.pdf"]), + ("f1", []), + ("*", ["f1.pdf", "f2.pdf", "f3.pdf", "f2.png"]), + ("*.png", ["f2.png"]), + ] + + for (pattern, matches) in tests: + self.async_task.reset_mock() + account = MailAccount() + rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME, account=account, filter_attachment_filename=pattern) + + result = self.mail_account_handler.handle_message(message, rule) + + self.assertEqual(result, len(matches)) + filenames = [a[1]['override_filename'] for a in self.async_task.call_args_list] + self.assertCountEqual(filenames, matches) + def test_handle_mail_account_mark_read(self): account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="secret")