Merge remote-tracking branch 'paperless/dev' into feature-consume-eml

This commit is contained in:
phail
2022-10-23 20:37:22 +02:00
225 changed files with 19278 additions and 25141 deletions

View File

@@ -1,24 +1,26 @@
import os
import re
import tempfile
from datetime import date
from datetime import timedelta
from fnmatch import fnmatch
from imaplib import IMAP4
from typing import Dict
import magic
import pathvalidate
from django.conf import settings
from django.db import DatabaseError
from django_q.tasks import async_task
from documents.loggers import LoggingMixin
from documents.models import Correspondent
from documents.parsers import is_mime_type_supported
from documents.tasks import consume_file
from imap_tools import AND
from imap_tools import MailBox
from imap_tools import MailboxFolderSelectError
from imap_tools import MailBoxUnencrypted
from imap_tools import MailMessage
from imap_tools import MailMessageFlags
from imap_tools import NOT
from imap_tools.mailbox import MailBoxTls
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
@@ -29,7 +31,7 @@ class MailError(Exception):
class BaseMailAction:
def get_criteria(self):
def get_criteria(self) -> Dict:
return {}
def post_consume(self, M, message_uids, parameter):
@@ -67,13 +69,17 @@ class TagMailAction(BaseMailAction):
self.keyword = parameter
def get_criteria(self):
return {"no_keyword": self.keyword}
return {"no_keyword": self.keyword, "gmail_label": self.keyword}
def post_consume(self, M: MailBox, message_uids, parameter):
M.flag(message_uids, [self.keyword], True)
if re.search(r"gmail\.com$|googlemail\.com$", M._host):
for uid in message_uids:
M.client.uid("STORE", uid, "X-GM-LABELS", self.keyword)
else:
M.flag(message_uids, [self.keyword], True)
def get_rule_action(rule):
def get_rule_action(rule) -> BaseMailAction:
if rule.action == MailRule.MailAction.FLAG:
return FlagMailAction()
elif rule.action == MailRule.MailAction.DELETE:
@@ -103,7 +109,7 @@ def make_criterias(rule):
return {**criterias, **get_rule_action(rule).get_criteria()}
def get_mailbox(server, port, security):
def get_mailbox(server, port, security) -> MailBox:
if security == MailAccount.ImapSecurity.NONE:
mailbox = MailBoxUnencrypted(server, port)
elif security == MailAccount.ImapSecurity.STARTTLS:
@@ -162,7 +168,7 @@ class MailAccountHandler(LoggingMixin):
"Unknown correspondent selector",
) # pragma: nocover
def handle_mail_account(self, account):
def handle_mail_account(self, account: MailAccount):
self.renew_logging_group()
@@ -176,33 +182,29 @@ class MailAccountHandler(LoggingMixin):
account.imap_security,
) as M:
supports_gmail_labels = "X-GM-EXT-1" in M.client.capabilities
supports_auth_plain = "AUTH=PLAIN" in M.client.capabilities
self.log("debug", f"GMAIL Label Support: {supports_gmail_labels}")
self.log("debug", f"AUTH=PLAIN Support: {supports_auth_plain}")
try:
M.login(account.username, account.password)
except UnicodeEncodeError:
self.log("debug", "Falling back to AUTH=PLAIN")
try:
# rfc2595 section 6 - PLAIN SASL mechanism
client: IMAP4 = M.client
encoded = (
b"\0"
+ account.username.encode("utf8")
+ b"\0"
+ account.password.encode("utf8")
)
# Assumption is the server supports AUTH=PLAIN capability
# Could check the list with client.capability(), but then what?
# We're failing anyway then
client.authenticate("PLAIN", lambda x: encoded)
# Need to transition out of AUTH state to SELECTED
M.folder.set("INBOX")
except Exception:
try:
M.login_utf8(account.username, account.password)
except Exception as err:
self.log(
"error",
"Unable to authenticate with mail server using AUTH=PLAIN",
)
raise MailError(f"Error while authenticating account {account}")
raise MailError(
f"Error while authenticating account {account}",
) from err
except Exception as e:
self.log(
"error",
@@ -221,7 +223,11 @@ class MailAccountHandler(LoggingMixin):
for rule in account.rules.order_by("order"):
try:
total_processed_files += self.handle_mail_rule(M, rule)
total_processed_files += self.handle_mail_rule(
M,
rule,
supports_gmail_labels,
)
except Exception as e:
self.log(
"error",
@@ -239,13 +245,18 @@ class MailAccountHandler(LoggingMixin):
return total_processed_files
def handle_mail_rule(self, M: MailBox, rule: MailRule):
def handle_mail_rule(
self,
M: MailBox,
rule: MailRule,
supports_gmail_labels: bool = False,
):
self.log("debug", f"Rule {rule}: Selecting folder {rule.folder}")
try:
M.folder.set(rule.folder)
except MailboxFolderSelectError:
except MailboxFolderSelectError as err:
self.log(
"error",
@@ -264,23 +275,38 @@ class MailAccountHandler(LoggingMixin):
raise MailError(
f"Rule {rule}: Folder {rule.folder} "
f"does not exist in account {rule.account}",
)
) from err
criterias = make_criterias(rule)
# Deal with the Gmail label extension
if "gmail_label" in criterias:
gmail_label = criterias["gmail_label"]
del criterias["gmail_label"]
if not supports_gmail_labels:
criterias_imap = AND(**criterias)
else:
criterias_imap = AND(NOT(gmail_label=gmail_label), **criterias)
else:
criterias_imap = AND(**criterias)
self.log(
"debug",
f"Rule {rule}: Searching folder with criteria " f"{str(AND(**criterias))}",
f"Rule {rule}: Searching folder with criteria " f"{str(criterias_imap)}",
)
try:
messages = M.fetch(
criteria=AND(**criterias),
criteria=criterias_imap,
mark_seen=False,
charset=rule.account.character_set,
)
except Exception:
raise MailError(f"Rule {rule}: Error while fetching folder {rule.folder}")
except Exception as err:
raise MailError(
f"Rule {rule}: Error while fetching folder {rule.folder}",
) from err
post_consume_messages = []
@@ -320,7 +346,7 @@ class MailAccountHandler(LoggingMixin):
except Exception as e:
raise MailError(
f"Rule {rule}: Error while processing post-consume actions: " f"{e}",
)
) from e
return total_processed_files
@@ -382,8 +408,7 @@ class MailAccountHandler(LoggingMixin):
f"{message.subject} from {message.from_}",
)
async_task(
"documents.tasks.consume_file",
consume_file.delay(
path=temp_filename,
override_filename=pathvalidate.sanitize_filename(
message.subject + ".eml",
@@ -447,8 +472,7 @@ class MailAccountHandler(LoggingMixin):
f"{message.subject} from {message.from_}",
)
async_task(
"documents.tasks.consume_file",
consume_file.delay(
path=temp_filename,
override_filename=pathvalidate.sanitize_filename(
att.filename,

View File

@@ -2,28 +2,12 @@
from django.db import migrations
from django.db.migrations import RunPython
from django_q.models import Schedule
from django_q.tasks import schedule
def add_schedules(apps, schema_editor):
schedule(
"paperless_mail.tasks.process_mail_accounts",
name="Check all e-mail accounts",
schedule_type=Schedule.MINUTES,
minutes=10,
)
def remove_schedules(apps, schema_editor):
Schedule.objects.filter(func="paperless_mail.tasks.process_mail_accounts").delete()
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0001_initial"),
("django_q", "0013_task_attempt_count"),
]
operations = [RunPython(add_schedules, remove_schedules)]
operations = [RunPython(migrations.RunPython.noop, migrations.RunPython.noop)]

View File

@@ -1,13 +1,14 @@
import logging
from celery import shared_task
from paperless_mail.mail import MailAccountHandler
from paperless_mail.mail import MailError
from paperless_mail.models import MailAccount
logger = logging.getLogger("paperless.mail.tasks")
@shared_task
def process_mail_accounts():
total_new_documents = 0
for account in MailAccount.objects.all():
@@ -20,11 +21,3 @@ def process_mail_accounts():
return f"Added {total_new_documents} document(s)."
else:
return "No new documents were added."
def process_mail_account(name):
try:
account = MailAccount.objects.get(name=name)
MailAccountHandler().handle_mail_account(account)
except MailAccount.DoesNotExist:
logger.error(f"Unknown mail acccount: {name}")

View File

@@ -0,0 +1,70 @@
import os
import pytest
from django.test import TestCase
from paperless_mail.mail import MailAccountHandler
from paperless_mail.mail import MailError
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
# Only run if the environment is setup
# And the environment is not empty (forks, I think)
@pytest.mark.skipif(
"PAPERLESS_MAIL_TEST_HOST" not in os.environ
or not len(os.environ["PAPERLESS_MAIL_TEST_HOST"]),
reason="Live server testing not enabled",
)
class TestMailLiveServer(TestCase):
def setUp(self) -> None:
self.mail_account_handler = MailAccountHandler()
self.account = MailAccount.objects.create(
name="test",
imap_server=os.environ["PAPERLESS_MAIL_TEST_HOST"],
username=os.environ["PAPERLESS_MAIL_TEST_USER"],
password=os.environ["PAPERLESS_MAIL_TEST_PASSWD"],
imap_port=993,
)
return super().setUp()
def tearDown(self) -> None:
self.account.delete()
return super().tearDown()
def test_process_non_gmail_server_flag(self):
try:
rule1 = MailRule.objects.create(
name="testrule",
account=self.account,
action=MailRule.MailAction.FLAG,
)
self.mail_account_handler.handle_mail_account(self.account)
rule1.delete()
except MailError as e:
self.fail(f"Failure: {e}")
except Exception as e:
pass
def test_process_non_gmail_server_tag(self):
try:
rule2 = MailRule.objects.create(
name="testrule",
account=self.account,
action=MailRule.MailAction.TAG,
)
self.mail_account_handler.handle_mail_account(self.account)
rule2.delete()
except MailError as e:
self.fail(f"Failure: {e}")
except Exception as e:
pass

View File

@@ -20,6 +20,7 @@ from imap_tools import MailboxFolderSelectError
from imap_tools import MailboxLoginError
from imap_tools import MailMessage
from imap_tools import MailMessageFlags
from imap_tools import NOT
from paperless_mail import tasks
from paperless_mail.mail import MailAccountHandler
from paperless_mail.mail import MailError
@@ -46,31 +47,66 @@ class BogusFolderManager:
class BogusClient:
def authenticate(self, mechanism, authobject):
# authobject must be a callable object
auth_bytes = authobject(None)
if auth_bytes != b"\x00admin\x00w57\xc3\xa4\xc3\xb6\xc3\xbcw4b6huwb6nhu":
raise MailboxLoginError("BAD", "OK")
def __init__(self, messages):
self.messages: List[MailMessage] = messages
self.capabilities: List[str] = []
class BogusMailBox(ContextManager):
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
pass
def authenticate(self, mechanism, authobject):
# authobject must be a callable object
auth_bytes = authobject(None)
if auth_bytes != b"\x00admin\x00w57\xc3\xa4\xc3\xb6\xc3\xbcw4b6huwb6nhu":
raise MailboxLoginError("BAD", "OK")
def uid(self, command, *args):
if command == "STORE":
for message in self.messages:
if message.uid == args[0]:
flag = args[2]
if flag == "processed":
message._raw_flag_data.append(f"+FLAGS (processed)".encode())
MailMessage.flags.fget.cache_clear()
class BogusMailBox(ContextManager):
# Common values so tests don't need to remember an accepted login
USERNAME: str = "admin"
ASCII_PASSWORD: str = "secret"
# Note the non-ascii characters here
UTF_PASSWORD: str = "w57äöüw4b6huwb6nhu"
def __init__(self):
self.messages: List[MailMessage] = []
self.messages_spam: List[MailMessage] = []
self.folder = BogusFolderManager()
self.client = BogusClient()
self.client = BogusClient(self.messages)
self._host = ""
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
pass
def updateClient(self):
self.client = BogusClient(self.messages)
def login(self, username, password):
# This will raise a UnicodeEncodeError if the password is not ASCII only
password.encode("ascii")
# Otherwise, check for correct values
if username != "admin" or password not in {"secret"}:
if username != self.USERNAME or password != self.ASCII_PASSWORD:
raise MailboxLoginError("BAD", "OK")
def login_utf8(self, username, password):
# Expected to only be called with the UTF-8 password
if username != self.USERNAME or password != self.UTF_PASSWORD:
raise MailboxLoginError("BAD", "OK")
def fetch(self, criteria, mark_seen, charset=""):
@@ -100,6 +136,9 @@ class BogusMailBox(ContextManager):
tag = criteria[criteria.index("UNKEYWORD") + 1].strip("'")
msg = filter(lambda m: "processed" not in m.flags, msg)
if "(X-GM-LABELS" in criteria: # ['NOT', '(X-GM-LABELS', '"processed"']
msg = filter(lambda m: "processed" not in m.flags, msg)
return list(msg)
def delete(self, uid_list):
@@ -209,7 +248,7 @@ class TestMail(DirectoriesMixin, TestCase):
m.return_value = self.bogus_mailbox
self.addCleanup(patcher.stop)
patcher = mock.patch("paperless_mail.mail.async_task")
patcher = mock.patch("paperless_mail.mail.consume_file.delay")
self.async_task = patcher.start()
self.addCleanup(patcher.stop)
@@ -247,6 +286,7 @@ class TestMail(DirectoriesMixin, TestCase):
seen=False,
),
)
self.bogus_mailbox.updateClient()
def test_get_correspondent(self):
message = namedtuple("MailMessage", [])
@@ -607,6 +647,33 @@ class TestMail(DirectoriesMixin, TestCase):
self.assertEqual(len(self.bogus_mailbox.fetch("UNKEYWORD processed", False)), 0)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
def test_handle_mail_account_tag_gmail(self):
self.bogus_mailbox._host = "imap.gmail.com"
self.bogus_mailbox.client.capabilities = ["X-GM-EXT-1"]
account = MailAccount.objects.create(
name="test",
imap_server="",
username="admin",
password="secret",
)
_ = MailRule.objects.create(
name="testrule",
account=account,
action=MailRule.MailAction.TAG,
action_parameter="processed",
)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
self.assertEqual(self.async_task.call_count, 0)
criteria = NOT(gmail_label="processed")
self.assertEqual(len(self.bogus_mailbox.fetch(criteria, False)), 2)
self.mail_account_handler.handle_mail_account(account)
self.assertEqual(self.async_task.call_count, 2)
self.assertEqual(len(self.bogus_mailbox.fetch(criteria, False)), 0)
self.assertEqual(len(self.bogus_mailbox.messages), 3)
def test_error_login(self):
account = MailAccount.objects.create(
name="test",
@@ -878,9 +945,9 @@ class TestMail(DirectoriesMixin, TestCase):
account = MailAccount.objects.create(
name="test",
imap_server="",
username="admin",
username=BogusMailBox.USERNAME,
# Note the non-ascii characters here
password="w57äöüw4b6huwb6nhu",
password=BogusMailBox.UTF_PASSWORD,
)
_ = MailRule.objects.create(
@@ -910,7 +977,7 @@ class TestMail(DirectoriesMixin, TestCase):
account = MailAccount.objects.create(
name="test",
imap_server="",
username="admin",
username=BogusMailBox.USERNAME,
# Note the non-ascii characters here
# Passes the check in login, not in authenticate
password="réception",
@@ -965,20 +1032,3 @@ class TestTasks(TestCase):
m.side_effect = lambda account: 0
result = tasks.process_mail_accounts()
self.assertIn("No new", result)
@mock.patch("paperless_mail.tasks.MailAccountHandler.handle_mail_account")
def test_single_accounts(self, m):
MailAccount.objects.create(
name="A",
imap_server="A",
username="A",
password="A",
)
tasks.process_mail_account("A")
m.assert_called_once()
m.reset_mock()
tasks.process_mail_account("B")
m.assert_not_called()