mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-08-07 19:08:32 -05:00
Merge remote-tracking branch 'paperless/dev' into feature-consume-eml
This commit is contained in:
@@ -1,24 +1,26 @@
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
from datetime import date
|
||||
from datetime import timedelta
|
||||
from fnmatch import fnmatch
|
||||
from imaplib import IMAP4
|
||||
from typing import Dict
|
||||
|
||||
import magic
|
||||
import pathvalidate
|
||||
from django.conf import settings
|
||||
from django.db import DatabaseError
|
||||
from django_q.tasks import async_task
|
||||
from documents.loggers import LoggingMixin
|
||||
from documents.models import Correspondent
|
||||
from documents.parsers import is_mime_type_supported
|
||||
from documents.tasks import consume_file
|
||||
from imap_tools import AND
|
||||
from imap_tools import MailBox
|
||||
from imap_tools import MailboxFolderSelectError
|
||||
from imap_tools import MailBoxUnencrypted
|
||||
from imap_tools import MailMessage
|
||||
from imap_tools import MailMessageFlags
|
||||
from imap_tools import NOT
|
||||
from imap_tools.mailbox import MailBoxTls
|
||||
from paperless_mail.models import MailAccount
|
||||
from paperless_mail.models import MailRule
|
||||
@@ -29,7 +31,7 @@ class MailError(Exception):
|
||||
|
||||
|
||||
class BaseMailAction:
|
||||
def get_criteria(self):
|
||||
def get_criteria(self) -> Dict:
|
||||
return {}
|
||||
|
||||
def post_consume(self, M, message_uids, parameter):
|
||||
@@ -67,13 +69,17 @@ class TagMailAction(BaseMailAction):
|
||||
self.keyword = parameter
|
||||
|
||||
def get_criteria(self):
|
||||
return {"no_keyword": self.keyword}
|
||||
return {"no_keyword": self.keyword, "gmail_label": self.keyword}
|
||||
|
||||
def post_consume(self, M: MailBox, message_uids, parameter):
|
||||
M.flag(message_uids, [self.keyword], True)
|
||||
if re.search(r"gmail\.com$|googlemail\.com$", M._host):
|
||||
for uid in message_uids:
|
||||
M.client.uid("STORE", uid, "X-GM-LABELS", self.keyword)
|
||||
else:
|
||||
M.flag(message_uids, [self.keyword], True)
|
||||
|
||||
|
||||
def get_rule_action(rule):
|
||||
def get_rule_action(rule) -> BaseMailAction:
|
||||
if rule.action == MailRule.MailAction.FLAG:
|
||||
return FlagMailAction()
|
||||
elif rule.action == MailRule.MailAction.DELETE:
|
||||
@@ -103,7 +109,7 @@ def make_criterias(rule):
|
||||
return {**criterias, **get_rule_action(rule).get_criteria()}
|
||||
|
||||
|
||||
def get_mailbox(server, port, security):
|
||||
def get_mailbox(server, port, security) -> MailBox:
|
||||
if security == MailAccount.ImapSecurity.NONE:
|
||||
mailbox = MailBoxUnencrypted(server, port)
|
||||
elif security == MailAccount.ImapSecurity.STARTTLS:
|
||||
@@ -162,7 +168,7 @@ class MailAccountHandler(LoggingMixin):
|
||||
"Unknown correspondent selector",
|
||||
) # pragma: nocover
|
||||
|
||||
def handle_mail_account(self, account):
|
||||
def handle_mail_account(self, account: MailAccount):
|
||||
|
||||
self.renew_logging_group()
|
||||
|
||||
@@ -176,33 +182,29 @@ class MailAccountHandler(LoggingMixin):
|
||||
account.imap_security,
|
||||
) as M:
|
||||
|
||||
supports_gmail_labels = "X-GM-EXT-1" in M.client.capabilities
|
||||
supports_auth_plain = "AUTH=PLAIN" in M.client.capabilities
|
||||
|
||||
self.log("debug", f"GMAIL Label Support: {supports_gmail_labels}")
|
||||
self.log("debug", f"AUTH=PLAIN Support: {supports_auth_plain}")
|
||||
|
||||
try:
|
||||
|
||||
M.login(account.username, account.password)
|
||||
|
||||
except UnicodeEncodeError:
|
||||
self.log("debug", "Falling back to AUTH=PLAIN")
|
||||
try:
|
||||
# rfc2595 section 6 - PLAIN SASL mechanism
|
||||
client: IMAP4 = M.client
|
||||
encoded = (
|
||||
b"\0"
|
||||
+ account.username.encode("utf8")
|
||||
+ b"\0"
|
||||
+ account.password.encode("utf8")
|
||||
)
|
||||
# Assumption is the server supports AUTH=PLAIN capability
|
||||
# Could check the list with client.capability(), but then what?
|
||||
# We're failing anyway then
|
||||
client.authenticate("PLAIN", lambda x: encoded)
|
||||
|
||||
# Need to transition out of AUTH state to SELECTED
|
||||
M.folder.set("INBOX")
|
||||
except Exception:
|
||||
try:
|
||||
M.login_utf8(account.username, account.password)
|
||||
except Exception as err:
|
||||
self.log(
|
||||
"error",
|
||||
"Unable to authenticate with mail server using AUTH=PLAIN",
|
||||
)
|
||||
raise MailError(f"Error while authenticating account {account}")
|
||||
raise MailError(
|
||||
f"Error while authenticating account {account}",
|
||||
) from err
|
||||
except Exception as e:
|
||||
self.log(
|
||||
"error",
|
||||
@@ -221,7 +223,11 @@ class MailAccountHandler(LoggingMixin):
|
||||
|
||||
for rule in account.rules.order_by("order"):
|
||||
try:
|
||||
total_processed_files += self.handle_mail_rule(M, rule)
|
||||
total_processed_files += self.handle_mail_rule(
|
||||
M,
|
||||
rule,
|
||||
supports_gmail_labels,
|
||||
)
|
||||
except Exception as e:
|
||||
self.log(
|
||||
"error",
|
||||
@@ -239,13 +245,18 @@ class MailAccountHandler(LoggingMixin):
|
||||
|
||||
return total_processed_files
|
||||
|
||||
def handle_mail_rule(self, M: MailBox, rule: MailRule):
|
||||
def handle_mail_rule(
|
||||
self,
|
||||
M: MailBox,
|
||||
rule: MailRule,
|
||||
supports_gmail_labels: bool = False,
|
||||
):
|
||||
|
||||
self.log("debug", f"Rule {rule}: Selecting folder {rule.folder}")
|
||||
|
||||
try:
|
||||
M.folder.set(rule.folder)
|
||||
except MailboxFolderSelectError:
|
||||
except MailboxFolderSelectError as err:
|
||||
|
||||
self.log(
|
||||
"error",
|
||||
@@ -264,23 +275,38 @@ class MailAccountHandler(LoggingMixin):
|
||||
raise MailError(
|
||||
f"Rule {rule}: Folder {rule.folder} "
|
||||
f"does not exist in account {rule.account}",
|
||||
)
|
||||
) from err
|
||||
|
||||
criterias = make_criterias(rule)
|
||||
|
||||
# Deal with the Gmail label extension
|
||||
if "gmail_label" in criterias:
|
||||
|
||||
gmail_label = criterias["gmail_label"]
|
||||
del criterias["gmail_label"]
|
||||
|
||||
if not supports_gmail_labels:
|
||||
criterias_imap = AND(**criterias)
|
||||
else:
|
||||
criterias_imap = AND(NOT(gmail_label=gmail_label), **criterias)
|
||||
else:
|
||||
criterias_imap = AND(**criterias)
|
||||
|
||||
self.log(
|
||||
"debug",
|
||||
f"Rule {rule}: Searching folder with criteria " f"{str(AND(**criterias))}",
|
||||
f"Rule {rule}: Searching folder with criteria " f"{str(criterias_imap)}",
|
||||
)
|
||||
|
||||
try:
|
||||
messages = M.fetch(
|
||||
criteria=AND(**criterias),
|
||||
criteria=criterias_imap,
|
||||
mark_seen=False,
|
||||
charset=rule.account.character_set,
|
||||
)
|
||||
except Exception:
|
||||
raise MailError(f"Rule {rule}: Error while fetching folder {rule.folder}")
|
||||
except Exception as err:
|
||||
raise MailError(
|
||||
f"Rule {rule}: Error while fetching folder {rule.folder}",
|
||||
) from err
|
||||
|
||||
post_consume_messages = []
|
||||
|
||||
@@ -320,7 +346,7 @@ class MailAccountHandler(LoggingMixin):
|
||||
except Exception as e:
|
||||
raise MailError(
|
||||
f"Rule {rule}: Error while processing post-consume actions: " f"{e}",
|
||||
)
|
||||
) from e
|
||||
|
||||
return total_processed_files
|
||||
|
||||
@@ -382,8 +408,7 @@ class MailAccountHandler(LoggingMixin):
|
||||
f"{message.subject} from {message.from_}",
|
||||
)
|
||||
|
||||
async_task(
|
||||
"documents.tasks.consume_file",
|
||||
consume_file.delay(
|
||||
path=temp_filename,
|
||||
override_filename=pathvalidate.sanitize_filename(
|
||||
message.subject + ".eml",
|
||||
@@ -447,8 +472,7 @@ class MailAccountHandler(LoggingMixin):
|
||||
f"{message.subject} from {message.from_}",
|
||||
)
|
||||
|
||||
async_task(
|
||||
"documents.tasks.consume_file",
|
||||
consume_file.delay(
|
||||
path=temp_filename,
|
||||
override_filename=pathvalidate.sanitize_filename(
|
||||
att.filename,
|
||||
|
@@ -2,28 +2,12 @@
|
||||
|
||||
from django.db import migrations
|
||||
from django.db.migrations import RunPython
|
||||
from django_q.models import Schedule
|
||||
from django_q.tasks import schedule
|
||||
|
||||
|
||||
def add_schedules(apps, schema_editor):
|
||||
schedule(
|
||||
"paperless_mail.tasks.process_mail_accounts",
|
||||
name="Check all e-mail accounts",
|
||||
schedule_type=Schedule.MINUTES,
|
||||
minutes=10,
|
||||
)
|
||||
|
||||
|
||||
def remove_schedules(apps, schema_editor):
|
||||
Schedule.objects.filter(func="paperless_mail.tasks.process_mail_accounts").delete()
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("paperless_mail", "0001_initial"),
|
||||
("django_q", "0013_task_attempt_count"),
|
||||
]
|
||||
|
||||
operations = [RunPython(add_schedules, remove_schedules)]
|
||||
operations = [RunPython(migrations.RunPython.noop, migrations.RunPython.noop)]
|
||||
|
@@ -1,13 +1,14 @@
|
||||
import logging
|
||||
|
||||
from celery import shared_task
|
||||
from paperless_mail.mail import MailAccountHandler
|
||||
from paperless_mail.mail import MailError
|
||||
from paperless_mail.models import MailAccount
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.mail.tasks")
|
||||
|
||||
|
||||
@shared_task
|
||||
def process_mail_accounts():
|
||||
total_new_documents = 0
|
||||
for account in MailAccount.objects.all():
|
||||
@@ -20,11 +21,3 @@ def process_mail_accounts():
|
||||
return f"Added {total_new_documents} document(s)."
|
||||
else:
|
||||
return "No new documents were added."
|
||||
|
||||
|
||||
def process_mail_account(name):
|
||||
try:
|
||||
account = MailAccount.objects.get(name=name)
|
||||
MailAccountHandler().handle_mail_account(account)
|
||||
except MailAccount.DoesNotExist:
|
||||
logger.error(f"Unknown mail acccount: {name}")
|
||||
|
70
src/paperless_mail/tests/test_live_mail.py
Normal file
70
src/paperless_mail/tests/test_live_mail.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from django.test import TestCase
|
||||
from paperless_mail.mail import MailAccountHandler
|
||||
from paperless_mail.mail import MailError
|
||||
from paperless_mail.models import MailAccount
|
||||
from paperless_mail.models import MailRule
|
||||
|
||||
# Only run if the environment is setup
|
||||
# And the environment is not empty (forks, I think)
|
||||
@pytest.mark.skipif(
|
||||
"PAPERLESS_MAIL_TEST_HOST" not in os.environ
|
||||
or not len(os.environ["PAPERLESS_MAIL_TEST_HOST"]),
|
||||
reason="Live server testing not enabled",
|
||||
)
|
||||
class TestMailLiveServer(TestCase):
|
||||
def setUp(self) -> None:
|
||||
|
||||
self.mail_account_handler = MailAccountHandler()
|
||||
self.account = MailAccount.objects.create(
|
||||
name="test",
|
||||
imap_server=os.environ["PAPERLESS_MAIL_TEST_HOST"],
|
||||
username=os.environ["PAPERLESS_MAIL_TEST_USER"],
|
||||
password=os.environ["PAPERLESS_MAIL_TEST_PASSWD"],
|
||||
imap_port=993,
|
||||
)
|
||||
|
||||
return super().setUp()
|
||||
|
||||
def tearDown(self) -> None:
|
||||
self.account.delete()
|
||||
return super().tearDown()
|
||||
|
||||
def test_process_non_gmail_server_flag(self):
|
||||
|
||||
try:
|
||||
rule1 = MailRule.objects.create(
|
||||
name="testrule",
|
||||
account=self.account,
|
||||
action=MailRule.MailAction.FLAG,
|
||||
)
|
||||
|
||||
self.mail_account_handler.handle_mail_account(self.account)
|
||||
|
||||
rule1.delete()
|
||||
|
||||
except MailError as e:
|
||||
self.fail(f"Failure: {e}")
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
def test_process_non_gmail_server_tag(self):
|
||||
|
||||
try:
|
||||
|
||||
rule2 = MailRule.objects.create(
|
||||
name="testrule",
|
||||
account=self.account,
|
||||
action=MailRule.MailAction.TAG,
|
||||
)
|
||||
|
||||
self.mail_account_handler.handle_mail_account(self.account)
|
||||
|
||||
rule2.delete()
|
||||
|
||||
except MailError as e:
|
||||
self.fail(f"Failure: {e}")
|
||||
except Exception as e:
|
||||
pass
|
@@ -20,6 +20,7 @@ from imap_tools import MailboxFolderSelectError
|
||||
from imap_tools import MailboxLoginError
|
||||
from imap_tools import MailMessage
|
||||
from imap_tools import MailMessageFlags
|
||||
from imap_tools import NOT
|
||||
from paperless_mail import tasks
|
||||
from paperless_mail.mail import MailAccountHandler
|
||||
from paperless_mail.mail import MailError
|
||||
@@ -46,31 +47,66 @@ class BogusFolderManager:
|
||||
|
||||
|
||||
class BogusClient:
|
||||
def authenticate(self, mechanism, authobject):
|
||||
# authobject must be a callable object
|
||||
auth_bytes = authobject(None)
|
||||
if auth_bytes != b"\x00admin\x00w57\xc3\xa4\xc3\xb6\xc3\xbcw4b6huwb6nhu":
|
||||
raise MailboxLoginError("BAD", "OK")
|
||||
def __init__(self, messages):
|
||||
self.messages: List[MailMessage] = messages
|
||||
self.capabilities: List[str] = []
|
||||
|
||||
|
||||
class BogusMailBox(ContextManager):
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
pass
|
||||
|
||||
def authenticate(self, mechanism, authobject):
|
||||
# authobject must be a callable object
|
||||
auth_bytes = authobject(None)
|
||||
if auth_bytes != b"\x00admin\x00w57\xc3\xa4\xc3\xb6\xc3\xbcw4b6huwb6nhu":
|
||||
raise MailboxLoginError("BAD", "OK")
|
||||
|
||||
def uid(self, command, *args):
|
||||
if command == "STORE":
|
||||
for message in self.messages:
|
||||
if message.uid == args[0]:
|
||||
flag = args[2]
|
||||
if flag == "processed":
|
||||
message._raw_flag_data.append(f"+FLAGS (processed)".encode())
|
||||
MailMessage.flags.fget.cache_clear()
|
||||
|
||||
|
||||
class BogusMailBox(ContextManager):
|
||||
|
||||
# Common values so tests don't need to remember an accepted login
|
||||
USERNAME: str = "admin"
|
||||
ASCII_PASSWORD: str = "secret"
|
||||
# Note the non-ascii characters here
|
||||
UTF_PASSWORD: str = "w57äöüw4b6huwb6nhu"
|
||||
|
||||
def __init__(self):
|
||||
self.messages: List[MailMessage] = []
|
||||
self.messages_spam: List[MailMessage] = []
|
||||
self.folder = BogusFolderManager()
|
||||
self.client = BogusClient()
|
||||
self.client = BogusClient(self.messages)
|
||||
self._host = ""
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
pass
|
||||
|
||||
def updateClient(self):
|
||||
self.client = BogusClient(self.messages)
|
||||
|
||||
def login(self, username, password):
|
||||
# This will raise a UnicodeEncodeError if the password is not ASCII only
|
||||
password.encode("ascii")
|
||||
# Otherwise, check for correct values
|
||||
if username != "admin" or password not in {"secret"}:
|
||||
if username != self.USERNAME or password != self.ASCII_PASSWORD:
|
||||
raise MailboxLoginError("BAD", "OK")
|
||||
|
||||
def login_utf8(self, username, password):
|
||||
# Expected to only be called with the UTF-8 password
|
||||
if username != self.USERNAME or password != self.UTF_PASSWORD:
|
||||
raise MailboxLoginError("BAD", "OK")
|
||||
|
||||
def fetch(self, criteria, mark_seen, charset=""):
|
||||
@@ -100,6 +136,9 @@ class BogusMailBox(ContextManager):
|
||||
tag = criteria[criteria.index("UNKEYWORD") + 1].strip("'")
|
||||
msg = filter(lambda m: "processed" not in m.flags, msg)
|
||||
|
||||
if "(X-GM-LABELS" in criteria: # ['NOT', '(X-GM-LABELS', '"processed"']
|
||||
msg = filter(lambda m: "processed" not in m.flags, msg)
|
||||
|
||||
return list(msg)
|
||||
|
||||
def delete(self, uid_list):
|
||||
@@ -209,7 +248,7 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
m.return_value = self.bogus_mailbox
|
||||
self.addCleanup(patcher.stop)
|
||||
|
||||
patcher = mock.patch("paperless_mail.mail.async_task")
|
||||
patcher = mock.patch("paperless_mail.mail.consume_file.delay")
|
||||
self.async_task = patcher.start()
|
||||
self.addCleanup(patcher.stop)
|
||||
|
||||
@@ -247,6 +286,7 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
seen=False,
|
||||
),
|
||||
)
|
||||
self.bogus_mailbox.updateClient()
|
||||
|
||||
def test_get_correspondent(self):
|
||||
message = namedtuple("MailMessage", [])
|
||||
@@ -607,6 +647,33 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(len(self.bogus_mailbox.fetch("UNKEYWORD processed", False)), 0)
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 3)
|
||||
|
||||
def test_handle_mail_account_tag_gmail(self):
|
||||
self.bogus_mailbox._host = "imap.gmail.com"
|
||||
self.bogus_mailbox.client.capabilities = ["X-GM-EXT-1"]
|
||||
|
||||
account = MailAccount.objects.create(
|
||||
name="test",
|
||||
imap_server="",
|
||||
username="admin",
|
||||
password="secret",
|
||||
)
|
||||
|
||||
_ = MailRule.objects.create(
|
||||
name="testrule",
|
||||
account=account,
|
||||
action=MailRule.MailAction.TAG,
|
||||
action_parameter="processed",
|
||||
)
|
||||
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 3)
|
||||
self.assertEqual(self.async_task.call_count, 0)
|
||||
criteria = NOT(gmail_label="processed")
|
||||
self.assertEqual(len(self.bogus_mailbox.fetch(criteria, False)), 2)
|
||||
self.mail_account_handler.handle_mail_account(account)
|
||||
self.assertEqual(self.async_task.call_count, 2)
|
||||
self.assertEqual(len(self.bogus_mailbox.fetch(criteria, False)), 0)
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 3)
|
||||
|
||||
def test_error_login(self):
|
||||
account = MailAccount.objects.create(
|
||||
name="test",
|
||||
@@ -878,9 +945,9 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
account = MailAccount.objects.create(
|
||||
name="test",
|
||||
imap_server="",
|
||||
username="admin",
|
||||
username=BogusMailBox.USERNAME,
|
||||
# Note the non-ascii characters here
|
||||
password="w57äöüw4b6huwb6nhu",
|
||||
password=BogusMailBox.UTF_PASSWORD,
|
||||
)
|
||||
|
||||
_ = MailRule.objects.create(
|
||||
@@ -910,7 +977,7 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
account = MailAccount.objects.create(
|
||||
name="test",
|
||||
imap_server="",
|
||||
username="admin",
|
||||
username=BogusMailBox.USERNAME,
|
||||
# Note the non-ascii characters here
|
||||
# Passes the check in login, not in authenticate
|
||||
password="réception",
|
||||
@@ -965,20 +1032,3 @@ class TestTasks(TestCase):
|
||||
m.side_effect = lambda account: 0
|
||||
result = tasks.process_mail_accounts()
|
||||
self.assertIn("No new", result)
|
||||
|
||||
@mock.patch("paperless_mail.tasks.MailAccountHandler.handle_mail_account")
|
||||
def test_single_accounts(self, m):
|
||||
MailAccount.objects.create(
|
||||
name="A",
|
||||
imap_server="A",
|
||||
username="A",
|
||||
password="A",
|
||||
)
|
||||
|
||||
tasks.process_mail_account("A")
|
||||
|
||||
m.assert_called_once()
|
||||
m.reset_mock()
|
||||
|
||||
tasks.process_mail_account("B")
|
||||
m.assert_not_called()
|
||||
|
Reference in New Issue
Block a user