From ce663398e6688ae4259be6a7b94e025395ada94e Mon Sep 17 00:00:00 2001 From: Daniel Bankmann <6322723+dbankmann@users.noreply.github.com> Date: Thu, 29 Aug 2024 02:22:44 +0200 Subject: [PATCH] Enhancement: mail message preprocessor for gpg encrypted mails (#7456) --------- Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com> --- Dockerfile | 2 + docs/advanced_usage.md | 54 ++ docs/configuration.md | 12 + src/paperless/settings.py | 9 + src/paperless_mail/mail.py | 30 ++ src/paperless_mail/preprocessor.py | 103 ++++ src/paperless_mail/tests/test_mail.py | 474 ++++++++++-------- src/paperless_mail/tests/test_preprocessor.py | 228 +++++++++ 8 files changed, 695 insertions(+), 217 deletions(-) create mode 100644 src/paperless_mail/preprocessor.py create mode 100644 src/paperless_mail/tests/test_preprocessor.py diff --git a/Dockerfile b/Dockerfile index 2a9d7b306..4ef558712 100644 --- a/Dockerfile +++ b/Dockerfile @@ -275,6 +275,8 @@ RUN set -eux \ && mkdir --parents --verbose /usr/src/paperless/media \ && mkdir --parents --verbose /usr/src/paperless/consume \ && mkdir --parents --verbose /usr/src/paperless/export \ + && echo "Creating gnupg directory" \ + && mkdir -m700 --verbose /usr/src/paperless/.gnupg \ && echo "Adjusting all permissions" \ && chown --from root:root --changes --recursive paperless:paperless /usr/src/paperless \ && echo "Collecting static files" \ diff --git a/docs/advanced_usage.md b/docs/advanced_usage.md index 5488659a2..fe7099e2d 100644 --- a/docs/advanced_usage.md +++ b/docs/advanced_usage.md @@ -690,3 +690,57 @@ More details about configuration option for various providers can be found in th Once external auth is set up, 'regular' login can be disabled with the [PAPERLESS_DISABLE_REGULAR_LOGIN](configuration.md#PAPERLESS_DISABLE_REGULAR_LOGIN) setting and / or users can be automatically redirected with the [PAPERLESS_REDIRECT_LOGIN_TO_SSO](configuration.md#PAPERLESS_REDIRECT_LOGIN_TO_SSO) setting. + +## Decryption of encrypted emails before consumption {#gpg-decryptor} + +Paperless-ngx can be configured to decrypt gpg encrypted emails before consumption. + +### Requirements + +You need a recent version of `gpg-agent >= 2.1.1` installed on your host. +Your host needs to be setup for decrypting your emails via `gpg-agent`, see this [tutorial](https://www.digitalocean.com/community/tutorials/how-to-use-gpg-to-encrypt-and-sign-messages#encrypt-and-decrypt-messages-with-gpg) for instance. +Test your setup and make sure that you can encrypt and decrypt files using your key + +``` +gpg --encrypt --armor -r person@email.com name_of_file +gpg --decrypt name_of_file.asc +``` + +### Setup + +First, enable the [PAPERLESS_GPG_DECRYPTOR environment variable](configuration.md#PAPERLESS_GPG_DECRYPTOR). + +Then determine your local `gpg-agent.extra` socket by invoking + +``` +gpgconf --list-dir agent-extra-socket +``` + +on your host. A possible output is `~/.gnupg/S.gpg-agent.extra`. +Also find the location of your public keyring. + +If using docker, you'll need to add the following volume mounts to your `docker-compose.yml` file: + +```yaml +webserver: + volumes: + - /home/user/.gnupg/pubring.gpg:/usr/src/paperless/.gnupg/pubring.gpg + - :/usr/src/paperless/.gnupg/S.gpg-agent +``` + +For a 'bare-metal' installation no further configuration is necessary. If you +want to use a separate `GNUPG_HOME`, you can do so by configuring the [PAPERLESS_EMAIL_GNUPG_HOME environment variable](configuration.md#PAPERLESS_EMAIL_GNUPG_HOME). + +### Troubleshooting + +- Make sure, that `gpg-agent` is running on your host machine +- Make sure, that encryption and decryption works from inside the container using the `gpg` commands from above. +- Check that all files in `/usr/src/paperless/.gnupg` have correct permissions + +```shell +paperless@9da1865df327:~/.gnupg$ ls -al +drwx------ 1 paperless paperless 4096 Aug 18 17:52 . +drwxr-xr-x 1 paperless paperless 4096 Aug 18 17:52 .. +srw------- 1 paperless paperless 0 Aug 18 17:22 S.gpg-agent +-rw------- 1 paperless paperless 147940 Jul 24 10:23 pubring.gpg +``` diff --git a/docs/configuration.md b/docs/configuration.md index e719e043d..7172afcb3 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1149,6 +1149,18 @@ within your documents. second, and year last order. Characters D, M, or Y can be shuffled to meet the required order. +#### [`PAPERLESS_GPG_DECRYPTOR=`](#PAPERLESS_GPG_DECRYPTOR) {#PAPERLESS_GPG_DECRYPTOR} + +: Enable or disable the GPG decryptor for encrypted emails. See [GPG Decryptor](advanced_usage.md#gpg-decryptor) for more information. + + Defaults to false. + +#### [`PAPERLESS_EMAIL_GNUPG_HOME=`](#PAPERLESS_EMAIL_GNUPG_HOME) {#PAPERLESS_EMAIL_GNUPG_HOME} + +: Optional, sets the `GNUPG_HOME` path to use with GPG decryptor for encrypted emails. See [GPG Decryptor](advanced_usage.md#gpg-decryptor) for more information. If not set, defaults to the default `GNUPG_HOME` path. + + Defaults to . + ### Polling {#polling} #### [`PAPERLESS_CONSUMER_POLLING=`](#PAPERLESS_CONSUMER_POLLING) {#PAPERLESS_CONSUMER_POLLING} diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 000904aef..27c609e8d 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -1171,6 +1171,15 @@ if DEBUG: # pragma: no cover EMAIL_BACKEND = "django.core.mail.backends.filebased.EmailBackend" EMAIL_FILE_PATH = BASE_DIR / "sent_emails" +############################################################################### +# Email Preprocessors # +############################################################################### + +EMAIL_GNUPG_HOME: Final[Optional[str]] = os.getenv("PAPERLESS_EMAIL_GNUPG_HOME") +EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = __get_boolean( + "PAPERLESS_ENABLE_GPG_DECRYPTOR", +) + ############################################################################### # Soft Delete diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index 83771dbf5..92c471845 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -43,6 +43,8 @@ from documents.tasks import consume_file from paperless_mail.models import MailAccount from paperless_mail.models import MailRule from paperless_mail.models import ProcessedMail +from paperless_mail.preprocessor import MailMessageDecryptor +from paperless_mail.preprocessor import MailMessagePreprocessor # Apple Mail sets multiple IMAP KEYWORD and the general "\Flagged" FLAG # imaplib => conn.fetch(b"", "FLAGS") @@ -426,9 +428,30 @@ class MailAccountHandler(LoggingMixin): logging_name = "paperless_mail" + _message_preprocessor_types: list[type[MailMessagePreprocessor]] = [ + MailMessageDecryptor, + ] + def __init__(self) -> None: super().__init__() self.renew_logging_group() + self._init_preprocessors() + + def _init_preprocessors(self): + self._message_preprocessors: list[MailMessagePreprocessor] = [] + for preprocessor_type in self._message_preprocessor_types: + self._init_preprocessor(preprocessor_type) + + def _init_preprocessor(self, preprocessor_type): + if preprocessor_type.able_to_run(): + try: + self._message_preprocessors.append(preprocessor_type()) + except Exception as e: + self.log.warning( + f"Error while initializing preprocessor {preprocessor_type.NAME}: {e}", + ) + else: + self.log.debug(f"Skipping mail preprocessor {preprocessor_type.NAME}") def _correspondent_from_name(self, name: str) -> Optional[Correspondent]: try: @@ -535,6 +558,11 @@ class MailAccountHandler(LoggingMixin): return total_processed_files + def _preprocess_message(self, message: MailMessage): + for preprocessor in self._message_preprocessors: + message = preprocessor.run(message) + return message + def _handle_mail_rule( self, M: MailBox, @@ -613,6 +641,8 @@ class MailAccountHandler(LoggingMixin): return total_processed_files def _handle_message(self, message, rule: MailRule) -> int: + message = self._preprocess_message(message) + processed_elements = 0 # Skip Message handling when only attachments are to be processed but diff --git a/src/paperless_mail/preprocessor.py b/src/paperless_mail/preprocessor.py new file mode 100644 index 000000000..7e0c76780 --- /dev/null +++ b/src/paperless_mail/preprocessor.py @@ -0,0 +1,103 @@ +import abc +import os +from email import message_from_bytes +from email import policy +from email.message import Message + +from django.conf import settings +from gnupg import GPG +from imap_tools import MailMessage + +from documents.loggers import LoggingMixin + + +class MailMessagePreprocessor(abc.ABC): + """ + Defines the interface for preprocessors that alter messages before they are handled in MailAccountHandler + """ + + NAME: str = "MailMessagePreprocessor" + + @staticmethod + @abc.abstractmethod + def able_to_run() -> bool: + """ + Return True if the conditions are met for the preprocessor to run, False otherwise + + If False, run(message) will not be called + """ + + @abc.abstractmethod + def run(self, message: MailMessage) -> MailMessage: + """ + Performs the actual preprocessing task + """ + + +class MailMessageDecryptor(MailMessagePreprocessor, LoggingMixin): + logging_name = "paperless_mail_message_decryptor" + + NAME = "MailMessageDecryptor" + + def __init__(self): + super().__init__() + self.renew_logging_group() + self._gpg = GPG(gnupghome=settings.EMAIL_GNUPG_HOME) + + @staticmethod + def able_to_run() -> bool: + if not settings.EMAIL_ENABLE_GPG_DECRYPTOR: + return False + if settings.EMAIL_GNUPG_HOME is None: + return True + return os.path.isdir(settings.EMAIL_GNUPG_HOME) + + def run(self, message: MailMessage) -> MailMessage: + if not hasattr(message, "obj"): + self.log.debug("Message does not have 'obj' attribute") + return message + if message.obj.get_content_type() != "multipart/encrypted": + self.log.debug("Message not encrypted. Keep unchanged") + return message + + self.log.debug("Message is encrypted.") + email_message = self._to_email_message(message) + decrypted_raw_message = self._gpg.decrypt(email_message.as_string()) + + if not decrypted_raw_message.ok: + self.log.debug( + f"Message decryption failed with status message " + f"{decrypted_raw_message.status}", + ) + raise Exception( + f"Decryption failed: {decrypted_raw_message.status}, {decrypted_raw_message.stderr}", + ) + self.log.debug("Message decrypted successfully.") + + decrypted_message = self._build_decrypted_message( + decrypted_raw_message, + email_message, + ) + + return MailMessage( + [(f"UID {message.uid}".encode(), decrypted_message.as_bytes())], + ) + + @staticmethod + def _to_email_message(message: MailMessage) -> Message: + email_message = message_from_bytes( + message.obj.as_bytes(), + policy=policy.default, + ) + return email_message + + @staticmethod + def _build_decrypted_message(decrypted_raw_message, email_message): + decrypted_message = message_from_bytes( + decrypted_raw_message.data, + policy=policy.default, + ) + for header, value in email_message.items(): + if not decrypted_message.get(header): + decrypted_message.add_header(header, value) + return decrypted_message diff --git a/src/paperless_mail/tests/test_mail.py b/src/paperless_mail/tests/test_mail.py index 0920f033c..d671021bf 100644 --- a/src/paperless_mail/tests/test_mail.py +++ b/src/paperless_mail/tests/test_mail.py @@ -193,31 +193,10 @@ def fake_magic_from_buffer(buffer, mime=False): return "Some verbose file description" -@mock.patch("paperless_mail.mail.magic.from_buffer", fake_magic_from_buffer) -class TestMail( - DirectoriesMixin, - FileSystemAssertsMixin, - TestCase, -): - def setUp(self): +class MessageBuilder: + def __init__(self): self._used_uids = set() - self.bogus_mailbox = BogusMailBox() - - patcher = mock.patch("paperless_mail.mail.MailBox") - m = patcher.start() - m.return_value = self.bogus_mailbox - self.addCleanup(patcher.stop) - - patcher = mock.patch("paperless_mail.mail.queue_consumption_tasks") - self._queue_consumption_tasks_mock = patcher.start() - self.addCleanup(patcher.stop) - - self.reset_bogus_mailbox() - - self.mail_account_handler = MailAccountHandler() - super().setUp() - def create_message( self, attachments: Union[int, list[_AttachmentDef]] = 1, @@ -283,39 +262,132 @@ class TestMail( return imap_msg - def reset_bogus_mailbox(self): - self.bogus_mailbox.messages = [] - self.bogus_mailbox.messages_spam = [] - self.bogus_mailbox.messages.append( - self.create_message( - subject="Invoice 1", - from_="amazon@amazon.de", - to=["me@myselfandi.com", "helpdesk@mydomain.com"], - body="cables", - seen=True, - flagged=False, - processed=False, - ), + +def reset_bogus_mailbox(bogus_mailbox: BogusMailBox, message_builder: MessageBuilder): + bogus_mailbox.messages = [] + bogus_mailbox.messages_spam = [] + bogus_mailbox.messages.append( + message_builder.create_message( + subject="Invoice 1", + from_="amazon@amazon.de", + to=["me@myselfandi.com", "helpdesk@mydomain.com"], + body="cables", + seen=True, + flagged=False, + processed=False, + ), + ) + bogus_mailbox.messages.append( + message_builder.create_message( + subject="Invoice 2", + body="from my favorite electronic store", + to=["invoices@mycompany.com"], + seen=False, + flagged=True, + processed=True, + ), + ) + bogus_mailbox.messages.append( + message_builder.create_message( + subject="Claim your $10M price now!", + from_="amazon@amazon-some-indian-site.org", + to=["special@me.me"], + seen=False, + ), + ) + bogus_mailbox.updateClient() + + +class MailMocker(DirectoriesMixin, FileSystemAssertsMixin, TestCase): + def setUp(self): + self.bogus_mailbox = BogusMailBox() + self.messageBuilder = MessageBuilder() + + reset_bogus_mailbox(self.bogus_mailbox, self.messageBuilder) + + patcher = mock.patch("paperless_mail.mail.MailBox") + m = patcher.start() + m.return_value = self.bogus_mailbox + self.addCleanup(patcher.stop) + + patcher = mock.patch("paperless_mail.mail.queue_consumption_tasks") + self._queue_consumption_tasks_mock = patcher.start() + self.addCleanup(patcher.stop) + + super().setUp() + + def assert_queue_consumption_tasks_call_args( + self, + expected_call_args: list[list[dict[str, str]]], + ): + """ + Verifies that queue_consumption_tasks has been called with the expected arguments. + + expected_call_args is the following format: + + * List of calls to queue_consumption_tasks, called once per mail, where each element is: + * List of signatures for the consume_file task, where each element is: + * dictionary containing arguments that need to be present in the consume_file signature. + + """ + + # assert number of calls to queue_consumption_tasks match + self.assertEqual( + len(self._queue_consumption_tasks_mock.call_args_list), + len(expected_call_args), ) - self.bogus_mailbox.messages.append( - self.create_message( - subject="Invoice 2", - body="from my favorite electronic store", - to=["invoices@mycompany.com"], - seen=False, - flagged=True, - processed=True, - ), - ) - self.bogus_mailbox.messages.append( - self.create_message( - subject="Claim your $10M price now!", - from_="amazon@amazon-some-indian-site.org", - to="special@me.me", - seen=False, - ), - ) - self.bogus_mailbox.updateClient() + + for (mock_args, mock_kwargs), expected_signatures in zip( + self._queue_consumption_tasks_mock.call_args_list, + expected_call_args, + ): + consume_tasks = mock_kwargs["consume_tasks"] + + # assert number of consume_file tasks match + self.assertEqual(len(consume_tasks), len(expected_signatures)) + + for consume_task, expected_signature in zip( + consume_tasks, + expected_signatures, + ): + input_doc, overrides = consume_task.args + + # assert the file exists + self.assertIsFile(input_doc.original_file) + + # assert all expected arguments are present in the signature + for key, value in expected_signature.items(): + if key == "override_correspondent_id": + self.assertEqual(overrides.correspondent_id, value) + elif key == "override_filename": + self.assertEqual(overrides.filename, value) + elif key == "override_title": + self.assertEqual(overrides.title, value) + else: + self.fail("No match for expected arg") + + def apply_mail_actions(self): + """ + Applies pending actions to mails by inspecting calls to the queue_consumption_tasks method. + """ + for args, kwargs in self._queue_consumption_tasks_mock.call_args_list: + message = kwargs["message"] + rule = kwargs["rule"] + apply_mail_action([], rule.pk, message.uid, message.subject, message.date) + + +@mock.patch("paperless_mail.mail.magic.from_buffer", fake_magic_from_buffer) +class TestMail( + DirectoriesMixin, + FileSystemAssertsMixin, + TestCase, +): + def setUp(self): + self.mailMocker = MailMocker() + self.mailMocker.setUp() + self.mail_account_handler = MailAccountHandler() + + super().setUp() def test_get_correspondent(self): message = namedtuple("MailMessage", []) @@ -399,7 +471,7 @@ class TestMail( self.assertEqual(handler._get_title(message, att, rule), None) def test_handle_message(self): - message = self.create_message( + message = self.mailMocker.messageBuilder.create_message( subject="the message title", from_="Myself", attachments=2, @@ -416,9 +488,9 @@ class TestMail( self.assertEqual(result, 2) - self._queue_consumption_tasks_mock.assert_called() + self.mailMocker._queue_consumption_tasks_mock.assert_called() - self.assert_queue_consumption_tasks_call_args( + self.mailMocker.assert_queue_consumption_tasks_call_args( [ [ {"override_title": "file_0", "override_filename": "file_0.pdf"}, @@ -435,11 +507,11 @@ class TestMail( result = self.mail_account_handler._handle_message(message, rule) - self._queue_consumption_tasks_mock.assert_not_called() + self.mailMocker._queue_consumption_tasks_mock.assert_not_called() self.assertEqual(result, 0) def test_handle_unknown_mime_type(self): - message = self.create_message( + message = self.mailMocker.messageBuilder.create_message( attachments=[ _AttachmentDef(filename="f1.pdf"), _AttachmentDef( @@ -459,7 +531,7 @@ class TestMail( result = self.mail_account_handler._handle_message(message, rule) self.assertEqual(result, 1) - self.assert_queue_consumption_tasks_call_args( + self.mailMocker.assert_queue_consumption_tasks_call_args( [ [ {"override_filename": "f1.pdf"}, @@ -468,7 +540,7 @@ class TestMail( ) def test_handle_disposition(self): - message = self.create_message( + message = self.mailMocker.messageBuilder.create_message( attachments=[ _AttachmentDef( filename="f1.pdf", @@ -487,7 +559,7 @@ class TestMail( result = self.mail_account_handler._handle_message(message, rule) self.assertEqual(result, 1) - self.assert_queue_consumption_tasks_call_args( + self.mailMocker.assert_queue_consumption_tasks_call_args( [ [ {"override_filename": "f2.pdf"}, @@ -496,7 +568,7 @@ class TestMail( ) def test_handle_inline_files(self): - message = self.create_message( + message = self.mailMocker.messageBuilder.create_message( attachments=[ _AttachmentDef( filename="f1.pdf", @@ -516,7 +588,7 @@ class TestMail( result = self.mail_account_handler._handle_message(message, rule) self.assertEqual(result, 2) - self.assert_queue_consumption_tasks_call_args( + self.mailMocker.assert_queue_consumption_tasks_call_args( [ [ {"override_filename": "f1.pdf"}, @@ -536,7 +608,7 @@ class TestMail( - Mail action should not be performed for files excluded - Mail action should be performed for files included """ - message = self.create_message( + message = self.mailMocker.messageBuilder.create_message( attachments=[ _AttachmentDef(filename="f1.pdf"), _AttachmentDef(filename="f2.pdf"), @@ -620,7 +692,7 @@ class TestMail( for test_case in tests: with self.subTest(msg=test_case.name): - self._queue_consumption_tasks_mock.reset_mock() + self.mailMocker._queue_consumption_tasks_mock.reset_mock() account = MailAccount(name=str(uuid.uuid4())) account.save() rule = MailRule( @@ -633,7 +705,7 @@ class TestMail( rule.save() self.mail_account_handler._handle_message(message, rule) - self.assert_queue_consumption_tasks_call_args( + self.mailMocker.assert_queue_consumption_tasks_call_args( [ [{"override_filename": m} for m in test_case.expected_matches], ], @@ -648,7 +720,7 @@ class TestMail( THEN: - Mail action should not be performed """ - message = self.create_message( + message = self.mailMocker.messageBuilder.create_message( attachments=[ _AttachmentDef( filename="test.png", @@ -656,7 +728,7 @@ class TestMail( ), ], ) - self.bogus_mailbox.messages.append(message) + self.mailMocker.bogus_mailbox.messages.append(message) account = MailAccount.objects.create( name="test", imap_server="", @@ -674,12 +746,12 @@ class TestMail( ) rule.save() - self.assertEqual(len(self.bogus_mailbox.messages), 4) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 4) self.mail_account_handler.handle_mail_account(account) - self.apply_mail_actions() + self.mailMocker.apply_mail_actions() - self.assertEqual(len(self.bogus_mailbox.messages), 1) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 1) def test_handle_mail_account_mark_read(self): account = MailAccount.objects.create( @@ -695,14 +767,14 @@ class TestMail( action=MailRule.MailAction.MARK_READ, ) - self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 2) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) + self.assertEqual(len(self.mailMocker.bogus_mailbox.fetch("UNSEEN", False)), 2) self.mail_account_handler.handle_mail_account(account) - self.apply_mail_actions() + self.mailMocker.apply_mail_actions() - self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 0) - self.assertEqual(len(self.bogus_mailbox.messages), 3) + self.assertEqual(len(self.mailMocker.bogus_mailbox.fetch("UNSEEN", False)), 0) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) def test_handle_mail_account_delete(self): account = MailAccount.objects.create( @@ -719,12 +791,12 @@ class TestMail( filter_subject="Invoice", ) - self.assertEqual(len(self.bogus_mailbox.messages), 3) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) self.mail_account_handler.handle_mail_account(account) - self.apply_mail_actions() + self.mailMocker.apply_mail_actions() - self.assertEqual(len(self.bogus_mailbox.messages), 1) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 1) def test_handle_mail_account_delete_no_filters(self): account = MailAccount.objects.create( @@ -741,12 +813,12 @@ class TestMail( maximum_age=0, ) - self.assertEqual(len(self.bogus_mailbox.messages), 3) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) self.mail_account_handler.handle_mail_account(account) - self.apply_mail_actions() + self.mailMocker.apply_mail_actions() - self.assertEqual(len(self.bogus_mailbox.messages), 0) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 0) def test_handle_mail_account_flag(self): account = MailAccount.objects.create( @@ -763,14 +835,20 @@ class TestMail( filter_subject="Invoice", ) - self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 2) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) + self.assertEqual( + len(self.mailMocker.bogus_mailbox.fetch("UNFLAGGED", False)), + 2, + ) self.mail_account_handler.handle_mail_account(account) - self.apply_mail_actions() + self.mailMocker.apply_mail_actions() - self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 1) - self.assertEqual(len(self.bogus_mailbox.messages), 3) + self.assertEqual( + len(self.mailMocker.bogus_mailbox.fetch("UNFLAGGED", False)), + 1, + ) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) @pytest.mark.flaky(reruns=4) def test_handle_mail_account_move(self): @@ -789,14 +867,14 @@ class TestMail( filter_subject="Claim", ) - self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.assertEqual(len(self.bogus_mailbox.messages_spam), 0) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages_spam), 0) self.mail_account_handler.handle_mail_account(account) - self.apply_mail_actions() + self.mailMocker.apply_mail_actions() - self.assertEqual(len(self.bogus_mailbox.messages), 2) - self.assertEqual(len(self.bogus_mailbox.messages_spam), 1) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 2) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages_spam), 1) def test_handle_mail_account_move_no_filters(self): account = MailAccount.objects.create( @@ -814,14 +892,14 @@ class TestMail( maximum_age=0, ) - self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.assertEqual(len(self.bogus_mailbox.messages_spam), 0) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages_spam), 0) self.mail_account_handler.handle_mail_account(account) - self.apply_mail_actions() + self.mailMocker.apply_mail_actions() - self.assertEqual(len(self.bogus_mailbox.messages), 0) - self.assertEqual(len(self.bogus_mailbox.messages_spam), 3) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 0) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages_spam), 3) def test_handle_mail_account_tag(self): account = MailAccount.objects.create( @@ -838,18 +916,24 @@ class TestMail( action_parameter="processed", ) - self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.assertEqual(len(self.bogus_mailbox.fetch("UNKEYWORD processed", False)), 2) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) + self.assertEqual( + len(self.mailMocker.bogus_mailbox.fetch("UNKEYWORD processed", False)), + 2, + ) self.mail_account_handler.handle_mail_account(account) - self.apply_mail_actions() + self.mailMocker.apply_mail_actions() - self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.assertEqual(len(self.bogus_mailbox.fetch("UNKEYWORD processed", False)), 0) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) + self.assertEqual( + len(self.mailMocker.bogus_mailbox.fetch("UNKEYWORD processed", False)), + 0, + ) def test_handle_mail_account_tag_gmail(self): - self.bogus_mailbox._host = "imap.gmail.com" - self.bogus_mailbox.client.capabilities = ["X-GM-EXT-1"] + self.mailMocker.bogus_mailbox._host = "imap.gmail.com" + self.mailMocker.bogus_mailbox.client.capabilities = ["X-GM-EXT-1"] account = MailAccount.objects.create( name="test", @@ -865,15 +949,15 @@ class TestMail( action_parameter="processed", ) - self.assertEqual(len(self.bogus_mailbox.messages), 3) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) criteria = NOT(gmail_label="processed") - self.assertEqual(len(self.bogus_mailbox.fetch(criteria, False)), 2) + self.assertEqual(len(self.mailMocker.bogus_mailbox.fetch(criteria, False)), 2) self.mail_account_handler.handle_mail_account(account) - self.apply_mail_actions() + self.mailMocker.apply_mail_actions() - self.assertEqual(len(self.bogus_mailbox.fetch(criteria, False)), 0) - self.assertEqual(len(self.bogus_mailbox.messages), 3) + self.assertEqual(len(self.mailMocker.bogus_mailbox.fetch(criteria, False)), 0) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) def test_tag_mail_action_applemail_wrong_input(self): self.assertRaises( @@ -900,14 +984,20 @@ class TestMail( action_parameter="apple:green", ) - self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 2) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) + self.assertEqual( + len(self.mailMocker.bogus_mailbox.fetch("UNFLAGGED", False)), + 2, + ) self.mail_account_handler.handle_mail_account(account) - self.apply_mail_actions() + self.mailMocker.apply_mail_actions() - self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 0) - self.assertEqual(len(self.bogus_mailbox.messages), 3) + self.assertEqual( + len(self.mailMocker.bogus_mailbox.fetch("UNFLAGGED", False)), + 0, + ) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) def test_error_login(self): """ @@ -955,10 +1045,10 @@ class TestMail( ) tasks.process_mail_accounts() - self.apply_mail_actions() + self.mailMocker.apply_mail_actions() - self.assertEqual(len(self.bogus_mailbox.messages), 2) - self.assertEqual(len(self.bogus_mailbox.messages_spam), 1) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 2) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages_spam), 1) def test_error_skip_rule(self): account = MailAccount.objects.create( @@ -986,10 +1076,10 @@ class TestMail( ) self.mail_account_handler.handle_mail_account(account) - self.apply_mail_actions() + self.mailMocker.apply_mail_actions() - self.assertEqual(len(self.bogus_mailbox.messages), 2) - self.assertEqual(len(self.bogus_mailbox.messages_spam), 1) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 2) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages_spam), 1) def test_error_folder_set(self): """ @@ -1015,14 +1105,14 @@ class TestMail( folder="uuuhhhh", # Invalid folder name ) - self.bogus_mailbox.folder.list = mock.Mock( + self.mailMocker.bogus_mailbox.folder.list = mock.Mock( return_value=[FolderInfo("SomeFoldername", "|", ())], ) self.mail_account_handler.handle_mail_account(account) - self.bogus_mailbox.folder.list.assert_called_once() - self._queue_consumption_tasks_mock.assert_not_called() + self.mailMocker.bogus_mailbox.folder.list.assert_called_once() + self.mailMocker._queue_consumption_tasks_mock.assert_not_called() def test_error_folder_set_error_listing(self): """ @@ -1048,14 +1138,14 @@ class TestMail( folder="uuuhhhh", # Invalid folder name ) - self.bogus_mailbox.folder.list = mock.Mock( + self.mailMocker.bogus_mailbox.folder.list = mock.Mock( side_effect=MailboxFolderSelectError(None, "uhm"), ) self.mail_account_handler.handle_mail_account(account) - self.bogus_mailbox.folder.list.assert_called_once() - self._queue_consumption_tasks_mock.assert_not_called() + self.mailMocker.bogus_mailbox.folder.list.assert_called_once() + self.mailMocker._queue_consumption_tasks_mock.assert_not_called() @mock.patch("paperless_mail.mail.MailAccountHandler._get_correspondent") def test_error_skip_mail(self, m): @@ -1081,14 +1171,17 @@ class TestMail( ) self.mail_account_handler.handle_mail_account(account) - self.apply_mail_actions() + self.mailMocker.apply_mail_actions() # test that we still consume mail even if some mails throw errors. - self.assertEqual(self._queue_consumption_tasks_mock.call_count, 2) + self.assertEqual(self.mailMocker._queue_consumption_tasks_mock.call_count, 2) # faulty mail still in inbox, untouched - self.assertEqual(len(self.bogus_mailbox.messages), 1) - self.assertEqual(self.bogus_mailbox.messages[0].from_, "amazon@amazon.de") + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 1) + self.assertEqual( + self.mailMocker.bogus_mailbox.messages[0].from_, + "amazon@amazon.de", + ) def test_error_create_correspondent(self): account = MailAccount.objects.create( @@ -1108,10 +1201,10 @@ class TestMail( self.mail_account_handler.handle_mail_account(account) - self._queue_consumption_tasks_mock.assert_called_once() + self.mailMocker._queue_consumption_tasks_mock.assert_called_once() c = Correspondent.objects.get(name="amazon@amazon.de") - self.assert_queue_consumption_tasks_call_args( + self.mailMocker.assert_queue_consumption_tasks_call_args( [ [ {"override_correspondent_id": c.id}, @@ -1119,15 +1212,18 @@ class TestMail( ], ) - self._queue_consumption_tasks_mock.reset_mock() - self.reset_bogus_mailbox() + self.mailMocker._queue_consumption_tasks_mock.reset_mock() + reset_bogus_mailbox( + self.mailMocker.bogus_mailbox, + self.mailMocker.messageBuilder, + ) with mock.patch("paperless_mail.mail.Correspondent.objects.get_or_create") as m: m.side_effect = DatabaseError() self.mail_account_handler.handle_mail_account(account) - self.assert_queue_consumption_tasks_call_args( + self.mailMocker.assert_queue_consumption_tasks_call_args( [ [ {"override_correspondent_id": None}, @@ -1165,21 +1261,24 @@ class TestMail( filter_from=f_from, filter_to=f_to, ) - self.reset_bogus_mailbox() - self._queue_consumption_tasks_mock.reset_mock() + reset_bogus_mailbox( + self.mailMocker.bogus_mailbox, + self.mailMocker.messageBuilder, + ) + self.mailMocker._queue_consumption_tasks_mock.reset_mock() - self._queue_consumption_tasks_mock.assert_not_called() - self.assertEqual(len(self.bogus_mailbox.messages), 3) + self.mailMocker._queue_consumption_tasks_mock.assert_not_called() + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) self.mail_account_handler.handle_mail_account(account) - self.apply_mail_actions() + self.mailMocker.apply_mail_actions() self.assertEqual( - len(self.bogus_mailbox.messages), + len(self.mailMocker.bogus_mailbox.messages), 3 - expected_mail_count, ) self.assertEqual( - self._queue_consumption_tasks_mock.call_count, + self.mailMocker._queue_consumption_tasks_mock.call_count, expected_mail_count, ) @@ -1206,16 +1305,16 @@ class TestMail( action=MailRule.MailAction.MARK_READ, ) - self.assertEqual(len(self.bogus_mailbox.messages), 3) - self._queue_consumption_tasks_mock.assert_not_called() - self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 2) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) + self.mailMocker._queue_consumption_tasks_mock.assert_not_called() + self.assertEqual(len(self.mailMocker.bogus_mailbox.fetch("UNSEEN", False)), 2) self.mail_account_handler.handle_mail_account(account) - self.apply_mail_actions() + self.mailMocker.apply_mail_actions() - self.assertEqual(self._queue_consumption_tasks_mock.call_count, 2) - self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 0) - self.assertEqual(len(self.bogus_mailbox.messages), 3) + self.assertEqual(self.mailMocker._queue_consumption_tasks_mock.call_count, 2) + self.assertEqual(len(self.mailMocker.bogus_mailbox.fetch("UNSEEN", False)), 0) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) def test_auth_plain_fallback_fails_still(self): """ @@ -1272,75 +1371,16 @@ class TestMail( action=MailRule.MailAction.MARK_READ, ) - self.assertEqual(len(self.bogus_mailbox.messages), 3) - self.assertEqual(self._queue_consumption_tasks_mock.call_count, 0) - self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 2) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) + self.assertEqual(self.mailMocker._queue_consumption_tasks_mock.call_count, 0) + self.assertEqual(len(self.mailMocker.bogus_mailbox.fetch("UNSEEN", False)), 2) self.mail_account_handler.handle_mail_account(account) - self.apply_mail_actions() + self.mailMocker.apply_mail_actions() - self.assertEqual(self._queue_consumption_tasks_mock.call_count, 2) - self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 0) - self.assertEqual(len(self.bogus_mailbox.messages), 3) - - def assert_queue_consumption_tasks_call_args( - self, - expected_call_args: list[list[dict[str, str]]], - ): - """ - Verifies that queue_consumption_tasks has been called with the expected arguments. - - expected_call_args is the following format: - - * List of calls to queue_consumption_tasks, called once per mail, where each element is: - * List of signatures for the consume_file task, where each element is: - * dictionary containing arguments that need to be present in the consume_file signature. - - """ - - # assert number of calls to queue_consumption_tasks match - self.assertEqual( - len(self._queue_consumption_tasks_mock.call_args_list), - len(expected_call_args), - ) - - for (mock_args, mock_kwargs), expected_signatures in zip( - self._queue_consumption_tasks_mock.call_args_list, - expected_call_args, - ): - consume_tasks = mock_kwargs["consume_tasks"] - - # assert number of consume_file tasks match - self.assertEqual(len(consume_tasks), len(expected_signatures)) - - for consume_task, expected_signature in zip( - consume_tasks, - expected_signatures, - ): - input_doc, overrides = consume_task.args - - # assert the file exists - self.assertIsFile(input_doc.original_file) - - # assert all expected arguments are present in the signature - for key, value in expected_signature.items(): - if key == "override_correspondent_id": - self.assertEqual(overrides.correspondent_id, value) - elif key == "override_filename": - self.assertEqual(overrides.filename, value) - elif key == "override_title": - self.assertEqual(overrides.title, value) - else: - self.fail("No match for expected arg") - - def apply_mail_actions(self): - """ - Applies pending actions to mails by inspecting calls to the queue_consumption_tasks method. - """ - for args, kwargs in self._queue_consumption_tasks_mock.call_args_list: - message = kwargs["message"] - rule = kwargs["rule"] - apply_mail_action([], rule.pk, message.uid, message.subject, message.date) + self.assertEqual(self.mailMocker._queue_consumption_tasks_mock.call_count, 2) + self.assertEqual(len(self.mailMocker.bogus_mailbox.fetch("UNSEEN", False)), 0) + self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 3) class TestManagementCommand(TestCase): diff --git a/src/paperless_mail/tests/test_preprocessor.py b/src/paperless_mail/tests/test_preprocessor.py new file mode 100644 index 000000000..90df77ba8 --- /dev/null +++ b/src/paperless_mail/tests/test_preprocessor.py @@ -0,0 +1,228 @@ +import email +import email.contentmanager +import tempfile +from email.message import Message +from email.mime.application import MIMEApplication +from email.mime.multipart import MIMEMultipart +from unittest import mock + +import gnupg +from django.test import override_settings +from imap_tools import MailMessage + +from paperless_mail.mail import MailAccountHandler +from paperless_mail.models import MailAccount +from paperless_mail.models import MailRule +from paperless_mail.preprocessor import MailMessageDecryptor +from paperless_mail.tests.test_mail import TestMail +from paperless_mail.tests.test_mail import _AttachmentDef + + +class MessageEncryptor: + def __init__(self): + self.gpg_home = tempfile.mkdtemp() + self.gpg = gnupg.GPG(gnupghome=self.gpg_home) + self._testUser = "testuser@example.com" + # Generate a new key + input_data = self.gpg.gen_key_input( + name_email=self._testUser, + passphrase=None, + key_type="RSA", + key_length=2048, + expire_date=0, + no_protection=True, + ) + self.gpg.gen_key(input_data) + + @staticmethod + def get_email_body_without_headers(email_message: Message) -> bytes: + """ + Filters some relevant headers from an EmailMessage and returns just the body. + """ + message_copy = email.message_from_bytes(email_message.as_bytes()) + + message_copy._headers = [ + header + for header in message_copy._headers + if header[0].lower() not in ("from", "to", "subject") + ] + return message_copy.as_bytes() + + def encrypt(self, message): + original_email: email.message.Message = message.obj + encrypted_data = self.gpg.encrypt( + self.get_email_body_without_headers(original_email), + self._testUser, + armor=True, + ) + if not encrypted_data.ok: + raise Exception(f"Encryption failed: {encrypted_data.stderr}") + encrypted_email_content = encrypted_data.data + + new_email = MIMEMultipart("encrypted", protocol="application/pgp-encrypted") + new_email["From"] = original_email["From"] + new_email["To"] = original_email["To"] + new_email["Subject"] = original_email["Subject"] + + # Add the control part + control_part = MIMEApplication(_data=b"", _subtype="pgp-encrypted") + control_part.set_payload("Version: 1") + new_email.attach(control_part) + + # Add the encrypted data part + encrypted_part = MIMEApplication(_data=b"", _subtype="octet-stream") + encrypted_part.set_payload(encrypted_email_content.decode("ascii")) + encrypted_part.add_header( + "Content-Disposition", + 'attachment; filename="encrypted.asc"', + ) + new_email.attach(encrypted_part) + + encrypted_message: MailMessage = MailMessage( + [(f"UID {message.uid}".encode(), new_email.as_bytes())], + ) + return encrypted_message + + +class TestMailMessageGpgDecryptor(TestMail): + def setUp(self): + self.messageEncryptor = MessageEncryptor() + with override_settings( + EMAIL_GNUPG_HOME=self.messageEncryptor.gpg_home, + EMAIL_ENABLE_GPG_DECRYPTOR=True, + ): + super().setUp() + + def test_preprocessor_is_able_to_run(self): + with override_settings( + EMAIL_GNUPG_HOME=self.messageEncryptor.gpg_home, + EMAIL_ENABLE_GPG_DECRYPTOR=True, + ): + self.assertTrue(MailMessageDecryptor.able_to_run()) + + def test_preprocessor_is_able_to_run2(self): + with override_settings( + EMAIL_GNUPG_HOME=None, + EMAIL_ENABLE_GPG_DECRYPTOR=True, + ): + self.assertTrue(MailMessageDecryptor.able_to_run()) + + def test_is_not_able_to_run_disabled(self): + with override_settings( + EMAIL_ENABLE_GPG_DECRYPTOR=False, + ): + self.assertFalse(MailMessageDecryptor.able_to_run()) + + def test_is_not_able_to_run_bogus_path(self): + with override_settings( + EMAIL_ENABLE_GPG_DECRYPTOR=True, + EMAIL_GNUPG_HOME="_)@# notapath &%#$", + ): + self.assertFalse(MailMessageDecryptor.able_to_run()) + + def test_fails_at_initialization(self): + with ( + mock.patch("gnupg.GPG.__init__") as mock_run, + override_settings( + EMAIL_ENABLE_GPG_DECRYPTOR=True, + ), + ): + + def side_effect(*args, **kwargs): + raise OSError("Cannot find 'gpg' binary") + + mock_run.side_effect = side_effect + + handler = MailAccountHandler() + self.assertEqual(len(handler._message_preprocessors), 0) + + def test_decrypt_fails(self): + encrypted_message, _ = self.create_encrypted_unencrypted_message_pair() + empty_gpg_home = tempfile.mkdtemp() + with override_settings( + EMAIL_ENABLE_GPG_DECRYPTOR=True, + EMAIL_GNUPG_HOME=empty_gpg_home, + ): + message_decryptor = MailMessageDecryptor() + self.assertRaises(Exception, message_decryptor.run, encrypted_message) + + def test_decrypt_encrypted_mail(self): + """ + Creates a mail with attachments. Then encrypts it with a new key. + Verifies that this encrypted message can be decrypted with attachments intact. + """ + encrypted_message, message = self.create_encrypted_unencrypted_message_pair() + headers = message.headers + text = message.text + + self.assertEqual(len(encrypted_message.attachments), 1) + self.assertEqual(encrypted_message.attachments[0].filename, "encrypted.asc") + self.assertEqual(encrypted_message.text, "") + + with override_settings( + EMAIL_ENABLE_GPG_DECRYPTOR=True, + EMAIL_GNUPG_HOME=self.messageEncryptor.gpg_home, + ): + message_decryptor = MailMessageDecryptor() + self.assertTrue(message_decryptor.able_to_run()) + decrypted_message = message_decryptor.run(encrypted_message) + + self.assertEqual(len(decrypted_message.attachments), 2) + self.assertEqual(decrypted_message.attachments[0].filename, "f1.pdf") + self.assertEqual(decrypted_message.attachments[1].filename, "f2.pdf") + self.assertEqual(decrypted_message.headers, headers) + self.assertEqual(decrypted_message.text, text) + self.assertEqual(decrypted_message.uid, message.uid) + + def create_encrypted_unencrypted_message_pair(self): + message = self.mailMocker.messageBuilder.create_message( + body="Test message with 2 attachments", + attachments=[ + _AttachmentDef( + filename="f1.pdf", + disposition="inline", + ), + _AttachmentDef(filename="f2.pdf"), + ], + ) + encrypted_message = self.messageEncryptor.encrypt(message) + return encrypted_message, message + + def test_handle_encrypted_message(self): + message = self.mailMocker.messageBuilder.create_message( + subject="the message title", + from_="Myself", + attachments=2, + body="Test mail", + ) + + encrypted_message = self.messageEncryptor.encrypt(message) + + account = MailAccount.objects.create() + rule = MailRule( + assign_title_from=MailRule.TitleSource.FROM_FILENAME, + consumption_scope=MailRule.ConsumptionScope.EVERYTHING, + account=account, + ) + rule.save() + + result = self.mail_account_handler._handle_message(encrypted_message, rule) + + self.assertEqual(result, 3) + + self.mailMocker._queue_consumption_tasks_mock.assert_called() + + self.mailMocker.assert_queue_consumption_tasks_call_args( + [ + [ + { + "override_title": message.subject, + "override_filename": f"{message.subject}.eml", + }, + ], + [ + {"override_title": "file_0", "override_filename": "file_0.pdf"}, + {"override_title": "file_1", "override_filename": "file_1.pdf"}, + ], + ], + )