mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 03:16:10 -06:00 
			
		
		
		
	Enhancement: add layout options for email conversion (#8907)
--------- Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
		@@ -48,6 +48,7 @@ from documents.templating.workflows import parse_w_workflow_placeholders
 | 
			
		||||
from documents.utils import copy_basic_file_stats
 | 
			
		||||
from documents.utils import copy_file_with_basic_stats
 | 
			
		||||
from documents.utils import run_subprocess
 | 
			
		||||
from paperless_mail.parsers import MailDocumentParser
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class WorkflowTriggerPlugin(
 | 
			
		||||
@@ -479,7 +480,18 @@ class ConsumerPlugin(
 | 
			
		||||
                ConsumerStatusShortMessage.PARSING_DOCUMENT,
 | 
			
		||||
            )
 | 
			
		||||
            self.log.debug(f"Parsing {self.filename}...")
 | 
			
		||||
            document_parser.parse(self.working_copy, mime_type, self.filename)
 | 
			
		||||
            if (
 | 
			
		||||
                isinstance(document_parser, MailDocumentParser)
 | 
			
		||||
                and self.input_doc.mailrule_id
 | 
			
		||||
            ):
 | 
			
		||||
                document_parser.parse(
 | 
			
		||||
                    self.working_copy,
 | 
			
		||||
                    mime_type,
 | 
			
		||||
                    self.filename,
 | 
			
		||||
                    self.input_doc.mailrule_id,
 | 
			
		||||
                )
 | 
			
		||||
            else:
 | 
			
		||||
                document_parser.parse(self.working_copy, mime_type, self.filename)
 | 
			
		||||
 | 
			
		||||
            self.log.debug(f"Generating thumbnail for {self.filename}...")
 | 
			
		||||
            self._send_progress(
 | 
			
		||||
 
 | 
			
		||||
@@ -21,6 +21,7 @@ from guardian.core import ObjectPermissionChecker
 | 
			
		||||
 | 
			
		||||
from documents.consumer import ConsumerError
 | 
			
		||||
from documents.data_models import DocumentMetadataOverrides
 | 
			
		||||
from documents.data_models import DocumentSource
 | 
			
		||||
from documents.models import Correspondent
 | 
			
		||||
from documents.models import CustomField
 | 
			
		||||
from documents.models import Document
 | 
			
		||||
@@ -35,6 +36,8 @@ from documents.tasks import sanity_check
 | 
			
		||||
from documents.tests.utils import DirectoriesMixin
 | 
			
		||||
from documents.tests.utils import FileSystemAssertsMixin
 | 
			
		||||
from documents.tests.utils import GetConsumerMixin
 | 
			
		||||
from paperless_mail.models import MailRule
 | 
			
		||||
from paperless_mail.parsers import MailDocumentParser
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestAttributes(UnittestTestCase):
 | 
			
		||||
@@ -243,6 +246,8 @@ def fake_magic_from_file(file, *, mime=False):
 | 
			
		||||
            return "image/png"
 | 
			
		||||
        elif os.path.splitext(file)[1] == ".webp":
 | 
			
		||||
            return "image/webp"
 | 
			
		||||
        elif os.path.splitext(file)[1] == ".eml":
 | 
			
		||||
            return "message/rfc822"
 | 
			
		||||
        else:
 | 
			
		||||
            return "unknown"
 | 
			
		||||
    else:
 | 
			
		||||
@@ -975,6 +980,59 @@ class TestConsumer(
 | 
			
		||||
            self.assertEqual(command[0], "qpdf")
 | 
			
		||||
            self.assertEqual(command[1], "--replace-input")
 | 
			
		||||
 | 
			
		||||
    @mock.patch("paperless_mail.models.MailRule.objects.get")
 | 
			
		||||
    @mock.patch("paperless_mail.parsers.MailDocumentParser.parse")
 | 
			
		||||
    @mock.patch("documents.parsers.document_consumer_declaration.send")
 | 
			
		||||
    def test_mail_parser_receives_mailrule(
 | 
			
		||||
        self,
 | 
			
		||||
        mock_consumer_declaration_send: mock.Mock,
 | 
			
		||||
        mock_mail_parser_parse: mock.Mock,
 | 
			
		||||
        mock_mailrule_get: mock.Mock,
 | 
			
		||||
    ):
 | 
			
		||||
        """
 | 
			
		||||
        GIVEN:
 | 
			
		||||
            - A mail document from a mail rule
 | 
			
		||||
        WHEN:
 | 
			
		||||
            - The consumer is run
 | 
			
		||||
        THEN:
 | 
			
		||||
            - The mail parser should receive the mail rule
 | 
			
		||||
        """
 | 
			
		||||
        mock_consumer_declaration_send.return_value = [
 | 
			
		||||
            (
 | 
			
		||||
                None,
 | 
			
		||||
                {
 | 
			
		||||
                    "parser": MailDocumentParser,
 | 
			
		||||
                    "mime_types": {"message/rfc822": ".eml"},
 | 
			
		||||
                    "weight": 0,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
        ]
 | 
			
		||||
        mock_mailrule_get.return_value = mock.Mock(
 | 
			
		||||
            pdf_layout=MailRule.PdfLayout.HTML_ONLY,
 | 
			
		||||
        )
 | 
			
		||||
        with self.get_consumer(
 | 
			
		||||
            filepath=(
 | 
			
		||||
                Path(__file__).parent.parent.parent
 | 
			
		||||
                / Path("paperless_mail")
 | 
			
		||||
                / Path("tests")
 | 
			
		||||
                / Path("samples")
 | 
			
		||||
            ).resolve()
 | 
			
		||||
            / "html.eml",
 | 
			
		||||
            source=DocumentSource.MailFetch,
 | 
			
		||||
            mailrule_id=1,
 | 
			
		||||
        ) as consumer:
 | 
			
		||||
            # fails because no gotenberg
 | 
			
		||||
            with self.assertRaises(
 | 
			
		||||
                ConsumerError,
 | 
			
		||||
            ):
 | 
			
		||||
                consumer.run()
 | 
			
		||||
                mock_mail_parser_parse.assert_called_once_with(
 | 
			
		||||
                    consumer.working_copy,
 | 
			
		||||
                    "message/rfc822",
 | 
			
		||||
                    file_name="sample.pdf",
 | 
			
		||||
                    mailrule=mock_mailrule_get.return_value,
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
 | 
			
		||||
class TestConsumerCreatedDate(DirectoriesMixin, GetConsumerMixin, TestCase):
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,7 @@ class TestMigrateWorkflow(TestMigrations):
 | 
			
		||||
    dependencies = (
 | 
			
		||||
        (
 | 
			
		||||
            "paperless_mail",
 | 
			
		||||
            "0028_alter_mailaccount_password_and_more",
 | 
			
		||||
            "0029_mailrule_pdf_layout",
 | 
			
		||||
        ),
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -340,11 +340,16 @@ class GetConsumerMixin:
 | 
			
		||||
        filepath: Path,
 | 
			
		||||
        overrides: DocumentMetadataOverrides | None = None,
 | 
			
		||||
        source: DocumentSource = DocumentSource.ConsumeFolder,
 | 
			
		||||
        mailrule_id: int | None = None,
 | 
			
		||||
    ) -> Generator[ConsumerPlugin, None, None]:
 | 
			
		||||
        # Store this for verification
 | 
			
		||||
        self.status = DummyProgressManager(filepath.name, None)
 | 
			
		||||
        reader = ConsumerPlugin(
 | 
			
		||||
            ConsumableDocument(source, original_file=filepath),
 | 
			
		||||
            ConsumableDocument(
 | 
			
		||||
                source,
 | 
			
		||||
                original_file=filepath,
 | 
			
		||||
                mailrule_id=mailrule_id or None,
 | 
			
		||||
            ),
 | 
			
		||||
            overrides or DocumentMetadataOverrides(),
 | 
			
		||||
            self.status,  # type: ignore
 | 
			
		||||
            self.dirs.scratch_dir,
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,7 @@ msgid ""
 | 
			
		||||
msgstr ""
 | 
			
		||||
"Project-Id-Version: paperless-ngx\n"
 | 
			
		||||
"Report-Msgid-Bugs-To: \n"
 | 
			
		||||
"POT-Creation-Date: 2025-01-27 08:19-0800\n"
 | 
			
		||||
"POT-Creation-Date: 2025-01-28 12:17-0800\n"
 | 
			
		||||
"PO-Revision-Date: 2022-02-17 04:17\n"
 | 
			
		||||
"Last-Translator: \n"
 | 
			
		||||
"Language-Team: English\n"
 | 
			
		||||
@@ -90,7 +90,7 @@ msgid "Automatic"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: documents/models.py:67 documents/models.py:433 documents/models.py:1493
 | 
			
		||||
#: paperless_mail/models.py:23 paperless_mail/models.py:136
 | 
			
		||||
#: paperless_mail/models.py:23 paperless_mail/models.py:143
 | 
			
		||||
msgid "name"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
@@ -276,7 +276,7 @@ msgstr ""
 | 
			
		||||
msgid "warning"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: documents/models.py:387 paperless_mail/models.py:350
 | 
			
		||||
#: documents/models.py:387 paperless_mail/models.py:363
 | 
			
		||||
msgid "error"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
@@ -818,7 +818,7 @@ msgstr ""
 | 
			
		||||
msgid "filter filename"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: documents/models.py:1066 paperless_mail/models.py:193
 | 
			
		||||
#: documents/models.py:1066 paperless_mail/models.py:200
 | 
			
		||||
msgid ""
 | 
			
		||||
"Only consume documents which entirely match this filename if specified. "
 | 
			
		||||
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
 | 
			
		||||
@@ -988,15 +988,15 @@ msgid ""
 | 
			
		||||
"Assign a document title, can include some placeholders, see documentation."
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: documents/models.py:1287 paperless_mail/models.py:261
 | 
			
		||||
#: documents/models.py:1287 paperless_mail/models.py:274
 | 
			
		||||
msgid "assign this tag"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: documents/models.py:1296 paperless_mail/models.py:269
 | 
			
		||||
#: documents/models.py:1296 paperless_mail/models.py:282
 | 
			
		||||
msgid "assign this document type"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: documents/models.py:1305 paperless_mail/models.py:283
 | 
			
		||||
#: documents/models.py:1305 paperless_mail/models.py:296
 | 
			
		||||
msgid "assign this correspondent"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
@@ -1112,7 +1112,7 @@ msgstr ""
 | 
			
		||||
msgid "workflow actions"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: documents/models.py:1495 paperless_mail/models.py:138
 | 
			
		||||
#: documents/models.py:1495 paperless_mail/models.py:145
 | 
			
		||||
msgid "order"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
@@ -1124,7 +1124,7 @@ msgstr ""
 | 
			
		||||
msgid "actions"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: documents/models.py:1511 paperless_mail/models.py:147
 | 
			
		||||
#: documents/models.py:1511 paperless_mail/models.py:154
 | 
			
		||||
msgid "enabled"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
@@ -1838,161 +1838,185 @@ msgid "Process all files, including 'inline' attachments."
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:119
 | 
			
		||||
msgid "Delete"
 | 
			
		||||
msgid "System default"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:120
 | 
			
		||||
msgid "Move to specified folder"
 | 
			
		||||
msgid "Text, then HTML"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:121
 | 
			
		||||
msgid "Mark as read, don't process read mails"
 | 
			
		||||
msgid "HTML, then text"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:122
 | 
			
		||||
msgid "Flag the mail, don't process flagged mails"
 | 
			
		||||
msgid "HTML only"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:123
 | 
			
		||||
msgid "Tag the mail with specified tag, don't process tagged mails"
 | 
			
		||||
msgid "Text only"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:126
 | 
			
		||||
msgid "Use subject as title"
 | 
			
		||||
msgid "Delete"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:127
 | 
			
		||||
msgid "Use attachment filename as title"
 | 
			
		||||
msgid "Move to specified folder"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:128
 | 
			
		||||
msgid "Do not assign title from rule"
 | 
			
		||||
msgid "Mark as read, don't process read mails"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:131
 | 
			
		||||
msgid "Do not assign a correspondent"
 | 
			
		||||
#: paperless_mail/models.py:129
 | 
			
		||||
msgid "Flag the mail, don't process flagged mails"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:132
 | 
			
		||||
msgid "Use mail address"
 | 
			
		||||
#: paperless_mail/models.py:130
 | 
			
		||||
msgid "Tag the mail with specified tag, don't process tagged mails"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:133
 | 
			
		||||
msgid "Use name (or mail address if not available)"
 | 
			
		||||
msgid "Use subject as title"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:134
 | 
			
		||||
msgid "Use attachment filename as title"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:135
 | 
			
		||||
msgid "Do not assign title from rule"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:138
 | 
			
		||||
msgid "Do not assign a correspondent"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:139
 | 
			
		||||
msgid "Use mail address"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:140
 | 
			
		||||
msgid "Use name (or mail address if not available)"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:141
 | 
			
		||||
msgid "Use correspondent selected below"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:144
 | 
			
		||||
#: paperless_mail/models.py:151
 | 
			
		||||
msgid "account"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:150 paperless_mail/models.py:305
 | 
			
		||||
#: paperless_mail/models.py:157 paperless_mail/models.py:318
 | 
			
		||||
msgid "folder"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:154
 | 
			
		||||
#: paperless_mail/models.py:161
 | 
			
		||||
msgid ""
 | 
			
		||||
"Subfolders must be separated by a delimiter, often a dot ('.') or slash "
 | 
			
		||||
"('/'), but it varies by mail server."
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:160
 | 
			
		||||
#: paperless_mail/models.py:167
 | 
			
		||||
msgid "filter from"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:167
 | 
			
		||||
#: paperless_mail/models.py:174
 | 
			
		||||
msgid "filter to"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:174
 | 
			
		||||
#: paperless_mail/models.py:181
 | 
			
		||||
msgid "filter subject"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:181
 | 
			
		||||
#: paperless_mail/models.py:188
 | 
			
		||||
msgid "filter body"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:188
 | 
			
		||||
#: paperless_mail/models.py:195
 | 
			
		||||
msgid "filter attachment filename inclusive"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:200
 | 
			
		||||
#: paperless_mail/models.py:207
 | 
			
		||||
msgid "filter attachment filename exclusive"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:205
 | 
			
		||||
#: paperless_mail/models.py:212
 | 
			
		||||
msgid ""
 | 
			
		||||
"Do not consume documents which entirely match this filename if specified. "
 | 
			
		||||
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:212
 | 
			
		||||
#: paperless_mail/models.py:219
 | 
			
		||||
msgid "maximum age"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:214
 | 
			
		||||
#: paperless_mail/models.py:221
 | 
			
		||||
msgid "Specified in days."
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:218
 | 
			
		||||
#: paperless_mail/models.py:225
 | 
			
		||||
msgid "attachment type"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:222
 | 
			
		||||
#: paperless_mail/models.py:229
 | 
			
		||||
msgid ""
 | 
			
		||||
"Inline attachments include embedded images, so it's best to combine this "
 | 
			
		||||
"option with a filename filter."
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:228
 | 
			
		||||
#: paperless_mail/models.py:235
 | 
			
		||||
msgid "consumption scope"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:234
 | 
			
		||||
#: paperless_mail/models.py:241
 | 
			
		||||
msgid "pdf layout"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:247
 | 
			
		||||
msgid "action"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:240
 | 
			
		||||
#: paperless_mail/models.py:253
 | 
			
		||||
msgid "action parameter"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:245
 | 
			
		||||
#: paperless_mail/models.py:258
 | 
			
		||||
msgid ""
 | 
			
		||||
"Additional parameter for the action selected above, i.e., the target folder "
 | 
			
		||||
"of the move to folder action. Subfolders must be separated by dots."
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:253
 | 
			
		||||
#: paperless_mail/models.py:266
 | 
			
		||||
msgid "assign title from"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:273
 | 
			
		||||
#: paperless_mail/models.py:286
 | 
			
		||||
msgid "assign correspondent from"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:287
 | 
			
		||||
#: paperless_mail/models.py:300
 | 
			
		||||
msgid "Assign the rule owner to documents"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:313
 | 
			
		||||
#: paperless_mail/models.py:326
 | 
			
		||||
msgid "uid"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:321
 | 
			
		||||
#: paperless_mail/models.py:334
 | 
			
		||||
msgid "subject"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:329
 | 
			
		||||
#: paperless_mail/models.py:342
 | 
			
		||||
msgid "received"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:336
 | 
			
		||||
#: paperless_mail/models.py:349
 | 
			
		||||
msgid "processed"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 | 
			
		||||
#: paperless_mail/models.py:342
 | 
			
		||||
#: paperless_mail/models.py:355
 | 
			
		||||
msgid "status"
 | 
			
		||||
msgstr ""
 | 
			
		||||
 
 | 
			
		||||
@@ -1030,6 +1030,11 @@ CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
 | 
			
		||||
 | 
			
		||||
GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
 | 
			
		||||
 | 
			
		||||
# Fallback layout for .eml consumption
 | 
			
		||||
EMAIL_PARSE_DEFAULT_LAYOUT = __get_int(
 | 
			
		||||
    "PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT",
 | 
			
		||||
    1,  # MailRule.PdfLayout.TEXT_HTML but that can't be imported here
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
# Pre-2.x versions of Paperless stored your documents locally with GPG
 | 
			
		||||
# encryption, but that is no longer the default.  This behaviour is still
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										28
									
								
								src/paperless_mail/migrations/0029_mailrule_pdf_layout.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								src/paperless_mail/migrations/0029_mailrule_pdf_layout.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,28 @@
 | 
			
		||||
# Generated by Django 5.1.3 on 2024-11-24 12:39
 | 
			
		||||
 | 
			
		||||
from django.db import migrations
 | 
			
		||||
from django.db import models
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Migration(migrations.Migration):
 | 
			
		||||
    dependencies = [
 | 
			
		||||
        ("paperless_mail", "0028_alter_mailaccount_password_and_more"),
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    operations = [
 | 
			
		||||
        migrations.AddField(
 | 
			
		||||
            model_name="mailrule",
 | 
			
		||||
            name="pdf_layout",
 | 
			
		||||
            field=models.PositiveIntegerField(
 | 
			
		||||
                choices=[
 | 
			
		||||
                    (0, "System default"),
 | 
			
		||||
                    (1, "Text, then HTML"),
 | 
			
		||||
                    (2, "HTML, then text"),
 | 
			
		||||
                    (3, "HTML only"),
 | 
			
		||||
                    (4, "Text only"),
 | 
			
		||||
                ],
 | 
			
		||||
                default=0,
 | 
			
		||||
                verbose_name="pdf layout",
 | 
			
		||||
            ),
 | 
			
		||||
        ),
 | 
			
		||||
    ]
 | 
			
		||||
@@ -115,6 +115,13 @@ class MailRule(document_models.ModelWithOwner):
 | 
			
		||||
        ATTACHMENTS_ONLY = 1, _("Only process attachments.")
 | 
			
		||||
        EVERYTHING = 2, _("Process all files, including 'inline' attachments.")
 | 
			
		||||
 | 
			
		||||
    class PdfLayout(models.IntegerChoices):
 | 
			
		||||
        DEFAULT = 0, _("System default")
 | 
			
		||||
        TEXT_HTML = 1, _("Text, then HTML")
 | 
			
		||||
        HTML_TEXT = 2, _("HTML, then text")
 | 
			
		||||
        HTML_ONLY = 3, _("HTML only")
 | 
			
		||||
        TEXT_ONLY = 4, _("Text only")
 | 
			
		||||
 | 
			
		||||
    class MailAction(models.IntegerChoices):
 | 
			
		||||
        DELETE = 1, _("Delete")
 | 
			
		||||
        MOVE = 2, _("Move to specified folder")
 | 
			
		||||
@@ -230,6 +237,12 @@ class MailRule(document_models.ModelWithOwner):
 | 
			
		||||
        default=ConsumptionScope.ATTACHMENTS_ONLY,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    pdf_layout = models.PositiveIntegerField(
 | 
			
		||||
        _("pdf layout"),
 | 
			
		||||
        choices=PdfLayout.choices,
 | 
			
		||||
        default=PdfLayout.DEFAULT,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    action = models.PositiveIntegerField(
 | 
			
		||||
        _("action"),
 | 
			
		||||
        choices=MailAction.choices,
 | 
			
		||||
 
 | 
			
		||||
@@ -22,6 +22,7 @@ from documents.parsers import DocumentParser
 | 
			
		||||
from documents.parsers import ParseError
 | 
			
		||||
from documents.parsers import make_thumbnail_from_pdf
 | 
			
		||||
from paperless.models import OutputTypeChoices
 | 
			
		||||
from paperless_mail.models import MailRule
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MailDocumentParser(DocumentParser):
 | 
			
		||||
@@ -121,7 +122,13 @@ class MailDocumentParser(DocumentParser):
 | 
			
		||||
        result.sort(key=lambda item: (item["prefix"], item["key"]))
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
    def parse(self, document_path: Path, mime_type: str, file_name=None):
 | 
			
		||||
    def parse(
 | 
			
		||||
        self,
 | 
			
		||||
        document_path: Path,
 | 
			
		||||
        mime_type: str,
 | 
			
		||||
        file_name=None,
 | 
			
		||||
        mailrule_id: int | None = None,
 | 
			
		||||
    ):
 | 
			
		||||
        """
 | 
			
		||||
        Parses the given .eml into formatted text, based on the decoded email.
 | 
			
		||||
 | 
			
		||||
@@ -180,7 +187,11 @@ class MailDocumentParser(DocumentParser):
 | 
			
		||||
            self.date = mail.date
 | 
			
		||||
 | 
			
		||||
        self.log.debug("Creating a PDF from the email")
 | 
			
		||||
        self.archive_path = self.generate_pdf(mail)
 | 
			
		||||
        if mailrule_id:
 | 
			
		||||
            rule = MailRule.objects.get(pk=mailrule_id)
 | 
			
		||||
            self.archive_path = self.generate_pdf(mail, rule.pdf_layout)
 | 
			
		||||
        else:
 | 
			
		||||
            self.archive_path = self.generate_pdf(mail)
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def parse_file_to_message(filepath: Path) -> MailMessage:
 | 
			
		||||
@@ -217,11 +228,19 @@ class MailDocumentParser(DocumentParser):
 | 
			
		||||
                f"{settings.TIKA_ENDPOINT}: {err}",
 | 
			
		||||
            ) from err
 | 
			
		||||
 | 
			
		||||
    def generate_pdf(self, mail_message: MailMessage) -> Path:
 | 
			
		||||
    def generate_pdf(
 | 
			
		||||
        self,
 | 
			
		||||
        mail_message: MailMessage,
 | 
			
		||||
        pdf_layout: MailRule.PdfLayout | None = None,
 | 
			
		||||
    ) -> Path:
 | 
			
		||||
        archive_path = Path(self.tempdir) / "merged.pdf"
 | 
			
		||||
 | 
			
		||||
        mail_pdf_file = self.generate_pdf_from_mail(mail_message)
 | 
			
		||||
 | 
			
		||||
        pdf_layout = (
 | 
			
		||||
            pdf_layout or settings.EMAIL_PARSE_DEFAULT_LAYOUT
 | 
			
		||||
        )  # EMAIL_PARSE_DEFAULT_LAYOUT is a MailRule.PdfLayout
 | 
			
		||||
 | 
			
		||||
        # If no HTML content, create the PDF from the message
 | 
			
		||||
        # Otherwise, create 2 PDFs and merge them with Gotenberg
 | 
			
		||||
        if not mail_message.html:
 | 
			
		||||
@@ -246,7 +265,15 @@ class MailDocumentParser(DocumentParser):
 | 
			
		||||
                if pdf_a_format is not None:
 | 
			
		||||
                    route.pdf_format(pdf_a_format)
 | 
			
		||||
 | 
			
		||||
                route.merge([mail_pdf_file, pdf_of_html_content])
 | 
			
		||||
                match pdf_layout:
 | 
			
		||||
                    case MailRule.PdfLayout.HTML_TEXT:
 | 
			
		||||
                        route.merge([pdf_of_html_content, mail_pdf_file])
 | 
			
		||||
                    case MailRule.PdfLayout.HTML_ONLY:
 | 
			
		||||
                        route.merge([pdf_of_html_content])
 | 
			
		||||
                    case MailRule.PdfLayout.TEXT_ONLY:
 | 
			
		||||
                        route.merge([mail_pdf_file])
 | 
			
		||||
                    case MailRule.PdfLayout.TEXT_HTML | _:
 | 
			
		||||
                        route.merge([mail_pdf_file, pdf_of_html_content])
 | 
			
		||||
 | 
			
		||||
                try:
 | 
			
		||||
                    response = route.run()
 | 
			
		||||
 
 | 
			
		||||
@@ -96,6 +96,7 @@ class MailRuleSerializer(OwnedObjectSerializer):
 | 
			
		||||
            "order",
 | 
			
		||||
            "attachment_type",
 | 
			
		||||
            "consumption_scope",
 | 
			
		||||
            "pdf_layout",
 | 
			
		||||
            "owner",
 | 
			
		||||
            "user_can_change",
 | 
			
		||||
            "permissions",
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,7 @@
 | 
			
		||||
import datetime
 | 
			
		||||
import logging
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from unittest import mock
 | 
			
		||||
 | 
			
		||||
import httpx
 | 
			
		||||
import pytest
 | 
			
		||||
@@ -662,3 +663,67 @@ class TestParser:
 | 
			
		||||
        request = httpx_mock.get_request()
 | 
			
		||||
 | 
			
		||||
        assert str(request.url) == "http://localhost:3000/forms/chromium/convert/html"
 | 
			
		||||
 | 
			
		||||
    @pytest.mark.httpx_mock(can_send_already_matched_responses=True)
 | 
			
		||||
    @mock.patch("gotenberg_client._merge.MergeRoute.merge")
 | 
			
		||||
    @mock.patch("paperless_mail.models.MailRule.objects.get")
 | 
			
		||||
    def test_generate_pdf_layout_options(
 | 
			
		||||
        self,
 | 
			
		||||
        mock_mailrule_get: mock.Mock,
 | 
			
		||||
        mock_merge_route: mock.Mock,
 | 
			
		||||
        httpx_mock: HTTPXMock,
 | 
			
		||||
        mail_parser: MailDocumentParser,
 | 
			
		||||
        html_email_file: Path,
 | 
			
		||||
        html_email_pdf_file: Path,
 | 
			
		||||
    ):
 | 
			
		||||
        """
 | 
			
		||||
        GIVEN:
 | 
			
		||||
            - Email message
 | 
			
		||||
        WHEN:
 | 
			
		||||
            - Email is parsed with different layout options
 | 
			
		||||
        THEN:
 | 
			
		||||
            - Gotenberg is called with the correct layout option
 | 
			
		||||
        """
 | 
			
		||||
        httpx_mock.add_response(
 | 
			
		||||
            url="http://localhost:9998/tika/text",
 | 
			
		||||
            method="PUT",
 | 
			
		||||
            json={
 | 
			
		||||
                "Content-Type": "text/html",
 | 
			
		||||
                "X-TIKA:Parsed-By": [],
 | 
			
		||||
                "X-TIKA:content": "This is some Tika HTML text",
 | 
			
		||||
            },
 | 
			
		||||
        )
 | 
			
		||||
        httpx_mock.add_response(
 | 
			
		||||
            url="http://localhost:3000/forms/chromium/convert/html",
 | 
			
		||||
            method="POST",
 | 
			
		||||
            content=html_email_pdf_file.read_bytes(),
 | 
			
		||||
        )
 | 
			
		||||
        httpx_mock.add_response(
 | 
			
		||||
            url="http://localhost:3000/forms/pdfengines/merge",
 | 
			
		||||
            method="POST",
 | 
			
		||||
            content=b"Pretend merged PDF content",
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        def test_layout_option(layout_option, expected_calls, expected_pdf_names):
 | 
			
		||||
            mock_mailrule_get.return_value = mock.Mock(pdf_layout=layout_option)
 | 
			
		||||
            mail_parser.parse(
 | 
			
		||||
                document_path=html_email_file,
 | 
			
		||||
                mime_type="message/rfc822",
 | 
			
		||||
                mailrule_id=1,
 | 
			
		||||
            )
 | 
			
		||||
            args, _ = mock_merge_route.call_args
 | 
			
		||||
            assert len(args[0]) == expected_calls
 | 
			
		||||
            for i, pdf in enumerate(expected_pdf_names):
 | 
			
		||||
                assert args[0][i].name == pdf
 | 
			
		||||
 | 
			
		||||
        # 1 = MailRule.PdfLayout.TEXT_HTML
 | 
			
		||||
        test_layout_option(1, 2, ["email_as_pdf.pdf", "html.pdf"])
 | 
			
		||||
 | 
			
		||||
        # 2 = MailRule.PdfLayout.HTML_TEXT
 | 
			
		||||
        test_layout_option(2, 2, ["html.pdf", "email_as_pdf.pdf"])
 | 
			
		||||
 | 
			
		||||
        # 3 = MailRule.PdfLayout.HTML_ONLY
 | 
			
		||||
        test_layout_option(3, 1, ["html.pdf"])
 | 
			
		||||
 | 
			
		||||
        # 4 = MailRule.PdfLayout.TEXT_ONLY
 | 
			
		||||
        test_layout_option(4, 1, ["email_as_pdf.pdf"])
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user