diff --git a/docs/configuration.md b/docs/configuration.md index 359a51482..b81c10b0d 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -198,6 +198,18 @@ Docker, this may be the `environment` key of the webserver or a containing the configuration parameters. Be sure to use the correct format and watch out for indentation if editing the YAML file. +### Email Parsing + +#### [`PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT=`(#PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT) {#PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT} + +: The default layout to use for emails that are consumed as documents. Must be one of the integer choices below. Note that mail +rules can specify this setting, thus this fallback is used for the default selection and for .eml files consumed by other means. + + - `1` = Text, then HTML + - `2` = HTML, then text + - `3` = HTML only + - `4` = Text only + ## Paths and folders #### [`PAPERLESS_CONSUMPTION_DIR=`](#PAPERLESS_CONSUMPTION_DIR) {#PAPERLESS_CONSUMPTION_DIR} diff --git a/src-ui/messages.xlf b/src-ui/messages.xlf index 1072e89c6..34959e4a0 100644 --- a/src-ui/messages.xlf +++ b/src-ui/messages.xlf @@ -569,7 +569,7 @@ src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html - 75 + 76 src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.html @@ -1453,7 +1453,7 @@ src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html - 74 + 75 src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.html @@ -2062,7 +2062,7 @@ src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 64 + 88 src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html @@ -3989,71 +3989,78 @@ 43 + + PDF layout + + src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html + 44 + + Include only files matching src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html - 46 + 47 Optional. Wildcards e.g. *.pdf or *invoice* allowed. Can be comma-separated list. Case insensitive. src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html - 46 + 47 src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html - 47 + 48 Exclude files matching src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html - 47 + 48 Action src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html - 53 + 54 Only performed if the mail is processed. src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html - 53 + 54 Action parameter src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html - 55 + 56 Assign title from src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html - 57 + 58 Assign owner from rule src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html - 58 + 59 Assign document type src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html - 62 + 63 src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html @@ -4064,14 +4071,14 @@ Assign correspondent from src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html - 63 + 64 Assign correspondent src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html - 65 + 66 src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html @@ -4082,7 +4089,7 @@ Error src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html - 72 + 73 src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html @@ -4097,123 +4104,158 @@ Only process attachments src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 38 + 39 src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 49 + 50 Process all files, including 'inline' attachments src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 42 + 43 Process message as .eml src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 53 + 54 Process message as .eml and attachments separately src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 57 + 58 + + + + System default + + src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts + 65 + + + + Text, then HTML + + src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts + 69 + + + + HTML, then text + + src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts + 73 + + + + HTML only + + src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts + 77 + + + + Text only + + src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts + 81 Move to specified folder src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 68 + 92 Mark as read, don't process read mails src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 72 + 96 Flag the mail, don't process flagged mails src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 76 + 100 Tag the mail with specified tag, don't process tagged mails src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 80 + 104 Use subject as title src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 87 + 111 Use attachment filename as title src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 91 + 115 Do not assign title from this rule src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 95 + 119 Do not assign a correspondent src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 102 + 126 Use mail address src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 106 + 130 Use name (or mail address if not available) src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 110 + 134 Use correspondent selected below src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 114 + 138 Create new mail rule src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 166 + 190 Edit mail rule src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts - 170 + 194 diff --git a/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html b/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html index a9ad3040b..afe6c2ab9 100644 --- a/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html +++ b/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html @@ -41,6 +41,7 @@
+
diff --git a/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts b/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts index f2d8236bc..3d4924c0b 100644 --- a/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts +++ b/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts @@ -18,6 +18,7 @@ import { MailMetadataTitleOption, MailRule, MailRuleConsumptionScope, + MailRulePdfLayout, } from 'src/app/data/mail-rule' import { CorrespondentService } from 'src/app/services/rest/correspondent.service' import { DocumentTypeService } from 'src/app/services/rest/document-type.service' @@ -58,6 +59,29 @@ const CONSUMPTION_SCOPE_OPTIONS = [ }, ] +const PDF_LAYOUT_OPTIONS = [ + { + id: MailRulePdfLayout.Default, + name: $localize`System default`, + }, + { + id: MailRulePdfLayout.TextHtml, + name: $localize`Text, then HTML`, + }, + { + id: MailRulePdfLayout.HtmlText, + name: $localize`HTML, then text`, + }, + { + id: MailRulePdfLayout.HtmlOnly, + name: $localize`HTML only`, + }, + { + id: MailRulePdfLayout.TextOnly, + name: $localize`Text only`, + }, +] + const ACTION_OPTIONS = [ { id: MailAction.Delete, @@ -184,6 +208,7 @@ export class MailRuleEditDialogComponent extends EditDialogComponent { filter_attachment_filename_exclude: new FormControl(null), maximum_age: new FormControl(null), attachment_type: new FormControl(MailFilterAttachmentType.Attachments), + pdf_layout: new FormControl(MailRulePdfLayout.Default), consumption_scope: new FormControl(MailRuleConsumptionScope.Attachments), order: new FormControl(null), action: new FormControl(MailAction.MarkRead), @@ -232,4 +257,8 @@ export class MailRuleEditDialogComponent extends EditDialogComponent { get consumptionScopeOptions() { return CONSUMPTION_SCOPE_OPTIONS } + + get pdfLayoutOptions() { + return PDF_LAYOUT_OPTIONS + } } diff --git a/src-ui/src/app/data/mail-rule.ts b/src-ui/src/app/data/mail-rule.ts index 6e2c468a2..4c47b6500 100644 --- a/src-ui/src/app/data/mail-rule.ts +++ b/src-ui/src/app/data/mail-rule.ts @@ -11,6 +11,14 @@ export enum MailRuleConsumptionScope { Everything = 3, } +export enum MailRulePdfLayout { + Default = 0, + TextHtml = 1, + HtmlText = 2, + HtmlOnly = 3, + TextOnly = 4, +} + export enum MailAction { Delete = 1, Move = 2, @@ -59,6 +67,8 @@ export interface MailRule extends ObjectWithPermissions { attachment_type: MailFilterAttachmentType + pdf_layout: MailRulePdfLayout + action: MailAction action_parameter?: string diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 35c18ac7b..81739fa7a 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -48,6 +48,7 @@ from documents.templating.workflows import parse_w_workflow_placeholders from documents.utils import copy_basic_file_stats from documents.utils import copy_file_with_basic_stats from documents.utils import run_subprocess +from paperless_mail.parsers import MailDocumentParser class WorkflowTriggerPlugin( @@ -479,7 +480,18 @@ class ConsumerPlugin( ConsumerStatusShortMessage.PARSING_DOCUMENT, ) self.log.debug(f"Parsing {self.filename}...") - document_parser.parse(self.working_copy, mime_type, self.filename) + if ( + isinstance(document_parser, MailDocumentParser) + and self.input_doc.mailrule_id + ): + document_parser.parse( + self.working_copy, + mime_type, + self.filename, + self.input_doc.mailrule_id, + ) + else: + document_parser.parse(self.working_copy, mime_type, self.filename) self.log.debug(f"Generating thumbnail for {self.filename}...") self._send_progress( diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index a862d7fa0..6f576ab24 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -21,6 +21,7 @@ from guardian.core import ObjectPermissionChecker from documents.consumer import ConsumerError from documents.data_models import DocumentMetadataOverrides +from documents.data_models import DocumentSource from documents.models import Correspondent from documents.models import CustomField from documents.models import Document @@ -35,6 +36,8 @@ from documents.tasks import sanity_check from documents.tests.utils import DirectoriesMixin from documents.tests.utils import FileSystemAssertsMixin from documents.tests.utils import GetConsumerMixin +from paperless_mail.models import MailRule +from paperless_mail.parsers import MailDocumentParser class TestAttributes(UnittestTestCase): @@ -243,6 +246,8 @@ def fake_magic_from_file(file, *, mime=False): return "image/png" elif os.path.splitext(file)[1] == ".webp": return "image/webp" + elif os.path.splitext(file)[1] == ".eml": + return "message/rfc822" else: return "unknown" else: @@ -975,6 +980,59 @@ class TestConsumer( self.assertEqual(command[0], "qpdf") self.assertEqual(command[1], "--replace-input") + @mock.patch("paperless_mail.models.MailRule.objects.get") + @mock.patch("paperless_mail.parsers.MailDocumentParser.parse") + @mock.patch("documents.parsers.document_consumer_declaration.send") + def test_mail_parser_receives_mailrule( + self, + mock_consumer_declaration_send: mock.Mock, + mock_mail_parser_parse: mock.Mock, + mock_mailrule_get: mock.Mock, + ): + """ + GIVEN: + - A mail document from a mail rule + WHEN: + - The consumer is run + THEN: + - The mail parser should receive the mail rule + """ + mock_consumer_declaration_send.return_value = [ + ( + None, + { + "parser": MailDocumentParser, + "mime_types": {"message/rfc822": ".eml"}, + "weight": 0, + }, + ), + ] + mock_mailrule_get.return_value = mock.Mock( + pdf_layout=MailRule.PdfLayout.HTML_ONLY, + ) + with self.get_consumer( + filepath=( + Path(__file__).parent.parent.parent + / Path("paperless_mail") + / Path("tests") + / Path("samples") + ).resolve() + / "html.eml", + source=DocumentSource.MailFetch, + mailrule_id=1, + ) as consumer: + # fails because no gotenberg + with self.assertRaises( + ConsumerError, + ): + consumer.run() + mock_mail_parser_parse.assert_called_once_with( + consumer.working_copy, + "message/rfc822", + file_name="sample.pdf", + mailrule=mock_mailrule_get.return_value, + ) + @mock.patch("documents.consumer.magic.from_file", fake_magic_from_file) class TestConsumerCreatedDate(DirectoriesMixin, GetConsumerMixin, TestCase): diff --git a/src/documents/tests/test_migration_workflows.py b/src/documents/tests/test_migration_workflows.py index 69f5ed5fb..989518818 100644 --- a/src/documents/tests/test_migration_workflows.py +++ b/src/documents/tests/test_migration_workflows.py @@ -8,7 +8,7 @@ class TestMigrateWorkflow(TestMigrations): dependencies = ( ( "paperless_mail", - "0028_alter_mailaccount_password_and_more", + "0029_mailrule_pdf_layout", ), ) diff --git a/src/documents/tests/utils.py b/src/documents/tests/utils.py index 739433bb6..fc50b3948 100644 --- a/src/documents/tests/utils.py +++ b/src/documents/tests/utils.py @@ -340,11 +340,16 @@ class GetConsumerMixin: filepath: Path, overrides: DocumentMetadataOverrides | None = None, source: DocumentSource = DocumentSource.ConsumeFolder, + mailrule_id: int | None = None, ) -> Generator[ConsumerPlugin, None, None]: # Store this for verification self.status = DummyProgressManager(filepath.name, None) reader = ConsumerPlugin( - ConsumableDocument(source, original_file=filepath), + ConsumableDocument( + source, + original_file=filepath, + mailrule_id=mailrule_id or None, + ), overrides or DocumentMetadataOverrides(), self.status, # type: ignore self.dirs.scratch_dir, diff --git a/src/locale/en_US/LC_MESSAGES/django.po b/src/locale/en_US/LC_MESSAGES/django.po index 0ef4c1dc8..57494a5c2 100644 --- a/src/locale/en_US/LC_MESSAGES/django.po +++ b/src/locale/en_US/LC_MESSAGES/django.po @@ -2,7 +2,7 @@ msgid "" msgstr "" "Project-Id-Version: paperless-ngx\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-01-27 08:19-0800\n" +"POT-Creation-Date: 2025-01-28 12:17-0800\n" "PO-Revision-Date: 2022-02-17 04:17\n" "Last-Translator: \n" "Language-Team: English\n" @@ -90,7 +90,7 @@ msgid "Automatic" msgstr "" #: documents/models.py:67 documents/models.py:433 documents/models.py:1493 -#: paperless_mail/models.py:23 paperless_mail/models.py:136 +#: paperless_mail/models.py:23 paperless_mail/models.py:143 msgid "name" msgstr "" @@ -276,7 +276,7 @@ msgstr "" msgid "warning" msgstr "" -#: documents/models.py:387 paperless_mail/models.py:350 +#: documents/models.py:387 paperless_mail/models.py:363 msgid "error" msgstr "" @@ -818,7 +818,7 @@ msgstr "" msgid "filter filename" msgstr "" -#: documents/models.py:1066 paperless_mail/models.py:193 +#: documents/models.py:1066 paperless_mail/models.py:200 msgid "" "Only consume documents which entirely match this filename if specified. " "Wildcards such as *.pdf or *invoice* are allowed. Case insensitive." @@ -988,15 +988,15 @@ msgid "" "Assign a document title, can include some placeholders, see documentation." msgstr "" -#: documents/models.py:1287 paperless_mail/models.py:261 +#: documents/models.py:1287 paperless_mail/models.py:274 msgid "assign this tag" msgstr "" -#: documents/models.py:1296 paperless_mail/models.py:269 +#: documents/models.py:1296 paperless_mail/models.py:282 msgid "assign this document type" msgstr "" -#: documents/models.py:1305 paperless_mail/models.py:283 +#: documents/models.py:1305 paperless_mail/models.py:296 msgid "assign this correspondent" msgstr "" @@ -1112,7 +1112,7 @@ msgstr "" msgid "workflow actions" msgstr "" -#: documents/models.py:1495 paperless_mail/models.py:138 +#: documents/models.py:1495 paperless_mail/models.py:145 msgid "order" msgstr "" @@ -1124,7 +1124,7 @@ msgstr "" msgid "actions" msgstr "" -#: documents/models.py:1511 paperless_mail/models.py:147 +#: documents/models.py:1511 paperless_mail/models.py:154 msgid "enabled" msgstr "" @@ -1838,161 +1838,185 @@ msgid "Process all files, including 'inline' attachments." msgstr "" #: paperless_mail/models.py:119 -msgid "Delete" +msgid "System default" msgstr "" #: paperless_mail/models.py:120 -msgid "Move to specified folder" +msgid "Text, then HTML" msgstr "" #: paperless_mail/models.py:121 -msgid "Mark as read, don't process read mails" +msgid "HTML, then text" msgstr "" #: paperless_mail/models.py:122 -msgid "Flag the mail, don't process flagged mails" +msgid "HTML only" msgstr "" #: paperless_mail/models.py:123 -msgid "Tag the mail with specified tag, don't process tagged mails" +msgid "Text only" msgstr "" #: paperless_mail/models.py:126 -msgid "Use subject as title" +msgid "Delete" msgstr "" #: paperless_mail/models.py:127 -msgid "Use attachment filename as title" +msgid "Move to specified folder" msgstr "" #: paperless_mail/models.py:128 -msgid "Do not assign title from rule" +msgid "Mark as read, don't process read mails" msgstr "" -#: paperless_mail/models.py:131 -msgid "Do not assign a correspondent" +#: paperless_mail/models.py:129 +msgid "Flag the mail, don't process flagged mails" msgstr "" -#: paperless_mail/models.py:132 -msgid "Use mail address" +#: paperless_mail/models.py:130 +msgid "Tag the mail with specified tag, don't process tagged mails" msgstr "" #: paperless_mail/models.py:133 -msgid "Use name (or mail address if not available)" +msgid "Use subject as title" msgstr "" #: paperless_mail/models.py:134 +msgid "Use attachment filename as title" +msgstr "" + +#: paperless_mail/models.py:135 +msgid "Do not assign title from rule" +msgstr "" + +#: paperless_mail/models.py:138 +msgid "Do not assign a correspondent" +msgstr "" + +#: paperless_mail/models.py:139 +msgid "Use mail address" +msgstr "" + +#: paperless_mail/models.py:140 +msgid "Use name (or mail address if not available)" +msgstr "" + +#: paperless_mail/models.py:141 msgid "Use correspondent selected below" msgstr "" -#: paperless_mail/models.py:144 +#: paperless_mail/models.py:151 msgid "account" msgstr "" -#: paperless_mail/models.py:150 paperless_mail/models.py:305 +#: paperless_mail/models.py:157 paperless_mail/models.py:318 msgid "folder" msgstr "" -#: paperless_mail/models.py:154 +#: paperless_mail/models.py:161 msgid "" "Subfolders must be separated by a delimiter, often a dot ('.') or slash " "('/'), but it varies by mail server." msgstr "" -#: paperless_mail/models.py:160 +#: paperless_mail/models.py:167 msgid "filter from" msgstr "" -#: paperless_mail/models.py:167 +#: paperless_mail/models.py:174 msgid "filter to" msgstr "" -#: paperless_mail/models.py:174 +#: paperless_mail/models.py:181 msgid "filter subject" msgstr "" -#: paperless_mail/models.py:181 +#: paperless_mail/models.py:188 msgid "filter body" msgstr "" -#: paperless_mail/models.py:188 +#: paperless_mail/models.py:195 msgid "filter attachment filename inclusive" msgstr "" -#: paperless_mail/models.py:200 +#: paperless_mail/models.py:207 msgid "filter attachment filename exclusive" msgstr "" -#: paperless_mail/models.py:205 +#: paperless_mail/models.py:212 msgid "" "Do not consume documents which entirely match this filename if specified. " "Wildcards such as *.pdf or *invoice* are allowed. Case insensitive." msgstr "" -#: paperless_mail/models.py:212 +#: paperless_mail/models.py:219 msgid "maximum age" msgstr "" -#: paperless_mail/models.py:214 +#: paperless_mail/models.py:221 msgid "Specified in days." msgstr "" -#: paperless_mail/models.py:218 +#: paperless_mail/models.py:225 msgid "attachment type" msgstr "" -#: paperless_mail/models.py:222 +#: paperless_mail/models.py:229 msgid "" "Inline attachments include embedded images, so it's best to combine this " "option with a filename filter." msgstr "" -#: paperless_mail/models.py:228 +#: paperless_mail/models.py:235 msgid "consumption scope" msgstr "" -#: paperless_mail/models.py:234 +#: paperless_mail/models.py:241 +msgid "pdf layout" +msgstr "" + +#: paperless_mail/models.py:247 msgid "action" msgstr "" -#: paperless_mail/models.py:240 +#: paperless_mail/models.py:253 msgid "action parameter" msgstr "" -#: paperless_mail/models.py:245 +#: paperless_mail/models.py:258 msgid "" "Additional parameter for the action selected above, i.e., the target folder " "of the move to folder action. Subfolders must be separated by dots." msgstr "" -#: paperless_mail/models.py:253 +#: paperless_mail/models.py:266 msgid "assign title from" msgstr "" -#: paperless_mail/models.py:273 +#: paperless_mail/models.py:286 msgid "assign correspondent from" msgstr "" -#: paperless_mail/models.py:287 +#: paperless_mail/models.py:300 msgid "Assign the rule owner to documents" msgstr "" -#: paperless_mail/models.py:313 +#: paperless_mail/models.py:326 msgid "uid" msgstr "" -#: paperless_mail/models.py:321 +#: paperless_mail/models.py:334 msgid "subject" msgstr "" -#: paperless_mail/models.py:329 +#: paperless_mail/models.py:342 msgid "received" msgstr "" -#: paperless_mail/models.py:336 +#: paperless_mail/models.py:349 msgid "processed" msgstr "" -#: paperless_mail/models.py:342 +#: paperless_mail/models.py:355 msgid "status" msgstr "" diff --git a/src/paperless/settings.py b/src/paperless/settings.py index a817abd70..846b9e0ee 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -1030,6 +1030,11 @@ CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT") GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs") +# Fallback layout for .eml consumption +EMAIL_PARSE_DEFAULT_LAYOUT = __get_int( + "PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT", + 1, # MailRule.PdfLayout.TEXT_HTML but that can't be imported here +) # Pre-2.x versions of Paperless stored your documents locally with GPG # encryption, but that is no longer the default. This behaviour is still diff --git a/src/paperless_mail/migrations/0029_mailrule_pdf_layout.py b/src/paperless_mail/migrations/0029_mailrule_pdf_layout.py new file mode 100644 index 000000000..fe7a93b71 --- /dev/null +++ b/src/paperless_mail/migrations/0029_mailrule_pdf_layout.py @@ -0,0 +1,28 @@ +# Generated by Django 5.1.3 on 2024-11-24 12:39 + +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + dependencies = [ + ("paperless_mail", "0028_alter_mailaccount_password_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="mailrule", + name="pdf_layout", + field=models.PositiveIntegerField( + choices=[ + (0, "System default"), + (1, "Text, then HTML"), + (2, "HTML, then text"), + (3, "HTML only"), + (4, "Text only"), + ], + default=0, + verbose_name="pdf layout", + ), + ), + ] diff --git a/src/paperless_mail/models.py b/src/paperless_mail/models.py index 46b9db1ff..cf33a056b 100644 --- a/src/paperless_mail/models.py +++ b/src/paperless_mail/models.py @@ -115,6 +115,13 @@ class MailRule(document_models.ModelWithOwner): ATTACHMENTS_ONLY = 1, _("Only process attachments.") EVERYTHING = 2, _("Process all files, including 'inline' attachments.") + class PdfLayout(models.IntegerChoices): + DEFAULT = 0, _("System default") + TEXT_HTML = 1, _("Text, then HTML") + HTML_TEXT = 2, _("HTML, then text") + HTML_ONLY = 3, _("HTML only") + TEXT_ONLY = 4, _("Text only") + class MailAction(models.IntegerChoices): DELETE = 1, _("Delete") MOVE = 2, _("Move to specified folder") @@ -230,6 +237,12 @@ class MailRule(document_models.ModelWithOwner): default=ConsumptionScope.ATTACHMENTS_ONLY, ) + pdf_layout = models.PositiveIntegerField( + _("pdf layout"), + choices=PdfLayout.choices, + default=PdfLayout.DEFAULT, + ) + action = models.PositiveIntegerField( _("action"), choices=MailAction.choices, diff --git a/src/paperless_mail/parsers.py b/src/paperless_mail/parsers.py index d98fb7238..44032a2e9 100644 --- a/src/paperless_mail/parsers.py +++ b/src/paperless_mail/parsers.py @@ -22,6 +22,7 @@ from documents.parsers import DocumentParser from documents.parsers import ParseError from documents.parsers import make_thumbnail_from_pdf from paperless.models import OutputTypeChoices +from paperless_mail.models import MailRule class MailDocumentParser(DocumentParser): @@ -121,7 +122,13 @@ class MailDocumentParser(DocumentParser): result.sort(key=lambda item: (item["prefix"], item["key"])) return result - def parse(self, document_path: Path, mime_type: str, file_name=None): + def parse( + self, + document_path: Path, + mime_type: str, + file_name=None, + mailrule_id: int | None = None, + ): """ Parses the given .eml into formatted text, based on the decoded email. @@ -180,7 +187,11 @@ class MailDocumentParser(DocumentParser): self.date = mail.date self.log.debug("Creating a PDF from the email") - self.archive_path = self.generate_pdf(mail) + if mailrule_id: + rule = MailRule.objects.get(pk=mailrule_id) + self.archive_path = self.generate_pdf(mail, rule.pdf_layout) + else: + self.archive_path = self.generate_pdf(mail) @staticmethod def parse_file_to_message(filepath: Path) -> MailMessage: @@ -217,11 +228,19 @@ class MailDocumentParser(DocumentParser): f"{settings.TIKA_ENDPOINT}: {err}", ) from err - def generate_pdf(self, mail_message: MailMessage) -> Path: + def generate_pdf( + self, + mail_message: MailMessage, + pdf_layout: MailRule.PdfLayout | None = None, + ) -> Path: archive_path = Path(self.tempdir) / "merged.pdf" mail_pdf_file = self.generate_pdf_from_mail(mail_message) + pdf_layout = ( + pdf_layout or settings.EMAIL_PARSE_DEFAULT_LAYOUT + ) # EMAIL_PARSE_DEFAULT_LAYOUT is a MailRule.PdfLayout + # If no HTML content, create the PDF from the message # Otherwise, create 2 PDFs and merge them with Gotenberg if not mail_message.html: @@ -246,7 +265,15 @@ class MailDocumentParser(DocumentParser): if pdf_a_format is not None: route.pdf_format(pdf_a_format) - route.merge([mail_pdf_file, pdf_of_html_content]) + match pdf_layout: + case MailRule.PdfLayout.HTML_TEXT: + route.merge([pdf_of_html_content, mail_pdf_file]) + case MailRule.PdfLayout.HTML_ONLY: + route.merge([pdf_of_html_content]) + case MailRule.PdfLayout.TEXT_ONLY: + route.merge([mail_pdf_file]) + case MailRule.PdfLayout.TEXT_HTML | _: + route.merge([mail_pdf_file, pdf_of_html_content]) try: response = route.run() diff --git a/src/paperless_mail/serialisers.py b/src/paperless_mail/serialisers.py index 5623f62c3..e9836b421 100644 --- a/src/paperless_mail/serialisers.py +++ b/src/paperless_mail/serialisers.py @@ -96,6 +96,7 @@ class MailRuleSerializer(OwnedObjectSerializer): "order", "attachment_type", "consumption_scope", + "pdf_layout", "owner", "user_can_change", "permissions", diff --git a/src/paperless_mail/tests/test_parsers.py b/src/paperless_mail/tests/test_parsers.py index e8186ea0f..dbd2c82cd 100644 --- a/src/paperless_mail/tests/test_parsers.py +++ b/src/paperless_mail/tests/test_parsers.py @@ -1,6 +1,7 @@ import datetime import logging from pathlib import Path +from unittest import mock import httpx import pytest @@ -662,3 +663,67 @@ class TestParser: request = httpx_mock.get_request() assert str(request.url) == "http://localhost:3000/forms/chromium/convert/html" + + @pytest.mark.httpx_mock(can_send_already_matched_responses=True) + @mock.patch("gotenberg_client._merge.MergeRoute.merge") + @mock.patch("paperless_mail.models.MailRule.objects.get") + def test_generate_pdf_layout_options( + self, + mock_mailrule_get: mock.Mock, + mock_merge_route: mock.Mock, + httpx_mock: HTTPXMock, + mail_parser: MailDocumentParser, + html_email_file: Path, + html_email_pdf_file: Path, + ): + """ + GIVEN: + - Email message + WHEN: + - Email is parsed with different layout options + THEN: + - Gotenberg is called with the correct layout option + """ + httpx_mock.add_response( + url="http://localhost:9998/tika/text", + method="PUT", + json={ + "Content-Type": "text/html", + "X-TIKA:Parsed-By": [], + "X-TIKA:content": "This is some Tika HTML text", + }, + ) + httpx_mock.add_response( + url="http://localhost:3000/forms/chromium/convert/html", + method="POST", + content=html_email_pdf_file.read_bytes(), + ) + httpx_mock.add_response( + url="http://localhost:3000/forms/pdfengines/merge", + method="POST", + content=b"Pretend merged PDF content", + ) + + def test_layout_option(layout_option, expected_calls, expected_pdf_names): + mock_mailrule_get.return_value = mock.Mock(pdf_layout=layout_option) + mail_parser.parse( + document_path=html_email_file, + mime_type="message/rfc822", + mailrule_id=1, + ) + args, _ = mock_merge_route.call_args + assert len(args[0]) == expected_calls + for i, pdf in enumerate(expected_pdf_names): + assert args[0][i].name == pdf + + # 1 = MailRule.PdfLayout.TEXT_HTML + test_layout_option(1, 2, ["email_as_pdf.pdf", "html.pdf"]) + + # 2 = MailRule.PdfLayout.HTML_TEXT + test_layout_option(2, 2, ["html.pdf", "email_as_pdf.pdf"]) + + # 3 = MailRule.PdfLayout.HTML_ONLY + test_layout_option(3, 1, ["html.pdf"]) + + # 4 = MailRule.PdfLayout.TEXT_ONLY + test_layout_option(4, 1, ["email_as_pdf.pdf"])