diff --git a/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html b/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html
index a9ad3040b..afe6c2ab9 100644
--- a/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html
+++ b/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html
@@ -41,6 +41,7 @@
diff --git a/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts b/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts
index f2d8236bc..624a4f676 100644
--- a/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts
+++ b/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts
@@ -18,6 +18,7 @@ import {
MailMetadataTitleOption,
MailRule,
MailRuleConsumptionScope,
+ MailRulePdfLayout,
} from 'src/app/data/mail-rule'
import { CorrespondentService } from 'src/app/services/rest/correspondent.service'
import { DocumentTypeService } from 'src/app/services/rest/document-type.service'
@@ -58,6 +59,25 @@ const CONSUMPTION_SCOPE_OPTIONS = [
},
]
+const PDF_LAYOUT_OPTIONS = [
+ {
+ id: MailRulePdfLayout.Text_Html,
+ name: $localize`Text, then HTML`,
+ },
+ {
+ id: MailRulePdfLayout.Html_Text,
+ name: $localize`HTML, then text`,
+ },
+ {
+ id: MailRulePdfLayout.Html_only,
+ name: $localize`HTML only`,
+ },
+ {
+ id: MailRulePdfLayout.Text_only,
+ name: $localize`Text only`,
+ },
+]
+
const ACTION_OPTIONS = [
{
id: MailAction.Delete,
@@ -184,6 +204,7 @@ export class MailRuleEditDialogComponent extends EditDialogComponent
{
filter_attachment_filename_exclude: new FormControl(null),
maximum_age: new FormControl(null),
attachment_type: new FormControl(MailFilterAttachmentType.Attachments),
+ pdf_layout: new FormControl(MailRulePdfLayout.Text_Html),
consumption_scope: new FormControl(MailRuleConsumptionScope.Attachments),
order: new FormControl(null),
action: new FormControl(MailAction.MarkRead),
@@ -232,4 +253,8 @@ export class MailRuleEditDialogComponent extends EditDialogComponent {
get consumptionScopeOptions() {
return CONSUMPTION_SCOPE_OPTIONS
}
+
+ get pdfLayoutOptions() {
+ return PDF_LAYOUT_OPTIONS
+ }
}
diff --git a/src-ui/src/app/data/mail-rule.ts b/src-ui/src/app/data/mail-rule.ts
index 6e2c468a2..c93dfdfa1 100644
--- a/src-ui/src/app/data/mail-rule.ts
+++ b/src-ui/src/app/data/mail-rule.ts
@@ -11,6 +11,13 @@ export enum MailRuleConsumptionScope {
Everything = 3,
}
+export enum MailRulePdfLayout {
+ Text_Html = 1,
+ Html_Text = 2,
+ Html_only = 3,
+ Text_only = 4,
+}
+
export enum MailAction {
Delete = 1,
Move = 2,
@@ -59,6 +66,8 @@ export interface MailRule extends ObjectWithPermissions {
attachment_type: MailFilterAttachmentType
+ pdf_layout: MailRulePdfLayout
+
action: MailAction
action_parameter?: string
diff --git a/src/documents/consumer.py b/src/documents/consumer.py
index ec92ddba8..524f5fd77 100644
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -47,6 +47,7 @@ from documents.templating.workflows import parse_w_workflow_placeholders
from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats
from documents.utils import run_subprocess
+from paperless_mail.parsers import MailDocumentParser
class WorkflowTriggerPlugin(
@@ -474,7 +475,18 @@ class ConsumerPlugin(
ConsumerStatusShortMessage.PARSING_DOCUMENT,
)
self.log.debug(f"Parsing {self.filename}...")
- document_parser.parse(self.working_copy, mime_type, self.filename)
+ if (
+ isinstance(document_parser, MailDocumentParser)
+ and self.input_doc.mailrule_id
+ ):
+ document_parser.parse(
+ self.working_copy,
+ mime_type,
+ self.filename,
+ self.input_doc.mailrule_id,
+ )
+ else:
+ document_parser.parse(self.working_copy, mime_type, self.filename)
self.log.debug(f"Generating thumbnail for {self.filename}...")
self._send_progress(
diff --git a/src/paperless_mail/migrations/0029_mailrule_pdf_layout.py b/src/paperless_mail/migrations/0029_mailrule_pdf_layout.py
new file mode 100644
index 000000000..ae0afacc0
--- /dev/null
+++ b/src/paperless_mail/migrations/0029_mailrule_pdf_layout.py
@@ -0,0 +1,27 @@
+# Generated by Django 5.1.3 on 2024-11-24 12:39
+
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("paperless_mail", "0028_alter_mailaccount_password_and_more"),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name="mailrule",
+ name="pdf_layout",
+ field=models.PositiveIntegerField(
+ choices=[
+ (1, "Text, then HTML"),
+ (2, "HTML, then text"),
+ (3, "HTML only"),
+ (4, "Text only"),
+ ],
+ default=1,
+ verbose_name="pdf layout",
+ ),
+ ),
+ ]
diff --git a/src/paperless_mail/models.py b/src/paperless_mail/models.py
index 46b9db1ff..2f7640325 100644
--- a/src/paperless_mail/models.py
+++ b/src/paperless_mail/models.py
@@ -115,6 +115,12 @@ class MailRule(document_models.ModelWithOwner):
ATTACHMENTS_ONLY = 1, _("Only process attachments.")
EVERYTHING = 2, _("Process all files, including 'inline' attachments.")
+ class PdfLayout(models.IntegerChoices):
+ TEXT_HTML = 1, _("Text, then HTML")
+ HTML_TEXT = 2, _("HTML, then text")
+ HTML_ONLY = 3, _("HTML only")
+ TEXT_ONLY = 4, _("Text only")
+
class MailAction(models.IntegerChoices):
DELETE = 1, _("Delete")
MOVE = 2, _("Move to specified folder")
@@ -230,6 +236,12 @@ class MailRule(document_models.ModelWithOwner):
default=ConsumptionScope.ATTACHMENTS_ONLY,
)
+ pdf_layout = models.PositiveIntegerField(
+ _("pdf layout"),
+ choices=PdfLayout.choices,
+ default=PdfLayout.TEXT_HTML,
+ )
+
action = models.PositiveIntegerField(
_("action"),
choices=MailAction.choices,
diff --git a/src/paperless_mail/parsers.py b/src/paperless_mail/parsers.py
index d98fb7238..682f8285c 100644
--- a/src/paperless_mail/parsers.py
+++ b/src/paperless_mail/parsers.py
@@ -22,6 +22,7 @@ from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import make_thumbnail_from_pdf
from paperless.models import OutputTypeChoices
+from paperless_mail.models import MailRule
class MailDocumentParser(DocumentParser):
@@ -121,7 +122,13 @@ class MailDocumentParser(DocumentParser):
result.sort(key=lambda item: (item["prefix"], item["key"]))
return result
- def parse(self, document_path: Path, mime_type: str, file_name=None):
+ def parse(
+ self,
+ document_path: Path,
+ mime_type: str,
+ file_name=None,
+ mailrule: int | None = None,
+ ):
"""
Parses the given .eml into formatted text, based on the decoded email.
@@ -180,7 +187,11 @@ class MailDocumentParser(DocumentParser):
self.date = mail.date
self.log.debug("Creating a PDF from the email")
- self.archive_path = self.generate_pdf(mail)
+ if mailrule:
+ rule = MailRule.objects.get(pk=mailrule)
+ self.archive_path = self.generate_pdf(mail, rule.pdf_layout)
+ else:
+ self.archive_path = self.generate_pdf(mail)
@staticmethod
def parse_file_to_message(filepath: Path) -> MailMessage:
@@ -217,7 +228,11 @@ class MailDocumentParser(DocumentParser):
f"{settings.TIKA_ENDPOINT}: {err}",
) from err
- def generate_pdf(self, mail_message: MailMessage) -> Path:
+ def generate_pdf(
+ self,
+ mail_message: MailMessage,
+ pdf_layout: MailRule.PdfLayout = MailRule.PdfLayout.TEXT_HTML,
+ ) -> Path:
archive_path = Path(self.tempdir) / "merged.pdf"
mail_pdf_file = self.generate_pdf_from_mail(mail_message)
@@ -246,7 +261,17 @@ class MailDocumentParser(DocumentParser):
if pdf_a_format is not None:
route.pdf_format(pdf_a_format)
- route.merge([mail_pdf_file, pdf_of_html_content])
+ match pdf_layout:
+ case MailRule.PdfLayout.TEXT_HTML:
+ route.merge([mail_pdf_file, pdf_of_html_content])
+ case MailRule.PdfLayout.HTML_TEXT:
+ route.merge([pdf_of_html_content, mail_pdf_file])
+ case MailRule.PdfLayout.HTML_ONLY:
+ route.merge([pdf_of_html_content])
+ case MailRule.PdfLayout.TEXT_ONLY:
+ route.merge([mail_pdf_file])
+ case _:
+ route.merge([mail_pdf_file, pdf_of_html_content])
try:
response = route.run()
diff --git a/src/paperless_mail/serialisers.py b/src/paperless_mail/serialisers.py
index 5623f62c3..e9836b421 100644
--- a/src/paperless_mail/serialisers.py
+++ b/src/paperless_mail/serialisers.py
@@ -96,6 +96,7 @@ class MailRuleSerializer(OwnedObjectSerializer):
"order",
"attachment_type",
"consumption_scope",
+ "pdf_layout",
"owner",
"user_can_change",
"permissions",