diff --git a/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html b/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html index a9ad3040b..afe6c2ab9 100644 --- a/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html +++ b/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html @@ -41,6 +41,7 @@
+
diff --git a/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts b/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts index f2d8236bc..624a4f676 100644 --- a/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts +++ b/src-ui/src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts @@ -18,6 +18,7 @@ import { MailMetadataTitleOption, MailRule, MailRuleConsumptionScope, + MailRulePdfLayout, } from 'src/app/data/mail-rule' import { CorrespondentService } from 'src/app/services/rest/correspondent.service' import { DocumentTypeService } from 'src/app/services/rest/document-type.service' @@ -58,6 +59,25 @@ const CONSUMPTION_SCOPE_OPTIONS = [ }, ] +const PDF_LAYOUT_OPTIONS = [ + { + id: MailRulePdfLayout.Text_Html, + name: $localize`Text, then HTML`, + }, + { + id: MailRulePdfLayout.Html_Text, + name: $localize`HTML, then text`, + }, + { + id: MailRulePdfLayout.Html_only, + name: $localize`HTML only`, + }, + { + id: MailRulePdfLayout.Text_only, + name: $localize`Text only`, + }, +] + const ACTION_OPTIONS = [ { id: MailAction.Delete, @@ -184,6 +204,7 @@ export class MailRuleEditDialogComponent extends EditDialogComponent { filter_attachment_filename_exclude: new FormControl(null), maximum_age: new FormControl(null), attachment_type: new FormControl(MailFilterAttachmentType.Attachments), + pdf_layout: new FormControl(MailRulePdfLayout.Text_Html), consumption_scope: new FormControl(MailRuleConsumptionScope.Attachments), order: new FormControl(null), action: new FormControl(MailAction.MarkRead), @@ -232,4 +253,8 @@ export class MailRuleEditDialogComponent extends EditDialogComponent { get consumptionScopeOptions() { return CONSUMPTION_SCOPE_OPTIONS } + + get pdfLayoutOptions() { + return PDF_LAYOUT_OPTIONS + } } diff --git a/src-ui/src/app/data/mail-rule.ts b/src-ui/src/app/data/mail-rule.ts index 6e2c468a2..c93dfdfa1 100644 --- a/src-ui/src/app/data/mail-rule.ts +++ b/src-ui/src/app/data/mail-rule.ts @@ -11,6 +11,13 @@ export enum MailRuleConsumptionScope { Everything = 3, } +export enum MailRulePdfLayout { + Text_Html = 1, + Html_Text = 2, + Html_only = 3, + Text_only = 4, +} + export enum MailAction { Delete = 1, Move = 2, @@ -59,6 +66,8 @@ export interface MailRule extends ObjectWithPermissions { attachment_type: MailFilterAttachmentType + pdf_layout: MailRulePdfLayout + action: MailAction action_parameter?: string diff --git a/src/documents/consumer.py b/src/documents/consumer.py index ec92ddba8..524f5fd77 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -47,6 +47,7 @@ from documents.templating.workflows import parse_w_workflow_placeholders from documents.utils import copy_basic_file_stats from documents.utils import copy_file_with_basic_stats from documents.utils import run_subprocess +from paperless_mail.parsers import MailDocumentParser class WorkflowTriggerPlugin( @@ -474,7 +475,18 @@ class ConsumerPlugin( ConsumerStatusShortMessage.PARSING_DOCUMENT, ) self.log.debug(f"Parsing {self.filename}...") - document_parser.parse(self.working_copy, mime_type, self.filename) + if ( + isinstance(document_parser, MailDocumentParser) + and self.input_doc.mailrule_id + ): + document_parser.parse( + self.working_copy, + mime_type, + self.filename, + self.input_doc.mailrule_id, + ) + else: + document_parser.parse(self.working_copy, mime_type, self.filename) self.log.debug(f"Generating thumbnail for {self.filename}...") self._send_progress( diff --git a/src/paperless_mail/migrations/0029_mailrule_pdf_layout.py b/src/paperless_mail/migrations/0029_mailrule_pdf_layout.py new file mode 100644 index 000000000..ae0afacc0 --- /dev/null +++ b/src/paperless_mail/migrations/0029_mailrule_pdf_layout.py @@ -0,0 +1,27 @@ +# Generated by Django 5.1.3 on 2024-11-24 12:39 + +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + dependencies = [ + ("paperless_mail", "0028_alter_mailaccount_password_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="mailrule", + name="pdf_layout", + field=models.PositiveIntegerField( + choices=[ + (1, "Text, then HTML"), + (2, "HTML, then text"), + (3, "HTML only"), + (4, "Text only"), + ], + default=1, + verbose_name="pdf layout", + ), + ), + ] diff --git a/src/paperless_mail/models.py b/src/paperless_mail/models.py index 46b9db1ff..2f7640325 100644 --- a/src/paperless_mail/models.py +++ b/src/paperless_mail/models.py @@ -115,6 +115,12 @@ class MailRule(document_models.ModelWithOwner): ATTACHMENTS_ONLY = 1, _("Only process attachments.") EVERYTHING = 2, _("Process all files, including 'inline' attachments.") + class PdfLayout(models.IntegerChoices): + TEXT_HTML = 1, _("Text, then HTML") + HTML_TEXT = 2, _("HTML, then text") + HTML_ONLY = 3, _("HTML only") + TEXT_ONLY = 4, _("Text only") + class MailAction(models.IntegerChoices): DELETE = 1, _("Delete") MOVE = 2, _("Move to specified folder") @@ -230,6 +236,12 @@ class MailRule(document_models.ModelWithOwner): default=ConsumptionScope.ATTACHMENTS_ONLY, ) + pdf_layout = models.PositiveIntegerField( + _("pdf layout"), + choices=PdfLayout.choices, + default=PdfLayout.TEXT_HTML, + ) + action = models.PositiveIntegerField( _("action"), choices=MailAction.choices, diff --git a/src/paperless_mail/parsers.py b/src/paperless_mail/parsers.py index d98fb7238..682f8285c 100644 --- a/src/paperless_mail/parsers.py +++ b/src/paperless_mail/parsers.py @@ -22,6 +22,7 @@ from documents.parsers import DocumentParser from documents.parsers import ParseError from documents.parsers import make_thumbnail_from_pdf from paperless.models import OutputTypeChoices +from paperless_mail.models import MailRule class MailDocumentParser(DocumentParser): @@ -121,7 +122,13 @@ class MailDocumentParser(DocumentParser): result.sort(key=lambda item: (item["prefix"], item["key"])) return result - def parse(self, document_path: Path, mime_type: str, file_name=None): + def parse( + self, + document_path: Path, + mime_type: str, + file_name=None, + mailrule: int | None = None, + ): """ Parses the given .eml into formatted text, based on the decoded email. @@ -180,7 +187,11 @@ class MailDocumentParser(DocumentParser): self.date = mail.date self.log.debug("Creating a PDF from the email") - self.archive_path = self.generate_pdf(mail) + if mailrule: + rule = MailRule.objects.get(pk=mailrule) + self.archive_path = self.generate_pdf(mail, rule.pdf_layout) + else: + self.archive_path = self.generate_pdf(mail) @staticmethod def parse_file_to_message(filepath: Path) -> MailMessage: @@ -217,7 +228,11 @@ class MailDocumentParser(DocumentParser): f"{settings.TIKA_ENDPOINT}: {err}", ) from err - def generate_pdf(self, mail_message: MailMessage) -> Path: + def generate_pdf( + self, + mail_message: MailMessage, + pdf_layout: MailRule.PdfLayout = MailRule.PdfLayout.TEXT_HTML, + ) -> Path: archive_path = Path(self.tempdir) / "merged.pdf" mail_pdf_file = self.generate_pdf_from_mail(mail_message) @@ -246,7 +261,17 @@ class MailDocumentParser(DocumentParser): if pdf_a_format is not None: route.pdf_format(pdf_a_format) - route.merge([mail_pdf_file, pdf_of_html_content]) + match pdf_layout: + case MailRule.PdfLayout.TEXT_HTML: + route.merge([mail_pdf_file, pdf_of_html_content]) + case MailRule.PdfLayout.HTML_TEXT: + route.merge([pdf_of_html_content, mail_pdf_file]) + case MailRule.PdfLayout.HTML_ONLY: + route.merge([pdf_of_html_content]) + case MailRule.PdfLayout.TEXT_ONLY: + route.merge([mail_pdf_file]) + case _: + route.merge([mail_pdf_file, pdf_of_html_content]) try: response = route.run() diff --git a/src/paperless_mail/serialisers.py b/src/paperless_mail/serialisers.py index 5623f62c3..e9836b421 100644 --- a/src/paperless_mail/serialisers.py +++ b/src/paperless_mail/serialisers.py @@ -96,6 +96,7 @@ class MailRuleSerializer(OwnedObjectSerializer): "order", "attachment_type", "consumption_scope", + "pdf_layout", "owner", "user_can_change", "permissions",