Feat: Add pdf layout switch for email

This commit is contained in:
Silvia Bigler
2025-01-25 20:00:44 +01:00
committed by shamoon
parent e4e906ce2b
commit 218f7ad876
8 changed files with 117 additions and 5 deletions

View File

@@ -0,0 +1,27 @@
# Generated by Django 5.1.3 on 2024-11-24 12:39
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0028_alter_mailaccount_password_and_more"),
]
operations = [
migrations.AddField(
model_name="mailrule",
name="pdf_layout",
field=models.PositiveIntegerField(
choices=[
(1, "Text, then HTML"),
(2, "HTML, then text"),
(3, "HTML only"),
(4, "Text only"),
],
default=1,
verbose_name="pdf layout",
),
),
]

View File

@@ -115,6 +115,12 @@ class MailRule(document_models.ModelWithOwner):
ATTACHMENTS_ONLY = 1, _("Only process attachments.")
EVERYTHING = 2, _("Process all files, including 'inline' attachments.")
class PdfLayout(models.IntegerChoices):
TEXT_HTML = 1, _("Text, then HTML")
HTML_TEXT = 2, _("HTML, then text")
HTML_ONLY = 3, _("HTML only")
TEXT_ONLY = 4, _("Text only")
class MailAction(models.IntegerChoices):
DELETE = 1, _("Delete")
MOVE = 2, _("Move to specified folder")
@@ -230,6 +236,12 @@ class MailRule(document_models.ModelWithOwner):
default=ConsumptionScope.ATTACHMENTS_ONLY,
)
pdf_layout = models.PositiveIntegerField(
_("pdf layout"),
choices=PdfLayout.choices,
default=PdfLayout.TEXT_HTML,
)
action = models.PositiveIntegerField(
_("action"),
choices=MailAction.choices,

View File

@@ -22,6 +22,7 @@ from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import make_thumbnail_from_pdf
from paperless.models import OutputTypeChoices
from paperless_mail.models import MailRule
class MailDocumentParser(DocumentParser):
@@ -121,7 +122,13 @@ class MailDocumentParser(DocumentParser):
result.sort(key=lambda item: (item["prefix"], item["key"]))
return result
def parse(self, document_path: Path, mime_type: str, file_name=None):
def parse(
self,
document_path: Path,
mime_type: str,
file_name=None,
mailrule: int | None = None,
):
"""
Parses the given .eml into formatted text, based on the decoded email.
@@ -180,7 +187,11 @@ class MailDocumentParser(DocumentParser):
self.date = mail.date
self.log.debug("Creating a PDF from the email")
self.archive_path = self.generate_pdf(mail)
if mailrule:
rule = MailRule.objects.get(pk=mailrule)
self.archive_path = self.generate_pdf(mail, rule.pdf_layout)
else:
self.archive_path = self.generate_pdf(mail)
@staticmethod
def parse_file_to_message(filepath: Path) -> MailMessage:
@@ -217,7 +228,11 @@ class MailDocumentParser(DocumentParser):
f"{settings.TIKA_ENDPOINT}: {err}",
) from err
def generate_pdf(self, mail_message: MailMessage) -> Path:
def generate_pdf(
self,
mail_message: MailMessage,
pdf_layout: MailRule.PdfLayout = MailRule.PdfLayout.TEXT_HTML,
) -> Path:
archive_path = Path(self.tempdir) / "merged.pdf"
mail_pdf_file = self.generate_pdf_from_mail(mail_message)
@@ -246,7 +261,17 @@ class MailDocumentParser(DocumentParser):
if pdf_a_format is not None:
route.pdf_format(pdf_a_format)
route.merge([mail_pdf_file, pdf_of_html_content])
match pdf_layout:
case MailRule.PdfLayout.TEXT_HTML:
route.merge([mail_pdf_file, pdf_of_html_content])
case MailRule.PdfLayout.HTML_TEXT:
route.merge([pdf_of_html_content, mail_pdf_file])
case MailRule.PdfLayout.HTML_ONLY:
route.merge([pdf_of_html_content])
case MailRule.PdfLayout.TEXT_ONLY:
route.merge([mail_pdf_file])
case _:
route.merge([mail_pdf_file, pdf_of_html_content])
try:
response = route.run()

View File

@@ -96,6 +96,7 @@ class MailRuleSerializer(OwnedObjectSerializer):
"order",
"attachment_type",
"consumption_scope",
"pdf_layout",
"owner",
"user_can_change",
"permissions",