Enhancement: add layout options for email conversion (#8907)

---------

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
Silvia Bigler 2025-02-07 19:32:35 +01:00 committed by GitHub
parent 7f36163c3b
commit 71472a6a82
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 421 additions and 89 deletions

View File

@ -198,6 +198,18 @@ Docker, this may be the `environment` key of the webserver or a
containing the configuration parameters. Be sure to use the correct format
and watch out for indentation if editing the YAML file.
### Email Parsing
#### [`PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT=<int>`(#PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT) {#PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT}
: The default layout to use for emails that are consumed as documents. Must be one of the integer choices below. Note that mail
rules can specify this setting, thus this fallback is used for the default selection and for .eml files consumed by other means.
- `1` = Text, then HTML
- `2` = HTML, then text
- `3` = HTML only
- `4` = Text only
## Paths and folders
#### [`PAPERLESS_CONSUMPTION_DIR=<path>`](#PAPERLESS_CONSUMPTION_DIR) {#PAPERLESS_CONSUMPTION_DIR}

View File

@ -569,7 +569,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
<context context-type="linenumber">75</context>
<context context-type="linenumber">76</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.html</context>
@ -1453,7 +1453,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
<context context-type="linenumber">74</context>
<context context-type="linenumber">75</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.html</context>
@ -2062,7 +2062,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">64</context>
<context context-type="linenumber">88</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
@ -3989,71 +3989,78 @@
<context context-type="linenumber">43</context>
</context-group>
</trans-unit>
<trans-unit id="3842519365862452117" datatype="html">
<source>PDF layout</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
<context context-type="linenumber">44</context>
</context-group>
</trans-unit>
<trans-unit id="2873939123535615966" datatype="html">
<source>Include only files matching</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
<context context-type="linenumber">46</context>
<context context-type="linenumber">47</context>
</context-group>
</trans-unit>
<trans-unit id="7233407036155150477" datatype="html">
<source>Optional. Wildcards e.g. *.pdf or *invoice* allowed. Can be comma-separated list. Case insensitive.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
<context context-type="linenumber">46</context>
<context context-type="linenumber">47</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
<context context-type="linenumber">47</context>
<context context-type="linenumber">48</context>
</context-group>
</trans-unit>
<trans-unit id="1546332577833742677" datatype="html">
<source>Exclude files matching</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
<context context-type="linenumber">47</context>
<context context-type="linenumber">48</context>
</context-group>
</trans-unit>
<trans-unit id="9216117865911519658" datatype="html">
<source>Action</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
<context context-type="linenumber">53</context>
<context context-type="linenumber">54</context>
</context-group>
</trans-unit>
<trans-unit id="7841986067387421166" datatype="html">
<source>Only performed if the mail is processed.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
<context context-type="linenumber">53</context>
<context context-type="linenumber">54</context>
</context-group>
</trans-unit>
<trans-unit id="1261794314435932203" datatype="html">
<source>Action parameter</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
<context context-type="linenumber">55</context>
<context context-type="linenumber">56</context>
</context-group>
</trans-unit>
<trans-unit id="6093797930511670257" datatype="html">
<source>Assign title from</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
<context context-type="linenumber">57</context>
<context context-type="linenumber">58</context>
</context-group>
</trans-unit>
<trans-unit id="5232720756589450549" datatype="html">
<source>Assign owner from rule</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
<context context-type="linenumber">58</context>
<context context-type="linenumber">59</context>
</context-group>
</trans-unit>
<trans-unit id="6695990587380209737" datatype="html">
<source>Assign document type</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
<context context-type="linenumber">62</context>
<context context-type="linenumber">63</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
@ -4064,14 +4071,14 @@
<source>Assign correspondent from</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
<context context-type="linenumber">63</context>
<context context-type="linenumber">64</context>
</context-group>
</trans-unit>
<trans-unit id="4875491778188965469" datatype="html">
<source>Assign correspondent</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
<context context-type="linenumber">65</context>
<context context-type="linenumber">66</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
@ -4082,7 +4089,7 @@
<source>Error</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
<context context-type="linenumber">72</context>
<context context-type="linenumber">73</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
@ -4097,123 +4104,158 @@
<source>Only process attachments</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">38</context>
<context context-type="linenumber">39</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">49</context>
<context context-type="linenumber">50</context>
</context-group>
</trans-unit>
<trans-unit id="936923743212522897" datatype="html">
<source>Process all files, including &apos;inline&apos; attachments</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">42</context>
<context context-type="linenumber">43</context>
</context-group>
</trans-unit>
<trans-unit id="9025522236384167767" datatype="html">
<source>Process message as .eml</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">53</context>
<context context-type="linenumber">54</context>
</context-group>
</trans-unit>
<trans-unit id="7411485377918318115" datatype="html">
<source>Process message as .eml and attachments separately</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">57</context>
<context context-type="linenumber">58</context>
</context-group>
</trans-unit>
<trans-unit id="8776300244268604360" datatype="html">
<source>System default</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">65</context>
</context-group>
</trans-unit>
<trans-unit id="4812910224365219000" datatype="html">
<source>Text, then HTML</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">69</context>
</context-group>
</trans-unit>
<trans-unit id="3181744476823286470" datatype="html">
<source>HTML, then text</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">73</context>
</context-group>
</trans-unit>
<trans-unit id="9048933760263399623" datatype="html">
<source>HTML only</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">77</context>
</context-group>
</trans-unit>
<trans-unit id="3835211125655594627" datatype="html">
<source>Text only</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">81</context>
</context-group>
</trans-unit>
<trans-unit id="2784260611081866636" datatype="html">
<source>Move to specified folder</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">68</context>
<context context-type="linenumber">92</context>
</context-group>
</trans-unit>
<trans-unit id="4593278936733161020" datatype="html">
<source>Mark as read, don&apos;t process read mails</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">72</context>
<context context-type="linenumber">96</context>
</context-group>
</trans-unit>
<trans-unit id="2378921144019636516" datatype="html">
<source>Flag the mail, don&apos;t process flagged mails</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">76</context>
<context context-type="linenumber">100</context>
</context-group>
</trans-unit>
<trans-unit id="6457024618858980302" datatype="html">
<source>Tag the mail with specified tag, don&apos;t process tagged mails</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">80</context>
<context context-type="linenumber">104</context>
</context-group>
</trans-unit>
<trans-unit id="4673329664686432878" datatype="html">
<source>Use subject as title</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">87</context>
<context context-type="linenumber">111</context>
</context-group>
</trans-unit>
<trans-unit id="8645471396972938185" datatype="html">
<source>Use attachment filename as title</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">91</context>
<context context-type="linenumber">115</context>
</context-group>
</trans-unit>
<trans-unit id="2881879110886196973" datatype="html">
<source>Do not assign title from this rule</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">95</context>
<context context-type="linenumber">119</context>
</context-group>
</trans-unit>
<trans-unit id="1568902914205618549" datatype="html">
<source>Do not assign a correspondent</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">102</context>
<context context-type="linenumber">126</context>
</context-group>
</trans-unit>
<trans-unit id="3567746385454588269" datatype="html">
<source>Use mail address</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">106</context>
<context context-type="linenumber">130</context>
</context-group>
</trans-unit>
<trans-unit id="445154175758965852" datatype="html">
<source>Use name (or mail address if not available)</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">110</context>
<context context-type="linenumber">134</context>
</context-group>
</trans-unit>
<trans-unit id="1258862217749148424" datatype="html">
<source>Use correspondent selected below</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">114</context>
<context context-type="linenumber">138</context>
</context-group>
</trans-unit>
<trans-unit id="3147349817770432927" datatype="html">
<source>Create new mail rule</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">166</context>
<context context-type="linenumber">190</context>
</context-group>
</trans-unit>
<trans-unit id="3374331029704382439" datatype="html">
<source>Edit mail rule</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.ts</context>
<context context-type="linenumber">170</context>
<context context-type="linenumber">194</context>
</context-group>
</trans-unit>
<trans-unit id="8911059720204770105" datatype="html">

View File

@ -41,6 +41,7 @@
<div class="col-md-6">
<pngx-input-select [horizontal]="true" i18n-title title="Consumption scope" [items]="consumptionScopeOptions" formControlName="consumption_scope" i18n-hint hint="See docs for .eml processing requirements"></pngx-input-select>
<pngx-input-select [horizontal]="true" i18n-title title="Attachment type" [items]="attachmentTypeOptions" formControlName="attachment_type"></pngx-input-select>
<pngx-input-select [horizontal]="true" i18n-title title="PDF layout" [items]="pdfLayoutOptions" formControlName="pdf_layout"></pngx-input-select>
</div>
<div class="col-md-6">
<pngx-input-text [horizontal]="true" i18n-title title="Include only files matching" formControlName="filter_attachment_filename_include" i18n-hint hint="Optional. Wildcards e.g. *.pdf or *invoice* allowed. Can be comma-separated list. Case insensitive." [error]="error?.filter_attachment_filename_include"></pngx-input-text>

View File

@ -18,6 +18,7 @@ import {
MailMetadataTitleOption,
MailRule,
MailRuleConsumptionScope,
MailRulePdfLayout,
} from 'src/app/data/mail-rule'
import { CorrespondentService } from 'src/app/services/rest/correspondent.service'
import { DocumentTypeService } from 'src/app/services/rest/document-type.service'
@ -58,6 +59,29 @@ const CONSUMPTION_SCOPE_OPTIONS = [
},
]
const PDF_LAYOUT_OPTIONS = [
{
id: MailRulePdfLayout.Default,
name: $localize`System default`,
},
{
id: MailRulePdfLayout.TextHtml,
name: $localize`Text, then HTML`,
},
{
id: MailRulePdfLayout.HtmlText,
name: $localize`HTML, then text`,
},
{
id: MailRulePdfLayout.HtmlOnly,
name: $localize`HTML only`,
},
{
id: MailRulePdfLayout.TextOnly,
name: $localize`Text only`,
},
]
const ACTION_OPTIONS = [
{
id: MailAction.Delete,
@ -184,6 +208,7 @@ export class MailRuleEditDialogComponent extends EditDialogComponent<MailRule> {
filter_attachment_filename_exclude: new FormControl(null),
maximum_age: new FormControl(null),
attachment_type: new FormControl(MailFilterAttachmentType.Attachments),
pdf_layout: new FormControl(MailRulePdfLayout.Default),
consumption_scope: new FormControl(MailRuleConsumptionScope.Attachments),
order: new FormControl(null),
action: new FormControl(MailAction.MarkRead),
@ -232,4 +257,8 @@ export class MailRuleEditDialogComponent extends EditDialogComponent<MailRule> {
get consumptionScopeOptions() {
return CONSUMPTION_SCOPE_OPTIONS
}
get pdfLayoutOptions() {
return PDF_LAYOUT_OPTIONS
}
}

View File

@ -11,6 +11,14 @@ export enum MailRuleConsumptionScope {
Everything = 3,
}
export enum MailRulePdfLayout {
Default = 0,
TextHtml = 1,
HtmlText = 2,
HtmlOnly = 3,
TextOnly = 4,
}
export enum MailAction {
Delete = 1,
Move = 2,
@ -59,6 +67,8 @@ export interface MailRule extends ObjectWithPermissions {
attachment_type: MailFilterAttachmentType
pdf_layout: MailRulePdfLayout
action: MailAction
action_parameter?: string

View File

@ -48,6 +48,7 @@ from documents.templating.workflows import parse_w_workflow_placeholders
from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats
from documents.utils import run_subprocess
from paperless_mail.parsers import MailDocumentParser
class WorkflowTriggerPlugin(
@ -479,7 +480,18 @@ class ConsumerPlugin(
ConsumerStatusShortMessage.PARSING_DOCUMENT,
)
self.log.debug(f"Parsing {self.filename}...")
document_parser.parse(self.working_copy, mime_type, self.filename)
if (
isinstance(document_parser, MailDocumentParser)
and self.input_doc.mailrule_id
):
document_parser.parse(
self.working_copy,
mime_type,
self.filename,
self.input_doc.mailrule_id,
)
else:
document_parser.parse(self.working_copy, mime_type, self.filename)
self.log.debug(f"Generating thumbnail for {self.filename}...")
self._send_progress(

View File

@ -21,6 +21,7 @@ from guardian.core import ObjectPermissionChecker
from documents.consumer import ConsumerError
from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource
from documents.models import Correspondent
from documents.models import CustomField
from documents.models import Document
@ -35,6 +36,8 @@ from documents.tasks import sanity_check
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from documents.tests.utils import GetConsumerMixin
from paperless_mail.models import MailRule
from paperless_mail.parsers import MailDocumentParser
class TestAttributes(UnittestTestCase):
@ -243,6 +246,8 @@ def fake_magic_from_file(file, *, mime=False):
return "image/png"
elif os.path.splitext(file)[1] == ".webp":
return "image/webp"
elif os.path.splitext(file)[1] == ".eml":
return "message/rfc822"
else:
return "unknown"
else:
@ -975,6 +980,59 @@ class TestConsumer(
self.assertEqual(command[0], "qpdf")
self.assertEqual(command[1], "--replace-input")
@mock.patch("paperless_mail.models.MailRule.objects.get")
@mock.patch("paperless_mail.parsers.MailDocumentParser.parse")
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test_mail_parser_receives_mailrule(
self,
mock_consumer_declaration_send: mock.Mock,
mock_mail_parser_parse: mock.Mock,
mock_mailrule_get: mock.Mock,
):
"""
GIVEN:
- A mail document from a mail rule
WHEN:
- The consumer is run
THEN:
- The mail parser should receive the mail rule
"""
mock_consumer_declaration_send.return_value = [
(
None,
{
"parser": MailDocumentParser,
"mime_types": {"message/rfc822": ".eml"},
"weight": 0,
},
),
]
mock_mailrule_get.return_value = mock.Mock(
pdf_layout=MailRule.PdfLayout.HTML_ONLY,
)
with self.get_consumer(
filepath=(
Path(__file__).parent.parent.parent
/ Path("paperless_mail")
/ Path("tests")
/ Path("samples")
).resolve()
/ "html.eml",
source=DocumentSource.MailFetch,
mailrule_id=1,
) as consumer:
# fails because no gotenberg
with self.assertRaises(
ConsumerError,
):
consumer.run()
mock_mail_parser_parse.assert_called_once_with(
consumer.working_copy,
"message/rfc822",
file_name="sample.pdf",
mailrule=mock_mailrule_get.return_value,
)
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
class TestConsumerCreatedDate(DirectoriesMixin, GetConsumerMixin, TestCase):

View File

@ -8,7 +8,7 @@ class TestMigrateWorkflow(TestMigrations):
dependencies = (
(
"paperless_mail",
"0028_alter_mailaccount_password_and_more",
"0029_mailrule_pdf_layout",
),
)

View File

@ -340,11 +340,16 @@ class GetConsumerMixin:
filepath: Path,
overrides: DocumentMetadataOverrides | None = None,
source: DocumentSource = DocumentSource.ConsumeFolder,
mailrule_id: int | None = None,
) -> Generator[ConsumerPlugin, None, None]:
# Store this for verification
self.status = DummyProgressManager(filepath.name, None)
reader = ConsumerPlugin(
ConsumableDocument(source, original_file=filepath),
ConsumableDocument(
source,
original_file=filepath,
mailrule_id=mailrule_id or None,
),
overrides or DocumentMetadataOverrides(),
self.status, # type: ignore
self.dirs.scratch_dir,

View File

@ -2,7 +2,7 @@ msgid ""
msgstr ""
"Project-Id-Version: paperless-ngx\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-01-27 08:19-0800\n"
"POT-Creation-Date: 2025-01-28 12:17-0800\n"
"PO-Revision-Date: 2022-02-17 04:17\n"
"Last-Translator: \n"
"Language-Team: English\n"
@ -90,7 +90,7 @@ msgid "Automatic"
msgstr ""
#: documents/models.py:67 documents/models.py:433 documents/models.py:1493
#: paperless_mail/models.py:23 paperless_mail/models.py:136
#: paperless_mail/models.py:23 paperless_mail/models.py:143
msgid "name"
msgstr ""
@ -276,7 +276,7 @@ msgstr ""
msgid "warning"
msgstr ""
#: documents/models.py:387 paperless_mail/models.py:350
#: documents/models.py:387 paperless_mail/models.py:363
msgid "error"
msgstr ""
@ -818,7 +818,7 @@ msgstr ""
msgid "filter filename"
msgstr ""
#: documents/models.py:1066 paperless_mail/models.py:193
#: documents/models.py:1066 paperless_mail/models.py:200
msgid ""
"Only consume documents which entirely match this filename if specified. "
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
@ -988,15 +988,15 @@ msgid ""
"Assign a document title, can include some placeholders, see documentation."
msgstr ""
#: documents/models.py:1287 paperless_mail/models.py:261
#: documents/models.py:1287 paperless_mail/models.py:274
msgid "assign this tag"
msgstr ""
#: documents/models.py:1296 paperless_mail/models.py:269
#: documents/models.py:1296 paperless_mail/models.py:282
msgid "assign this document type"
msgstr ""
#: documents/models.py:1305 paperless_mail/models.py:283
#: documents/models.py:1305 paperless_mail/models.py:296
msgid "assign this correspondent"
msgstr ""
@ -1112,7 +1112,7 @@ msgstr ""
msgid "workflow actions"
msgstr ""
#: documents/models.py:1495 paperless_mail/models.py:138
#: documents/models.py:1495 paperless_mail/models.py:145
msgid "order"
msgstr ""
@ -1124,7 +1124,7 @@ msgstr ""
msgid "actions"
msgstr ""
#: documents/models.py:1511 paperless_mail/models.py:147
#: documents/models.py:1511 paperless_mail/models.py:154
msgid "enabled"
msgstr ""
@ -1838,161 +1838,185 @@ msgid "Process all files, including 'inline' attachments."
msgstr ""
#: paperless_mail/models.py:119
msgid "Delete"
msgid "System default"
msgstr ""
#: paperless_mail/models.py:120
msgid "Move to specified folder"
msgid "Text, then HTML"
msgstr ""
#: paperless_mail/models.py:121
msgid "Mark as read, don't process read mails"
msgid "HTML, then text"
msgstr ""
#: paperless_mail/models.py:122
msgid "Flag the mail, don't process flagged mails"
msgid "HTML only"
msgstr ""
#: paperless_mail/models.py:123
msgid "Tag the mail with specified tag, don't process tagged mails"
msgid "Text only"
msgstr ""
#: paperless_mail/models.py:126
msgid "Use subject as title"
msgid "Delete"
msgstr ""
#: paperless_mail/models.py:127
msgid "Use attachment filename as title"
msgid "Move to specified folder"
msgstr ""
#: paperless_mail/models.py:128
msgid "Do not assign title from rule"
msgid "Mark as read, don't process read mails"
msgstr ""
#: paperless_mail/models.py:131
msgid "Do not assign a correspondent"
#: paperless_mail/models.py:129
msgid "Flag the mail, don't process flagged mails"
msgstr ""
#: paperless_mail/models.py:132
msgid "Use mail address"
#: paperless_mail/models.py:130
msgid "Tag the mail with specified tag, don't process tagged mails"
msgstr ""
#: paperless_mail/models.py:133
msgid "Use name (or mail address if not available)"
msgid "Use subject as title"
msgstr ""
#: paperless_mail/models.py:134
msgid "Use attachment filename as title"
msgstr ""
#: paperless_mail/models.py:135
msgid "Do not assign title from rule"
msgstr ""
#: paperless_mail/models.py:138
msgid "Do not assign a correspondent"
msgstr ""
#: paperless_mail/models.py:139
msgid "Use mail address"
msgstr ""
#: paperless_mail/models.py:140
msgid "Use name (or mail address if not available)"
msgstr ""
#: paperless_mail/models.py:141
msgid "Use correspondent selected below"
msgstr ""
#: paperless_mail/models.py:144
#: paperless_mail/models.py:151
msgid "account"
msgstr ""
#: paperless_mail/models.py:150 paperless_mail/models.py:305
#: paperless_mail/models.py:157 paperless_mail/models.py:318
msgid "folder"
msgstr ""
#: paperless_mail/models.py:154
#: paperless_mail/models.py:161
msgid ""
"Subfolders must be separated by a delimiter, often a dot ('.') or slash "
"('/'), but it varies by mail server."
msgstr ""
#: paperless_mail/models.py:160
#: paperless_mail/models.py:167
msgid "filter from"
msgstr ""
#: paperless_mail/models.py:167
#: paperless_mail/models.py:174
msgid "filter to"
msgstr ""
#: paperless_mail/models.py:174
#: paperless_mail/models.py:181
msgid "filter subject"
msgstr ""
#: paperless_mail/models.py:181
#: paperless_mail/models.py:188
msgid "filter body"
msgstr ""
#: paperless_mail/models.py:188
#: paperless_mail/models.py:195
msgid "filter attachment filename inclusive"
msgstr ""
#: paperless_mail/models.py:200
#: paperless_mail/models.py:207
msgid "filter attachment filename exclusive"
msgstr ""
#: paperless_mail/models.py:205
#: paperless_mail/models.py:212
msgid ""
"Do not consume documents which entirely match this filename if specified. "
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
msgstr ""
#: paperless_mail/models.py:212
#: paperless_mail/models.py:219
msgid "maximum age"
msgstr ""
#: paperless_mail/models.py:214
#: paperless_mail/models.py:221
msgid "Specified in days."
msgstr ""
#: paperless_mail/models.py:218
#: paperless_mail/models.py:225
msgid "attachment type"
msgstr ""
#: paperless_mail/models.py:222
#: paperless_mail/models.py:229
msgid ""
"Inline attachments include embedded images, so it's best to combine this "
"option with a filename filter."
msgstr ""
#: paperless_mail/models.py:228
#: paperless_mail/models.py:235
msgid "consumption scope"
msgstr ""
#: paperless_mail/models.py:234
#: paperless_mail/models.py:241
msgid "pdf layout"
msgstr ""
#: paperless_mail/models.py:247
msgid "action"
msgstr ""
#: paperless_mail/models.py:240
#: paperless_mail/models.py:253
msgid "action parameter"
msgstr ""
#: paperless_mail/models.py:245
#: paperless_mail/models.py:258
msgid ""
"Additional parameter for the action selected above, i.e., the target folder "
"of the move to folder action. Subfolders must be separated by dots."
msgstr ""
#: paperless_mail/models.py:253
#: paperless_mail/models.py:266
msgid "assign title from"
msgstr ""
#: paperless_mail/models.py:273
#: paperless_mail/models.py:286
msgid "assign correspondent from"
msgstr ""
#: paperless_mail/models.py:287
#: paperless_mail/models.py:300
msgid "Assign the rule owner to documents"
msgstr ""
#: paperless_mail/models.py:313
#: paperless_mail/models.py:326
msgid "uid"
msgstr ""
#: paperless_mail/models.py:321
#: paperless_mail/models.py:334
msgid "subject"
msgstr ""
#: paperless_mail/models.py:329
#: paperless_mail/models.py:342
msgid "received"
msgstr ""
#: paperless_mail/models.py:336
#: paperless_mail/models.py:349
msgid "processed"
msgstr ""
#: paperless_mail/models.py:342
#: paperless_mail/models.py:355
msgid "status"
msgstr ""

View File

@ -1030,6 +1030,11 @@ CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
# Fallback layout for .eml consumption
EMAIL_PARSE_DEFAULT_LAYOUT = __get_int(
"PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT",
1, # MailRule.PdfLayout.TEXT_HTML but that can't be imported here
)
# Pre-2.x versions of Paperless stored your documents locally with GPG
# encryption, but that is no longer the default. This behaviour is still

View File

@ -0,0 +1,28 @@
# Generated by Django 5.1.3 on 2024-11-24 12:39
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0028_alter_mailaccount_password_and_more"),
]
operations = [
migrations.AddField(
model_name="mailrule",
name="pdf_layout",
field=models.PositiveIntegerField(
choices=[
(0, "System default"),
(1, "Text, then HTML"),
(2, "HTML, then text"),
(3, "HTML only"),
(4, "Text only"),
],
default=0,
verbose_name="pdf layout",
),
),
]

View File

@ -115,6 +115,13 @@ class MailRule(document_models.ModelWithOwner):
ATTACHMENTS_ONLY = 1, _("Only process attachments.")
EVERYTHING = 2, _("Process all files, including 'inline' attachments.")
class PdfLayout(models.IntegerChoices):
DEFAULT = 0, _("System default")
TEXT_HTML = 1, _("Text, then HTML")
HTML_TEXT = 2, _("HTML, then text")
HTML_ONLY = 3, _("HTML only")
TEXT_ONLY = 4, _("Text only")
class MailAction(models.IntegerChoices):
DELETE = 1, _("Delete")
MOVE = 2, _("Move to specified folder")
@ -230,6 +237,12 @@ class MailRule(document_models.ModelWithOwner):
default=ConsumptionScope.ATTACHMENTS_ONLY,
)
pdf_layout = models.PositiveIntegerField(
_("pdf layout"),
choices=PdfLayout.choices,
default=PdfLayout.DEFAULT,
)
action = models.PositiveIntegerField(
_("action"),
choices=MailAction.choices,

View File

@ -22,6 +22,7 @@ from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import make_thumbnail_from_pdf
from paperless.models import OutputTypeChoices
from paperless_mail.models import MailRule
class MailDocumentParser(DocumentParser):
@ -121,7 +122,13 @@ class MailDocumentParser(DocumentParser):
result.sort(key=lambda item: (item["prefix"], item["key"]))
return result
def parse(self, document_path: Path, mime_type: str, file_name=None):
def parse(
self,
document_path: Path,
mime_type: str,
file_name=None,
mailrule_id: int | None = None,
):
"""
Parses the given .eml into formatted text, based on the decoded email.
@ -180,7 +187,11 @@ class MailDocumentParser(DocumentParser):
self.date = mail.date
self.log.debug("Creating a PDF from the email")
self.archive_path = self.generate_pdf(mail)
if mailrule_id:
rule = MailRule.objects.get(pk=mailrule_id)
self.archive_path = self.generate_pdf(mail, rule.pdf_layout)
else:
self.archive_path = self.generate_pdf(mail)
@staticmethod
def parse_file_to_message(filepath: Path) -> MailMessage:
@ -217,11 +228,19 @@ class MailDocumentParser(DocumentParser):
f"{settings.TIKA_ENDPOINT}: {err}",
) from err
def generate_pdf(self, mail_message: MailMessage) -> Path:
def generate_pdf(
self,
mail_message: MailMessage,
pdf_layout: MailRule.PdfLayout | None = None,
) -> Path:
archive_path = Path(self.tempdir) / "merged.pdf"
mail_pdf_file = self.generate_pdf_from_mail(mail_message)
pdf_layout = (
pdf_layout or settings.EMAIL_PARSE_DEFAULT_LAYOUT
) # EMAIL_PARSE_DEFAULT_LAYOUT is a MailRule.PdfLayout
# If no HTML content, create the PDF from the message
# Otherwise, create 2 PDFs and merge them with Gotenberg
if not mail_message.html:
@ -246,7 +265,15 @@ class MailDocumentParser(DocumentParser):
if pdf_a_format is not None:
route.pdf_format(pdf_a_format)
route.merge([mail_pdf_file, pdf_of_html_content])
match pdf_layout:
case MailRule.PdfLayout.HTML_TEXT:
route.merge([pdf_of_html_content, mail_pdf_file])
case MailRule.PdfLayout.HTML_ONLY:
route.merge([pdf_of_html_content])
case MailRule.PdfLayout.TEXT_ONLY:
route.merge([mail_pdf_file])
case MailRule.PdfLayout.TEXT_HTML | _:
route.merge([mail_pdf_file, pdf_of_html_content])
try:
response = route.run()

View File

@ -96,6 +96,7 @@ class MailRuleSerializer(OwnedObjectSerializer):
"order",
"attachment_type",
"consumption_scope",
"pdf_layout",
"owner",
"user_can_change",
"permissions",

View File

@ -1,6 +1,7 @@
import datetime
import logging
from pathlib import Path
from unittest import mock
import httpx
import pytest
@ -662,3 +663,67 @@ class TestParser:
request = httpx_mock.get_request()
assert str(request.url) == "http://localhost:3000/forms/chromium/convert/html"
@pytest.mark.httpx_mock(can_send_already_matched_responses=True)
@mock.patch("gotenberg_client._merge.MergeRoute.merge")
@mock.patch("paperless_mail.models.MailRule.objects.get")
def test_generate_pdf_layout_options(
self,
mock_mailrule_get: mock.Mock,
mock_merge_route: mock.Mock,
httpx_mock: HTTPXMock,
mail_parser: MailDocumentParser,
html_email_file: Path,
html_email_pdf_file: Path,
):
"""
GIVEN:
- Email message
WHEN:
- Email is parsed with different layout options
THEN:
- Gotenberg is called with the correct layout option
"""
httpx_mock.add_response(
url="http://localhost:9998/tika/text",
method="PUT",
json={
"Content-Type": "text/html",
"X-TIKA:Parsed-By": [],
"X-TIKA:content": "This is some Tika HTML text",
},
)
httpx_mock.add_response(
url="http://localhost:3000/forms/chromium/convert/html",
method="POST",
content=html_email_pdf_file.read_bytes(),
)
httpx_mock.add_response(
url="http://localhost:3000/forms/pdfengines/merge",
method="POST",
content=b"Pretend merged PDF content",
)
def test_layout_option(layout_option, expected_calls, expected_pdf_names):
mock_mailrule_get.return_value = mock.Mock(pdf_layout=layout_option)
mail_parser.parse(
document_path=html_email_file,
mime_type="message/rfc822",
mailrule_id=1,
)
args, _ = mock_merge_route.call_args
assert len(args[0]) == expected_calls
for i, pdf in enumerate(expected_pdf_names):
assert args[0][i].name == pdf
# 1 = MailRule.PdfLayout.TEXT_HTML
test_layout_option(1, 2, ["email_as_pdf.pdf", "html.pdf"])
# 2 = MailRule.PdfLayout.HTML_TEXT
test_layout_option(2, 2, ["html.pdf", "email_as_pdf.pdf"])
# 3 = MailRule.PdfLayout.HTML_ONLY
test_layout_option(3, 1, ["html.pdf"])
# 4 = MailRule.PdfLayout.TEXT_ONLY
test_layout_option(4, 1, ["email_as_pdf.pdf"])