mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Enhancement: add layout options for email conversion (#8907)
--------- Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
		| @@ -48,6 +48,7 @@ from documents.templating.workflows import parse_w_workflow_placeholders | ||||
| from documents.utils import copy_basic_file_stats | ||||
| from documents.utils import copy_file_with_basic_stats | ||||
| from documents.utils import run_subprocess | ||||
| from paperless_mail.parsers import MailDocumentParser | ||||
|  | ||||
|  | ||||
| class WorkflowTriggerPlugin( | ||||
| @@ -479,7 +480,18 @@ class ConsumerPlugin( | ||||
|                 ConsumerStatusShortMessage.PARSING_DOCUMENT, | ||||
|             ) | ||||
|             self.log.debug(f"Parsing {self.filename}...") | ||||
|             document_parser.parse(self.working_copy, mime_type, self.filename) | ||||
|             if ( | ||||
|                 isinstance(document_parser, MailDocumentParser) | ||||
|                 and self.input_doc.mailrule_id | ||||
|             ): | ||||
|                 document_parser.parse( | ||||
|                     self.working_copy, | ||||
|                     mime_type, | ||||
|                     self.filename, | ||||
|                     self.input_doc.mailrule_id, | ||||
|                 ) | ||||
|             else: | ||||
|                 document_parser.parse(self.working_copy, mime_type, self.filename) | ||||
|  | ||||
|             self.log.debug(f"Generating thumbnail for {self.filename}...") | ||||
|             self._send_progress( | ||||
|   | ||||
| @@ -21,6 +21,7 @@ from guardian.core import ObjectPermissionChecker | ||||
|  | ||||
| from documents.consumer import ConsumerError | ||||
| from documents.data_models import DocumentMetadataOverrides | ||||
| from documents.data_models import DocumentSource | ||||
| from documents.models import Correspondent | ||||
| from documents.models import CustomField | ||||
| from documents.models import Document | ||||
| @@ -35,6 +36,8 @@ from documents.tasks import sanity_check | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
| from documents.tests.utils import FileSystemAssertsMixin | ||||
| from documents.tests.utils import GetConsumerMixin | ||||
| from paperless_mail.models import MailRule | ||||
| from paperless_mail.parsers import MailDocumentParser | ||||
|  | ||||
|  | ||||
| class TestAttributes(UnittestTestCase): | ||||
| @@ -243,6 +246,8 @@ def fake_magic_from_file(file, *, mime=False): | ||||
|             return "image/png" | ||||
|         elif os.path.splitext(file)[1] == ".webp": | ||||
|             return "image/webp" | ||||
|         elif os.path.splitext(file)[1] == ".eml": | ||||
|             return "message/rfc822" | ||||
|         else: | ||||
|             return "unknown" | ||||
|     else: | ||||
| @@ -975,6 +980,59 @@ class TestConsumer( | ||||
|             self.assertEqual(command[0], "qpdf") | ||||
|             self.assertEqual(command[1], "--replace-input") | ||||
|  | ||||
|     @mock.patch("paperless_mail.models.MailRule.objects.get") | ||||
|     @mock.patch("paperless_mail.parsers.MailDocumentParser.parse") | ||||
|     @mock.patch("documents.parsers.document_consumer_declaration.send") | ||||
|     def test_mail_parser_receives_mailrule( | ||||
|         self, | ||||
|         mock_consumer_declaration_send: mock.Mock, | ||||
|         mock_mail_parser_parse: mock.Mock, | ||||
|         mock_mailrule_get: mock.Mock, | ||||
|     ): | ||||
|         """ | ||||
|         GIVEN: | ||||
|             - A mail document from a mail rule | ||||
|         WHEN: | ||||
|             - The consumer is run | ||||
|         THEN: | ||||
|             - The mail parser should receive the mail rule | ||||
|         """ | ||||
|         mock_consumer_declaration_send.return_value = [ | ||||
|             ( | ||||
|                 None, | ||||
|                 { | ||||
|                     "parser": MailDocumentParser, | ||||
|                     "mime_types": {"message/rfc822": ".eml"}, | ||||
|                     "weight": 0, | ||||
|                 }, | ||||
|             ), | ||||
|         ] | ||||
|         mock_mailrule_get.return_value = mock.Mock( | ||||
|             pdf_layout=MailRule.PdfLayout.HTML_ONLY, | ||||
|         ) | ||||
|         with self.get_consumer( | ||||
|             filepath=( | ||||
|                 Path(__file__).parent.parent.parent | ||||
|                 / Path("paperless_mail") | ||||
|                 / Path("tests") | ||||
|                 / Path("samples") | ||||
|             ).resolve() | ||||
|             / "html.eml", | ||||
|             source=DocumentSource.MailFetch, | ||||
|             mailrule_id=1, | ||||
|         ) as consumer: | ||||
|             # fails because no gotenberg | ||||
|             with self.assertRaises( | ||||
|                 ConsumerError, | ||||
|             ): | ||||
|                 consumer.run() | ||||
|                 mock_mail_parser_parse.assert_called_once_with( | ||||
|                     consumer.working_copy, | ||||
|                     "message/rfc822", | ||||
|                     file_name="sample.pdf", | ||||
|                     mailrule=mock_mailrule_get.return_value, | ||||
|                 ) | ||||
|  | ||||
|  | ||||
| @mock.patch("documents.consumer.magic.from_file", fake_magic_from_file) | ||||
| class TestConsumerCreatedDate(DirectoriesMixin, GetConsumerMixin, TestCase): | ||||
|   | ||||
| @@ -8,7 +8,7 @@ class TestMigrateWorkflow(TestMigrations): | ||||
|     dependencies = ( | ||||
|         ( | ||||
|             "paperless_mail", | ||||
|             "0028_alter_mailaccount_password_and_more", | ||||
|             "0029_mailrule_pdf_layout", | ||||
|         ), | ||||
|     ) | ||||
|  | ||||
|   | ||||
| @@ -340,11 +340,16 @@ class GetConsumerMixin: | ||||
|         filepath: Path, | ||||
|         overrides: DocumentMetadataOverrides | None = None, | ||||
|         source: DocumentSource = DocumentSource.ConsumeFolder, | ||||
|         mailrule_id: int | None = None, | ||||
|     ) -> Generator[ConsumerPlugin, None, None]: | ||||
|         # Store this for verification | ||||
|         self.status = DummyProgressManager(filepath.name, None) | ||||
|         reader = ConsumerPlugin( | ||||
|             ConsumableDocument(source, original_file=filepath), | ||||
|             ConsumableDocument( | ||||
|                 source, | ||||
|                 original_file=filepath, | ||||
|                 mailrule_id=mailrule_id or None, | ||||
|             ), | ||||
|             overrides or DocumentMetadataOverrides(), | ||||
|             self.status,  # type: ignore | ||||
|             self.dirs.scratch_dir, | ||||
|   | ||||
| @@ -2,7 +2,7 @@ msgid "" | ||||
| msgstr "" | ||||
| "Project-Id-Version: paperless-ngx\n" | ||||
| "Report-Msgid-Bugs-To: \n" | ||||
| "POT-Creation-Date: 2025-01-27 08:19-0800\n" | ||||
| "POT-Creation-Date: 2025-01-28 12:17-0800\n" | ||||
| "PO-Revision-Date: 2022-02-17 04:17\n" | ||||
| "Last-Translator: \n" | ||||
| "Language-Team: English\n" | ||||
| @@ -90,7 +90,7 @@ msgid "Automatic" | ||||
| msgstr "" | ||||
|  | ||||
| #: documents/models.py:67 documents/models.py:433 documents/models.py:1493 | ||||
| #: paperless_mail/models.py:23 paperless_mail/models.py:136 | ||||
| #: paperless_mail/models.py:23 paperless_mail/models.py:143 | ||||
| msgid "name" | ||||
| msgstr "" | ||||
|  | ||||
| @@ -276,7 +276,7 @@ msgstr "" | ||||
| msgid "warning" | ||||
| msgstr "" | ||||
|  | ||||
| #: documents/models.py:387 paperless_mail/models.py:350 | ||||
| #: documents/models.py:387 paperless_mail/models.py:363 | ||||
| msgid "error" | ||||
| msgstr "" | ||||
|  | ||||
| @@ -818,7 +818,7 @@ msgstr "" | ||||
| msgid "filter filename" | ||||
| msgstr "" | ||||
|  | ||||
| #: documents/models.py:1066 paperless_mail/models.py:193 | ||||
| #: documents/models.py:1066 paperless_mail/models.py:200 | ||||
| msgid "" | ||||
| "Only consume documents which entirely match this filename if specified. " | ||||
| "Wildcards such as *.pdf or *invoice* are allowed. Case insensitive." | ||||
| @@ -988,15 +988,15 @@ msgid "" | ||||
| "Assign a document title, can include some placeholders, see documentation." | ||||
| msgstr "" | ||||
|  | ||||
| #: documents/models.py:1287 paperless_mail/models.py:261 | ||||
| #: documents/models.py:1287 paperless_mail/models.py:274 | ||||
| msgid "assign this tag" | ||||
| msgstr "" | ||||
|  | ||||
| #: documents/models.py:1296 paperless_mail/models.py:269 | ||||
| #: documents/models.py:1296 paperless_mail/models.py:282 | ||||
| msgid "assign this document type" | ||||
| msgstr "" | ||||
|  | ||||
| #: documents/models.py:1305 paperless_mail/models.py:283 | ||||
| #: documents/models.py:1305 paperless_mail/models.py:296 | ||||
| msgid "assign this correspondent" | ||||
| msgstr "" | ||||
|  | ||||
| @@ -1112,7 +1112,7 @@ msgstr "" | ||||
| msgid "workflow actions" | ||||
| msgstr "" | ||||
|  | ||||
| #: documents/models.py:1495 paperless_mail/models.py:138 | ||||
| #: documents/models.py:1495 paperless_mail/models.py:145 | ||||
| msgid "order" | ||||
| msgstr "" | ||||
|  | ||||
| @@ -1124,7 +1124,7 @@ msgstr "" | ||||
| msgid "actions" | ||||
| msgstr "" | ||||
|  | ||||
| #: documents/models.py:1511 paperless_mail/models.py:147 | ||||
| #: documents/models.py:1511 paperless_mail/models.py:154 | ||||
| msgid "enabled" | ||||
| msgstr "" | ||||
|  | ||||
| @@ -1838,161 +1838,185 @@ msgid "Process all files, including 'inline' attachments." | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:119 | ||||
| msgid "Delete" | ||||
| msgid "System default" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:120 | ||||
| msgid "Move to specified folder" | ||||
| msgid "Text, then HTML" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:121 | ||||
| msgid "Mark as read, don't process read mails" | ||||
| msgid "HTML, then text" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:122 | ||||
| msgid "Flag the mail, don't process flagged mails" | ||||
| msgid "HTML only" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:123 | ||||
| msgid "Tag the mail with specified tag, don't process tagged mails" | ||||
| msgid "Text only" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:126 | ||||
| msgid "Use subject as title" | ||||
| msgid "Delete" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:127 | ||||
| msgid "Use attachment filename as title" | ||||
| msgid "Move to specified folder" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:128 | ||||
| msgid "Do not assign title from rule" | ||||
| msgid "Mark as read, don't process read mails" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:131 | ||||
| msgid "Do not assign a correspondent" | ||||
| #: paperless_mail/models.py:129 | ||||
| msgid "Flag the mail, don't process flagged mails" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:132 | ||||
| msgid "Use mail address" | ||||
| #: paperless_mail/models.py:130 | ||||
| msgid "Tag the mail with specified tag, don't process tagged mails" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:133 | ||||
| msgid "Use name (or mail address if not available)" | ||||
| msgid "Use subject as title" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:134 | ||||
| msgid "Use attachment filename as title" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:135 | ||||
| msgid "Do not assign title from rule" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:138 | ||||
| msgid "Do not assign a correspondent" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:139 | ||||
| msgid "Use mail address" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:140 | ||||
| msgid "Use name (or mail address if not available)" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:141 | ||||
| msgid "Use correspondent selected below" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:144 | ||||
| #: paperless_mail/models.py:151 | ||||
| msgid "account" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:150 paperless_mail/models.py:305 | ||||
| #: paperless_mail/models.py:157 paperless_mail/models.py:318 | ||||
| msgid "folder" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:154 | ||||
| #: paperless_mail/models.py:161 | ||||
| msgid "" | ||||
| "Subfolders must be separated by a delimiter, often a dot ('.') or slash " | ||||
| "('/'), but it varies by mail server." | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:160 | ||||
| #: paperless_mail/models.py:167 | ||||
| msgid "filter from" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:167 | ||||
| #: paperless_mail/models.py:174 | ||||
| msgid "filter to" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:174 | ||||
| #: paperless_mail/models.py:181 | ||||
| msgid "filter subject" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:181 | ||||
| #: paperless_mail/models.py:188 | ||||
| msgid "filter body" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:188 | ||||
| #: paperless_mail/models.py:195 | ||||
| msgid "filter attachment filename inclusive" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:200 | ||||
| #: paperless_mail/models.py:207 | ||||
| msgid "filter attachment filename exclusive" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:205 | ||||
| #: paperless_mail/models.py:212 | ||||
| msgid "" | ||||
| "Do not consume documents which entirely match this filename if specified. " | ||||
| "Wildcards such as *.pdf or *invoice* are allowed. Case insensitive." | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:212 | ||||
| #: paperless_mail/models.py:219 | ||||
| msgid "maximum age" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:214 | ||||
| #: paperless_mail/models.py:221 | ||||
| msgid "Specified in days." | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:218 | ||||
| #: paperless_mail/models.py:225 | ||||
| msgid "attachment type" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:222 | ||||
| #: paperless_mail/models.py:229 | ||||
| msgid "" | ||||
| "Inline attachments include embedded images, so it's best to combine this " | ||||
| "option with a filename filter." | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:228 | ||||
| #: paperless_mail/models.py:235 | ||||
| msgid "consumption scope" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:234 | ||||
| #: paperless_mail/models.py:241 | ||||
| msgid "pdf layout" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:247 | ||||
| msgid "action" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:240 | ||||
| #: paperless_mail/models.py:253 | ||||
| msgid "action parameter" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:245 | ||||
| #: paperless_mail/models.py:258 | ||||
| msgid "" | ||||
| "Additional parameter for the action selected above, i.e., the target folder " | ||||
| "of the move to folder action. Subfolders must be separated by dots." | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:253 | ||||
| #: paperless_mail/models.py:266 | ||||
| msgid "assign title from" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:273 | ||||
| #: paperless_mail/models.py:286 | ||||
| msgid "assign correspondent from" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:287 | ||||
| #: paperless_mail/models.py:300 | ||||
| msgid "Assign the rule owner to documents" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:313 | ||||
| #: paperless_mail/models.py:326 | ||||
| msgid "uid" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:321 | ||||
| #: paperless_mail/models.py:334 | ||||
| msgid "subject" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:329 | ||||
| #: paperless_mail/models.py:342 | ||||
| msgid "received" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:336 | ||||
| #: paperless_mail/models.py:349 | ||||
| msgid "processed" | ||||
| msgstr "" | ||||
|  | ||||
| #: paperless_mail/models.py:342 | ||||
| #: paperless_mail/models.py:355 | ||||
| msgid "status" | ||||
| msgstr "" | ||||
|   | ||||
| @@ -1030,6 +1030,11 @@ CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT") | ||||
|  | ||||
| GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs") | ||||
|  | ||||
| # Fallback layout for .eml consumption | ||||
| EMAIL_PARSE_DEFAULT_LAYOUT = __get_int( | ||||
|     "PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT", | ||||
|     1,  # MailRule.PdfLayout.TEXT_HTML but that can't be imported here | ||||
| ) | ||||
|  | ||||
| # Pre-2.x versions of Paperless stored your documents locally with GPG | ||||
| # encryption, but that is no longer the default.  This behaviour is still | ||||
|   | ||||
							
								
								
									
										28
									
								
								src/paperless_mail/migrations/0029_mailrule_pdf_layout.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								src/paperless_mail/migrations/0029_mailrule_pdf_layout.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,28 @@ | ||||
| # Generated by Django 5.1.3 on 2024-11-24 12:39 | ||||
|  | ||||
| from django.db import migrations | ||||
| from django.db import models | ||||
|  | ||||
|  | ||||
| class Migration(migrations.Migration): | ||||
|     dependencies = [ | ||||
|         ("paperless_mail", "0028_alter_mailaccount_password_and_more"), | ||||
|     ] | ||||
|  | ||||
|     operations = [ | ||||
|         migrations.AddField( | ||||
|             model_name="mailrule", | ||||
|             name="pdf_layout", | ||||
|             field=models.PositiveIntegerField( | ||||
|                 choices=[ | ||||
|                     (0, "System default"), | ||||
|                     (1, "Text, then HTML"), | ||||
|                     (2, "HTML, then text"), | ||||
|                     (3, "HTML only"), | ||||
|                     (4, "Text only"), | ||||
|                 ], | ||||
|                 default=0, | ||||
|                 verbose_name="pdf layout", | ||||
|             ), | ||||
|         ), | ||||
|     ] | ||||
| @@ -115,6 +115,13 @@ class MailRule(document_models.ModelWithOwner): | ||||
|         ATTACHMENTS_ONLY = 1, _("Only process attachments.") | ||||
|         EVERYTHING = 2, _("Process all files, including 'inline' attachments.") | ||||
|  | ||||
|     class PdfLayout(models.IntegerChoices): | ||||
|         DEFAULT = 0, _("System default") | ||||
|         TEXT_HTML = 1, _("Text, then HTML") | ||||
|         HTML_TEXT = 2, _("HTML, then text") | ||||
|         HTML_ONLY = 3, _("HTML only") | ||||
|         TEXT_ONLY = 4, _("Text only") | ||||
|  | ||||
|     class MailAction(models.IntegerChoices): | ||||
|         DELETE = 1, _("Delete") | ||||
|         MOVE = 2, _("Move to specified folder") | ||||
| @@ -230,6 +237,12 @@ class MailRule(document_models.ModelWithOwner): | ||||
|         default=ConsumptionScope.ATTACHMENTS_ONLY, | ||||
|     ) | ||||
|  | ||||
|     pdf_layout = models.PositiveIntegerField( | ||||
|         _("pdf layout"), | ||||
|         choices=PdfLayout.choices, | ||||
|         default=PdfLayout.DEFAULT, | ||||
|     ) | ||||
|  | ||||
|     action = models.PositiveIntegerField( | ||||
|         _("action"), | ||||
|         choices=MailAction.choices, | ||||
|   | ||||
| @@ -22,6 +22,7 @@ from documents.parsers import DocumentParser | ||||
| from documents.parsers import ParseError | ||||
| from documents.parsers import make_thumbnail_from_pdf | ||||
| from paperless.models import OutputTypeChoices | ||||
| from paperless_mail.models import MailRule | ||||
|  | ||||
|  | ||||
| class MailDocumentParser(DocumentParser): | ||||
| @@ -121,7 +122,13 @@ class MailDocumentParser(DocumentParser): | ||||
|         result.sort(key=lambda item: (item["prefix"], item["key"])) | ||||
|         return result | ||||
|  | ||||
|     def parse(self, document_path: Path, mime_type: str, file_name=None): | ||||
|     def parse( | ||||
|         self, | ||||
|         document_path: Path, | ||||
|         mime_type: str, | ||||
|         file_name=None, | ||||
|         mailrule_id: int | None = None, | ||||
|     ): | ||||
|         """ | ||||
|         Parses the given .eml into formatted text, based on the decoded email. | ||||
|  | ||||
| @@ -180,7 +187,11 @@ class MailDocumentParser(DocumentParser): | ||||
|             self.date = mail.date | ||||
|  | ||||
|         self.log.debug("Creating a PDF from the email") | ||||
|         self.archive_path = self.generate_pdf(mail) | ||||
|         if mailrule_id: | ||||
|             rule = MailRule.objects.get(pk=mailrule_id) | ||||
|             self.archive_path = self.generate_pdf(mail, rule.pdf_layout) | ||||
|         else: | ||||
|             self.archive_path = self.generate_pdf(mail) | ||||
|  | ||||
|     @staticmethod | ||||
|     def parse_file_to_message(filepath: Path) -> MailMessage: | ||||
| @@ -217,11 +228,19 @@ class MailDocumentParser(DocumentParser): | ||||
|                 f"{settings.TIKA_ENDPOINT}: {err}", | ||||
|             ) from err | ||||
|  | ||||
|     def generate_pdf(self, mail_message: MailMessage) -> Path: | ||||
|     def generate_pdf( | ||||
|         self, | ||||
|         mail_message: MailMessage, | ||||
|         pdf_layout: MailRule.PdfLayout | None = None, | ||||
|     ) -> Path: | ||||
|         archive_path = Path(self.tempdir) / "merged.pdf" | ||||
|  | ||||
|         mail_pdf_file = self.generate_pdf_from_mail(mail_message) | ||||
|  | ||||
|         pdf_layout = ( | ||||
|             pdf_layout or settings.EMAIL_PARSE_DEFAULT_LAYOUT | ||||
|         )  # EMAIL_PARSE_DEFAULT_LAYOUT is a MailRule.PdfLayout | ||||
|  | ||||
|         # If no HTML content, create the PDF from the message | ||||
|         # Otherwise, create 2 PDFs and merge them with Gotenberg | ||||
|         if not mail_message.html: | ||||
| @@ -246,7 +265,15 @@ class MailDocumentParser(DocumentParser): | ||||
|                 if pdf_a_format is not None: | ||||
|                     route.pdf_format(pdf_a_format) | ||||
|  | ||||
|                 route.merge([mail_pdf_file, pdf_of_html_content]) | ||||
|                 match pdf_layout: | ||||
|                     case MailRule.PdfLayout.HTML_TEXT: | ||||
|                         route.merge([pdf_of_html_content, mail_pdf_file]) | ||||
|                     case MailRule.PdfLayout.HTML_ONLY: | ||||
|                         route.merge([pdf_of_html_content]) | ||||
|                     case MailRule.PdfLayout.TEXT_ONLY: | ||||
|                         route.merge([mail_pdf_file]) | ||||
|                     case MailRule.PdfLayout.TEXT_HTML | _: | ||||
|                         route.merge([mail_pdf_file, pdf_of_html_content]) | ||||
|  | ||||
|                 try: | ||||
|                     response = route.run() | ||||
|   | ||||
| @@ -96,6 +96,7 @@ class MailRuleSerializer(OwnedObjectSerializer): | ||||
|             "order", | ||||
|             "attachment_type", | ||||
|             "consumption_scope", | ||||
|             "pdf_layout", | ||||
|             "owner", | ||||
|             "user_can_change", | ||||
|             "permissions", | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| import datetime | ||||
| import logging | ||||
| from pathlib import Path | ||||
| from unittest import mock | ||||
|  | ||||
| import httpx | ||||
| import pytest | ||||
| @@ -662,3 +663,67 @@ class TestParser: | ||||
|         request = httpx_mock.get_request() | ||||
|  | ||||
|         assert str(request.url) == "http://localhost:3000/forms/chromium/convert/html" | ||||
|  | ||||
|     @pytest.mark.httpx_mock(can_send_already_matched_responses=True) | ||||
|     @mock.patch("gotenberg_client._merge.MergeRoute.merge") | ||||
|     @mock.patch("paperless_mail.models.MailRule.objects.get") | ||||
|     def test_generate_pdf_layout_options( | ||||
|         self, | ||||
|         mock_mailrule_get: mock.Mock, | ||||
|         mock_merge_route: mock.Mock, | ||||
|         httpx_mock: HTTPXMock, | ||||
|         mail_parser: MailDocumentParser, | ||||
|         html_email_file: Path, | ||||
|         html_email_pdf_file: Path, | ||||
|     ): | ||||
|         """ | ||||
|         GIVEN: | ||||
|             - Email message | ||||
|         WHEN: | ||||
|             - Email is parsed with different layout options | ||||
|         THEN: | ||||
|             - Gotenberg is called with the correct layout option | ||||
|         """ | ||||
|         httpx_mock.add_response( | ||||
|             url="http://localhost:9998/tika/text", | ||||
|             method="PUT", | ||||
|             json={ | ||||
|                 "Content-Type": "text/html", | ||||
|                 "X-TIKA:Parsed-By": [], | ||||
|                 "X-TIKA:content": "This is some Tika HTML text", | ||||
|             }, | ||||
|         ) | ||||
|         httpx_mock.add_response( | ||||
|             url="http://localhost:3000/forms/chromium/convert/html", | ||||
|             method="POST", | ||||
|             content=html_email_pdf_file.read_bytes(), | ||||
|         ) | ||||
|         httpx_mock.add_response( | ||||
|             url="http://localhost:3000/forms/pdfengines/merge", | ||||
|             method="POST", | ||||
|             content=b"Pretend merged PDF content", | ||||
|         ) | ||||
|  | ||||
|         def test_layout_option(layout_option, expected_calls, expected_pdf_names): | ||||
|             mock_mailrule_get.return_value = mock.Mock(pdf_layout=layout_option) | ||||
|             mail_parser.parse( | ||||
|                 document_path=html_email_file, | ||||
|                 mime_type="message/rfc822", | ||||
|                 mailrule_id=1, | ||||
|             ) | ||||
|             args, _ = mock_merge_route.call_args | ||||
|             assert len(args[0]) == expected_calls | ||||
|             for i, pdf in enumerate(expected_pdf_names): | ||||
|                 assert args[0][i].name == pdf | ||||
|  | ||||
|         # 1 = MailRule.PdfLayout.TEXT_HTML | ||||
|         test_layout_option(1, 2, ["email_as_pdf.pdf", "html.pdf"]) | ||||
|  | ||||
|         # 2 = MailRule.PdfLayout.HTML_TEXT | ||||
|         test_layout_option(2, 2, ["html.pdf", "email_as_pdf.pdf"]) | ||||
|  | ||||
|         # 3 = MailRule.PdfLayout.HTML_ONLY | ||||
|         test_layout_option(3, 1, ["html.pdf"]) | ||||
|  | ||||
|         # 4 = MailRule.PdfLayout.TEXT_ONLY | ||||
|         test_layout_option(4, 1, ["email_as_pdf.pdf"]) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Silvia Bigler
					Silvia Bigler