From d9796e5003b9a5bba80f4f1167d07524edb6aa3a Mon Sep 17 00:00:00 2001 From: phail Date: Sun, 20 Nov 2022 20:24:36 +0100 Subject: [PATCH] change order of elements in parsed Texts --- src/paperless_mail/parsers.py | 5 +++-- src/paperless_mail/tests/test_parsers.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/paperless_mail/parsers.py b/src/paperless_mail/parsers.py index 8f8dd2d37..d50217f2e 100644 --- a/src/paperless_mail/parsers.py +++ b/src/paperless_mail/parsers.py @@ -112,8 +112,7 @@ class MailDocumentParser(DocumentParser): mail = self.get_parsed(document_path) - self.text = f"{strip_text(mail.text)}\n\n" - self.text += f"Subject: {mail.subject}\n\n" + self.text = f"Subject: {mail.subject}\n\n" self.text += f"From: {mail.from_values.full}\n\n" self.text += f"To: {', '.join(address.full for address in mail.to_values)}\n\n" if len(mail.cc_values) >= 1: @@ -134,6 +133,8 @@ class MailDocumentParser(DocumentParser): if mail.html != "": self.text += "HTML content: " + strip_text(self.tika_parse(mail.html)) + self.text += f"\n\n{strip_text(mail.text)}" + self.date = mail.date self.archive_path = self.generate_pdf(document_path) diff --git a/src/paperless_mail/tests/test_parsers.py b/src/paperless_mail/tests/test_parsers.py index 315e82a1a..6e47c70ed 100644 --- a/src/paperless_mail/tests/test_parsers.py +++ b/src/paperless_mail/tests/test_parsers.py @@ -231,7 +231,7 @@ class TestParser(TestCase): @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf") def test_parse_html_eml(self, n, mock_tika_parse: mock.MagicMock): # Validate parsing returns the expected results - text_expected = "Some Text and an embedded image.\n\nSubject: HTML Message\n\nFrom: Name \n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)\n\nHTML content: tika return" + text_expected = "Subject: HTML Message\n\nFrom: Name \n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)\n\nHTML content: tika return\n\nSome Text and an embedded image." mock_tika_parse.return_value = "tika return" self.parser.parse(os.path.join(self.SAMPLE_FILES, "html.eml"), "message/rfc822") @@ -258,7 +258,7 @@ class TestParser(TestCase): os.path.join(self.SAMPLE_FILES, "simple_text.eml"), "message/rfc822", ) - text_expected = "This is just a simple Text Mail.\n\nSubject: Simple Text Mail\n\nFrom: Some One \n\nTo: some@one.de\n\nCC: asdasd@æsdasd.de, asdadasdasdasda.asdasd@æsdasd.de\n\nBCC: fdf@fvf.de\n\n" + text_expected = "Subject: Simple Text Mail\n\nFrom: Some One \n\nTo: some@one.de\n\nCC: asdasd@æsdasd.de, asdadasdasdasda.asdasd@æsdasd.de\n\nBCC: fdf@fvf.de\n\n\n\nThis is just a simple Text Mail." self.assertEqual(text_expected, self.parser.text) self.assertEqual( datetime.datetime(