change order of elements in parsed Texts

This commit is contained in:
phail 2022-11-20 20:24:36 +01:00
parent 3599bb52c0
commit d9796e5003
2 changed files with 5 additions and 4 deletions

View File

@ -112,8 +112,7 @@ class MailDocumentParser(DocumentParser):
mail = self.get_parsed(document_path)
self.text = f"{strip_text(mail.text)}\n\n"
self.text += f"Subject: {mail.subject}\n\n"
self.text = f"Subject: {mail.subject}\n\n"
self.text += f"From: {mail.from_values.full}\n\n"
self.text += f"To: {', '.join(address.full for address in mail.to_values)}\n\n"
if len(mail.cc_values) >= 1:
@ -134,6 +133,8 @@ class MailDocumentParser(DocumentParser):
if mail.html != "":
self.text += "HTML content: " + strip_text(self.tika_parse(mail.html))
self.text += f"\n\n{strip_text(mail.text)}"
self.date = mail.date
self.archive_path = self.generate_pdf(document_path)

View File

@ -231,7 +231,7 @@ class TestParser(TestCase):
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
def test_parse_html_eml(self, n, mock_tika_parse: mock.MagicMock):
# Validate parsing returns the expected results
text_expected = "Some Text and an embedded image.\n\nSubject: HTML Message\n\nFrom: Name <someone@example.de>\n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)\n\nHTML content: tika return"
text_expected = "Subject: HTML Message\n\nFrom: Name <someone@example.de>\n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)\n\nHTML content: tika return\n\nSome Text and an embedded image."
mock_tika_parse.return_value = "tika return"
self.parser.parse(os.path.join(self.SAMPLE_FILES, "html.eml"), "message/rfc822")
@ -258,7 +258,7 @@ class TestParser(TestCase):
os.path.join(self.SAMPLE_FILES, "simple_text.eml"),
"message/rfc822",
)
text_expected = "This is just a simple Text Mail.\n\nSubject: Simple Text Mail\n\nFrom: Some One <mail@someserver.de>\n\nTo: some@one.de\n\nCC: asdasd@æsdasd.de, asdadasdasdasda.asdasd@æsdasd.de\n\nBCC: fdf@fvf.de\n\n"
text_expected = "Subject: Simple Text Mail\n\nFrom: Some One <mail@someserver.de>\n\nTo: some@one.de\n\nCC: asdasd@æsdasd.de, asdadasdasdasda.asdasd@æsdasd.de\n\nBCC: fdf@fvf.de\n\n\n\nThis is just a simple Text Mail."
self.assertEqual(text_expected, self.parser.text)
self.assertEqual(
datetime.datetime(