diff --git a/src/paperless_mail/parsers.py b/src/paperless_mail/parsers.py index da0cf96b9..c4ecaf861 100644 --- a/src/paperless_mail/parsers.py +++ b/src/paperless_mail/parsers.py @@ -1,5 +1,6 @@ import os import re +from html import escape from io import BytesIO from io import StringIO @@ -198,12 +199,7 @@ class MailDocumentParser(DocumentParser): text = "\n".join([str(e) for e in text]) if type(text) != str: text = str(text) - text = text.replace("&", "&") - text = text.replace("<", "<") - text = text.replace(">", ">") - text = text.replace(" ", "  ") - text = text.replace("'", "'") - text = text.replace('"', """) + text = escape(text) text = clean(text) text = linkify(text, parse_email=True) text = text.replace("\n", "
") diff --git a/src/paperless_mail/tests/test_parsers.py b/src/paperless_mail/tests/test_parsers.py index 4123e1cc8..1a348b472 100644 --- a/src/paperless_mail/tests/test_parsers.py +++ b/src/paperless_mail/tests/test_parsers.py @@ -364,11 +364,13 @@ class TestParser(TestCase): def test_mail_to_html(self): mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml")) html_handle = self.parser.mail_to_html(mail) + html_received = html_handle.read() with open( os.path.join(self.SAMPLE_FILES, "html.eml.html"), ) as html_expected_handle: - self.assertHTMLEqual(html_expected_handle.read(), html_handle.read()) + html_expected = html_expected_handle.read() + self.assertHTMLEqual(html_expected, html_received) @mock.patch("paperless_mail.parsers.requests.post") @mock.patch("paperless_mail.parsers.MailDocumentParser.mail_to_html")