diff --git a/src/paperless_mail/parsers.py b/src/paperless_mail/parsers.py index 654372666..f4c73276d 100644 --- a/src/paperless_mail/parsers.py +++ b/src/paperless_mail/parsers.py @@ -191,7 +191,7 @@ class MailDocumentParser(DocumentParser): return pdf_path @staticmethod - def mail_to_html(mail): + def mail_to_html(mail) -> StringIO: data = {} def clean_html(text: str): diff --git a/src/paperless_mail/tests/samples/html.eml.html b/src/paperless_mail/tests/samples/html.eml.html new file mode 100644 index 000000000..6b5e0438f --- /dev/null +++ b/src/paperless_mail/tests/samples/html.eml.html @@ -0,0 +1,45 @@ + + + + + + + + + + +
+ +
+ +
2022-10-15 09:23
+ +
From
+ + +
Subject
+
HTML Message
+ +
To
+ + +
+
+ +
+
+ +
Attachments
+
IntM6gnXFm00FEV5.png (6.89 KiB)
+
+ + +
+ + +
Some Text

and an embedded image.
+
+ + + + diff --git a/src/paperless_mail/tests/test_eml.py b/src/paperless_mail/tests/test_eml.py index d922167f1..e545067a4 100644 --- a/src/paperless_mail/tests/test_eml.py +++ b/src/paperless_mail/tests/test_eml.py @@ -303,6 +303,52 @@ class TestParser(TestCase): f"PDF looks different. Check if {converted} looks weird.", ) + def test_mail_to_html(self): + parser = MailDocumentParser(None) + mail = parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml")) + html_handle = parser.mail_to_html(mail) + + with open( + os.path.join(self.SAMPLE_FILES, "html.eml.html"), + ) as html_expected_handle: + self.assertHTMLEqual(html_expected_handle.read(), html_handle.read()) + + @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output + def test_generate_pdf_from_mail(self, m): + parser = MailDocumentParser(None) + mail = parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml")) + + pdf_path = os.path.join(parser.tempdir, "test_generate_pdf_from_mail.pdf") + + with open(pdf_path, "wb") as file: + file.write(parser.generate_pdf_from_mail(mail)) + file.close() + + converted = os.path.join(parser.tempdir, "test_generate_pdf_from_mail.webp") + run_convert( + density=300, + scale="500x5000>", + alpha="remove", + strip=True, + trim=False, + auto_orient=True, + input_file=f"{pdf_path}", # Do net define an index to convert all pages. + output_file=converted, + logging_group=None, + ) + self.assertTrue(os.path.isfile(converted)) + thumb_hash = self.hashfile(converted) + + # The created pdf is not reproducible. But the converted image should always look the same. + expected_hash = ( + "635bda532707faf69f06b040660445b656abcc7d622cc29c24a5c7fd2c713c5f" + ) + self.assertEqual( + thumb_hash, + expected_hash, + f"PDF looks different. Check if {converted} looks weird.", + ) + def test_transform_inline_html(self): class MailAttachmentMock: def __init__(self, payload, content_id):