add tests for mail_to_html and generate_pdf_from_mail

This commit is contained in:
phail 2022-10-23 17:18:10 +02:00
parent 567e89d1c7
commit 30372b0e85
3 changed files with 92 additions and 1 deletions

View File

@ -191,7 +191,7 @@ class MailDocumentParser(DocumentParser):
return pdf_path
@staticmethod
def mail_to_html(mail):
def mail_to_html(mail) -> StringIO:
data = {}
def clean_html(text: str):

View File

@ -0,0 +1,45 @@
<!doctype html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link href="output.css" rel="stylesheet">
</head>
<body class="bg-white w-screen flex flex-col items-center">
<div class="container max-w-4xl">
<!-- Header -->
<div class="grid gap-x-2 bg-slate-200 p-4">
<div class="col-start-9 col-span-4 row-start-1 text-right">2022-10-15 09:23</div>
<div class="col-start-1 row-start-1 text-slate-400 text-right">From</div>
<div class="col-start-2 col-span-7 row-start-1">Name &lt;<a href="mailto:someone@example.de">someone@example.de</a>&gt;</div>
<div class="col-start-1 row-start-2 text-slate-400 text-right">Subject</div>
<div class=" col-start-2 col-span-10 row-start-2 font-bold">HTML Message</div>
<div class="col-start-1 row-start-3 text-slate-400 text-right">To</div>
<div class="col-start-2 col-span-10 row-start-3 text-sm my-0.5"><a href="mailto:someone@example.de">someone@example.de</a></div>
<div class="col-start-1 row-start-4 text-slate-400 text-right"></div>
<div class="col-start-2 col-span-10 row-start-4 text-sm my-0.5"></div>
<div class="col-start-1 row-start-5 text-slate-400 text-right"></div>
<div class="col-start-2 col-span-10 row-start-5" text-sm my-0.5></div>
<div class="col-start-1 row-start-6 text-slate-400 text-right">Attachments</div>
<div class="col-start-2 col-span-10 row-start-6">IntM6gnXFm00FEV5.png (6.89 KiB)</div>
</div>
<!-- Separator-->
<div class="border-t border-solid border-b w-full h-[1px] box-content border-black mb-5 bg-slate-200"></div>
<!-- Content-->
<div class="w-full break-words">Some Text<br><br>and an embedded image.</div>
</div>
</body>
</html>

View File

@ -303,6 +303,52 @@ class TestParser(TestCase):
f"PDF looks different. Check if {converted} looks weird.",
)
def test_mail_to_html(self):
parser = MailDocumentParser(None)
mail = parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml"))
html_handle = parser.mail_to_html(mail)
with open(
os.path.join(self.SAMPLE_FILES, "html.eml.html"),
) as html_expected_handle:
self.assertHTMLEqual(html_expected_handle.read(), html_handle.read())
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output
def test_generate_pdf_from_mail(self, m):
parser = MailDocumentParser(None)
mail = parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml"))
pdf_path = os.path.join(parser.tempdir, "test_generate_pdf_from_mail.pdf")
with open(pdf_path, "wb") as file:
file.write(parser.generate_pdf_from_mail(mail))
file.close()
converted = os.path.join(parser.tempdir, "test_generate_pdf_from_mail.webp")
run_convert(
density=300,
scale="500x5000>",
alpha="remove",
strip=True,
trim=False,
auto_orient=True,
input_file=f"{pdf_path}", # Do net define an index to convert all pages.
output_file=converted,
logging_group=None,
)
self.assertTrue(os.path.isfile(converted))
thumb_hash = self.hashfile(converted)
# The created pdf is not reproducible. But the converted image should always look the same.
expected_hash = (
"635bda532707faf69f06b040660445b656abcc7d622cc29c24a5c7fd2c713c5f"
)
self.assertEqual(
thumb_hash,
expected_hash,
f"PDF looks different. Check if {converted} looks weird.",
)
def test_transform_inline_html(self):
class MailAttachmentMock:
def __init__(self, payload, content_id):