test for broken eml, add test_generate_pdf

This commit is contained in:
phail
2022-10-22 02:25:23 +02:00
parent f1f5227ccd
commit 567e89d1c7
4 changed files with 68 additions and 44 deletions

View File

@@ -35,6 +35,11 @@ class MailDocumentParser(DocumentParser):
raise ParseError(
f"Could not parse {document_path}: {err}",
)
if not self._parsed.from_values:
self._parsed = None
raise ParseError(
f"Could not parse {document_path}: Missing 'from'",
)
return self._parsed
@@ -185,7 +190,8 @@ class MailDocumentParser(DocumentParser):
return pdf_path
def mail_to_html(self, mail):
@staticmethod
def mail_to_html(mail):
data = {}
def clean_html(text: str):
@@ -230,15 +236,6 @@ class MailDocumentParser(DocumentParser):
if data["attachments"] != "":
data["attachments_label"] = "Attachments"
if len(mail.attachments) >= 1:
att = []
for a in mail.attachments:
if a.size >= 1024 * 600:
att.append(f"{a.filename} ({(a.size / 1024 / 1024):.2f} MiB)")
else:
att.append(f"{a.filename} ({(a.size / 1024):.2f} KiB)")
self.text += f"Attachments: {', '.join(att)}\n\n"
data["date"] = clean_html(mail.date.astimezone().strftime("%Y-%m-%d %H:%M"))
data["content"] = clean_html(mail.text.strip())