diff --git a/src/paperless_mail/parsers.py b/src/paperless_mail/parsers.py index ccfdfe3a3..654372666 100644 --- a/src/paperless_mail/parsers.py +++ b/src/paperless_mail/parsers.py @@ -35,6 +35,11 @@ class MailDocumentParser(DocumentParser): raise ParseError( f"Could not parse {document_path}: {err}", ) + if not self._parsed.from_values: + self._parsed = None + raise ParseError( + f"Could not parse {document_path}: Missing 'from'", + ) return self._parsed @@ -185,7 +190,8 @@ class MailDocumentParser(DocumentParser): return pdf_path - def mail_to_html(self, mail): + @staticmethod + def mail_to_html(mail): data = {} def clean_html(text: str): @@ -230,15 +236,6 @@ class MailDocumentParser(DocumentParser): if data["attachments"] != "": data["attachments_label"] = "Attachments" - if len(mail.attachments) >= 1: - att = [] - for a in mail.attachments: - if a.size >= 1024 * 600: - att.append(f"{a.filename} ({(a.size / 1024 / 1024):.2f} MiB)") - else: - att.append(f"{a.filename} ({(a.size / 1024):.2f} KiB)") - self.text += f"Attachments: {', '.join(att)}\n\n" - data["date"] = clean_html(mail.date.astimezone().strftime("%Y-%m-%d %H:%M")) data["content"] = clean_html(mail.text.strip()) diff --git a/src/paperless_mail/tests/samples/broken.eml b/src/paperless_mail/tests/samples/broken.eml new file mode 100644 index 000000000..3e03caf0a --- /dev/null +++ b/src/paperless_mail/tests/samples/broken.eml @@ -0,0 +1 @@ +This is not a valid eml. diff --git a/src/paperless_mail/tests/samples/html.eml b/src/paperless_mail/tests/samples/html.eml index d6ee7c350..09af2e1e9 100644 --- a/src/paperless_mail/tests/samples/html.eml +++ b/src/paperless_mail/tests/samples/html.eml @@ -47,12 +47,20 @@ Content-Transfer-Encoding: 7bit -
-Some Text
-and an embedded image.
-
Some Text
+
+
+
+
and an embedded image.
+
Paragraph unchanged.
+ +