diff --git a/src/paperless_mail/parsers.py b/src/paperless_mail/parsers.py
index a845c3157..da0cf96b9 100644
--- a/src/paperless_mail/parsers.py
+++ b/src/paperless_mail/parsers.py
@@ -10,6 +10,7 @@ from django.conf import settings
from documents.parsers import DocumentParser
from documents.parsers import make_thumbnail_from_pdf
from documents.parsers import ParseError
+from humanfriendly import format_size
from imap_tools import MailMessage
from tika import parser
@@ -125,10 +126,8 @@ class MailDocumentParser(DocumentParser):
if len(mail.attachments) >= 1:
att = []
for a in mail.attachments:
- if a.size >= 1024 * 600:
- att.append(f"{a.filename} ({(a.size / 1024 / 1024):.2f} MiB)")
- else:
- att.append(f"{a.filename} ({(a.size / 1024):.2f} KiB)")
+ att.append(f"{a.filename} ({format_size(a.size, binary=True)})")
+
self.text += f"Attachments: {', '.join(att)}\n\n"
if mail.html != "":
@@ -191,7 +190,7 @@ class MailDocumentParser(DocumentParser):
return pdf_path
@staticmethod
- def mail_to_html(mail) -> StringIO:
+ def mail_to_html(mail: MailMessage) -> StringIO:
data = {}
def clean_html(text: str):
@@ -228,10 +227,7 @@ class MailDocumentParser(DocumentParser):
att = []
for a in mail.attachments:
- if a.size >= 1024 * 600:
- att.append(f"{a.filename} ({(a.size / 1024 / 1024):.2f} MiB)")
- else:
- att.append(f"{a.filename} ({(a.size / 1024):.2f} KiB)")
+ att.append(f"{a.filename} ({format_size(a.size, binary=True)})")
data["attachments"] = clean_html(", ".join(att))
if data["attachments"] != "":
data["attachments_label"] = "Attachments"
diff --git a/src/paperless_mail/tests/samples/html.eml.html b/src/paperless_mail/tests/samples/html.eml.html
index fbc4f9460..a73be6f95 100644
--- a/src/paperless_mail/tests/samples/html.eml.html
+++ b/src/paperless_mail/tests/samples/html.eml.html
@@ -30,7 +30,7 @@
Attachments
- IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (0.59 MiB)
+ IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)
diff --git a/src/paperless_mail/tests/samples/html.eml.pdf b/src/paperless_mail/tests/samples/html.eml.pdf
index 058988f66..de4aeb038 100644
Binary files a/src/paperless_mail/tests/samples/html.eml.pdf and b/src/paperless_mail/tests/samples/html.eml.pdf differ
diff --git a/src/paperless_mail/tests/samples/html.eml.pdf.webp b/src/paperless_mail/tests/samples/html.eml.pdf.webp
index b4481efd9..ab7cd8535 100644
Binary files a/src/paperless_mail/tests/samples/html.eml.pdf.webp and b/src/paperless_mail/tests/samples/html.eml.pdf.webp differ
diff --git a/src/paperless_mail/tests/test_parsers.py b/src/paperless_mail/tests/test_parsers.py
index 3da54e364..4123e1cc8 100644
--- a/src/paperless_mail/tests/test_parsers.py
+++ b/src/paperless_mail/tests/test_parsers.py
@@ -19,12 +19,18 @@ class TestParser(TestCase):
def test_get_parsed(self):
# Check if exception is raised when parsing fails.
- with pytest.raises(ParseError):
- self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "na"))
+ self.assertRaises(
+ ParseError,
+ self.parser.get_parsed,
+ os.path.join(self.SAMPLE_FILES, "na"),
+ )
# Check if exception is raised when the mail is faulty.
- with pytest.raises(ParseError):
- self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "broken.eml"))
+ self.assertRaises(
+ ParseError,
+ self.parser.get_parsed,
+ os.path.join(self.SAMPLE_FILES, "broken.eml"),
+ )
# Parse Test file and check relevant content
parsed1 = self.parser.get_parsed(
@@ -210,18 +216,18 @@ class TestParser(TestCase):
def test_parse_na(self):
# Check if exception is raised when parsing fails.
- with pytest.raises(ParseError):
- self.parser.parse(
- os.path.join(os.path.join(self.SAMPLE_FILES, "na")),
- "message/rfc822",
- )
+ self.assertRaises(
+ ParseError,
+ self.parser.parse,
+ os.path.join(self.SAMPLE_FILES, "na"),
+ "message/rfc822",
+ )
@mock.patch("paperless_mail.parsers.MailDocumentParser.tika_parse")
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
- @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output
- def test_parse_html_eml(self, m, n, mock_tika_parse: mock.MagicMock):
+ def test_parse_html_eml(self, n, mock_tika_parse: mock.MagicMock):
# Validate parsing returns the expected results
- text_expected = "Some Text\nand an embedded image.\n\nSubject: HTML Message\n\nFrom: Name \n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (0.59 MiB)\n\nHTML content: tika return"
+ text_expected = "Some Text\nand an embedded image.\n\nSubject: HTML Message\n\nFrom: Name \n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)\n\nHTML content: tika return"
mock_tika_parse.return_value = "tika return"
self.parser.parse(os.path.join(self.SAMPLE_FILES, "html.eml"), "message/rfc822")
@@ -241,8 +247,7 @@ class TestParser(TestCase):
)
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
- @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output
- def test_parse_simple_eml(self, m, n):
+ def test_parse_simple_eml(self, n):
# Validate parsing returns the expected results
self.parser.parse(
@@ -268,8 +273,7 @@ class TestParser(TestCase):
self.assertTrue(os.path.isfile(self.parser.archive_path))
@mock.patch("paperless_mail.parsers.parser.from_buffer")
- @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output
- def test_tika_parse(self, m, mock_from_buffer: mock.MagicMock):
+ def test_tika_parse(self, mock_from_buffer: mock.MagicMock):
html = 'Some Text
'
expected_text = "Some Text"
mock_from_buffer.return_value = {"content": expected_text}
@@ -300,8 +304,11 @@ class TestParser(TestCase):
# Check if exception is raised when the pdf can not be created.
self.parser.gotenberg_server = ""
- with pytest.raises(ParseError):
- self.parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "html.eml"))
+ self.assertRaises(
+ ParseError,
+ self.parser.generate_pdf,
+ os.path.join(self.SAMPLE_FILES, "html.eml"),
+ )
@mock.patch("paperless_mail.parsers.requests.post")
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail")
@@ -313,8 +320,11 @@ class TestParser(TestCase):
mock_post: mock.MagicMock,
):
# Check if exception is raised when the mail can not be parsed.
- with pytest.raises(ParseError):
- self.parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "broken.eml"))
+ self.assertRaises(
+ ParseError,
+ self.parser.generate_pdf,
+ os.path.join(self.SAMPLE_FILES, "broken.eml"),
+ )
mock_generate_pdf_from_mail.return_value = b"Mail Return"
mock_generate_pdf_from_html.return_value = b"HTML Return"
@@ -430,8 +440,7 @@ class TestParser(TestCase):
self.assertFalse("