remove log mocking, replace pytest raises, use humanfriendly

2026-02-09 23:49:29 -06:00 · 2022-11-20 12:06:35 +01:00
parent b68906b14e
commit e3c1bde793
6 changed files with 51 additions and 55 deletions
--- a/src/paperless_mail/parsers.py
+++ b/src/paperless_mail/parsers.py
@@ -10,6 +10,7 @@ from django.conf import settings
 from documents.parsers import DocumentParser
 from documents.parsers import make_thumbnail_from_pdf
 from documents.parsers import ParseError
+from humanfriendly import format_size
 from imap_tools import MailMessage
 from tika import parser

@@ -125,10 +126,8 @@ class MailDocumentParser(DocumentParser):
        if len(mail.attachments) >= 1:
            att = []
            for a in mail.attachments:
-                if a.size >= 1024 * 600:
-                    att.append(f"{a.filename} ({(a.size / 1024 / 1024):.2f} MiB)")
-                else:
-                    att.append(f"{a.filename} ({(a.size / 1024):.2f} KiB)")
+                att.append(f"{a.filename} ({format_size(a.size, binary=True)})")
+
            self.text += f"Attachments: {', '.join(att)}\n\n"

        if mail.html != "":
@@ -191,7 +190,7 @@ class MailDocumentParser(DocumentParser):
        return pdf_path

    @staticmethod
-    def mail_to_html(mail) -> StringIO:
+    def mail_to_html(mail: MailMessage) -> StringIO:
        data = {}

        def clean_html(text: str):
@@ -228,10 +227,7 @@ class MailDocumentParser(DocumentParser):

        att = []
        for a in mail.attachments:
-            if a.size >= 1024 * 600:
-                att.append(f"{a.filename} ({(a.size / 1024 / 1024):.2f} MiB)")
-            else:
-                att.append(f"{a.filename} ({(a.size / 1024):.2f} KiB)")
+            att.append(f"{a.filename} ({format_size(a.size, binary=True)})")
        data["attachments"] = clean_html(", ".join(att))
        if data["attachments"] != "":
            data["attachments_label"] = "Attachments"
--- a/src/paperless_mail/tests/samples/html.eml.html
+++ b/src/paperless_mail/tests/samples/html.eml.html
@@ -30,7 +30,7 @@
      <div class="col-start-2 col-span-10 row-start-5" text-sm my-0.5></div>

 			<div class="col-start-1 row-start-6	 text-slate-400 text-right">Attachments</div>
-      <div class="col-start-2 col-span-10 row-start-6">IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (0.59 MiB)</div>
+      <div class="col-start-2 col-span-10 row-start-6">IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)</div>
    </div>

    <!-- Separator-->
--- a/src/paperless_mail/tests/samples/html.eml.pdf
+++ b/src/paperless_mail/tests/samples/html.eml.pdf
--- a/src/paperless_mail/tests/samples/html.eml.pdf.webp
+++ b/src/paperless_mail/tests/samples/html.eml.pdf.webp
--- a/src/paperless_mail/tests/test_parsers.py
+++ b/src/paperless_mail/tests/test_parsers.py
@@ -19,12 +19,18 @@ class TestParser(TestCase):

    def test_get_parsed(self):
        # Check if exception is raised when parsing fails.
-        with pytest.raises(ParseError):
-            self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "na"))
+        self.assertRaises(
+            ParseError,
+            self.parser.get_parsed,
+            os.path.join(self.SAMPLE_FILES, "na"),
+        )

        # Check if exception is raised when the mail is faulty.
-        with pytest.raises(ParseError):
-            self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "broken.eml"))
+        self.assertRaises(
+            ParseError,
+            self.parser.get_parsed,
+            os.path.join(self.SAMPLE_FILES, "broken.eml"),
+        )

        # Parse Test file and check relevant content
        parsed1 = self.parser.get_parsed(
@@ -210,18 +216,18 @@ class TestParser(TestCase):

    def test_parse_na(self):
        # Check if exception is raised when parsing fails.
-        with pytest.raises(ParseError):
-            self.parser.parse(
-                os.path.join(os.path.join(self.SAMPLE_FILES, "na")),
-                "message/rfc822",
-            )
+        self.assertRaises(
+            ParseError,
+            self.parser.parse,
+            os.path.join(self.SAMPLE_FILES, "na"),
+            "message/rfc822",
+        )

    @mock.patch("paperless_mail.parsers.MailDocumentParser.tika_parse")
    @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
-    @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output
-    def test_parse_html_eml(self, m, n, mock_tika_parse: mock.MagicMock):
+    def test_parse_html_eml(self, n, mock_tika_parse: mock.MagicMock):
        # Validate parsing returns the expected results
-        text_expected = "Some Text\nand an embedded image.\n\nSubject: HTML Message\n\nFrom: Name <someone@example.de>\n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (0.59 MiB)\n\nHTML content: tika return"
+        text_expected = "Some Text\nand an embedded image.\n\nSubject: HTML Message\n\nFrom: Name <someone@example.de>\n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)\n\nHTML content: tika return"
        mock_tika_parse.return_value = "tika return"

        self.parser.parse(os.path.join(self.SAMPLE_FILES, "html.eml"), "message/rfc822")
@@ -241,8 +247,7 @@ class TestParser(TestCase):
        )

    @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
-    @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output
-    def test_parse_simple_eml(self, m, n):
+    def test_parse_simple_eml(self, n):
        # Validate parsing returns the expected results

        self.parser.parse(
@@ -268,8 +273,7 @@ class TestParser(TestCase):
        self.assertTrue(os.path.isfile(self.parser.archive_path))

    @mock.patch("paperless_mail.parsers.parser.from_buffer")
-    @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output
-    def test_tika_parse(self, m, mock_from_buffer: mock.MagicMock):
+    def test_tika_parse(self, mock_from_buffer: mock.MagicMock):
        html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
        expected_text = "Some Text"
        mock_from_buffer.return_value = {"content": expected_text}
@@ -300,8 +304,11 @@ class TestParser(TestCase):

        # Check if exception is raised when the pdf can not be created.
        self.parser.gotenberg_server = ""
-        with pytest.raises(ParseError):
-            self.parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "html.eml"))
+        self.assertRaises(
+            ParseError,
+            self.parser.generate_pdf,
+            os.path.join(self.SAMPLE_FILES, "html.eml"),
+        )

    @mock.patch("paperless_mail.parsers.requests.post")
    @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail")
@@ -313,8 +320,11 @@ class TestParser(TestCase):
        mock_post: mock.MagicMock,
    ):
        # Check if exception is raised when the mail can not be parsed.
-        with pytest.raises(ParseError):
-            self.parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "broken.eml"))
+        self.assertRaises(
+            ParseError,
+            self.parser.generate_pdf,
+            os.path.join(self.SAMPLE_FILES, "broken.eml"),
+        )

        mock_generate_pdf_from_mail.return_value = b"Mail Return"
        mock_generate_pdf_from_html.return_value = b"HTML Return"
@@ -430,8 +440,7 @@ class TestParser(TestCase):
        self.assertFalse("<script" in resulting_html.lower())

    @mock.patch("paperless_mail.parsers.requests.post")
-    @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output
-    def test_generate_pdf_from_html(self, m, mock_post: mock.MagicMock):
+    def test_generate_pdf_from_html(self, mock_post: mock.MagicMock):
        class MailAttachmentMock:
            def __init__(self, payload, content_id):
                self.payload = payload
--- a/src/paperless_mail/tests/test_parsers_live.py
+++ b/src/paperless_mail/tests/test_parsers_live.py
@@ -39,8 +39,7 @@ class TestParserLive(TestCase):
        reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
    )
    @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
-    @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output
-    def test_get_thumbnail(self, m, mock_generate_pdf: mock.MagicMock):
+    def test_get_thumbnail(self, mock_generate_pdf: mock.MagicMock):
        mock_generate_pdf.return_value = os.path.join(
            self.SAMPLE_FILES,
            "simple_text.eml.pdf",
@@ -63,17 +62,15 @@ class TestParserLive(TestCase):
        "TIKA_LIVE" not in os.environ,
        reason="No tika server",
    )
-    @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output
-    def test_tika_parse(self, m):
+    def test_tika_parse(self):
        html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
        expected_text = "Some Text"

        tika_server_original = self.parser.tika_server

        # Check if exception is raised when Tika cannot be reached.
-        with pytest.raises(ParseError):
-            self.parser.tika_server = ""
-            self.parser.tika_parse(html)
+        self.parser.tika_server = ""
+        self.assertRaises(ParseError, self.parser.tika_parse, html)

        # Check unsuccessful parsing
        self.parser.tika_server = tika_server_original
@@ -116,15 +113,13 @@ class TestParserLive(TestCase):
        "GOTENBERG_LIVE" not in os.environ,
        reason="No gotenberg server",
    )
-    @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output
-    def test_generate_pdf_from_mail_no_convert(self, m):
+    def test_generate_pdf_from_mail_no_convert(self):
        mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml"))

        pdf_path = os.path.join(self.parser.tempdir, "html.eml.pdf")

        with open(pdf_path, "wb") as file:
            file.write(self.parser.generate_pdf_from_mail(mail))
-            file.close()

        extracted = extract_text(pdf_path)
        expected = extract_text(os.path.join(self.SAMPLE_FILES, "html.eml.pdf"))
@@ -139,15 +134,13 @@ class TestParserLive(TestCase):
        "PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
        reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
    )
-    @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output
-    def test_generate_pdf_from_mail(self, m):
+    def test_generate_pdf_from_mail(self):
        mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml"))

        pdf_path = os.path.join(self.parser.tempdir, "html.eml.pdf")

        with open(pdf_path, "wb") as file:
            file.write(self.parser.generate_pdf_from_mail(mail))
-            file.close()

        converted = os.path.join(
            self.parser.tempdir,
@@ -181,8 +174,7 @@ class TestParserLive(TestCase):
        "GOTENBERG_LIVE" not in os.environ,
        reason="No gotenberg server",
    )
-    @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output
-    def test_generate_pdf_from_html_no_convert(self, m):
+    def test_generate_pdf_from_html_no_convert(self):
        class MailAttachmentMock:
            def __init__(self, payload, content_id):
                self.payload = payload
@@ -203,7 +195,6 @@ class TestParserLive(TestCase):

        with open(pdf_path, "wb") as file:
            file.write(result)
-            file.close()

        extracted = extract_text(pdf_path)
        expected = extract_text(os.path.join(self.SAMPLE_FILES, "sample.html.pdf"))
@@ -218,8 +209,7 @@ class TestParserLive(TestCase):
        "PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
        reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
    )
-    @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output
-    def test_generate_pdf_from_html(self, m):
+    def test_generate_pdf_from_html(self):
        class MailAttachmentMock:
            def __init__(self, payload, content_id):
                self.payload = payload
@@ -240,7 +230,6 @@ class TestParserLive(TestCase):

        with open(pdf_path, "wb") as file:
            file.write(result)
-            file.close()

        converted = os.path.join(self.parser.tempdir, "sample.html.pdf.webp")
        run_convert(
@@ -269,20 +258,22 @@ class TestParserLive(TestCase):
            f"If Rick Astley is shown, Gotenberg loads from web which is bad for Mail content.",
        )

-    @staticmethod
    @pytest.mark.skipif(
        "GOTENBERG_LIVE" not in os.environ,
        reason="No gotenberg server",
    )
-    def test_is_online_image_still_available():
+    def test_is_online_image_still_available(self):
        """
        A public image is used in the html sample file. We have no control
        whether this image stays online forever, so here we check if it is still there
        """

        # Start by Testing if nonexistent URL really throws an Exception
-        with pytest.raises(HTTPError):
-            urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png")
+        self.assertRaises(
+            HTTPError,
+            urlopen,
+            "https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png",
+        )

        # Now check the URL used in samples/sample.html
        urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png")