From fda844f64cce6247f484dd8d14038ea36db9f934 Mon Sep 17 00:00:00 2001
From: phail <phail@hacknology.de>
Date: Sat, 15 Oct 2022 15:41:43 +0200
Subject: [PATCH] add unittest for parse

---
 src/paperless_mail/tests/test_eml.py | 83 +++++++++++++++++++++++-----
 1 file changed, 70 insertions(+), 13 deletions(-)

diff --git a/src/paperless_mail/tests/test_eml.py b/src/paperless_mail/tests/test_eml.py
index e365e4d54..a0f4c0b77 100644
--- a/src/paperless_mail/tests/test_eml.py
+++ b/src/paperless_mail/tests/test_eml.py
@@ -1,3 +1,4 @@
+import datetime
 import hashlib
 import os
 from unittest import mock
@@ -5,8 +6,8 @@ from unittest import mock
 import pytest
 from django.test import TestCase
 from documents.parsers import ParseError
+from documents.parsers import run_convert
 from paperless_mail.parsers import MailDocumentParser
-from paperless_mail.parsers import settings
 
 
 class TestParser(TestCase):
@@ -38,25 +39,26 @@ class TestParser(TestCase):
         parsed2 = parser.get_parsed(os.path.join(os.path.join(self.SAMPLE_FILES, "na")))
         self.assertEqual(parsed1, parsed2)
 
-    def test_get_thumbnail(self):
-        def hashfile(file):
-            buf_size = 65536  # An arbitrary (but fixed) buffer
-            sha256 = hashlib.sha256()
-            with open(file, "rb") as f:
-                while True:
-                    data = f.read(buf_size)
-                    if not data:
-                        break
-                    sha256.update(data)
-            return sha256.hexdigest()
+    @staticmethod
+    def hashfile(file):
+        buf_size = 65536  # An arbitrary (but fixed) buffer
+        sha256 = hashlib.sha256()
+        with open(file, "rb") as f:
+            while True:
+                data = f.read(buf_size)
+                if not data:
+                    break
+                sha256.update(data)
+        return sha256.hexdigest()
 
+    def test_get_thumbnail(self):
         parser = MailDocumentParser(None)
         thumb = parser.get_thumbnail(
             os.path.join(self.SAMPLE_FILES, "simple_text.eml"),
             "message/rfc822",
         )
         self.assertTrue(os.path.isfile(thumb))
-        thumb_hash = hashfile(thumb)
+        thumb_hash = self.hashfile(thumb)
 
         # The created intermediary pdf is not reproducible. But the thumbnail image should always look the same.
         expected_hash = (
@@ -203,6 +205,61 @@ class TestParser(TestCase):
             in metadata,
         )
 
+    @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output
+    def test_parse(self, m):
+        parser = MailDocumentParser(None)
+
+        # Check if exception is raised when parsing fails.
+        with pytest.raises(ParseError):
+            parser.parse(
+                os.path.join(os.path.join(self.SAMPLE_FILES, "na")),
+                "message/rfc822",
+            )
+
+        # Validate parsing returns the expected results
+        parser.parse(os.path.join(self.SAMPLE_FILES, "html.eml"), "message/rfc822")
+
+        text_expected = "Some Text\nand an embedded image.\n\nSubject: HTML Message\n\nFrom: Name <someone@example.de>\n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB)\n\nHTML content: Some Text\nand an embedded image.Attachments: IntM6gnXFm00FEV5.png (6.89 KiB)\n\n"
+        self.assertEqual(text_expected, parser.text)
+        self.assertEqual(
+            datetime.datetime(
+                2022,
+                10,
+                15,
+                11,
+                23,
+                19,
+                tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)),
+            ),
+            parser.date,
+        )
+        self.assertTrue(os.path.isfile(parser.archive_path))
+
+        converted = os.path.join(parser.tempdir, "converted.webp")
+        run_convert(
+            density=300,
+            scale="500x5000>",
+            alpha="remove",
+            strip=True,
+            trim=False,
+            auto_orient=True,
+            input_file=f"{parser.archive_path}",  # Do net define an index to convert all pages.
+            output_file=converted,
+            logging_group=None,
+        )
+        self.assertTrue(os.path.isfile(converted))
+        thumb_hash = self.hashfile(converted)
+
+        # The created pdf is not reproducible. But the converted image should always look the same.
+        expected_hash = (
+            "174f9c81f9aeda63b64375fa2fe675fd542677c1ba7a32fc19e09ffc4d461e12"
+        )
+        self.assertEqual(
+            thumb_hash,
+            expected_hash,
+            "PDF looks look different.",
+        )
+
     @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output
     def test_tika_parse(self, m):
         html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'