From 3d37e49c1a41f117518d73165311710d79045d8f Mon Sep 17 00:00:00 2001 From: phail Date: Fri, 14 Oct 2022 15:43:43 +0200 Subject: [PATCH] add 2 more tests --- src/paperless_mail/tests/test_eml.py | 166 ++++++++++++++++++++++++++- 1 file changed, 165 insertions(+), 1 deletion(-) diff --git a/src/paperless_mail/tests/test_eml.py b/src/paperless_mail/tests/test_eml.py index e1aebf351..3fe05e8a3 100644 --- a/src/paperless_mail/tests/test_eml.py +++ b/src/paperless_mail/tests/test_eml.py @@ -1,14 +1,43 @@ import hashlib import os +from unittest import mock +import pytest from django.test import TestCase +from documents.parsers import ParseError from paperless_mail.parsers import MailDocumentParser class TestParser(TestCase): SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples") - def test_thumbnail(self): + def test_get_parsed(self): + parser = MailDocumentParser(None) + + # Check if exception is raised when parsing fails. + with pytest.raises(ParseError): + parser.get_parsed(os.path.join(os.path.join(self.SAMPLE_FILES, "na"))) + + # Parse Test file and check relevant content + parsed1 = parser.get_parsed(os.path.join(self.SAMPLE_FILES, "simple_text.eml")) + + self.assertEqual(parsed1.date.year, 2022) + self.assertEqual(parsed1.date.month, 10) + self.assertEqual(parsed1.date.day, 12) + self.assertEqual(parsed1.date.hour, 21) + self.assertEqual(parsed1.date.minute, 40) + self.assertEqual(parsed1.date.second, 43) + self.assertEqual(parsed1.date.tzname(), "UTC+02:00") + self.assertEqual(parsed1.from_, "mail@someserver.de") + self.assertEqual(parsed1.subject, "Simple Text Mail") + self.assertEqual(parsed1.text, "This is just a simple Text Mail.\n") + self.assertEqual(parsed1.to, ("some@one.de",)) + + # Check if same parsed object as before is returned, even if another file is given. + parsed2 = parser.get_parsed(os.path.join(os.path.join(self.SAMPLE_FILES, "na"))) + self.assertEqual(parsed1, parsed2) + + def test_get_thumbnail(self): def hashfile(file): buf_size = 65536 # An arbitrary (but fixed) buffer sha256 = hashlib.sha256() @@ -37,3 +66,138 @@ class TestParser(TestCase): expected_hash, "Thumbnail file hash not as expected.", ) + + @mock.patch("documents.loggers.LoggingMixin.log") + def test_extract_metadata(self, m: mock.MagicMock): + parser = MailDocumentParser(None) + + # Validate if warning is logged when parsing fails + self.assertEqual([], parser.extract_metadata("na", "message/rfc822")) + self.assertEqual("warning", m.call_args[0][0]) + + # Validate Metadata parsing returns the expected results + metadata = parser.extract_metadata( + os.path.join(self.SAMPLE_FILES, "simple_text.eml"), + "message/rfc822", + ) + + self.assertTrue( + {"namespace": "", "prefix": "", "key": "attachments", "value": ""} + in metadata, + ) + self.assertTrue( + { + "namespace": "", + "prefix": "", + "key": "date", + "value": "2022-10-12 21:40:43 UTC+02:00", + } + in metadata, + ) + self.assertTrue( + { + "namespace": "", + "prefix": "header", + "key": "content-language", + "value": "en-US", + } + in metadata, + ) + self.assertTrue( + { + "namespace": "", + "prefix": "header", + "key": "content-type", + "value": "text/plain; charset=UTF-8; format=flowed", + } + in metadata, + ) + self.assertTrue( + { + "namespace": "", + "prefix": "header", + "key": "date", + "value": "Wed, 12 Oct 2022 21:40:43 +0200", + } + in metadata, + ) + self.assertTrue( + { + "namespace": "", + "prefix": "header", + "key": "delivered-to", + "value": "mail@someserver.de", + } + in metadata, + ) + self.assertTrue( + { + "namespace": "", + "prefix": "header", + "key": "from", + "value": "Some One ", + } + in metadata, + ) + self.assertTrue( + { + "namespace": "", + "prefix": "header", + "key": "message-id", + "value": "<6e99e34d-e20a-80c4-ea61-d8234b612be9@someserver.de>", + } + in metadata, + ) + self.assertTrue( + {"namespace": "", "prefix": "header", "key": "mime-version", "value": "1.0"} + in metadata, + ) + self.assertTrue( + { + "namespace": "", + "prefix": "header", + "key": "received", + "value": "from mail.someserver.org ([::1])\n\tby e1acdba3bd07 with LMTP\n\tid KBKZGD2YR2NTCgQAjubtDA\n\t(envelope-from )\n\tfor ; Wed, 10 Oct 2022 11:40:46 +0200, from [127.0.0.1] (localhost [127.0.0.1]) by localhost (Mailerdaemon) with ESMTPSA id 2BC9064C1616\n\tfor ; Wed, 12 Oct 2022 21:40:46 +0200 (CEST)", + } + in metadata, + ) + self.assertTrue( + { + "namespace": "", + "prefix": "header", + "key": "return-path", + "value": "", + } + in metadata, + ) + self.assertTrue( + { + "namespace": "", + "prefix": "header", + "key": "subject", + "value": "Simple Text Mail", + } + in metadata, + ) + self.assertTrue( + {"namespace": "", "prefix": "header", "key": "to", "value": "some@one.de"} + in metadata, + ) + self.assertTrue( + { + "namespace": "", + "prefix": "header", + "key": "user-agent", + "value": "Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101\n Thunderbird/102.3.1", + } + in metadata, + ) + self.assertTrue( + { + "namespace": "", + "prefix": "header", + "key": "x-last-tls-session-version", + "value": "TLSv1.3", + } + in metadata, + )