From 00f39d8b581c358f2484680275222f6ad909758c Mon Sep 17 00:00:00 2001 From: phail Date: Sun, 20 Nov 2022 22:49:42 +0100 Subject: [PATCH] add test comments --- src/paperless_mail/tests/test_parsers.py | 206 +++++++++++++++++++++-- 1 file changed, 191 insertions(+), 15 deletions(-) diff --git a/src/paperless_mail/tests/test_parsers.py b/src/paperless_mail/tests/test_parsers.py index 6e47c70ed..a2aab941e 100644 --- a/src/paperless_mail/tests/test_parsers.py +++ b/src/paperless_mail/tests/test_parsers.py @@ -16,7 +16,15 @@ class TestParser(TestCase): def tearDown(self) -> None: self.parser.cleanup() - def test_get_parsed(self): + def test_get_parsed_missing_file(self): + """ + GIVEN: + - Fresh parser + WHEN: + - A nonexistent file should be parsed + THEN: + - An Exception is thrown + """ # Check if exception is raised when parsing fails. self.assertRaises( ParseError, @@ -24,6 +32,15 @@ class TestParser(TestCase): os.path.join(self.SAMPLE_FILES, "na"), ) + def test_get_parsed_broken_file(self): + """ + GIVEN: + - Fresh parser + WHEN: + - A faulty file should be parsed + THEN: + - An Exception is thrown + """ # Check if exception is raised when the mail is faulty. self.assertRaises( ParseError, @@ -31,6 +48,15 @@ class TestParser(TestCase): os.path.join(self.SAMPLE_FILES, "broken.eml"), ) + def test_get_parsed_simple_text_mail(self): + """ + GIVEN: + - Fresh parser + WHEN: + - A .eml file should be parsed + THEN: + - The content of the mail should be available in the parse result. + """ # Parse Test file and check relevant content parsed1 = self.parser.get_parsed( os.path.join(self.SAMPLE_FILES, "simple_text.eml"), @@ -48,9 +74,22 @@ class TestParser(TestCase): self.assertEqual(parsed1.text, "This is just a simple Text Mail.\n") self.assertEqual(parsed1.to, ("some@one.de",)) + def test_get_parsed_reparse(self): + """ + GIVEN: + - An E-Mail was parsed + WHEN: + - Another .eml file should be parsed + THEN: + - The parser should not retry to parse and return the old results + """ + # Parse Test file and check relevant content + parsed1 = self.parser.get_parsed( + os.path.join(self.SAMPLE_FILES, "simple_text.eml"), + ) # Check if same parsed object as before is returned, even if another file is given. parsed2 = self.parser.get_parsed( - os.path.join(os.path.join(self.SAMPLE_FILES, "na")), + os.path.join(os.path.join(self.SAMPLE_FILES, "html.eml")), ) self.assertEqual(parsed1, parsed2) @@ -61,6 +100,14 @@ class TestParser(TestCase): mock_make_thumbnail_from_pdf: mock.MagicMock, mock_generate_pdf: mock.MagicMock, ): + """ + GIVEN: + - An E-Mail was parsed + WHEN: + - The Thumbnail is requested + THEN: + - The parser should call the functions which generate the thumbnail + """ mocked_return = "Passing the return value through.." mock_make_thumbnail_from_pdf.return_value = mocked_return @@ -81,11 +128,28 @@ class TestParser(TestCase): self.assertEqual(mocked_return, thumb) @mock.patch("documents.loggers.LoggingMixin.log") - def test_extract_metadata(self, m: mock.MagicMock): + def test_extract_metadata_fail(self, m: mock.MagicMock): + """ + GIVEN: + - Fresh start + WHEN: + - Metadata extraction is triggered for nonexistent file + THEN: + - A log warning should be generated + """ # Validate if warning is logged when parsing fails self.assertEqual([], self.parser.extract_metadata("na", "message/rfc822")) self.assertEqual("warning", m.call_args[0][0]) + def test_extract_metadata(self): + """ + GIVEN: + - Fresh start + WHEN: + - Metadata extraction is triggered + THEN: + - metadata is returned + """ # Validate Metadata parsing returns the expected results metadata = self.parser.extract_metadata( os.path.join(self.SAMPLE_FILES, "simple_text.eml"), @@ -219,6 +283,14 @@ class TestParser(TestCase): ) def test_parse_na(self): + """ + GIVEN: + - Fresh start + WHEN: + - parsing is attempted with nonexistent file + THEN: + - Exception is thrown + """ # Check if exception is raised when parsing fails. self.assertRaises( ParseError, @@ -230,6 +302,14 @@ class TestParser(TestCase): @mock.patch("paperless_mail.parsers.MailDocumentParser.tika_parse") @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf") def test_parse_html_eml(self, n, mock_tika_parse: mock.MagicMock): + """ + GIVEN: + - Fresh start + WHEN: + - parsing is done with html mail + THEN: + - Tika is called, parsed information from non html parts is available + """ # Validate parsing returns the expected results text_expected = "Subject: HTML Message\n\nFrom: Name \n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)\n\nHTML content: tika return\n\nSome Text and an embedded image." mock_tika_parse.return_value = "tika return" @@ -252,6 +332,14 @@ class TestParser(TestCase): @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf") def test_parse_simple_eml(self, n): + """ + GIVEN: + - Fresh start + WHEN: + - parsing is done with non html mail + THEN: + - parsed information is available + """ # Validate parsing returns the expected results self.parser.parse( @@ -277,22 +365,51 @@ class TestParser(TestCase): self.assertTrue(os.path.isfile(self.parser.archive_path)) @mock.patch("paperless_mail.parsers.parser.from_buffer") - def test_tika_parse(self, mock_from_buffer: mock.MagicMock): - html = '

Some Text

' - expected_text = "Some Text" - mock_from_buffer.return_value = {"content": expected_text} - + def test_tika_parse_unsuccessful(self, mock_from_buffer: mock.MagicMock): + """ + GIVEN: + - Fresh start + WHEN: + - tika parsing fails + THEN: + - the parser should return an empty string + """ # Check unsuccessful parsing mock_from_buffer.return_value = {"content": None} parsed = self.parser.tika_parse(None) self.assertEqual("", parsed) + @mock.patch("paperless_mail.parsers.parser.from_buffer") + def test_tika_parse(self, mock_from_buffer: mock.MagicMock): + """ + GIVEN: + - Fresh start + WHEN: + - tika parsing is called + THEN: + - a web request to tika shall be done and the reply es returned + """ + html = '

Some Text

' + expected_text = "Some Text" + # Check successful parsing mock_from_buffer.return_value = {"content": expected_text} parsed = self.parser.tika_parse(html) self.assertEqual(expected_text, parsed.strip()) mock_from_buffer.assert_called_with(html, self.parser.tika_server) + @mock.patch("paperless_mail.parsers.parser.from_buffer") + def test_tika_parse_exception(self, mock_from_buffer: mock.MagicMock): + """ + GIVEN: + - Fresh start + WHEN: + - tika parsing is called and an exception is thrown on the request + THEN: + - a ParseError Exception is thrown + """ + html = '

Some Text

' + # Check ParseError def my_side_effect(): raise Exception("Test") @@ -303,6 +420,14 @@ class TestParser(TestCase): @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail") @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_html") def test_generate_pdf_parse_error(self, m: mock.MagicMock, n: mock.MagicMock): + """ + GIVEN: + - Fresh start + WHEN: + - pdf generation is requested but gotenberg can not be reached + THEN: + - a ParseError Exception is thrown + """ m.return_value = b"" n.return_value = b"" @@ -314,6 +439,22 @@ class TestParser(TestCase): os.path.join(self.SAMPLE_FILES, "html.eml"), ) + def test_generate_pdf_exception(self): + """ + GIVEN: + - Fresh start + WHEN: + - pdf generation is requested but parsing throws an exception + THEN: + - a ParseError Exception is thrown + """ + # Check if exception is raised when the mail can not be parsed. + self.assertRaises( + ParseError, + self.parser.generate_pdf, + os.path.join(self.SAMPLE_FILES, "broken.eml"), + ) + @mock.patch("paperless_mail.parsers.requests.post") @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail") @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_html") @@ -323,13 +464,14 @@ class TestParser(TestCase): mock_generate_pdf_from_mail: mock.MagicMock, mock_post: mock.MagicMock, ): - # Check if exception is raised when the mail can not be parsed. - self.assertRaises( - ParseError, - self.parser.generate_pdf, - os.path.join(self.SAMPLE_FILES, "broken.eml"), - ) - + """ + GIVEN: + - Fresh start + WHEN: + - pdf generation is requested + THEN: + - gotenberg is called and the resulting file is returned + """ mock_generate_pdf_from_mail.return_value = b"Mail Return" mock_generate_pdf_from_html.return_value = b"HTML Return" @@ -366,6 +508,14 @@ class TestParser(TestCase): self.assertEqual(b"Content", file.read()) def test_mail_to_html(self): + """ + GIVEN: + - Fresh start + WHEN: + - conversion from eml to html is requested + THEN: + - html should be returned + """ mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml")) html_handle = self.parser.mail_to_html(mail) html_received = html_handle.read() @@ -384,6 +534,14 @@ class TestParser(TestCase): mock_mail_to_html: mock.MagicMock, mock_post: mock.MagicMock, ): + """ + GIVEN: + - Fresh start + WHEN: + - conversion of PDF from .eml is requested + THEN: + - gotenberg should be called with valid intermediary html files, the resulting pdf is returned + """ mock_response = mock.MagicMock() mock_response.content = b"Content" mock_post.return_value = mock_response @@ -425,6 +583,15 @@ class TestParser(TestCase): mock_response.raise_for_status.assert_called_once() def test_transform_inline_html(self): + """ + GIVEN: + - Fresh start + WHEN: + - transforming of html content from an email with an inline image attachment is requested + THEN: + - html is returned and sanitized + """ + class MailAttachmentMock: def __init__(self, payload, content_id): self.payload = payload @@ -448,6 +615,15 @@ class TestParser(TestCase): @mock.patch("paperless_mail.parsers.requests.post") def test_generate_pdf_from_html(self, mock_post: mock.MagicMock): + """ + GIVEN: + - Fresh start + WHEN: + - generating pdf from html with inline attachments is attempted + THEN: + - gotenberg is called with the correct parameters and the resulting pdf is returned + """ + class MailAttachmentMock: def __init__(self, payload, content_id): self.payload = payload