Some Text
' expected_text = "Some Text" - - tika_server_original = self.parser.tika_server - - # Check if exception is raised when Tika cannot be reached. - with pytest.raises(ParseError): - self.parser.tika_server = "" - self.parser.tika_parse(html) + mock_from_buffer.return_value = {"content": expected_text} # Check unsuccessful parsing - self.parser.tika_server = tika_server_original - + mock_from_buffer.return_value = {"content": None} parsed = self.parser.tika_parse(None) self.assertEqual("", parsed) # Check successful parsing + mock_from_buffer.return_value = {"content": expected_text} parsed = self.parser.tika_parse(html) self.assertEqual(expected_text, parsed.strip()) + mock_from_buffer.assert_called_with(html, self.parser.tika_server) + + # Check ParseError + def my_side_effect(): + raise Exception("Test") + + mock_from_buffer.side_effect = my_side_effect + self.assertRaises(ParseError, self.parser.tika_parse, html) @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail") @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_html") @@ -373,25 +378,31 @@ class TestParser(TestCase): retval = self.parser.generate_pdf_from_mail(mail) self.assertEqual(b"Content", retval) - mock_generate_pdf_from_mail.assert_called_once_with( - self.parser.get_parsed(None), - ) - mock_generate_pdf_from_html.assert_called_once_with( - self.parser.get_parsed(None).html, - self.parser.get_parsed(None).attachments, - ) + mock_mail_to_html.assert_called_once_with(mail) self.assertEqual( - self.parser.gotenberg_server + "/forms/pdfengines/merge", + self.parser.gotenberg_server + "/forms/chromium/convert/html", mock_post.call_args.args[0], ) self.assertEqual({}, mock_post.call_args.kwargs["headers"]) self.assertEqual( - b"Mail Return", - mock_post.call_args.kwargs["files"]["1_mail.pdf"][1].read(), + { + "marginTop": "0.1", + "marginBottom": "0.1", + "marginLeft": "0.1", + "marginRight": "0.1", + "paperWidth": "8.27", + "paperHeight": "11.7", + "scale": "1.0", + }, + mock_post.call_args.kwargs["data"], ) self.assertEqual( - b"HTML Return", - mock_post.call_args.kwargs["files"]["2_html.pdf"][1].read(), + "Testresponse", + mock_post.call_args.kwargs["files"]["html"][1], + ) + self.assertEqual( + "output.css", + mock_post.call_args.kwargs["files"]["css"][0], ) mock_response.raise_for_status.assert_called_once() diff --git a/src/paperless_mail/tests/test_parsers_live.py b/src/paperless_mail/tests/test_parsers_live.py index a0fa1f54d..653388300 100644 --- a/src/paperless_mail/tests/test_parsers_live.py +++ b/src/paperless_mail/tests/test_parsers_live.py @@ -59,6 +59,10 @@ class TestParserLive(TestCase): f"Created Thumbnail {thumb} differs from expected file {expected}", ) + @pytest.mark.skipif( + "TIKA_LIVE" not in os.environ, + reason="No tika server", + ) @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output def test_tika_parse(self, m): html = 'Some Text
' @@ -108,6 +112,28 @@ class TestParserLive(TestCase): ) self.assertEqual(expected, extracted) + @pytest.mark.skipif( + "GOTENBERG_LIVE" not in os.environ, + reason="No gotenberg server", + ) + @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output + def test_generate_pdf_from_mail_no_convert(self, m): + mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml")) + + pdf_path = os.path.join(self.parser.tempdir, "html.eml.pdf") + + with open(pdf_path, "wb") as file: + file.write(self.parser.generate_pdf_from_mail(mail)) + file.close() + + extracted = extract_text(pdf_path) + expected = extract_text(os.path.join(self.SAMPLE_FILES, "html.eml.pdf")) + self.assertEqual(expected, extracted) + + @pytest.mark.skipif( + "GOTENBERG_LIVE" not in os.environ, + reason="No gotenberg server", + ) # Only run if convert is available @pytest.mark.skipif( "PAPERLESS_TEST_SKIP_CONVERT" in os.environ, @@ -115,10 +141,9 @@ class TestParserLive(TestCase): ) @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output def test_generate_pdf_from_mail(self, m): - # TODO mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml")) - pdf_path = os.path.join(self.parser.tempdir, "test_generate_pdf_from_mail.pdf") + pdf_path = os.path.join(self.parser.tempdir, "html.eml.pdf") with open(pdf_path, "wb") as file: file.write(self.parser.generate_pdf_from_mail(mail)) @@ -126,7 +151,7 @@ class TestParserLive(TestCase): converted = os.path.join( self.parser.tempdir, - "test_generate_pdf_from_mail.webp", + "html.eml.pdf.webp", ) run_convert( density=300, @@ -143,8 +168,8 @@ class TestParserLive(TestCase): thumb_hash = self.hashfile(converted) # The created pdf is not reproducible. But the converted image should always look the same. - expected_hash = ( - "8734a3f0a567979343824e468cd737bf29c02086bbfd8773e94feb986968ad32" + expected_hash = self.hashfile( + os.path.join(self.SAMPLE_FILES, "html.eml.pdf.webp"), ) self.assertEqual( thumb_hash, @@ -174,14 +199,14 @@ class TestParserLive(TestCase): ] result = self.parser.generate_pdf_from_html(html, attachments) - pdf_path = os.path.join(self.parser.tempdir, "test_generate_pdf_from_html.pdf") + pdf_path = os.path.join(self.parser.tempdir, "sample.html.pdf") with open(pdf_path, "wb") as file: file.write(result) file.close() extracted = extract_text(pdf_path) - expected = "Some Text\n\n This image should not be shown.\n\nand an embedded image.\n\nParagraph unchanged.\n\n\x0c" + expected = extract_text(os.path.join(self.SAMPLE_FILES, "sample.html.pdf")) self.assertEqual(expected, extracted) @pytest.mark.skipif(