mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	remove log mocking, replace pytest raises, use humanfriendly
This commit is contained in:
		| @@ -10,6 +10,7 @@ from django.conf import settings | ||||
| from documents.parsers import DocumentParser | ||||
| from documents.parsers import make_thumbnail_from_pdf | ||||
| from documents.parsers import ParseError | ||||
| from humanfriendly import format_size | ||||
| from imap_tools import MailMessage | ||||
| from tika import parser | ||||
|  | ||||
| @@ -125,10 +126,8 @@ class MailDocumentParser(DocumentParser): | ||||
|         if len(mail.attachments) >= 1: | ||||
|             att = [] | ||||
|             for a in mail.attachments: | ||||
|                 if a.size >= 1024 * 600: | ||||
|                     att.append(f"{a.filename} ({(a.size / 1024 / 1024):.2f} MiB)") | ||||
|                 else: | ||||
|                     att.append(f"{a.filename} ({(a.size / 1024):.2f} KiB)") | ||||
|                 att.append(f"{a.filename} ({format_size(a.size, binary=True)})") | ||||
|  | ||||
|             self.text += f"Attachments: {', '.join(att)}\n\n" | ||||
|  | ||||
|         if mail.html != "": | ||||
| @@ -191,7 +190,7 @@ class MailDocumentParser(DocumentParser): | ||||
|         return pdf_path | ||||
|  | ||||
|     @staticmethod | ||||
|     def mail_to_html(mail) -> StringIO: | ||||
|     def mail_to_html(mail: MailMessage) -> StringIO: | ||||
|         data = {} | ||||
|  | ||||
|         def clean_html(text: str): | ||||
| @@ -228,10 +227,7 @@ class MailDocumentParser(DocumentParser): | ||||
|  | ||||
|         att = [] | ||||
|         for a in mail.attachments: | ||||
|             if a.size >= 1024 * 600: | ||||
|                 att.append(f"{a.filename} ({(a.size / 1024 / 1024):.2f} MiB)") | ||||
|             else: | ||||
|                 att.append(f"{a.filename} ({(a.size / 1024):.2f} KiB)") | ||||
|             att.append(f"{a.filename} ({format_size(a.size, binary=True)})") | ||||
|         data["attachments"] = clean_html(", ".join(att)) | ||||
|         if data["attachments"] != "": | ||||
|             data["attachments_label"] = "Attachments" | ||||
|   | ||||
| @@ -30,7 +30,7 @@ | ||||
|       <div class="col-start-2 col-span-10 row-start-5" text-sm my-0.5></div> | ||||
|  | ||||
| 			<div class="col-start-1 row-start-6	 text-slate-400 text-right">Attachments</div> | ||||
|       <div class="col-start-2 col-span-10 row-start-6">IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (0.59 MiB)</div> | ||||
|       <div class="col-start-2 col-span-10 row-start-6">IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)</div> | ||||
|     </div> | ||||
|  | ||||
|     <!-- Separator--> | ||||
|   | ||||
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							| Before Width: | Height: | Size: 5.9 KiB After Width: | Height: | Size: 6.0 KiB | 
| @@ -19,12 +19,18 @@ class TestParser(TestCase): | ||||
|  | ||||
|     def test_get_parsed(self): | ||||
|         # Check if exception is raised when parsing fails. | ||||
|         with pytest.raises(ParseError): | ||||
|             self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "na")) | ||||
|         self.assertRaises( | ||||
|             ParseError, | ||||
|             self.parser.get_parsed, | ||||
|             os.path.join(self.SAMPLE_FILES, "na"), | ||||
|         ) | ||||
|  | ||||
|         # Check if exception is raised when the mail is faulty. | ||||
|         with pytest.raises(ParseError): | ||||
|             self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "broken.eml")) | ||||
|         self.assertRaises( | ||||
|             ParseError, | ||||
|             self.parser.get_parsed, | ||||
|             os.path.join(self.SAMPLE_FILES, "broken.eml"), | ||||
|         ) | ||||
|  | ||||
|         # Parse Test file and check relevant content | ||||
|         parsed1 = self.parser.get_parsed( | ||||
| @@ -210,18 +216,18 @@ class TestParser(TestCase): | ||||
|  | ||||
|     def test_parse_na(self): | ||||
|         # Check if exception is raised when parsing fails. | ||||
|         with pytest.raises(ParseError): | ||||
|             self.parser.parse( | ||||
|                 os.path.join(os.path.join(self.SAMPLE_FILES, "na")), | ||||
|                 "message/rfc822", | ||||
|             ) | ||||
|         self.assertRaises( | ||||
|             ParseError, | ||||
|             self.parser.parse, | ||||
|             os.path.join(self.SAMPLE_FILES, "na"), | ||||
|             "message/rfc822", | ||||
|         ) | ||||
|  | ||||
|     @mock.patch("paperless_mail.parsers.MailDocumentParser.tika_parse") | ||||
|     @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf") | ||||
|     @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output | ||||
|     def test_parse_html_eml(self, m, n, mock_tika_parse: mock.MagicMock): | ||||
|     def test_parse_html_eml(self, n, mock_tika_parse: mock.MagicMock): | ||||
|         # Validate parsing returns the expected results | ||||
|         text_expected = "Some Text\nand an embedded image.\n\nSubject: HTML Message\n\nFrom: Name <someone@example.de>\n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (0.59 MiB)\n\nHTML content: tika return" | ||||
|         text_expected = "Some Text\nand an embedded image.\n\nSubject: HTML Message\n\nFrom: Name <someone@example.de>\n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)\n\nHTML content: tika return" | ||||
|         mock_tika_parse.return_value = "tika return" | ||||
|  | ||||
|         self.parser.parse(os.path.join(self.SAMPLE_FILES, "html.eml"), "message/rfc822") | ||||
| @@ -241,8 +247,7 @@ class TestParser(TestCase): | ||||
|         ) | ||||
|  | ||||
|     @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf") | ||||
|     @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output | ||||
|     def test_parse_simple_eml(self, m, n): | ||||
|     def test_parse_simple_eml(self, n): | ||||
|         # Validate parsing returns the expected results | ||||
|  | ||||
|         self.parser.parse( | ||||
| @@ -268,8 +273,7 @@ class TestParser(TestCase): | ||||
|         self.assertTrue(os.path.isfile(self.parser.archive_path)) | ||||
|  | ||||
|     @mock.patch("paperless_mail.parsers.parser.from_buffer") | ||||
|     @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output | ||||
|     def test_tika_parse(self, m, mock_from_buffer: mock.MagicMock): | ||||
|     def test_tika_parse(self, mock_from_buffer: mock.MagicMock): | ||||
|         html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>' | ||||
|         expected_text = "Some Text" | ||||
|         mock_from_buffer.return_value = {"content": expected_text} | ||||
| @@ -300,8 +304,11 @@ class TestParser(TestCase): | ||||
|  | ||||
|         # Check if exception is raised when the pdf can not be created. | ||||
|         self.parser.gotenberg_server = "" | ||||
|         with pytest.raises(ParseError): | ||||
|             self.parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "html.eml")) | ||||
|         self.assertRaises( | ||||
|             ParseError, | ||||
|             self.parser.generate_pdf, | ||||
|             os.path.join(self.SAMPLE_FILES, "html.eml"), | ||||
|         ) | ||||
|  | ||||
|     @mock.patch("paperless_mail.parsers.requests.post") | ||||
|     @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail") | ||||
| @@ -313,8 +320,11 @@ class TestParser(TestCase): | ||||
|         mock_post: mock.MagicMock, | ||||
|     ): | ||||
|         # Check if exception is raised when the mail can not be parsed. | ||||
|         with pytest.raises(ParseError): | ||||
|             self.parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "broken.eml")) | ||||
|         self.assertRaises( | ||||
|             ParseError, | ||||
|             self.parser.generate_pdf, | ||||
|             os.path.join(self.SAMPLE_FILES, "broken.eml"), | ||||
|         ) | ||||
|  | ||||
|         mock_generate_pdf_from_mail.return_value = b"Mail Return" | ||||
|         mock_generate_pdf_from_html.return_value = b"HTML Return" | ||||
| @@ -430,8 +440,7 @@ class TestParser(TestCase): | ||||
|         self.assertFalse("<script" in resulting_html.lower()) | ||||
|  | ||||
|     @mock.patch("paperless_mail.parsers.requests.post") | ||||
|     @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output | ||||
|     def test_generate_pdf_from_html(self, m, mock_post: mock.MagicMock): | ||||
|     def test_generate_pdf_from_html(self, mock_post: mock.MagicMock): | ||||
|         class MailAttachmentMock: | ||||
|             def __init__(self, payload, content_id): | ||||
|                 self.payload = payload | ||||
|   | ||||
| @@ -39,8 +39,7 @@ class TestParserLive(TestCase): | ||||
|         reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test", | ||||
|     ) | ||||
|     @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf") | ||||
|     @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output | ||||
|     def test_get_thumbnail(self, m, mock_generate_pdf: mock.MagicMock): | ||||
|     def test_get_thumbnail(self, mock_generate_pdf: mock.MagicMock): | ||||
|         mock_generate_pdf.return_value = os.path.join( | ||||
|             self.SAMPLE_FILES, | ||||
|             "simple_text.eml.pdf", | ||||
| @@ -63,17 +62,15 @@ class TestParserLive(TestCase): | ||||
|         "TIKA_LIVE" not in os.environ, | ||||
|         reason="No tika server", | ||||
|     ) | ||||
|     @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output | ||||
|     def test_tika_parse(self, m): | ||||
|     def test_tika_parse(self): | ||||
|         html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>' | ||||
|         expected_text = "Some Text" | ||||
|  | ||||
|         tika_server_original = self.parser.tika_server | ||||
|  | ||||
|         # Check if exception is raised when Tika cannot be reached. | ||||
|         with pytest.raises(ParseError): | ||||
|             self.parser.tika_server = "" | ||||
|             self.parser.tika_parse(html) | ||||
|         self.parser.tika_server = "" | ||||
|         self.assertRaises(ParseError, self.parser.tika_parse, html) | ||||
|  | ||||
|         # Check unsuccessful parsing | ||||
|         self.parser.tika_server = tika_server_original | ||||
| @@ -116,15 +113,13 @@ class TestParserLive(TestCase): | ||||
|         "GOTENBERG_LIVE" not in os.environ, | ||||
|         reason="No gotenberg server", | ||||
|     ) | ||||
|     @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output | ||||
|     def test_generate_pdf_from_mail_no_convert(self, m): | ||||
|     def test_generate_pdf_from_mail_no_convert(self): | ||||
|         mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml")) | ||||
|  | ||||
|         pdf_path = os.path.join(self.parser.tempdir, "html.eml.pdf") | ||||
|  | ||||
|         with open(pdf_path, "wb") as file: | ||||
|             file.write(self.parser.generate_pdf_from_mail(mail)) | ||||
|             file.close() | ||||
|  | ||||
|         extracted = extract_text(pdf_path) | ||||
|         expected = extract_text(os.path.join(self.SAMPLE_FILES, "html.eml.pdf")) | ||||
| @@ -139,15 +134,13 @@ class TestParserLive(TestCase): | ||||
|         "PAPERLESS_TEST_SKIP_CONVERT" in os.environ, | ||||
|         reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test", | ||||
|     ) | ||||
|     @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output | ||||
|     def test_generate_pdf_from_mail(self, m): | ||||
|     def test_generate_pdf_from_mail(self): | ||||
|         mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml")) | ||||
|  | ||||
|         pdf_path = os.path.join(self.parser.tempdir, "html.eml.pdf") | ||||
|  | ||||
|         with open(pdf_path, "wb") as file: | ||||
|             file.write(self.parser.generate_pdf_from_mail(mail)) | ||||
|             file.close() | ||||
|  | ||||
|         converted = os.path.join( | ||||
|             self.parser.tempdir, | ||||
| @@ -181,8 +174,7 @@ class TestParserLive(TestCase): | ||||
|         "GOTENBERG_LIVE" not in os.environ, | ||||
|         reason="No gotenberg server", | ||||
|     ) | ||||
|     @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output | ||||
|     def test_generate_pdf_from_html_no_convert(self, m): | ||||
|     def test_generate_pdf_from_html_no_convert(self): | ||||
|         class MailAttachmentMock: | ||||
|             def __init__(self, payload, content_id): | ||||
|                 self.payload = payload | ||||
| @@ -203,7 +195,6 @@ class TestParserLive(TestCase): | ||||
|  | ||||
|         with open(pdf_path, "wb") as file: | ||||
|             file.write(result) | ||||
|             file.close() | ||||
|  | ||||
|         extracted = extract_text(pdf_path) | ||||
|         expected = extract_text(os.path.join(self.SAMPLE_FILES, "sample.html.pdf")) | ||||
| @@ -218,8 +209,7 @@ class TestParserLive(TestCase): | ||||
|         "PAPERLESS_TEST_SKIP_CONVERT" in os.environ, | ||||
|         reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test", | ||||
|     ) | ||||
|     @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output | ||||
|     def test_generate_pdf_from_html(self, m): | ||||
|     def test_generate_pdf_from_html(self): | ||||
|         class MailAttachmentMock: | ||||
|             def __init__(self, payload, content_id): | ||||
|                 self.payload = payload | ||||
| @@ -240,7 +230,6 @@ class TestParserLive(TestCase): | ||||
|  | ||||
|         with open(pdf_path, "wb") as file: | ||||
|             file.write(result) | ||||
|             file.close() | ||||
|  | ||||
|         converted = os.path.join(self.parser.tempdir, "sample.html.pdf.webp") | ||||
|         run_convert( | ||||
| @@ -269,20 +258,22 @@ class TestParserLive(TestCase): | ||||
|             f"If Rick Astley is shown, Gotenberg loads from web which is bad for Mail content.", | ||||
|         ) | ||||
|  | ||||
|     @staticmethod | ||||
|     @pytest.mark.skipif( | ||||
|         "GOTENBERG_LIVE" not in os.environ, | ||||
|         reason="No gotenberg server", | ||||
|     ) | ||||
|     def test_is_online_image_still_available(): | ||||
|     def test_is_online_image_still_available(self): | ||||
|         """ | ||||
|         A public image is used in the html sample file. We have no control | ||||
|         whether this image stays online forever, so here we check if it is still there | ||||
|         """ | ||||
|  | ||||
|         # Start by Testing if nonexistent URL really throws an Exception | ||||
|         with pytest.raises(HTTPError): | ||||
|             urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png") | ||||
|         self.assertRaises( | ||||
|             HTTPError, | ||||
|             urlopen, | ||||
|             "https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png", | ||||
|         ) | ||||
|  | ||||
|         # Now check the URL used in samples/sample.html | ||||
|         urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png") | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 phail
					phail