remove log mocking, replace pytest raises, use humanfriendly

This commit is contained in:
phail 2022-11-20 12:06:35 +01:00
parent b68906b14e
commit e3c1bde793
6 changed files with 51 additions and 55 deletions

View File

@ -10,6 +10,7 @@ from django.conf import settings
from documents.parsers import DocumentParser from documents.parsers import DocumentParser
from documents.parsers import make_thumbnail_from_pdf from documents.parsers import make_thumbnail_from_pdf
from documents.parsers import ParseError from documents.parsers import ParseError
from humanfriendly import format_size
from imap_tools import MailMessage from imap_tools import MailMessage
from tika import parser from tika import parser
@ -125,10 +126,8 @@ class MailDocumentParser(DocumentParser):
if len(mail.attachments) >= 1: if len(mail.attachments) >= 1:
att = [] att = []
for a in mail.attachments: for a in mail.attachments:
if a.size >= 1024 * 600: att.append(f"{a.filename} ({format_size(a.size, binary=True)})")
att.append(f"{a.filename} ({(a.size / 1024 / 1024):.2f} MiB)")
else:
att.append(f"{a.filename} ({(a.size / 1024):.2f} KiB)")
self.text += f"Attachments: {', '.join(att)}\n\n" self.text += f"Attachments: {', '.join(att)}\n\n"
if mail.html != "": if mail.html != "":
@ -191,7 +190,7 @@ class MailDocumentParser(DocumentParser):
return pdf_path return pdf_path
@staticmethod @staticmethod
def mail_to_html(mail) -> StringIO: def mail_to_html(mail: MailMessage) -> StringIO:
data = {} data = {}
def clean_html(text: str): def clean_html(text: str):
@ -228,10 +227,7 @@ class MailDocumentParser(DocumentParser):
att = [] att = []
for a in mail.attachments: for a in mail.attachments:
if a.size >= 1024 * 600: att.append(f"{a.filename} ({format_size(a.size, binary=True)})")
att.append(f"{a.filename} ({(a.size / 1024 / 1024):.2f} MiB)")
else:
att.append(f"{a.filename} ({(a.size / 1024):.2f} KiB)")
data["attachments"] = clean_html(", ".join(att)) data["attachments"] = clean_html(", ".join(att))
if data["attachments"] != "": if data["attachments"] != "":
data["attachments_label"] = "Attachments" data["attachments_label"] = "Attachments"

View File

@ -30,7 +30,7 @@
<div class="col-start-2 col-span-10 row-start-5" text-sm my-0.5></div> <div class="col-start-2 col-span-10 row-start-5" text-sm my-0.5></div>
<div class="col-start-1 row-start-6 text-slate-400 text-right">Attachments</div> <div class="col-start-1 row-start-6 text-slate-400 text-right">Attachments</div>
<div class="col-start-2 col-span-10 row-start-6">IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (0.59 MiB)</div> <div class="col-start-2 col-span-10 row-start-6">IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)</div>
</div> </div>
<!-- Separator--> <!-- Separator-->

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.9 KiB

After

Width:  |  Height:  |  Size: 6.0 KiB

View File

@ -19,12 +19,18 @@ class TestParser(TestCase):
def test_get_parsed(self): def test_get_parsed(self):
# Check if exception is raised when parsing fails. # Check if exception is raised when parsing fails.
with pytest.raises(ParseError): self.assertRaises(
self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "na")) ParseError,
self.parser.get_parsed,
os.path.join(self.SAMPLE_FILES, "na"),
)
# Check if exception is raised when the mail is faulty. # Check if exception is raised when the mail is faulty.
with pytest.raises(ParseError): self.assertRaises(
self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "broken.eml")) ParseError,
self.parser.get_parsed,
os.path.join(self.SAMPLE_FILES, "broken.eml"),
)
# Parse Test file and check relevant content # Parse Test file and check relevant content
parsed1 = self.parser.get_parsed( parsed1 = self.parser.get_parsed(
@ -210,18 +216,18 @@ class TestParser(TestCase):
def test_parse_na(self): def test_parse_na(self):
# Check if exception is raised when parsing fails. # Check if exception is raised when parsing fails.
with pytest.raises(ParseError): self.assertRaises(
self.parser.parse( ParseError,
os.path.join(os.path.join(self.SAMPLE_FILES, "na")), self.parser.parse,
"message/rfc822", os.path.join(self.SAMPLE_FILES, "na"),
) "message/rfc822",
)
@mock.patch("paperless_mail.parsers.MailDocumentParser.tika_parse") @mock.patch("paperless_mail.parsers.MailDocumentParser.tika_parse")
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf") @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output def test_parse_html_eml(self, n, mock_tika_parse: mock.MagicMock):
def test_parse_html_eml(self, m, n, mock_tika_parse: mock.MagicMock):
# Validate parsing returns the expected results # Validate parsing returns the expected results
text_expected = "Some Text\nand an embedded image.\n\nSubject: HTML Message\n\nFrom: Name <someone@example.de>\n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (0.59 MiB)\n\nHTML content: tika return" text_expected = "Some Text\nand an embedded image.\n\nSubject: HTML Message\n\nFrom: Name <someone@example.de>\n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)\n\nHTML content: tika return"
mock_tika_parse.return_value = "tika return" mock_tika_parse.return_value = "tika return"
self.parser.parse(os.path.join(self.SAMPLE_FILES, "html.eml"), "message/rfc822") self.parser.parse(os.path.join(self.SAMPLE_FILES, "html.eml"), "message/rfc822")
@ -241,8 +247,7 @@ class TestParser(TestCase):
) )
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf") @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output def test_parse_simple_eml(self, n):
def test_parse_simple_eml(self, m, n):
# Validate parsing returns the expected results # Validate parsing returns the expected results
self.parser.parse( self.parser.parse(
@ -268,8 +273,7 @@ class TestParser(TestCase):
self.assertTrue(os.path.isfile(self.parser.archive_path)) self.assertTrue(os.path.isfile(self.parser.archive_path))
@mock.patch("paperless_mail.parsers.parser.from_buffer") @mock.patch("paperless_mail.parsers.parser.from_buffer")
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output def test_tika_parse(self, mock_from_buffer: mock.MagicMock):
def test_tika_parse(self, m, mock_from_buffer: mock.MagicMock):
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>' html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
expected_text = "Some Text" expected_text = "Some Text"
mock_from_buffer.return_value = {"content": expected_text} mock_from_buffer.return_value = {"content": expected_text}
@ -300,8 +304,11 @@ class TestParser(TestCase):
# Check if exception is raised when the pdf can not be created. # Check if exception is raised when the pdf can not be created.
self.parser.gotenberg_server = "" self.parser.gotenberg_server = ""
with pytest.raises(ParseError): self.assertRaises(
self.parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "html.eml")) ParseError,
self.parser.generate_pdf,
os.path.join(self.SAMPLE_FILES, "html.eml"),
)
@mock.patch("paperless_mail.parsers.requests.post") @mock.patch("paperless_mail.parsers.requests.post")
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail") @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail")
@ -313,8 +320,11 @@ class TestParser(TestCase):
mock_post: mock.MagicMock, mock_post: mock.MagicMock,
): ):
# Check if exception is raised when the mail can not be parsed. # Check if exception is raised when the mail can not be parsed.
with pytest.raises(ParseError): self.assertRaises(
self.parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "broken.eml")) ParseError,
self.parser.generate_pdf,
os.path.join(self.SAMPLE_FILES, "broken.eml"),
)
mock_generate_pdf_from_mail.return_value = b"Mail Return" mock_generate_pdf_from_mail.return_value = b"Mail Return"
mock_generate_pdf_from_html.return_value = b"HTML Return" mock_generate_pdf_from_html.return_value = b"HTML Return"
@ -430,8 +440,7 @@ class TestParser(TestCase):
self.assertFalse("<script" in resulting_html.lower()) self.assertFalse("<script" in resulting_html.lower())
@mock.patch("paperless_mail.parsers.requests.post") @mock.patch("paperless_mail.parsers.requests.post")
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output def test_generate_pdf_from_html(self, mock_post: mock.MagicMock):
def test_generate_pdf_from_html(self, m, mock_post: mock.MagicMock):
class MailAttachmentMock: class MailAttachmentMock:
def __init__(self, payload, content_id): def __init__(self, payload, content_id):
self.payload = payload self.payload = payload

View File

@ -39,8 +39,7 @@ class TestParserLive(TestCase):
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test", reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
) )
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf") @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output def test_get_thumbnail(self, mock_generate_pdf: mock.MagicMock):
def test_get_thumbnail(self, m, mock_generate_pdf: mock.MagicMock):
mock_generate_pdf.return_value = os.path.join( mock_generate_pdf.return_value = os.path.join(
self.SAMPLE_FILES, self.SAMPLE_FILES,
"simple_text.eml.pdf", "simple_text.eml.pdf",
@ -63,17 +62,15 @@ class TestParserLive(TestCase):
"TIKA_LIVE" not in os.environ, "TIKA_LIVE" not in os.environ,
reason="No tika server", reason="No tika server",
) )
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output def test_tika_parse(self):
def test_tika_parse(self, m):
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>' html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
expected_text = "Some Text" expected_text = "Some Text"
tika_server_original = self.parser.tika_server tika_server_original = self.parser.tika_server
# Check if exception is raised when Tika cannot be reached. # Check if exception is raised when Tika cannot be reached.
with pytest.raises(ParseError): self.parser.tika_server = ""
self.parser.tika_server = "" self.assertRaises(ParseError, self.parser.tika_parse, html)
self.parser.tika_parse(html)
# Check unsuccessful parsing # Check unsuccessful parsing
self.parser.tika_server = tika_server_original self.parser.tika_server = tika_server_original
@ -116,15 +113,13 @@ class TestParserLive(TestCase):
"GOTENBERG_LIVE" not in os.environ, "GOTENBERG_LIVE" not in os.environ,
reason="No gotenberg server", reason="No gotenberg server",
) )
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output def test_generate_pdf_from_mail_no_convert(self):
def test_generate_pdf_from_mail_no_convert(self, m):
mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml")) mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml"))
pdf_path = os.path.join(self.parser.tempdir, "html.eml.pdf") pdf_path = os.path.join(self.parser.tempdir, "html.eml.pdf")
with open(pdf_path, "wb") as file: with open(pdf_path, "wb") as file:
file.write(self.parser.generate_pdf_from_mail(mail)) file.write(self.parser.generate_pdf_from_mail(mail))
file.close()
extracted = extract_text(pdf_path) extracted = extract_text(pdf_path)
expected = extract_text(os.path.join(self.SAMPLE_FILES, "html.eml.pdf")) expected = extract_text(os.path.join(self.SAMPLE_FILES, "html.eml.pdf"))
@ -139,15 +134,13 @@ class TestParserLive(TestCase):
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ, "PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test", reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
) )
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output def test_generate_pdf_from_mail(self):
def test_generate_pdf_from_mail(self, m):
mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml")) mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml"))
pdf_path = os.path.join(self.parser.tempdir, "html.eml.pdf") pdf_path = os.path.join(self.parser.tempdir, "html.eml.pdf")
with open(pdf_path, "wb") as file: with open(pdf_path, "wb") as file:
file.write(self.parser.generate_pdf_from_mail(mail)) file.write(self.parser.generate_pdf_from_mail(mail))
file.close()
converted = os.path.join( converted = os.path.join(
self.parser.tempdir, self.parser.tempdir,
@ -181,8 +174,7 @@ class TestParserLive(TestCase):
"GOTENBERG_LIVE" not in os.environ, "GOTENBERG_LIVE" not in os.environ,
reason="No gotenberg server", reason="No gotenberg server",
) )
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output def test_generate_pdf_from_html_no_convert(self):
def test_generate_pdf_from_html_no_convert(self, m):
class MailAttachmentMock: class MailAttachmentMock:
def __init__(self, payload, content_id): def __init__(self, payload, content_id):
self.payload = payload self.payload = payload
@ -203,7 +195,6 @@ class TestParserLive(TestCase):
with open(pdf_path, "wb") as file: with open(pdf_path, "wb") as file:
file.write(result) file.write(result)
file.close()
extracted = extract_text(pdf_path) extracted = extract_text(pdf_path)
expected = extract_text(os.path.join(self.SAMPLE_FILES, "sample.html.pdf")) expected = extract_text(os.path.join(self.SAMPLE_FILES, "sample.html.pdf"))
@ -218,8 +209,7 @@ class TestParserLive(TestCase):
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ, "PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test", reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
) )
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output def test_generate_pdf_from_html(self):
def test_generate_pdf_from_html(self, m):
class MailAttachmentMock: class MailAttachmentMock:
def __init__(self, payload, content_id): def __init__(self, payload, content_id):
self.payload = payload self.payload = payload
@ -240,7 +230,6 @@ class TestParserLive(TestCase):
with open(pdf_path, "wb") as file: with open(pdf_path, "wb") as file:
file.write(result) file.write(result)
file.close()
converted = os.path.join(self.parser.tempdir, "sample.html.pdf.webp") converted = os.path.join(self.parser.tempdir, "sample.html.pdf.webp")
run_convert( run_convert(
@ -269,20 +258,22 @@ class TestParserLive(TestCase):
f"If Rick Astley is shown, Gotenberg loads from web which is bad for Mail content.", f"If Rick Astley is shown, Gotenberg loads from web which is bad for Mail content.",
) )
@staticmethod
@pytest.mark.skipif( @pytest.mark.skipif(
"GOTENBERG_LIVE" not in os.environ, "GOTENBERG_LIVE" not in os.environ,
reason="No gotenberg server", reason="No gotenberg server",
) )
def test_is_online_image_still_available(): def test_is_online_image_still_available(self):
""" """
A public image is used in the html sample file. We have no control A public image is used in the html sample file. We have no control
whether this image stays online forever, so here we check if it is still there whether this image stays online forever, so here we check if it is still there
""" """
# Start by Testing if nonexistent URL really throws an Exception # Start by Testing if nonexistent URL really throws an Exception
with pytest.raises(HTTPError): self.assertRaises(
urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png") HTTPError,
urlopen,
"https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png",
)
# Now check the URL used in samples/sample.html # Now check the URL used in samples/sample.html
urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png") urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png")