mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
remove log mocking, replace pytest raises, use humanfriendly
This commit is contained in:
parent
b68906b14e
commit
e3c1bde793
@ -10,6 +10,7 @@ from django.conf import settings
|
|||||||
from documents.parsers import DocumentParser
|
from documents.parsers import DocumentParser
|
||||||
from documents.parsers import make_thumbnail_from_pdf
|
from documents.parsers import make_thumbnail_from_pdf
|
||||||
from documents.parsers import ParseError
|
from documents.parsers import ParseError
|
||||||
|
from humanfriendly import format_size
|
||||||
from imap_tools import MailMessage
|
from imap_tools import MailMessage
|
||||||
from tika import parser
|
from tika import parser
|
||||||
|
|
||||||
@ -125,10 +126,8 @@ class MailDocumentParser(DocumentParser):
|
|||||||
if len(mail.attachments) >= 1:
|
if len(mail.attachments) >= 1:
|
||||||
att = []
|
att = []
|
||||||
for a in mail.attachments:
|
for a in mail.attachments:
|
||||||
if a.size >= 1024 * 600:
|
att.append(f"{a.filename} ({format_size(a.size, binary=True)})")
|
||||||
att.append(f"{a.filename} ({(a.size / 1024 / 1024):.2f} MiB)")
|
|
||||||
else:
|
|
||||||
att.append(f"{a.filename} ({(a.size / 1024):.2f} KiB)")
|
|
||||||
self.text += f"Attachments: {', '.join(att)}\n\n"
|
self.text += f"Attachments: {', '.join(att)}\n\n"
|
||||||
|
|
||||||
if mail.html != "":
|
if mail.html != "":
|
||||||
@ -191,7 +190,7 @@ class MailDocumentParser(DocumentParser):
|
|||||||
return pdf_path
|
return pdf_path
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def mail_to_html(mail) -> StringIO:
|
def mail_to_html(mail: MailMessage) -> StringIO:
|
||||||
data = {}
|
data = {}
|
||||||
|
|
||||||
def clean_html(text: str):
|
def clean_html(text: str):
|
||||||
@ -228,10 +227,7 @@ class MailDocumentParser(DocumentParser):
|
|||||||
|
|
||||||
att = []
|
att = []
|
||||||
for a in mail.attachments:
|
for a in mail.attachments:
|
||||||
if a.size >= 1024 * 600:
|
att.append(f"{a.filename} ({format_size(a.size, binary=True)})")
|
||||||
att.append(f"{a.filename} ({(a.size / 1024 / 1024):.2f} MiB)")
|
|
||||||
else:
|
|
||||||
att.append(f"{a.filename} ({(a.size / 1024):.2f} KiB)")
|
|
||||||
data["attachments"] = clean_html(", ".join(att))
|
data["attachments"] = clean_html(", ".join(att))
|
||||||
if data["attachments"] != "":
|
if data["attachments"] != "":
|
||||||
data["attachments_label"] = "Attachments"
|
data["attachments_label"] = "Attachments"
|
||||||
|
@ -30,7 +30,7 @@
|
|||||||
<div class="col-start-2 col-span-10 row-start-5" text-sm my-0.5></div>
|
<div class="col-start-2 col-span-10 row-start-5" text-sm my-0.5></div>
|
||||||
|
|
||||||
<div class="col-start-1 row-start-6 text-slate-400 text-right">Attachments</div>
|
<div class="col-start-1 row-start-6 text-slate-400 text-right">Attachments</div>
|
||||||
<div class="col-start-2 col-span-10 row-start-6">IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (0.59 MiB)</div>
|
<div class="col-start-2 col-span-10 row-start-6">IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Separator-->
|
<!-- Separator-->
|
||||||
|
Binary file not shown.
Binary file not shown.
Before Width: | Height: | Size: 5.9 KiB After Width: | Height: | Size: 6.0 KiB |
@ -19,12 +19,18 @@ class TestParser(TestCase):
|
|||||||
|
|
||||||
def test_get_parsed(self):
|
def test_get_parsed(self):
|
||||||
# Check if exception is raised when parsing fails.
|
# Check if exception is raised when parsing fails.
|
||||||
with pytest.raises(ParseError):
|
self.assertRaises(
|
||||||
self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "na"))
|
ParseError,
|
||||||
|
self.parser.get_parsed,
|
||||||
|
os.path.join(self.SAMPLE_FILES, "na"),
|
||||||
|
)
|
||||||
|
|
||||||
# Check if exception is raised when the mail is faulty.
|
# Check if exception is raised when the mail is faulty.
|
||||||
with pytest.raises(ParseError):
|
self.assertRaises(
|
||||||
self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "broken.eml"))
|
ParseError,
|
||||||
|
self.parser.get_parsed,
|
||||||
|
os.path.join(self.SAMPLE_FILES, "broken.eml"),
|
||||||
|
)
|
||||||
|
|
||||||
# Parse Test file and check relevant content
|
# Parse Test file and check relevant content
|
||||||
parsed1 = self.parser.get_parsed(
|
parsed1 = self.parser.get_parsed(
|
||||||
@ -210,18 +216,18 @@ class TestParser(TestCase):
|
|||||||
|
|
||||||
def test_parse_na(self):
|
def test_parse_na(self):
|
||||||
# Check if exception is raised when parsing fails.
|
# Check if exception is raised when parsing fails.
|
||||||
with pytest.raises(ParseError):
|
self.assertRaises(
|
||||||
self.parser.parse(
|
ParseError,
|
||||||
os.path.join(os.path.join(self.SAMPLE_FILES, "na")),
|
self.parser.parse,
|
||||||
"message/rfc822",
|
os.path.join(self.SAMPLE_FILES, "na"),
|
||||||
)
|
"message/rfc822",
|
||||||
|
)
|
||||||
|
|
||||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.tika_parse")
|
@mock.patch("paperless_mail.parsers.MailDocumentParser.tika_parse")
|
||||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
|
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
|
||||||
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output
|
def test_parse_html_eml(self, n, mock_tika_parse: mock.MagicMock):
|
||||||
def test_parse_html_eml(self, m, n, mock_tika_parse: mock.MagicMock):
|
|
||||||
# Validate parsing returns the expected results
|
# Validate parsing returns the expected results
|
||||||
text_expected = "Some Text\nand an embedded image.\n\nSubject: HTML Message\n\nFrom: Name <someone@example.de>\n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (0.59 MiB)\n\nHTML content: tika return"
|
text_expected = "Some Text\nand an embedded image.\n\nSubject: HTML Message\n\nFrom: Name <someone@example.de>\n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)\n\nHTML content: tika return"
|
||||||
mock_tika_parse.return_value = "tika return"
|
mock_tika_parse.return_value = "tika return"
|
||||||
|
|
||||||
self.parser.parse(os.path.join(self.SAMPLE_FILES, "html.eml"), "message/rfc822")
|
self.parser.parse(os.path.join(self.SAMPLE_FILES, "html.eml"), "message/rfc822")
|
||||||
@ -241,8 +247,7 @@ class TestParser(TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
|
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
|
||||||
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output
|
def test_parse_simple_eml(self, n):
|
||||||
def test_parse_simple_eml(self, m, n):
|
|
||||||
# Validate parsing returns the expected results
|
# Validate parsing returns the expected results
|
||||||
|
|
||||||
self.parser.parse(
|
self.parser.parse(
|
||||||
@ -268,8 +273,7 @@ class TestParser(TestCase):
|
|||||||
self.assertTrue(os.path.isfile(self.parser.archive_path))
|
self.assertTrue(os.path.isfile(self.parser.archive_path))
|
||||||
|
|
||||||
@mock.patch("paperless_mail.parsers.parser.from_buffer")
|
@mock.patch("paperless_mail.parsers.parser.from_buffer")
|
||||||
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output
|
def test_tika_parse(self, mock_from_buffer: mock.MagicMock):
|
||||||
def test_tika_parse(self, m, mock_from_buffer: mock.MagicMock):
|
|
||||||
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
|
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
|
||||||
expected_text = "Some Text"
|
expected_text = "Some Text"
|
||||||
mock_from_buffer.return_value = {"content": expected_text}
|
mock_from_buffer.return_value = {"content": expected_text}
|
||||||
@ -300,8 +304,11 @@ class TestParser(TestCase):
|
|||||||
|
|
||||||
# Check if exception is raised when the pdf can not be created.
|
# Check if exception is raised when the pdf can not be created.
|
||||||
self.parser.gotenberg_server = ""
|
self.parser.gotenberg_server = ""
|
||||||
with pytest.raises(ParseError):
|
self.assertRaises(
|
||||||
self.parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "html.eml"))
|
ParseError,
|
||||||
|
self.parser.generate_pdf,
|
||||||
|
os.path.join(self.SAMPLE_FILES, "html.eml"),
|
||||||
|
)
|
||||||
|
|
||||||
@mock.patch("paperless_mail.parsers.requests.post")
|
@mock.patch("paperless_mail.parsers.requests.post")
|
||||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail")
|
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail")
|
||||||
@ -313,8 +320,11 @@ class TestParser(TestCase):
|
|||||||
mock_post: mock.MagicMock,
|
mock_post: mock.MagicMock,
|
||||||
):
|
):
|
||||||
# Check if exception is raised when the mail can not be parsed.
|
# Check if exception is raised when the mail can not be parsed.
|
||||||
with pytest.raises(ParseError):
|
self.assertRaises(
|
||||||
self.parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "broken.eml"))
|
ParseError,
|
||||||
|
self.parser.generate_pdf,
|
||||||
|
os.path.join(self.SAMPLE_FILES, "broken.eml"),
|
||||||
|
)
|
||||||
|
|
||||||
mock_generate_pdf_from_mail.return_value = b"Mail Return"
|
mock_generate_pdf_from_mail.return_value = b"Mail Return"
|
||||||
mock_generate_pdf_from_html.return_value = b"HTML Return"
|
mock_generate_pdf_from_html.return_value = b"HTML Return"
|
||||||
@ -430,8 +440,7 @@ class TestParser(TestCase):
|
|||||||
self.assertFalse("<script" in resulting_html.lower())
|
self.assertFalse("<script" in resulting_html.lower())
|
||||||
|
|
||||||
@mock.patch("paperless_mail.parsers.requests.post")
|
@mock.patch("paperless_mail.parsers.requests.post")
|
||||||
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output
|
def test_generate_pdf_from_html(self, mock_post: mock.MagicMock):
|
||||||
def test_generate_pdf_from_html(self, m, mock_post: mock.MagicMock):
|
|
||||||
class MailAttachmentMock:
|
class MailAttachmentMock:
|
||||||
def __init__(self, payload, content_id):
|
def __init__(self, payload, content_id):
|
||||||
self.payload = payload
|
self.payload = payload
|
||||||
|
@ -39,8 +39,7 @@ class TestParserLive(TestCase):
|
|||||||
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
|
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
|
||||||
)
|
)
|
||||||
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
|
@mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf")
|
||||||
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output
|
def test_get_thumbnail(self, mock_generate_pdf: mock.MagicMock):
|
||||||
def test_get_thumbnail(self, m, mock_generate_pdf: mock.MagicMock):
|
|
||||||
mock_generate_pdf.return_value = os.path.join(
|
mock_generate_pdf.return_value = os.path.join(
|
||||||
self.SAMPLE_FILES,
|
self.SAMPLE_FILES,
|
||||||
"simple_text.eml.pdf",
|
"simple_text.eml.pdf",
|
||||||
@ -63,17 +62,15 @@ class TestParserLive(TestCase):
|
|||||||
"TIKA_LIVE" not in os.environ,
|
"TIKA_LIVE" not in os.environ,
|
||||||
reason="No tika server",
|
reason="No tika server",
|
||||||
)
|
)
|
||||||
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output
|
def test_tika_parse(self):
|
||||||
def test_tika_parse(self, m):
|
|
||||||
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
|
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
|
||||||
expected_text = "Some Text"
|
expected_text = "Some Text"
|
||||||
|
|
||||||
tika_server_original = self.parser.tika_server
|
tika_server_original = self.parser.tika_server
|
||||||
|
|
||||||
# Check if exception is raised when Tika cannot be reached.
|
# Check if exception is raised when Tika cannot be reached.
|
||||||
with pytest.raises(ParseError):
|
self.parser.tika_server = ""
|
||||||
self.parser.tika_server = ""
|
self.assertRaises(ParseError, self.parser.tika_parse, html)
|
||||||
self.parser.tika_parse(html)
|
|
||||||
|
|
||||||
# Check unsuccessful parsing
|
# Check unsuccessful parsing
|
||||||
self.parser.tika_server = tika_server_original
|
self.parser.tika_server = tika_server_original
|
||||||
@ -116,15 +113,13 @@ class TestParserLive(TestCase):
|
|||||||
"GOTENBERG_LIVE" not in os.environ,
|
"GOTENBERG_LIVE" not in os.environ,
|
||||||
reason="No gotenberg server",
|
reason="No gotenberg server",
|
||||||
)
|
)
|
||||||
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output
|
def test_generate_pdf_from_mail_no_convert(self):
|
||||||
def test_generate_pdf_from_mail_no_convert(self, m):
|
|
||||||
mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml"))
|
mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml"))
|
||||||
|
|
||||||
pdf_path = os.path.join(self.parser.tempdir, "html.eml.pdf")
|
pdf_path = os.path.join(self.parser.tempdir, "html.eml.pdf")
|
||||||
|
|
||||||
with open(pdf_path, "wb") as file:
|
with open(pdf_path, "wb") as file:
|
||||||
file.write(self.parser.generate_pdf_from_mail(mail))
|
file.write(self.parser.generate_pdf_from_mail(mail))
|
||||||
file.close()
|
|
||||||
|
|
||||||
extracted = extract_text(pdf_path)
|
extracted = extract_text(pdf_path)
|
||||||
expected = extract_text(os.path.join(self.SAMPLE_FILES, "html.eml.pdf"))
|
expected = extract_text(os.path.join(self.SAMPLE_FILES, "html.eml.pdf"))
|
||||||
@ -139,15 +134,13 @@ class TestParserLive(TestCase):
|
|||||||
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
|
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
|
||||||
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
|
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
|
||||||
)
|
)
|
||||||
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output
|
def test_generate_pdf_from_mail(self):
|
||||||
def test_generate_pdf_from_mail(self, m):
|
|
||||||
mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml"))
|
mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml"))
|
||||||
|
|
||||||
pdf_path = os.path.join(self.parser.tempdir, "html.eml.pdf")
|
pdf_path = os.path.join(self.parser.tempdir, "html.eml.pdf")
|
||||||
|
|
||||||
with open(pdf_path, "wb") as file:
|
with open(pdf_path, "wb") as file:
|
||||||
file.write(self.parser.generate_pdf_from_mail(mail))
|
file.write(self.parser.generate_pdf_from_mail(mail))
|
||||||
file.close()
|
|
||||||
|
|
||||||
converted = os.path.join(
|
converted = os.path.join(
|
||||||
self.parser.tempdir,
|
self.parser.tempdir,
|
||||||
@ -181,8 +174,7 @@ class TestParserLive(TestCase):
|
|||||||
"GOTENBERG_LIVE" not in os.environ,
|
"GOTENBERG_LIVE" not in os.environ,
|
||||||
reason="No gotenberg server",
|
reason="No gotenberg server",
|
||||||
)
|
)
|
||||||
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output
|
def test_generate_pdf_from_html_no_convert(self):
|
||||||
def test_generate_pdf_from_html_no_convert(self, m):
|
|
||||||
class MailAttachmentMock:
|
class MailAttachmentMock:
|
||||||
def __init__(self, payload, content_id):
|
def __init__(self, payload, content_id):
|
||||||
self.payload = payload
|
self.payload = payload
|
||||||
@ -203,7 +195,6 @@ class TestParserLive(TestCase):
|
|||||||
|
|
||||||
with open(pdf_path, "wb") as file:
|
with open(pdf_path, "wb") as file:
|
||||||
file.write(result)
|
file.write(result)
|
||||||
file.close()
|
|
||||||
|
|
||||||
extracted = extract_text(pdf_path)
|
extracted = extract_text(pdf_path)
|
||||||
expected = extract_text(os.path.join(self.SAMPLE_FILES, "sample.html.pdf"))
|
expected = extract_text(os.path.join(self.SAMPLE_FILES, "sample.html.pdf"))
|
||||||
@ -218,8 +209,7 @@ class TestParserLive(TestCase):
|
|||||||
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
|
"PAPERLESS_TEST_SKIP_CONVERT" in os.environ,
|
||||||
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
|
reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test",
|
||||||
)
|
)
|
||||||
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output
|
def test_generate_pdf_from_html(self):
|
||||||
def test_generate_pdf_from_html(self, m):
|
|
||||||
class MailAttachmentMock:
|
class MailAttachmentMock:
|
||||||
def __init__(self, payload, content_id):
|
def __init__(self, payload, content_id):
|
||||||
self.payload = payload
|
self.payload = payload
|
||||||
@ -240,7 +230,6 @@ class TestParserLive(TestCase):
|
|||||||
|
|
||||||
with open(pdf_path, "wb") as file:
|
with open(pdf_path, "wb") as file:
|
||||||
file.write(result)
|
file.write(result)
|
||||||
file.close()
|
|
||||||
|
|
||||||
converted = os.path.join(self.parser.tempdir, "sample.html.pdf.webp")
|
converted = os.path.join(self.parser.tempdir, "sample.html.pdf.webp")
|
||||||
run_convert(
|
run_convert(
|
||||||
@ -269,20 +258,22 @@ class TestParserLive(TestCase):
|
|||||||
f"If Rick Astley is shown, Gotenberg loads from web which is bad for Mail content.",
|
f"If Rick Astley is shown, Gotenberg loads from web which is bad for Mail content.",
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
"GOTENBERG_LIVE" not in os.environ,
|
"GOTENBERG_LIVE" not in os.environ,
|
||||||
reason="No gotenberg server",
|
reason="No gotenberg server",
|
||||||
)
|
)
|
||||||
def test_is_online_image_still_available():
|
def test_is_online_image_still_available(self):
|
||||||
"""
|
"""
|
||||||
A public image is used in the html sample file. We have no control
|
A public image is used in the html sample file. We have no control
|
||||||
whether this image stays online forever, so here we check if it is still there
|
whether this image stays online forever, so here we check if it is still there
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Start by Testing if nonexistent URL really throws an Exception
|
# Start by Testing if nonexistent URL really throws an Exception
|
||||||
with pytest.raises(HTTPError):
|
self.assertRaises(
|
||||||
urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png")
|
HTTPError,
|
||||||
|
urlopen,
|
||||||
|
"https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png",
|
||||||
|
)
|
||||||
|
|
||||||
# Now check the URL used in samples/sample.html
|
# Now check the URL used in samples/sample.html
|
||||||
urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png")
|
urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user