Cleans up and improves parser discovery testing, simplifies the determination of supported or not supported extensions and mime types

This commit is contained in:
Trenton H
2023-01-04 10:18:31 -08:00
parent 096e3320f5
commit 8504b6f7da
2 changed files with 109 additions and 49 deletions

View File

@@ -1,14 +1,8 @@
import os
import shutil
import tempfile
from tempfile import TemporaryDirectory
from unittest import mock
from django.test import override_settings
from django.test import TestCase
from documents.parsers import DocumentParser
from documents.parsers import get_default_file_extension
from documents.parsers import get_parser_class
from documents.parsers import get_parser_class_for_mime_type
from documents.parsers import get_supported_file_extensions
from documents.parsers import is_file_ext_supported
@@ -16,21 +10,18 @@ from paperless_tesseract.parsers import RasterisedDocumentParser
from paperless_text.parsers import TextDocumentParser
def fake_magic_from_file(file, mime=False):
if mime:
if os.path.splitext(file)[1] == ".pdf":
return "application/pdf"
else:
return "unknown"
else:
return "A verbose string that describes the contents of the file"
@mock.patch("documents.parsers.magic.from_file", fake_magic_from_file)
class TestParserDiscovery(TestCase):
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test__get_parser_class_1_parser(self, m, *args):
def test_get_parser_class_1_parser(self, m, *args):
"""
GIVEN:
- Parser declared for a given mimetype
WHEN:
- Attempt to get parser for the mimetype
THEN:
- Declared parser class is returned
"""
class DummyParser:
pass
@@ -45,10 +36,20 @@ class TestParserDiscovery(TestCase):
),
)
self.assertEqual(get_parser_class("doc.pdf"), DummyParser)
self.assertEqual(get_parser_class_for_mime_type("application/pdf"), DummyParser)
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test__get_parser_class_n_parsers(self, m, *args):
def test_get_parser_class_n_parsers(self, m, *args):
"""
GIVEN:
- Two parsers declared for a given mimetype
- Second parser has a higher weight
WHEN:
- Attempt to get parser for the mimetype
THEN:
- Second parser class is returned
"""
class DummyParser1:
pass
@@ -74,30 +75,77 @@ class TestParserDiscovery(TestCase):
),
)
self.assertEqual(get_parser_class("doc.pdf"), DummyParser2)
self.assertEqual(
get_parser_class_for_mime_type("application/pdf"),
DummyParser2,
)
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test__get_parser_class_0_parsers(self, m, *args):
def test_get_parser_class_0_parsers(self, m, *args):
"""
GIVEN:
- No parsers are declared
WHEN:
- Attempt to get parser for the mimetype
THEN:
- No parser class is returned
"""
m.return_value = []
with TemporaryDirectory() as tmpdir:
self.assertIsNone(get_parser_class("doc.pdf"))
self.assertIsNone(get_parser_class_for_mime_type("application/pdf"))
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test_get_parser_class_no_valid_parser(self, m, *args):
"""
GIVEN:
- No parser declared for a given mimetype
- Parser declared for a different mimetype
WHEN:
- Attempt to get parser for the given mimetype
THEN:
- No parser class is returned
"""
def fake_get_thumbnail(self, path, mimetype, file_name):
return os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
class DummyParser:
pass
m.return_value = (
(
None,
{
"weight": 0,
"parser": DummyParser,
"mime_types": {"application/pdf": ".pdf"},
},
),
)
self.assertIsNone(get_parser_class_for_mime_type("image/tiff"))
class TestParserAvailability(TestCase):
def test_file_extensions(self):
for ext in [".pdf", ".jpe", ".jpg", ".jpeg", ".txt", ".csv"]:
self.assertIn(ext, get_supported_file_extensions())
self.assertEqual(get_default_file_extension("application/pdf"), ".pdf")
self.assertEqual(get_default_file_extension("image/png"), ".png")
self.assertEqual(get_default_file_extension("image/jpeg"), ".jpg")
self.assertEqual(get_default_file_extension("text/plain"), ".txt")
self.assertEqual(get_default_file_extension("text/csv"), ".csv")
supported_mimes_and_exts = [
("application/pdf", ".pdf"),
("image/png", ".png"),
("image/jpeg", ".jpg"),
("image/tiff", ".tif"),
("image/webp", ".webp"),
("text/plain", ".txt"),
("text/csv", ".csv"),
]
supported_exts = get_supported_file_extensions()
for mime_type, ext in supported_mimes_and_exts:
self.assertIn(ext, supported_exts)
self.assertEqual(get_default_file_extension(mime_type), ext)
# Test no parser declared still returns a an extension
self.assertEqual(get_default_file_extension("application/zip"), ".zip")
# Test invalid mimetype returns no extension
self.assertEqual(get_default_file_extension("aasdasd/dgfgf"), "")
self.assertIsInstance(
@@ -108,7 +156,7 @@ class TestParserAvailability(TestCase):
get_parser_class_for_mime_type("text/plain")(logging_group=None),
TextDocumentParser,
)
self.assertEqual(get_parser_class_for_mime_type("text/sdgsdf"), None)
self.assertIsNone(get_parser_class_for_mime_type("text/sdgsdf"))
self.assertTrue(is_file_ext_supported(".pdf"))
self.assertFalse(is_file_ext_supported(".hsdfh"))