85 lines
2.9 KiB
Python

import os
from tempfile import TemporaryDirectory
from unittest import mock
from django.test import TestCase
from documents.parsers import get_parser_class, get_supported_file_extensions, get_default_file_extension, \
get_parser_class_for_mime_type
from paperless_tesseract.parsers import RasterisedDocumentParser
from paperless_text.parsers import TextDocumentParser
def fake_magic_from_file(file, mime=False):
if mime:
if os.path.splitext(file)[1] == ".pdf":
return "application/pdf"
else:
return "unknown"
else:
return "A verbose string that describes the contents of the file"
@mock.patch("documents.parsers.magic.from_file", fake_magic_from_file)
class TestParserDiscovery(TestCase):
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test__get_parser_class_1_parser(self, m, *args):
class DummyParser(object):
pass
m.return_value = (
(None, {"weight": 0, "parser": DummyParser, "mime_types": {"application/pdf": ".pdf"}}),
)
self.assertEqual(
get_parser_class("doc.pdf"),
DummyParser
)
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test__get_parser_class_n_parsers(self, m, *args):
class DummyParser1(object):
pass
class DummyParser2(object):
pass
m.return_value = (
(None, {"weight": 0, "parser": DummyParser1, "mime_types": {"application/pdf": ".pdf"}}),
(None, {"weight": 1, "parser": DummyParser2, "mime_types": {"application/pdf": ".pdf"}}),
)
self.assertEqual(
get_parser_class("doc.pdf"),
DummyParser2
)
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test__get_parser_class_0_parsers(self, m, *args):
m.return_value = []
with TemporaryDirectory() as tmpdir:
self.assertIsNone(
get_parser_class("doc.pdf")
)
class TestParserAvailability(TestCase):
def test_file_extensions(self):
for ext in [".pdf", ".jpe", ".jpg", ".jpeg", ".txt", ".csv"]:
self.assertIn(ext, get_supported_file_extensions())
self.assertEqual(get_default_file_extension('application/pdf'), ".pdf")
self.assertEqual(get_default_file_extension('image/png'), ".png")
self.assertEqual(get_default_file_extension('image/jpeg'), ".jpg")
self.assertEqual(get_default_file_extension('text/plain'), ".txt")
self.assertEqual(get_default_file_extension('text/csv'), ".csv")
self.assertEqual(get_default_file_extension('aasdasd/dgfgf'), None)
self.assertEqual(get_parser_class_for_mime_type('application/pdf'), RasterisedDocumentParser)
self.assertEqual(get_parser_class_for_mime_type('text/plain'), TextDocumentParser)
self.assertEqual(get_parser_class_for_mime_type('text/sdgsdf'), None)