mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
85 lines
2.9 KiB
Python
85 lines
2.9 KiB
Python
import os
|
|
from tempfile import TemporaryDirectory
|
|
from unittest import mock
|
|
|
|
from django.test import TestCase
|
|
|
|
from documents.parsers import get_parser_class, get_supported_file_extensions, get_default_file_extension, \
|
|
get_parser_class_for_mime_type
|
|
from paperless_tesseract.parsers import RasterisedDocumentParser
|
|
from paperless_text.parsers import TextDocumentParser
|
|
|
|
|
|
def fake_magic_from_file(file, mime=False):
|
|
|
|
if mime:
|
|
if os.path.splitext(file)[1] == ".pdf":
|
|
return "application/pdf"
|
|
else:
|
|
return "unknown"
|
|
else:
|
|
return "A verbose string that describes the contents of the file"
|
|
|
|
|
|
@mock.patch("documents.parsers.magic.from_file", fake_magic_from_file)
|
|
class TestParserDiscovery(TestCase):
|
|
|
|
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
|
def test__get_parser_class_1_parser(self, m, *args):
|
|
class DummyParser(object):
|
|
pass
|
|
|
|
m.return_value = (
|
|
(None, {"weight": 0, "parser": DummyParser, "mime_types": {"application/pdf": ".pdf"}}),
|
|
)
|
|
|
|
self.assertEqual(
|
|
get_parser_class("doc.pdf"),
|
|
DummyParser
|
|
)
|
|
|
|
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
|
def test__get_parser_class_n_parsers(self, m, *args):
|
|
|
|
class DummyParser1(object):
|
|
pass
|
|
|
|
class DummyParser2(object):
|
|
pass
|
|
|
|
m.return_value = (
|
|
(None, {"weight": 0, "parser": DummyParser1, "mime_types": {"application/pdf": ".pdf"}}),
|
|
(None, {"weight": 1, "parser": DummyParser2, "mime_types": {"application/pdf": ".pdf"}}),
|
|
)
|
|
|
|
self.assertEqual(
|
|
get_parser_class("doc.pdf"),
|
|
DummyParser2
|
|
)
|
|
|
|
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
|
def test__get_parser_class_0_parsers(self, m, *args):
|
|
m.return_value = []
|
|
with TemporaryDirectory() as tmpdir:
|
|
self.assertIsNone(
|
|
get_parser_class("doc.pdf")
|
|
)
|
|
|
|
|
|
class TestParserAvailability(TestCase):
|
|
|
|
def test_file_extensions(self):
|
|
|
|
for ext in [".pdf", ".jpe", ".jpg", ".jpeg", ".txt", ".csv"]:
|
|
self.assertIn(ext, get_supported_file_extensions())
|
|
self.assertEqual(get_default_file_extension('application/pdf'), ".pdf")
|
|
self.assertEqual(get_default_file_extension('image/png'), ".png")
|
|
self.assertEqual(get_default_file_extension('image/jpeg'), ".jpg")
|
|
self.assertEqual(get_default_file_extension('text/plain'), ".txt")
|
|
self.assertEqual(get_default_file_extension('text/csv'), ".csv")
|
|
self.assertEqual(get_default_file_extension('aasdasd/dgfgf'), None)
|
|
|
|
self.assertEqual(get_parser_class_for_mime_type('application/pdf'), RasterisedDocumentParser)
|
|
self.assertEqual(get_parser_class_for_mime_type('text/plain'), TextDocumentParser)
|
|
self.assertEqual(get_parser_class_for_mime_type('text/sdgsdf'), None)
|