lazy loading for parsers

This commit is contained in:
jonaswinkler 2021-02-04 13:17:24 +01:00
parent 883a6b26a4
commit 44ec3a3d9c
5 changed files with 20 additions and 8 deletions

View File

@ -114,8 +114,8 @@ class TestParserAvailability(TestCase):
self.assertEqual(get_default_file_extension('application/zip'), ".zip")
self.assertEqual(get_default_file_extension('aasdasd/dgfgf'), "")
self.assertEqual(get_parser_class_for_mime_type('application/pdf'), RasterisedDocumentParser)
self.assertEqual(get_parser_class_for_mime_type('text/plain'), TextDocumentParser)
self.assertIsInstance(get_parser_class_for_mime_type('application/pdf')(logging_group=None), RasterisedDocumentParser)
self.assertIsInstance(get_parser_class_for_mime_type('text/plain')(logging_group=None), TextDocumentParser)
self.assertEqual(get_parser_class_for_mime_type('text/sdgsdf'), None)
self.assertTrue(is_file_ext_supported('.pdf'))

View File

@ -1,9 +1,13 @@
def get_parser(*args, **kwargs):
from .parsers import RasterisedDocumentParser
return RasterisedDocumentParser(*args, **kwargs)
def tesseract_consumer_declaration(sender, **kwargs):
return {
"parser": RasterisedDocumentParser,
"parser": get_parser,
"weight": 0,
"mime_types": {
"application/pdf": ".pdf",

View File

@ -1,9 +1,13 @@
def get_parser(*args, **kwargs):
from .parsers import TextDocumentParser
return TextDocumentParser(*args, **kwargs)
def text_consumer_declaration(sender, **kwargs):
return {
"parser": TextDocumentParser,
"parser": get_parser,
"weight": 10,
"mime_types": {
"text/plain": ".txt",

View File

View File

@ -1,9 +1,13 @@
def get_parser(*args, **kwargs):
from .parsers import TikaDocumentParser
return TikaDocumentParser(*args, **kwargs)
def tika_consumer_declaration(sender, **kwargs):
return {
"parser": TikaDocumentParser,
"parser": get_parser,
"weight": 10,
"mime_types": {
"application/msword": ".doc",