mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Changed the way parsers are discovered. This also prepares for upcoming changes regarding content types and file types: parsers should declare what they support, and actual file extensions should not be hardcoded everywhere.
This commit is contained in:
@@ -41,15 +41,16 @@ def get_parser_class(doc):
|
||||
Determine the appropriate parser class based on the file
|
||||
"""
|
||||
|
||||
parsers = []
|
||||
for response in document_consumer_declaration.send(None):
|
||||
parsers.append(response[1])
|
||||
|
||||
options = []
|
||||
for parser in parsers:
|
||||
result = parser(doc)
|
||||
if result:
|
||||
options.append(result)
|
||||
|
||||
# Sein letzter Befehl war: KOMMT! Und sie kamen. Alle. Sogar die Parser.
|
||||
|
||||
for response in document_consumer_declaration.send(None):
|
||||
parser_declaration = response[1]
|
||||
parser_test = parser_declaration["test"]
|
||||
|
||||
if parser_test(doc):
|
||||
options.append(parser_declaration)
|
||||
|
||||
if not options:
|
||||
return None
|
||||
|
@@ -14,7 +14,7 @@ class TestParserDiscovery(TestCase):
|
||||
pass
|
||||
|
||||
m.return_value = (
|
||||
(None, lambda _: {"weight": 0, "parser": DummyParser}),
|
||||
(None, {"weight": 0, "parser": DummyParser, "test": lambda _: True}),
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
@@ -32,8 +32,8 @@ class TestParserDiscovery(TestCase):
|
||||
pass
|
||||
|
||||
m.return_value = (
|
||||
(None, lambda _: {"weight": 0, "parser": DummyParser1}),
|
||||
(None, lambda _: {"weight": 1, "parser": DummyParser2}),
|
||||
(None, {"weight": 0, "parser": DummyParser1, "test": lambda _: True}),
|
||||
(None, {"weight": 1, "parser": DummyParser2, "test": lambda _: True}),
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
@@ -43,7 +43,7 @@ class TestParserDiscovery(TestCase):
|
||||
|
||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||
def test__get_parser_class_0_parsers(self, m, *args):
|
||||
m.return_value = ((None, lambda _: None),)
|
||||
m.return_value = []
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
self.assertIsNone(
|
||||
get_parser_class("doc.pdf")
|
||||
|
Reference in New Issue
Block a user