Changed the way parsers are discovered. This also prepares for upcoming changes regarding content types and file types: parsers should declare what they support, and actual file extensions should not be hardcoded everywhere.

This commit is contained in:
Jonas Winkler
2020-11-16 23:53:12 +01:00
parent 70d8e8bc56
commit d2e22e3f27
7 changed files with 42 additions and 51 deletions

View File

@@ -1,5 +1,7 @@
from django.apps import AppConfig
from paperless_text.signals import text_consumer_declaration
class PaperlessTextConfig(AppConfig):
@@ -9,8 +11,6 @@ class PaperlessTextConfig(AppConfig):
from documents.signals import document_consumer_declaration
from .signals import ConsumerDeclaration
document_consumer_declaration.connect(ConsumerDeclaration.handle)
document_consumer_declaration.connect(text_consumer_declaration)
AppConfig.ready(self)

View File

@@ -3,21 +3,16 @@ import re
from .parsers import TextDocumentParser
class ConsumerDeclaration:
def text_consumer_declaration(sender, **kwargs):
return {
"parser": TextDocumentParser,
"weight": 10,
"test": text_consumer_test
}
MATCHING_FILES = re.compile(r"^.*\.(te?xt|md|csv)$")
@classmethod
def handle(cls, sender, **kwargs):
return cls.test
MATCHING_FILES = re.compile(r"^.*\.(te?xt|md|csv)$")
@classmethod
def test(cls, doc):
if cls.MATCHING_FILES.match(doc.lower()):
return {
"parser": TextDocumentParser,
"weight": 10
}
return None
def text_consumer_test(doc):
return MATCHING_FILES.match(doc.lower())