Changed the way parsers are discovered. This also prepares for upcoming changes regarding content types and file types: parsers should declare what they support, and actual file extensions should not be hardcoded everywhere.

2026-02-09 23:49:29 -06:00 · 2020-11-16 23:53:12 +01:00
parent 70d8e8bc56
commit d2e22e3f27
7 changed files with 42 additions and 51 deletions
--- a/src/paperless_text/apps.py
+++ b/src/paperless_text/apps.py
@@ -1,5 +1,7 @@
 from django.apps import AppConfig

+from paperless_text.signals import text_consumer_declaration
+

 class PaperlessTextConfig(AppConfig):

@@ -9,8 +11,6 @@ class PaperlessTextConfig(AppConfig):

        from documents.signals import document_consumer_declaration

-        from .signals import ConsumerDeclaration
-
-        document_consumer_declaration.connect(ConsumerDeclaration.handle)
+        document_consumer_declaration.connect(text_consumer_declaration)

        AppConfig.ready(self)
--- a/src/paperless_text/signals.py
+++ b/src/paperless_text/signals.py
@@ -3,21 +3,16 @@ import re
 from .parsers import TextDocumentParser


-class ConsumerDeclaration:
+def text_consumer_declaration(sender, **kwargs):
+    return {
+        "parser": TextDocumentParser,
+        "weight": 10,
+        "test": text_consumer_test
+    }

-    MATCHING_FILES = re.compile(r"^.*\.(te?xt|md|csv)$")

-    @classmethod
-    def handle(cls, sender, **kwargs):
-        return cls.test
+MATCHING_FILES = re.compile(r"^.*\.(te?xt|md|csv)$")

-    @classmethod
-    def test(cls, doc):

-        if cls.MATCHING_FILES.match(doc.lower()):
-            return {
-                "parser": TextDocumentParser,
-                "weight": 10
-            }
-
-        return None
+def text_consumer_test(doc):
+    return MATCHING_FILES.match(doc.lower())