Changed the way parsers are discovered. This also prepares for upcoming changes regarding content types and file types: parsers should declare what they support, and actual file extensions should not be hardcoded everywhere.

2025-12-22 01:55:49 -06:00 · 2020-11-16 23:53:12 +01:00
parent e30f0b274b
commit 9a48d6c577
7 changed files with 42 additions and 51 deletions
--- a/src/paperless_text/signals.py
+++ b/src/paperless_text/signals.py
@@ -3,21 +3,16 @@ import re
 from .parsers import TextDocumentParser


-class ConsumerDeclaration:
+def text_consumer_declaration(sender, **kwargs):
+    return {
+        "parser": TextDocumentParser,
+        "weight": 10,
+        "test": text_consumer_test
+    }

-    MATCHING_FILES = re.compile(r"^.*\.(te?xt|md|csv)$")

-    @classmethod
-    def handle(cls, sender, **kwargs):
-        return cls.test
+MATCHING_FILES = re.compile(r"^.*\.(te?xt|md|csv)$")

-    @classmethod
-    def test(cls, doc):

-        if cls.MATCHING_FILES.match(doc.lower()):
-            return {
-                "parser": TextDocumentParser,
-                "weight": 10
-            }
-
-        return None
+def text_consumer_test(doc):
+    return MATCHING_FILES.match(doc.lower())