mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-30 18:27:45 -05:00
Changed the way parsers are discovered. This also prepares for upcoming changes regarding content types and file types: parsers should declare what they support, and actual file extensions should not be hardcoded everywhere.
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
from paperless_tesseract.signals import tesseract_consumer_declaration
|
||||
|
||||
|
||||
class PaperlessTesseractConfig(AppConfig):
|
||||
|
||||
@@ -9,8 +11,6 @@ class PaperlessTesseractConfig(AppConfig):
|
||||
|
||||
from documents.signals import document_consumer_declaration
|
||||
|
||||
from .signals import ConsumerDeclaration
|
||||
|
||||
document_consumer_declaration.connect(ConsumerDeclaration.handle)
|
||||
document_consumer_declaration.connect(tesseract_consumer_declaration)
|
||||
|
||||
AppConfig.ready(self)
|
||||
|
@@ -3,21 +3,16 @@ import re
|
||||
from .parsers import RasterisedDocumentParser
|
||||
|
||||
|
||||
class ConsumerDeclaration:
|
||||
def tesseract_consumer_declaration(sender, **kwargs):
|
||||
return {
|
||||
"parser": RasterisedDocumentParser,
|
||||
"weight": 0,
|
||||
"test": tesseract_consumer_test
|
||||
}
|
||||
|
||||
MATCHING_FILES = re.compile(r"^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")
|
||||
|
||||
@classmethod
|
||||
def handle(cls, sender, **kwargs):
|
||||
return cls.test
|
||||
MATCHING_FILES = re.compile(r"^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")
|
||||
|
||||
@classmethod
|
||||
def test(cls, doc):
|
||||
|
||||
if cls.MATCHING_FILES.match(doc.lower()):
|
||||
return {
|
||||
"parser": RasterisedDocumentParser,
|
||||
"weight": 0
|
||||
}
|
||||
|
||||
return None
|
||||
def tesseract_consumer_test(doc):
|
||||
return MATCHING_FILES.match(doc.lower())
|
||||
|
@@ -1,6 +1,6 @@
|
||||
from django.test import TestCase
|
||||
|
||||
from ..signals import ConsumerDeclaration
|
||||
from paperless_tesseract.signals import tesseract_consumer_test
|
||||
|
||||
|
||||
class SignalsTestCase(TestCase):
|
||||
@@ -20,7 +20,7 @@ class SignalsTestCase(TestCase):
|
||||
for prefix in prefixes:
|
||||
for suffix in suffixes:
|
||||
name = "{}.{}".format(prefix, suffix)
|
||||
self.assertTrue(ConsumerDeclaration.test(name))
|
||||
self.assertTrue(tesseract_consumer_test(name))
|
||||
|
||||
def test_test_handles_various_file_names_false(self):
|
||||
|
||||
@@ -30,7 +30,7 @@ class SignalsTestCase(TestCase):
|
||||
for prefix in prefixes:
|
||||
for suffix in suffixes:
|
||||
name = "{}.{}".format(prefix, suffix)
|
||||
self.assertFalse(ConsumerDeclaration.test(name))
|
||||
self.assertFalse(tesseract_consumer_test(name))
|
||||
|
||||
self.assertFalse(ConsumerDeclaration.test(""))
|
||||
self.assertFalse(ConsumerDeclaration.test("doc"))
|
||||
self.assertFalse(tesseract_consumer_test(""))
|
||||
self.assertFalse(tesseract_consumer_test("doc"))
|
||||
|
Reference in New Issue
Block a user