mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 03:16:10 -06:00 
			
		
		
		
	fix: allow for caps in file name suffixes #206
@schinkelg ran aground of this one and I took the opportunity to add a test to catch this sort of thing for next time.
This commit is contained in:
		@@ -102,7 +102,7 @@ class Consumer(object):
 | 
			
		||||
            parser_class = self._get_parser_class(doc)
 | 
			
		||||
            if not parser_class:
 | 
			
		||||
                self.log(
 | 
			
		||||
                    "info", "No parsers could be found for {}".format(doc))
 | 
			
		||||
                    "error", "No parsers could be found for {}".format(doc))
 | 
			
		||||
                self._ignore.append(doc)
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
@@ -160,6 +160,16 @@ class Consumer(object):
 | 
			
		||||
            if result:
 | 
			
		||||
                options.append(result)
 | 
			
		||||
 | 
			
		||||
        self.log(
 | 
			
		||||
            "info",
 | 
			
		||||
            "Parsers available: {}".format(
 | 
			
		||||
                ", ".join([str(o["parser"].__name__) for o in options])
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        if not options:
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        # Return the parser with the highest weight.
 | 
			
		||||
        return sorted(
 | 
			
		||||
            options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
 | 
			
		||||
 
 | 
			
		||||
@@ -1,8 +1,56 @@
 | 
			
		||||
from django.test import TestCase
 | 
			
		||||
from unittest import mock
 | 
			
		||||
 | 
			
		||||
from ..consumer import Consumer
 | 
			
		||||
from ..models import FileInfo
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestConsumer(TestCase):
 | 
			
		||||
 | 
			
		||||
    class DummyParser(object):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    def test__get_parser_class_1_parser(self):
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            self._get_consumer()._get_parser_class("doc.pdf"),
 | 
			
		||||
            self.DummyParser
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch("documents.consumer.os.makedirs")
 | 
			
		||||
    @mock.patch("documents.consumer.os.path.exists", return_value=True)
 | 
			
		||||
    @mock.patch("documents.consumer.document_consumer_declaration.send")
 | 
			
		||||
    def test__get_parser_class_n_parsers(self, m, *args):
 | 
			
		||||
 | 
			
		||||
        class DummyParser1(object):
 | 
			
		||||
            pass
 | 
			
		||||
 | 
			
		||||
        class DummyParser2(object):
 | 
			
		||||
            pass
 | 
			
		||||
 | 
			
		||||
        m.return_value = (
 | 
			
		||||
            (None, lambda _: {"weight": 0, "parser": DummyParser1}),
 | 
			
		||||
            (None, lambda _: {"weight": 1, "parser": DummyParser2}),
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(Consumer()._get_parser_class("doc.pdf"), DummyParser2)
 | 
			
		||||
 | 
			
		||||
    @mock.patch("documents.consumer.os.makedirs")
 | 
			
		||||
    @mock.patch("documents.consumer.os.path.exists", return_value=True)
 | 
			
		||||
    @mock.patch("documents.consumer.document_consumer_declaration.send")
 | 
			
		||||
    def test__get_parser_class_0_parsers(self, m, *args):
 | 
			
		||||
        m.return_value = ((None, lambda _: None),)
 | 
			
		||||
        self.assertIsNone(Consumer()._get_parser_class("doc.pdf"))
 | 
			
		||||
 | 
			
		||||
    @mock.patch("documents.consumer.os.makedirs")
 | 
			
		||||
    @mock.patch("documents.consumer.os.path.exists", return_value=True)
 | 
			
		||||
    @mock.patch("documents.consumer.document_consumer_declaration.send")
 | 
			
		||||
    def _get_consumer(self, m, *args):
 | 
			
		||||
        m.return_value = (
 | 
			
		||||
            (None, lambda _: {"weight": 0, "parser": self.DummyParser}),
 | 
			
		||||
        )
 | 
			
		||||
        return Consumer()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestAttributes(TestCase):
 | 
			
		||||
 | 
			
		||||
    TAGS = ("tag1", "tag2", "tag3")
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,7 @@ from .parsers import RasterisedDocumentParser
 | 
			
		||||
 | 
			
		||||
class ConsumerDeclaration(object):
 | 
			
		||||
 | 
			
		||||
    MATCHING_FILES = re.compile("^.*\.(pdf|jpg|gif|png|tiff|pnm|bmp)$")
 | 
			
		||||
    MATCHING_FILES = re.compile("^.*\.(pdf|jpg|gif|png|tiff?|pnm|bmp)$")
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def handle(cls, sender, **kwargs):
 | 
			
		||||
@@ -14,7 +14,7 @@ class ConsumerDeclaration(object):
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def test(cls, doc):
 | 
			
		||||
 | 
			
		||||
        if cls.MATCHING_FILES.match(doc):
 | 
			
		||||
        if cls.MATCHING_FILES.match(doc.lower()):
 | 
			
		||||
            return {
 | 
			
		||||
                "parser": RasterisedDocumentParser,
 | 
			
		||||
                "weight": 0
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										36
									
								
								src/paperless_tesseract/tests/test_signals.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								src/paperless_tesseract/tests/test_signals.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,36 @@
 | 
			
		||||
from django.test import TestCase
 | 
			
		||||
 | 
			
		||||
from ..signals import ConsumerDeclaration
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SignalsTestCase(TestCase):
 | 
			
		||||
 | 
			
		||||
    def test_test_handles_various_file_names_true(self):
 | 
			
		||||
 | 
			
		||||
        prefixes = (
 | 
			
		||||
            "doc", "My Document", "Μυ Γρεεκ Δοψθμεντ", "Doc -with - tags",
 | 
			
		||||
            "A document with a . in it", "Doc with -- in it"
 | 
			
		||||
        )
 | 
			
		||||
        suffixes = (
 | 
			
		||||
            "pdf", "jpg", "gif", "png", "tiff", "tif", "pnm", "bmp",
 | 
			
		||||
            "PDF", "JPG", "GIF", "PNG", "TIFF", "TIF", "PNM", "BMP",
 | 
			
		||||
            "pDf", "jPg", "gIf", "pNg", "tIff", "tIf", "pNm", "bMp",
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        for prefix in prefixes:
 | 
			
		||||
            for suffix in suffixes:
 | 
			
		||||
                name = "{}.{}".format(prefix, suffix)
 | 
			
		||||
                self.assertTrue(ConsumerDeclaration.test(name))
 | 
			
		||||
 | 
			
		||||
    def test_test_handles_various_file_names_false(self):
 | 
			
		||||
 | 
			
		||||
        prefixes = ("doc",)
 | 
			
		||||
        suffixes = ("txt", "markdown", "",)
 | 
			
		||||
 | 
			
		||||
        for prefix in prefixes:
 | 
			
		||||
            for suffix in suffixes:
 | 
			
		||||
                name = "{}.{}".format(prefix, suffix)
 | 
			
		||||
                self.assertFalse(ConsumerDeclaration.test(name))
 | 
			
		||||
 | 
			
		||||
        self.assertFalse(ConsumerDeclaration.test(""))
 | 
			
		||||
        self.assertFalse(ConsumerDeclaration.test("doc"))
 | 
			
		||||
		Reference in New Issue
	
	Block a user