mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
fix: allow for caps in file name suffixes #206
@schinkelg ran aground of this one and I took the opportunity to add a test to catch this sort of thing for next time.
This commit is contained in:
parent
5b88ebf0e7
commit
fa4924d5ba
@ -1,6 +1,10 @@
|
||||
Changelog
|
||||
#########
|
||||
|
||||
* 0.4.1
|
||||
* Fix for `#206`_ wherein the pluggable parser didn't recognise files with
|
||||
all-caps suffixes like ``.PDF``
|
||||
|
||||
* 0.4.0
|
||||
* Introducing reminders. See `#199`_ for more information, but the short
|
||||
explanation is that you can now attach simple notes & times to documents
|
||||
@ -211,3 +215,4 @@ Changelog
|
||||
.. _#179: https://github.com/danielquinn/paperless/pull/179
|
||||
.. _#199: https://github.com/danielquinn/paperless/issues/199
|
||||
.. _#200: https://github.com/danielquinn/paperless/issues/200
|
||||
.. _#206: https://github.com/danielquinn/paperless/issues/206
|
||||
|
@ -102,7 +102,7 @@ class Consumer(object):
|
||||
parser_class = self._get_parser_class(doc)
|
||||
if not parser_class:
|
||||
self.log(
|
||||
"info", "No parsers could be found for {}".format(doc))
|
||||
"error", "No parsers could be found for {}".format(doc))
|
||||
self._ignore.append(doc)
|
||||
continue
|
||||
|
||||
@ -160,6 +160,16 @@ class Consumer(object):
|
||||
if result:
|
||||
options.append(result)
|
||||
|
||||
self.log(
|
||||
"info",
|
||||
"Parsers available: {}".format(
|
||||
", ".join([str(o["parser"].__name__) for o in options])
|
||||
)
|
||||
)
|
||||
|
||||
if not options:
|
||||
return None
|
||||
|
||||
# Return the parser with the highest weight.
|
||||
return sorted(
|
||||
options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
|
||||
|
@ -1,8 +1,56 @@
|
||||
from django.test import TestCase
|
||||
from unittest import mock
|
||||
|
||||
from ..consumer import Consumer
|
||||
from ..models import FileInfo
|
||||
|
||||
|
||||
class TestConsumer(TestCase):
|
||||
|
||||
class DummyParser(object):
|
||||
pass
|
||||
|
||||
def test__get_parser_class_1_parser(self):
|
||||
self.assertEqual(
|
||||
self._get_consumer()._get_parser_class("doc.pdf"),
|
||||
self.DummyParser
|
||||
)
|
||||
|
||||
@mock.patch("documents.consumer.os.makedirs")
|
||||
@mock.patch("documents.consumer.os.path.exists", return_value=True)
|
||||
@mock.patch("documents.consumer.document_consumer_declaration.send")
|
||||
def test__get_parser_class_n_parsers(self, m, *args):
|
||||
|
||||
class DummyParser1(object):
|
||||
pass
|
||||
|
||||
class DummyParser2(object):
|
||||
pass
|
||||
|
||||
m.return_value = (
|
||||
(None, lambda _: {"weight": 0, "parser": DummyParser1}),
|
||||
(None, lambda _: {"weight": 1, "parser": DummyParser2}),
|
||||
)
|
||||
|
||||
self.assertEqual(Consumer()._get_parser_class("doc.pdf"), DummyParser2)
|
||||
|
||||
@mock.patch("documents.consumer.os.makedirs")
|
||||
@mock.patch("documents.consumer.os.path.exists", return_value=True)
|
||||
@mock.patch("documents.consumer.document_consumer_declaration.send")
|
||||
def test__get_parser_class_0_parsers(self, m, *args):
|
||||
m.return_value = ((None, lambda _: None),)
|
||||
self.assertIsNone(Consumer()._get_parser_class("doc.pdf"))
|
||||
|
||||
@mock.patch("documents.consumer.os.makedirs")
|
||||
@mock.patch("documents.consumer.os.path.exists", return_value=True)
|
||||
@mock.patch("documents.consumer.document_consumer_declaration.send")
|
||||
def _get_consumer(self, m, *args):
|
||||
m.return_value = (
|
||||
(None, lambda _: {"weight": 0, "parser": self.DummyParser}),
|
||||
)
|
||||
return Consumer()
|
||||
|
||||
|
||||
class TestAttributes(TestCase):
|
||||
|
||||
TAGS = ("tag1", "tag2", "tag3")
|
||||
|
@ -5,7 +5,7 @@ from .parsers import RasterisedDocumentParser
|
||||
|
||||
class ConsumerDeclaration(object):
|
||||
|
||||
MATCHING_FILES = re.compile("^.*\.(pdf|jpg|gif|png|tiff|pnm|bmp)$")
|
||||
MATCHING_FILES = re.compile("^.*\.(pdf|jpg|gif|png|tiff?|pnm|bmp)$")
|
||||
|
||||
@classmethod
|
||||
def handle(cls, sender, **kwargs):
|
||||
@ -14,7 +14,7 @@ class ConsumerDeclaration(object):
|
||||
@classmethod
|
||||
def test(cls, doc):
|
||||
|
||||
if cls.MATCHING_FILES.match(doc):
|
||||
if cls.MATCHING_FILES.match(doc.lower()):
|
||||
return {
|
||||
"parser": RasterisedDocumentParser,
|
||||
"weight": 0
|
||||
|
36
src/paperless_tesseract/tests/test_signals.py
Normal file
36
src/paperless_tesseract/tests/test_signals.py
Normal file
@ -0,0 +1,36 @@
|
||||
from django.test import TestCase
|
||||
|
||||
from ..signals import ConsumerDeclaration
|
||||
|
||||
|
||||
class SignalsTestCase(TestCase):
|
||||
|
||||
def test_test_handles_various_file_names_true(self):
|
||||
|
||||
prefixes = (
|
||||
"doc", "My Document", "Μυ Γρεεκ Δοψθμεντ", "Doc -with - tags",
|
||||
"A document with a . in it", "Doc with -- in it"
|
||||
)
|
||||
suffixes = (
|
||||
"pdf", "jpg", "gif", "png", "tiff", "tif", "pnm", "bmp",
|
||||
"PDF", "JPG", "GIF", "PNG", "TIFF", "TIF", "PNM", "BMP",
|
||||
"pDf", "jPg", "gIf", "pNg", "tIff", "tIf", "pNm", "bMp",
|
||||
)
|
||||
|
||||
for prefix in prefixes:
|
||||
for suffix in suffixes:
|
||||
name = "{}.{}".format(prefix, suffix)
|
||||
self.assertTrue(ConsumerDeclaration.test(name))
|
||||
|
||||
def test_test_handles_various_file_names_false(self):
|
||||
|
||||
prefixes = ("doc",)
|
||||
suffixes = ("txt", "markdown", "",)
|
||||
|
||||
for prefix in prefixes:
|
||||
for suffix in suffixes:
|
||||
name = "{}.{}".format(prefix, suffix)
|
||||
self.assertFalse(ConsumerDeclaration.test(name))
|
||||
|
||||
self.assertFalse(ConsumerDeclaration.test(""))
|
||||
self.assertFalse(ConsumerDeclaration.test("doc"))
|
Loading…
x
Reference in New Issue
Block a user