mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-19 10:19:27 -05:00
fix: allow for caps in file name suffixes #206
@schinkelg ran aground of this one and I took the opportunity to add a test to catch this sort of thing for next time.
This commit is contained in:
parent
5b88ebf0e7
commit
fa4924d5ba
@ -1,6 +1,10 @@
|
|||||||
Changelog
|
Changelog
|
||||||
#########
|
#########
|
||||||
|
|
||||||
|
* 0.4.1
|
||||||
|
* Fix for `#206`_ wherein the pluggable parser didn't recognise files with
|
||||||
|
all-caps suffixes like ``.PDF``
|
||||||
|
|
||||||
* 0.4.0
|
* 0.4.0
|
||||||
* Introducing reminders. See `#199`_ for more information, but the short
|
* Introducing reminders. See `#199`_ for more information, but the short
|
||||||
explanation is that you can now attach simple notes & times to documents
|
explanation is that you can now attach simple notes & times to documents
|
||||||
@ -211,3 +215,4 @@ Changelog
|
|||||||
.. _#179: https://github.com/danielquinn/paperless/pull/179
|
.. _#179: https://github.com/danielquinn/paperless/pull/179
|
||||||
.. _#199: https://github.com/danielquinn/paperless/issues/199
|
.. _#199: https://github.com/danielquinn/paperless/issues/199
|
||||||
.. _#200: https://github.com/danielquinn/paperless/issues/200
|
.. _#200: https://github.com/danielquinn/paperless/issues/200
|
||||||
|
.. _#206: https://github.com/danielquinn/paperless/issues/206
|
||||||
|
@ -102,7 +102,7 @@ class Consumer(object):
|
|||||||
parser_class = self._get_parser_class(doc)
|
parser_class = self._get_parser_class(doc)
|
||||||
if not parser_class:
|
if not parser_class:
|
||||||
self.log(
|
self.log(
|
||||||
"info", "No parsers could be found for {}".format(doc))
|
"error", "No parsers could be found for {}".format(doc))
|
||||||
self._ignore.append(doc)
|
self._ignore.append(doc)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -160,6 +160,16 @@ class Consumer(object):
|
|||||||
if result:
|
if result:
|
||||||
options.append(result)
|
options.append(result)
|
||||||
|
|
||||||
|
self.log(
|
||||||
|
"info",
|
||||||
|
"Parsers available: {}".format(
|
||||||
|
", ".join([str(o["parser"].__name__) for o in options])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if not options:
|
||||||
|
return None
|
||||||
|
|
||||||
# Return the parser with the highest weight.
|
# Return the parser with the highest weight.
|
||||||
return sorted(
|
return sorted(
|
||||||
options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
|
options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
|
||||||
|
@ -1,8 +1,56 @@
|
|||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
from ..consumer import Consumer
|
||||||
from ..models import FileInfo
|
from ..models import FileInfo
|
||||||
|
|
||||||
|
|
||||||
|
class TestConsumer(TestCase):
|
||||||
|
|
||||||
|
class DummyParser(object):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test__get_parser_class_1_parser(self):
|
||||||
|
self.assertEqual(
|
||||||
|
self._get_consumer()._get_parser_class("doc.pdf"),
|
||||||
|
self.DummyParser
|
||||||
|
)
|
||||||
|
|
||||||
|
@mock.patch("documents.consumer.os.makedirs")
|
||||||
|
@mock.patch("documents.consumer.os.path.exists", return_value=True)
|
||||||
|
@mock.patch("documents.consumer.document_consumer_declaration.send")
|
||||||
|
def test__get_parser_class_n_parsers(self, m, *args):
|
||||||
|
|
||||||
|
class DummyParser1(object):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class DummyParser2(object):
|
||||||
|
pass
|
||||||
|
|
||||||
|
m.return_value = (
|
||||||
|
(None, lambda _: {"weight": 0, "parser": DummyParser1}),
|
||||||
|
(None, lambda _: {"weight": 1, "parser": DummyParser2}),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(Consumer()._get_parser_class("doc.pdf"), DummyParser2)
|
||||||
|
|
||||||
|
@mock.patch("documents.consumer.os.makedirs")
|
||||||
|
@mock.patch("documents.consumer.os.path.exists", return_value=True)
|
||||||
|
@mock.patch("documents.consumer.document_consumer_declaration.send")
|
||||||
|
def test__get_parser_class_0_parsers(self, m, *args):
|
||||||
|
m.return_value = ((None, lambda _: None),)
|
||||||
|
self.assertIsNone(Consumer()._get_parser_class("doc.pdf"))
|
||||||
|
|
||||||
|
@mock.patch("documents.consumer.os.makedirs")
|
||||||
|
@mock.patch("documents.consumer.os.path.exists", return_value=True)
|
||||||
|
@mock.patch("documents.consumer.document_consumer_declaration.send")
|
||||||
|
def _get_consumer(self, m, *args):
|
||||||
|
m.return_value = (
|
||||||
|
(None, lambda _: {"weight": 0, "parser": self.DummyParser}),
|
||||||
|
)
|
||||||
|
return Consumer()
|
||||||
|
|
||||||
|
|
||||||
class TestAttributes(TestCase):
|
class TestAttributes(TestCase):
|
||||||
|
|
||||||
TAGS = ("tag1", "tag2", "tag3")
|
TAGS = ("tag1", "tag2", "tag3")
|
||||||
|
@ -5,7 +5,7 @@ from .parsers import RasterisedDocumentParser
|
|||||||
|
|
||||||
class ConsumerDeclaration(object):
|
class ConsumerDeclaration(object):
|
||||||
|
|
||||||
MATCHING_FILES = re.compile("^.*\.(pdf|jpg|gif|png|tiff|pnm|bmp)$")
|
MATCHING_FILES = re.compile("^.*\.(pdf|jpg|gif|png|tiff?|pnm|bmp)$")
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def handle(cls, sender, **kwargs):
|
def handle(cls, sender, **kwargs):
|
||||||
@ -14,7 +14,7 @@ class ConsumerDeclaration(object):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def test(cls, doc):
|
def test(cls, doc):
|
||||||
|
|
||||||
if cls.MATCHING_FILES.match(doc):
|
if cls.MATCHING_FILES.match(doc.lower()):
|
||||||
return {
|
return {
|
||||||
"parser": RasterisedDocumentParser,
|
"parser": RasterisedDocumentParser,
|
||||||
"weight": 0
|
"weight": 0
|
||||||
|
36
src/paperless_tesseract/tests/test_signals.py
Normal file
36
src/paperless_tesseract/tests/test_signals.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
from django.test import TestCase
|
||||||
|
|
||||||
|
from ..signals import ConsumerDeclaration
|
||||||
|
|
||||||
|
|
||||||
|
class SignalsTestCase(TestCase):
|
||||||
|
|
||||||
|
def test_test_handles_various_file_names_true(self):
|
||||||
|
|
||||||
|
prefixes = (
|
||||||
|
"doc", "My Document", "Μυ Γρεεκ Δοψθμεντ", "Doc -with - tags",
|
||||||
|
"A document with a . in it", "Doc with -- in it"
|
||||||
|
)
|
||||||
|
suffixes = (
|
||||||
|
"pdf", "jpg", "gif", "png", "tiff", "tif", "pnm", "bmp",
|
||||||
|
"PDF", "JPG", "GIF", "PNG", "TIFF", "TIF", "PNM", "BMP",
|
||||||
|
"pDf", "jPg", "gIf", "pNg", "tIff", "tIf", "pNm", "bMp",
|
||||||
|
)
|
||||||
|
|
||||||
|
for prefix in prefixes:
|
||||||
|
for suffix in suffixes:
|
||||||
|
name = "{}.{}".format(prefix, suffix)
|
||||||
|
self.assertTrue(ConsumerDeclaration.test(name))
|
||||||
|
|
||||||
|
def test_test_handles_various_file_names_false(self):
|
||||||
|
|
||||||
|
prefixes = ("doc",)
|
||||||
|
suffixes = ("txt", "markdown", "",)
|
||||||
|
|
||||||
|
for prefix in prefixes:
|
||||||
|
for suffix in suffixes:
|
||||||
|
name = "{}.{}".format(prefix, suffix)
|
||||||
|
self.assertFalse(ConsumerDeclaration.test(name))
|
||||||
|
|
||||||
|
self.assertFalse(ConsumerDeclaration.test(""))
|
||||||
|
self.assertFalse(ConsumerDeclaration.test("doc"))
|
Loading…
x
Reference in New Issue
Block a user