diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 3294c4792..643ac4f66 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -16,6 +16,7 @@ from django.template.defaultfilters import slugify from paperless.db import GnuPG +from .mixins import Renderable from .models import Sender, Tag, Document from .languages import ISO639 @@ -28,7 +29,7 @@ class ConsumerError(Exception): pass -class Consumer(object): +class Consumer(Renderable): """ Loop over every file found in CONSUMPTION_DIR and: 1. Convert it to a greyscale png @@ -50,11 +51,11 @@ class Consumer(object): flags=re.IGNORECASE ) REGEX_SENDER_TITLE = re.compile( - r"^[^/]*/(.+) - ([^/]+)\.(pdf|jpe?g|png|gif|tiff)$", + r"^.*/(.+) - ([^/]+)\.(pdf|jpe?g|png|gif|tiff)$", flags=re.IGNORECASE ) REGEX_SENDER_TITLE_TAGS = re.compile( - r"^.*/([^/]+) - ([^/]+) - ([a-z\-,]+)\.(pdf|jpe?g|png|gif|tiff)$", + r"^.*/(.*) - (.*) - ([a-z0-9\-,]*)\.(pdf|jpe?g|png|gif|tiff)$", flags=re.IGNORECASE ) @@ -208,7 +209,7 @@ class Consumer(object): for t in tags.split(","): r.append( Tag.objects.get_or_create(slug=t, defaults={"name": t})[0]) - return r + return tuple(r) # First attempt: " - - <tags>.<suffix>" m = re.match(self.REGEX_SENDER_TITLE_TAGS, parseable) @@ -223,11 +224,11 @@ class Consumer(object): # Second attempt: "<sender> - <title>.<suffix>" m = re.match(self.REGEX_SENDER_TITLE, parseable) if m: - return get_sender(m.group(1)), m.group(2), [], m.group(3) + return get_sender(m.group(1)), m.group(2), (), m.group(3) # That didn't work, so we assume sender and tags are None m = re.match(self.REGEX_TITLE, parseable) - return None, m.group(1), [], m.group(2) + return None, m.group(1), (), m.group(2) def _store(self, text, doc): @@ -273,10 +274,6 @@ class Consumer(object): self._render("", 2) - def _render(self, text, verbosity): - if self.verbosity >= verbosity: - print(text) - def _is_ready(self, doc): """ Detect whether `doc` is ready to consume or if it's still being written diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index a729c7c48..7cee524c3 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -4,73 +4,83 @@ from ..consumer import Consumer class TestAttachment(TestCase): + + TAGS = ("tag1", "tag2", "tag3") + CONSUMER = Consumer() + + def _test_guess_attributes_from_name(self, path, sender, title, tags): + for suffix in ("pdf", "png", "jpg", "jpeg", "gif"): + f = path.format(suffix) + results = self.CONSUMER._guess_attributes_from_name(f) + self.assertEqual(results[0].name, sender, f) + self.assertEqual(results[1], title, f) + self.assertEqual(tuple([t.slug for t in results[2]]), tags, f) + self.assertEqual(results[3], suffix, f) - def test_guess_attributes_from_name(self): - consumer = Consumer() - suffixes = ("pdf", "png", "jpg", "jpeg", "gif") - tests = ( - { - "path": "/path/to/Sender - Title - tag1,tag2,tag3.{}", - "result": { - "sender": "Sender", - "title": "Title", - "tags": ("tag1", "tag2", "tag3") - }, - }, - { - "path": "/path/to/Spaced Sender - Title - tag1,tag2,tag3.{}", - "result": { - "sender": "Spaced Sender", - "title": "Title", - "tags": ("tag1", "tag2", "tag3") - }, - }, - { - "path": "/path/to/Sender - Spaced Title - tag1,tag2,tag3.{}", - "result": { - "sender": "Sender", - "title": "Spaced Title", - "tags": ("tag1", "tag2", "tag3") - }, - }, - { - "path": "/path/to/Spaced Sender - Spaced Title - tag1,tag2.{}", - "result": { - "sender": "Spaced Sender", - "title": "Spaced Title", - "tags": ("tag1", "tag2") - }, - }, - { - "path": "/path/to/Dash-Sender - Title - tag1,tag2.{}", - "result": { - "sender": "Dash-Sender", - "title": "Title", - "tags": ("tag1", "tag2") - }, - }, - { - "path": "/path/to/Sender - Dash-Title - tag1,tag2.{}", - "result": { - "sender": "Sender", - "title": "Dash-Title", - "tags": ("tag1", "tag2") - }, - }, - { - "path": "/path/to/Dash-Sender - Dash-Title - tag1,tag2.{}", - "result": { - "sender": "Dash-Sender", - "title": "Dash-Title", - "tags": ("tag1", "tag2") - }, - }, + def test_guess_attributes_from_name0(self): + self._test_guess_attributes_from_name( + "/path/to/Sender - Title.{}", "Sender", "Title", ()) + + def test_guess_attributes_from_name1(self): + self._test_guess_attributes_from_name( + "/path/to/Spaced Sender - Title.{}", "Spaced Sender", "Title", ()) + + def test_guess_attributes_from_name2(self): + self._test_guess_attributes_from_name( + "/path/to/Sender - Spaced Title.{}", "Sender", "Spaced Title", ()) + + def test_guess_attributes_from_name3(self): + self._test_guess_attributes_from_name( + "/path/to/Dashed-Sender - Title.{}", "Dashed-Sender", "Title", ()) + + def test_guess_attributes_from_name4(self): + self._test_guess_attributes_from_name( + "/path/to/Sender - Dashed-Title.{}", "Sender", "Dashed-Title", ()) + + def test_guess_attributes_from_name5(self): + self._test_guess_attributes_from_name( + "/path/to/Sender - Title - tag1,tag2,tag3.{}", + "Sender", + "Title", + self.TAGS + ) + + def test_guess_attributes_from_name6(self): + self._test_guess_attributes_from_name( + "/path/to/Spaced Sender - Title - tag1,tag2,tag3.{}", + "Spaced Sender", + "Title", + self.TAGS + ) + + def test_guess_attributes_from_name7(self): + self._test_guess_attributes_from_name( + "/path/to/Sender - Spaced Title - tag1,tag2,tag3.{}", + "Sender", + "Spaced Title", + self.TAGS + ) + + def test_guess_attributes_from_name8(self): + self._test_guess_attributes_from_name( + "/path/to/Dashed-Sender - Title - tag1,tag2,tag3.{}", + "Dashed-Sender", + "Title", + self.TAGS + ) + + def test_guess_attributes_from_name9(self): + self._test_guess_attributes_from_name( + "/path/to/Sender - Dashed-Title - tag1,tag2,tag3.{}", + "Sender", + "Dashed-Title", + self.TAGS + ) + + def test_guess_attributes_from_name10(self): + self._test_guess_attributes_from_name( + "/path/to/Σενδερ - Τιτλε - tag1,tag2,tag3.{}", + "Σενδερ", + "Τιτλε", + self.TAGS ) - for test in tests: - for suffix in suffixes: - f = test["path"].format(suffix) - sender, title, tags, s = consumer._guess_attributes_from_name(f) - self.assertEqual(sender.name, test["result"]["sender"]) - self.assertEqual(title, test["result"]["title"]) - self.assertEqual(tags, test["result"]["tags"]) - self.assertEqual(s, suffix)