Fixed the auto-naming regexes

This commit is contained in:
Daniel Quinn 2016-02-11 22:05:55 +00:00
parent 7aadab23cc
commit a022fcb8f1
2 changed files with 85 additions and 78 deletions

View File

@ -16,6 +16,7 @@ from django.template.defaultfilters import slugify
from paperless.db import GnuPG
from .mixins import Renderable
from .models import Sender, Tag, Document
from .languages import ISO639
@ -28,7 +29,7 @@ class ConsumerError(Exception):
pass
class Consumer(object):
class Consumer(Renderable):
"""
Loop over every file found in CONSUMPTION_DIR and:
1. Convert it to a greyscale png
@ -50,11 +51,11 @@ class Consumer(object):
flags=re.IGNORECASE
)
REGEX_SENDER_TITLE = re.compile(
r"^[^/]*/(.+) - ([^/]+)\.(pdf|jpe?g|png|gif|tiff)$",
r"^.*/(.+) - ([^/]+)\.(pdf|jpe?g|png|gif|tiff)$",
flags=re.IGNORECASE
)
REGEX_SENDER_TITLE_TAGS = re.compile(
r"^.*/([^/]+) - ([^/]+) - ([a-z\-,]+)\.(pdf|jpe?g|png|gif|tiff)$",
r"^.*/(.*) - (.*) - ([a-z0-9\-,]*)\.(pdf|jpe?g|png|gif|tiff)$",
flags=re.IGNORECASE
)
@ -208,7 +209,7 @@ class Consumer(object):
for t in tags.split(","):
r.append(
Tag.objects.get_or_create(slug=t, defaults={"name": t})[0])
return r
return tuple(r)
# First attempt: "<sender> - <title> - <tags>.<suffix>"
m = re.match(self.REGEX_SENDER_TITLE_TAGS, parseable)
@ -223,11 +224,11 @@ class Consumer(object):
# Second attempt: "<sender> - <title>.<suffix>"
m = re.match(self.REGEX_SENDER_TITLE, parseable)
if m:
return get_sender(m.group(1)), m.group(2), [], m.group(3)
return get_sender(m.group(1)), m.group(2), (), m.group(3)
# That didn't work, so we assume sender and tags are None
m = re.match(self.REGEX_TITLE, parseable)
return None, m.group(1), [], m.group(2)
return None, m.group(1), (), m.group(2)
def _store(self, text, doc):
@ -273,10 +274,6 @@ class Consumer(object):
self._render("", 2)
def _render(self, text, verbosity):
if self.verbosity >= verbosity:
print(text)
def _is_ready(self, doc):
"""
Detect whether `doc` is ready to consume or if it's still being written

View File

@ -4,73 +4,83 @@ from ..consumer import Consumer
class TestAttachment(TestCase):
TAGS = ("tag1", "tag2", "tag3")
CONSUMER = Consumer()
def _test_guess_attributes_from_name(self, path, sender, title, tags):
for suffix in ("pdf", "png", "jpg", "jpeg", "gif"):
f = path.format(suffix)
results = self.CONSUMER._guess_attributes_from_name(f)
self.assertEqual(results[0].name, sender, f)
self.assertEqual(results[1], title, f)
self.assertEqual(tuple([t.slug for t in results[2]]), tags, f)
self.assertEqual(results[3], suffix, f)
def test_guess_attributes_from_name(self):
consumer = Consumer()
suffixes = ("pdf", "png", "jpg", "jpeg", "gif")
tests = (
{
"path": "/path/to/Sender - Title - tag1,tag2,tag3.{}",
"result": {
"sender": "Sender",
"title": "Title",
"tags": ("tag1", "tag2", "tag3")
},
},
{
"path": "/path/to/Spaced Sender - Title - tag1,tag2,tag3.{}",
"result": {
"sender": "Spaced Sender",
"title": "Title",
"tags": ("tag1", "tag2", "tag3")
},
},
{
"path": "/path/to/Sender - Spaced Title - tag1,tag2,tag3.{}",
"result": {
"sender": "Sender",
"title": "Spaced Title",
"tags": ("tag1", "tag2", "tag3")
},
},
{
"path": "/path/to/Spaced Sender - Spaced Title - tag1,tag2.{}",
"result": {
"sender": "Spaced Sender",
"title": "Spaced Title",
"tags": ("tag1", "tag2")
},
},
{
"path": "/path/to/Dash-Sender - Title - tag1,tag2.{}",
"result": {
"sender": "Dash-Sender",
"title": "Title",
"tags": ("tag1", "tag2")
},
},
{
"path": "/path/to/Sender - Dash-Title - tag1,tag2.{}",
"result": {
"sender": "Sender",
"title": "Dash-Title",
"tags": ("tag1", "tag2")
},
},
{
"path": "/path/to/Dash-Sender - Dash-Title - tag1,tag2.{}",
"result": {
"sender": "Dash-Sender",
"title": "Dash-Title",
"tags": ("tag1", "tag2")
},
},
def test_guess_attributes_from_name0(self):
self._test_guess_attributes_from_name(
"/path/to/Sender - Title.{}", "Sender", "Title", ())
def test_guess_attributes_from_name1(self):
self._test_guess_attributes_from_name(
"/path/to/Spaced Sender - Title.{}", "Spaced Sender", "Title", ())
def test_guess_attributes_from_name2(self):
self._test_guess_attributes_from_name(
"/path/to/Sender - Spaced Title.{}", "Sender", "Spaced Title", ())
def test_guess_attributes_from_name3(self):
self._test_guess_attributes_from_name(
"/path/to/Dashed-Sender - Title.{}", "Dashed-Sender", "Title", ())
def test_guess_attributes_from_name4(self):
self._test_guess_attributes_from_name(
"/path/to/Sender - Dashed-Title.{}", "Sender", "Dashed-Title", ())
def test_guess_attributes_from_name5(self):
self._test_guess_attributes_from_name(
"/path/to/Sender - Title - tag1,tag2,tag3.{}",
"Sender",
"Title",
self.TAGS
)
def test_guess_attributes_from_name6(self):
self._test_guess_attributes_from_name(
"/path/to/Spaced Sender - Title - tag1,tag2,tag3.{}",
"Spaced Sender",
"Title",
self.TAGS
)
def test_guess_attributes_from_name7(self):
self._test_guess_attributes_from_name(
"/path/to/Sender - Spaced Title - tag1,tag2,tag3.{}",
"Sender",
"Spaced Title",
self.TAGS
)
def test_guess_attributes_from_name8(self):
self._test_guess_attributes_from_name(
"/path/to/Dashed-Sender - Title - tag1,tag2,tag3.{}",
"Dashed-Sender",
"Title",
self.TAGS
)
def test_guess_attributes_from_name9(self):
self._test_guess_attributes_from_name(
"/path/to/Sender - Dashed-Title - tag1,tag2,tag3.{}",
"Sender",
"Dashed-Title",
self.TAGS
)
def test_guess_attributes_from_name10(self):
self._test_guess_attributes_from_name(
"/path/to/Σενδερ - Τιτλε - tag1,tag2,tag3.{}",
"Σενδερ",
"Τιτλε",
self.TAGS
)
for test in tests:
for suffix in suffixes:
f = test["path"].format(suffix)
sender, title, tags, s = consumer._guess_attributes_from_name(f)
self.assertEqual(sender.name, test["result"]["sender"])
self.assertEqual(title, test["result"]["title"])
self.assertEqual(tags, test["result"]["tags"])
self.assertEqual(s, suffix)