mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Fixed the auto-naming regexes
This commit is contained in:
parent
7aadab23cc
commit
a022fcb8f1
@ -16,6 +16,7 @@ from django.template.defaultfilters import slugify
|
||||
|
||||
from paperless.db import GnuPG
|
||||
|
||||
from .mixins import Renderable
|
||||
from .models import Sender, Tag, Document
|
||||
from .languages import ISO639
|
||||
|
||||
@ -28,7 +29,7 @@ class ConsumerError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Consumer(object):
|
||||
class Consumer(Renderable):
|
||||
"""
|
||||
Loop over every file found in CONSUMPTION_DIR and:
|
||||
1. Convert it to a greyscale png
|
||||
@ -50,11 +51,11 @@ class Consumer(object):
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
REGEX_SENDER_TITLE = re.compile(
|
||||
r"^[^/]*/(.+) - ([^/]+)\.(pdf|jpe?g|png|gif|tiff)$",
|
||||
r"^.*/(.+) - ([^/]+)\.(pdf|jpe?g|png|gif|tiff)$",
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
REGEX_SENDER_TITLE_TAGS = re.compile(
|
||||
r"^.*/([^/]+) - ([^/]+) - ([a-z\-,]+)\.(pdf|jpe?g|png|gif|tiff)$",
|
||||
r"^.*/(.*) - (.*) - ([a-z0-9\-,]*)\.(pdf|jpe?g|png|gif|tiff)$",
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
|
||||
@ -208,7 +209,7 @@ class Consumer(object):
|
||||
for t in tags.split(","):
|
||||
r.append(
|
||||
Tag.objects.get_or_create(slug=t, defaults={"name": t})[0])
|
||||
return r
|
||||
return tuple(r)
|
||||
|
||||
# First attempt: "<sender> - <title> - <tags>.<suffix>"
|
||||
m = re.match(self.REGEX_SENDER_TITLE_TAGS, parseable)
|
||||
@ -223,11 +224,11 @@ class Consumer(object):
|
||||
# Second attempt: "<sender> - <title>.<suffix>"
|
||||
m = re.match(self.REGEX_SENDER_TITLE, parseable)
|
||||
if m:
|
||||
return get_sender(m.group(1)), m.group(2), [], m.group(3)
|
||||
return get_sender(m.group(1)), m.group(2), (), m.group(3)
|
||||
|
||||
# That didn't work, so we assume sender and tags are None
|
||||
m = re.match(self.REGEX_TITLE, parseable)
|
||||
return None, m.group(1), [], m.group(2)
|
||||
return None, m.group(1), (), m.group(2)
|
||||
|
||||
def _store(self, text, doc):
|
||||
|
||||
@ -273,10 +274,6 @@ class Consumer(object):
|
||||
|
||||
self._render("", 2)
|
||||
|
||||
def _render(self, text, verbosity):
|
||||
if self.verbosity >= verbosity:
|
||||
print(text)
|
||||
|
||||
def _is_ready(self, doc):
|
||||
"""
|
||||
Detect whether `doc` is ready to consume or if it's still being written
|
||||
|
@ -4,73 +4,83 @@ from ..consumer import Consumer
|
||||
|
||||
|
||||
class TestAttachment(TestCase):
|
||||
|
||||
TAGS = ("tag1", "tag2", "tag3")
|
||||
CONSUMER = Consumer()
|
||||
|
||||
def _test_guess_attributes_from_name(self, path, sender, title, tags):
|
||||
for suffix in ("pdf", "png", "jpg", "jpeg", "gif"):
|
||||
f = path.format(suffix)
|
||||
results = self.CONSUMER._guess_attributes_from_name(f)
|
||||
self.assertEqual(results[0].name, sender, f)
|
||||
self.assertEqual(results[1], title, f)
|
||||
self.assertEqual(tuple([t.slug for t in results[2]]), tags, f)
|
||||
self.assertEqual(results[3], suffix, f)
|
||||
|
||||
def test_guess_attributes_from_name(self):
|
||||
consumer = Consumer()
|
||||
suffixes = ("pdf", "png", "jpg", "jpeg", "gif")
|
||||
tests = (
|
||||
{
|
||||
"path": "/path/to/Sender - Title - tag1,tag2,tag3.{}",
|
||||
"result": {
|
||||
"sender": "Sender",
|
||||
"title": "Title",
|
||||
"tags": ("tag1", "tag2", "tag3")
|
||||
},
|
||||
},
|
||||
{
|
||||
"path": "/path/to/Spaced Sender - Title - tag1,tag2,tag3.{}",
|
||||
"result": {
|
||||
"sender": "Spaced Sender",
|
||||
"title": "Title",
|
||||
"tags": ("tag1", "tag2", "tag3")
|
||||
},
|
||||
},
|
||||
{
|
||||
"path": "/path/to/Sender - Spaced Title - tag1,tag2,tag3.{}",
|
||||
"result": {
|
||||
"sender": "Sender",
|
||||
"title": "Spaced Title",
|
||||
"tags": ("tag1", "tag2", "tag3")
|
||||
},
|
||||
},
|
||||
{
|
||||
"path": "/path/to/Spaced Sender - Spaced Title - tag1,tag2.{}",
|
||||
"result": {
|
||||
"sender": "Spaced Sender",
|
||||
"title": "Spaced Title",
|
||||
"tags": ("tag1", "tag2")
|
||||
},
|
||||
},
|
||||
{
|
||||
"path": "/path/to/Dash-Sender - Title - tag1,tag2.{}",
|
||||
"result": {
|
||||
"sender": "Dash-Sender",
|
||||
"title": "Title",
|
||||
"tags": ("tag1", "tag2")
|
||||
},
|
||||
},
|
||||
{
|
||||
"path": "/path/to/Sender - Dash-Title - tag1,tag2.{}",
|
||||
"result": {
|
||||
"sender": "Sender",
|
||||
"title": "Dash-Title",
|
||||
"tags": ("tag1", "tag2")
|
||||
},
|
||||
},
|
||||
{
|
||||
"path": "/path/to/Dash-Sender - Dash-Title - tag1,tag2.{}",
|
||||
"result": {
|
||||
"sender": "Dash-Sender",
|
||||
"title": "Dash-Title",
|
||||
"tags": ("tag1", "tag2")
|
||||
},
|
||||
},
|
||||
def test_guess_attributes_from_name0(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"/path/to/Sender - Title.{}", "Sender", "Title", ())
|
||||
|
||||
def test_guess_attributes_from_name1(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"/path/to/Spaced Sender - Title.{}", "Spaced Sender", "Title", ())
|
||||
|
||||
def test_guess_attributes_from_name2(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"/path/to/Sender - Spaced Title.{}", "Sender", "Spaced Title", ())
|
||||
|
||||
def test_guess_attributes_from_name3(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"/path/to/Dashed-Sender - Title.{}", "Dashed-Sender", "Title", ())
|
||||
|
||||
def test_guess_attributes_from_name4(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"/path/to/Sender - Dashed-Title.{}", "Sender", "Dashed-Title", ())
|
||||
|
||||
def test_guess_attributes_from_name5(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"/path/to/Sender - Title - tag1,tag2,tag3.{}",
|
||||
"Sender",
|
||||
"Title",
|
||||
self.TAGS
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name6(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"/path/to/Spaced Sender - Title - tag1,tag2,tag3.{}",
|
||||
"Spaced Sender",
|
||||
"Title",
|
||||
self.TAGS
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name7(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"/path/to/Sender - Spaced Title - tag1,tag2,tag3.{}",
|
||||
"Sender",
|
||||
"Spaced Title",
|
||||
self.TAGS
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name8(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"/path/to/Dashed-Sender - Title - tag1,tag2,tag3.{}",
|
||||
"Dashed-Sender",
|
||||
"Title",
|
||||
self.TAGS
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name9(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"/path/to/Sender - Dashed-Title - tag1,tag2,tag3.{}",
|
||||
"Sender",
|
||||
"Dashed-Title",
|
||||
self.TAGS
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name10(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"/path/to/Σενδερ - Τιτλε - tag1,tag2,tag3.{}",
|
||||
"Σενδερ",
|
||||
"Τιτλε",
|
||||
self.TAGS
|
||||
)
|
||||
for test in tests:
|
||||
for suffix in suffixes:
|
||||
f = test["path"].format(suffix)
|
||||
sender, title, tags, s = consumer._guess_attributes_from_name(f)
|
||||
self.assertEqual(sender.name, test["result"]["sender"])
|
||||
self.assertEqual(title, test["result"]["title"])
|
||||
self.assertEqual(tags, test["result"]["tags"])
|
||||
self.assertEqual(s, suffix)
|
||||
|
Loading…
x
Reference in New Issue
Block a user