mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-19 10:19:27 -05:00
Fixed the auto-naming regexes
This commit is contained in:
parent
7aadab23cc
commit
a022fcb8f1
@ -16,6 +16,7 @@ from django.template.defaultfilters import slugify
|
|||||||
|
|
||||||
from paperless.db import GnuPG
|
from paperless.db import GnuPG
|
||||||
|
|
||||||
|
from .mixins import Renderable
|
||||||
from .models import Sender, Tag, Document
|
from .models import Sender, Tag, Document
|
||||||
from .languages import ISO639
|
from .languages import ISO639
|
||||||
|
|
||||||
@ -28,7 +29,7 @@ class ConsumerError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Consumer(object):
|
class Consumer(Renderable):
|
||||||
"""
|
"""
|
||||||
Loop over every file found in CONSUMPTION_DIR and:
|
Loop over every file found in CONSUMPTION_DIR and:
|
||||||
1. Convert it to a greyscale png
|
1. Convert it to a greyscale png
|
||||||
@ -50,11 +51,11 @@ class Consumer(object):
|
|||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)
|
)
|
||||||
REGEX_SENDER_TITLE = re.compile(
|
REGEX_SENDER_TITLE = re.compile(
|
||||||
r"^[^/]*/(.+) - ([^/]+)\.(pdf|jpe?g|png|gif|tiff)$",
|
r"^.*/(.+) - ([^/]+)\.(pdf|jpe?g|png|gif|tiff)$",
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)
|
)
|
||||||
REGEX_SENDER_TITLE_TAGS = re.compile(
|
REGEX_SENDER_TITLE_TAGS = re.compile(
|
||||||
r"^.*/([^/]+) - ([^/]+) - ([a-z\-,]+)\.(pdf|jpe?g|png|gif|tiff)$",
|
r"^.*/(.*) - (.*) - ([a-z0-9\-,]*)\.(pdf|jpe?g|png|gif|tiff)$",
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -208,7 +209,7 @@ class Consumer(object):
|
|||||||
for t in tags.split(","):
|
for t in tags.split(","):
|
||||||
r.append(
|
r.append(
|
||||||
Tag.objects.get_or_create(slug=t, defaults={"name": t})[0])
|
Tag.objects.get_or_create(slug=t, defaults={"name": t})[0])
|
||||||
return r
|
return tuple(r)
|
||||||
|
|
||||||
# First attempt: "<sender> - <title> - <tags>.<suffix>"
|
# First attempt: "<sender> - <title> - <tags>.<suffix>"
|
||||||
m = re.match(self.REGEX_SENDER_TITLE_TAGS, parseable)
|
m = re.match(self.REGEX_SENDER_TITLE_TAGS, parseable)
|
||||||
@ -223,11 +224,11 @@ class Consumer(object):
|
|||||||
# Second attempt: "<sender> - <title>.<suffix>"
|
# Second attempt: "<sender> - <title>.<suffix>"
|
||||||
m = re.match(self.REGEX_SENDER_TITLE, parseable)
|
m = re.match(self.REGEX_SENDER_TITLE, parseable)
|
||||||
if m:
|
if m:
|
||||||
return get_sender(m.group(1)), m.group(2), [], m.group(3)
|
return get_sender(m.group(1)), m.group(2), (), m.group(3)
|
||||||
|
|
||||||
# That didn't work, so we assume sender and tags are None
|
# That didn't work, so we assume sender and tags are None
|
||||||
m = re.match(self.REGEX_TITLE, parseable)
|
m = re.match(self.REGEX_TITLE, parseable)
|
||||||
return None, m.group(1), [], m.group(2)
|
return None, m.group(1), (), m.group(2)
|
||||||
|
|
||||||
def _store(self, text, doc):
|
def _store(self, text, doc):
|
||||||
|
|
||||||
@ -273,10 +274,6 @@ class Consumer(object):
|
|||||||
|
|
||||||
self._render("", 2)
|
self._render("", 2)
|
||||||
|
|
||||||
def _render(self, text, verbosity):
|
|
||||||
if self.verbosity >= verbosity:
|
|
||||||
print(text)
|
|
||||||
|
|
||||||
def _is_ready(self, doc):
|
def _is_ready(self, doc):
|
||||||
"""
|
"""
|
||||||
Detect whether `doc` is ready to consume or if it's still being written
|
Detect whether `doc` is ready to consume or if it's still being written
|
||||||
|
@ -5,72 +5,82 @@ from ..consumer import Consumer
|
|||||||
|
|
||||||
class TestAttachment(TestCase):
|
class TestAttachment(TestCase):
|
||||||
|
|
||||||
def test_guess_attributes_from_name(self):
|
TAGS = ("tag1", "tag2", "tag3")
|
||||||
consumer = Consumer()
|
CONSUMER = Consumer()
|
||||||
suffixes = ("pdf", "png", "jpg", "jpeg", "gif")
|
|
||||||
tests = (
|
def _test_guess_attributes_from_name(self, path, sender, title, tags):
|
||||||
{
|
for suffix in ("pdf", "png", "jpg", "jpeg", "gif"):
|
||||||
"path": "/path/to/Sender - Title - tag1,tag2,tag3.{}",
|
f = path.format(suffix)
|
||||||
"result": {
|
results = self.CONSUMER._guess_attributes_from_name(f)
|
||||||
"sender": "Sender",
|
self.assertEqual(results[0].name, sender, f)
|
||||||
"title": "Title",
|
self.assertEqual(results[1], title, f)
|
||||||
"tags": ("tag1", "tag2", "tag3")
|
self.assertEqual(tuple([t.slug for t in results[2]]), tags, f)
|
||||||
},
|
self.assertEqual(results[3], suffix, f)
|
||||||
},
|
|
||||||
{
|
def test_guess_attributes_from_name0(self):
|
||||||
"path": "/path/to/Spaced Sender - Title - tag1,tag2,tag3.{}",
|
self._test_guess_attributes_from_name(
|
||||||
"result": {
|
"/path/to/Sender - Title.{}", "Sender", "Title", ())
|
||||||
"sender": "Spaced Sender",
|
|
||||||
"title": "Title",
|
def test_guess_attributes_from_name1(self):
|
||||||
"tags": ("tag1", "tag2", "tag3")
|
self._test_guess_attributes_from_name(
|
||||||
},
|
"/path/to/Spaced Sender - Title.{}", "Spaced Sender", "Title", ())
|
||||||
},
|
|
||||||
{
|
def test_guess_attributes_from_name2(self):
|
||||||
"path": "/path/to/Sender - Spaced Title - tag1,tag2,tag3.{}",
|
self._test_guess_attributes_from_name(
|
||||||
"result": {
|
"/path/to/Sender - Spaced Title.{}", "Sender", "Spaced Title", ())
|
||||||
"sender": "Sender",
|
|
||||||
"title": "Spaced Title",
|
def test_guess_attributes_from_name3(self):
|
||||||
"tags": ("tag1", "tag2", "tag3")
|
self._test_guess_attributes_from_name(
|
||||||
},
|
"/path/to/Dashed-Sender - Title.{}", "Dashed-Sender", "Title", ())
|
||||||
},
|
|
||||||
{
|
def test_guess_attributes_from_name4(self):
|
||||||
"path": "/path/to/Spaced Sender - Spaced Title - tag1,tag2.{}",
|
self._test_guess_attributes_from_name(
|
||||||
"result": {
|
"/path/to/Sender - Dashed-Title.{}", "Sender", "Dashed-Title", ())
|
||||||
"sender": "Spaced Sender",
|
|
||||||
"title": "Spaced Title",
|
def test_guess_attributes_from_name5(self):
|
||||||
"tags": ("tag1", "tag2")
|
self._test_guess_attributes_from_name(
|
||||||
},
|
"/path/to/Sender - Title - tag1,tag2,tag3.{}",
|
||||||
},
|
"Sender",
|
||||||
{
|
"Title",
|
||||||
"path": "/path/to/Dash-Sender - Title - tag1,tag2.{}",
|
self.TAGS
|
||||||
"result": {
|
)
|
||||||
"sender": "Dash-Sender",
|
|
||||||
"title": "Title",
|
def test_guess_attributes_from_name6(self):
|
||||||
"tags": ("tag1", "tag2")
|
self._test_guess_attributes_from_name(
|
||||||
},
|
"/path/to/Spaced Sender - Title - tag1,tag2,tag3.{}",
|
||||||
},
|
"Spaced Sender",
|
||||||
{
|
"Title",
|
||||||
"path": "/path/to/Sender - Dash-Title - tag1,tag2.{}",
|
self.TAGS
|
||||||
"result": {
|
)
|
||||||
"sender": "Sender",
|
|
||||||
"title": "Dash-Title",
|
def test_guess_attributes_from_name7(self):
|
||||||
"tags": ("tag1", "tag2")
|
self._test_guess_attributes_from_name(
|
||||||
},
|
"/path/to/Sender - Spaced Title - tag1,tag2,tag3.{}",
|
||||||
},
|
"Sender",
|
||||||
{
|
"Spaced Title",
|
||||||
"path": "/path/to/Dash-Sender - Dash-Title - tag1,tag2.{}",
|
self.TAGS
|
||||||
"result": {
|
)
|
||||||
"sender": "Dash-Sender",
|
|
||||||
"title": "Dash-Title",
|
def test_guess_attributes_from_name8(self):
|
||||||
"tags": ("tag1", "tag2")
|
self._test_guess_attributes_from_name(
|
||||||
},
|
"/path/to/Dashed-Sender - Title - tag1,tag2,tag3.{}",
|
||||||
},
|
"Dashed-Sender",
|
||||||
|
"Title",
|
||||||
|
self.TAGS
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_guess_attributes_from_name9(self):
|
||||||
|
self._test_guess_attributes_from_name(
|
||||||
|
"/path/to/Sender - Dashed-Title - tag1,tag2,tag3.{}",
|
||||||
|
"Sender",
|
||||||
|
"Dashed-Title",
|
||||||
|
self.TAGS
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_guess_attributes_from_name10(self):
|
||||||
|
self._test_guess_attributes_from_name(
|
||||||
|
"/path/to/Σενδερ - Τιτλε - tag1,tag2,tag3.{}",
|
||||||
|
"Σενδερ",
|
||||||
|
"Τιτλε",
|
||||||
|
self.TAGS
|
||||||
)
|
)
|
||||||
for test in tests:
|
|
||||||
for suffix in suffixes:
|
|
||||||
f = test["path"].format(suffix)
|
|
||||||
sender, title, tags, s = consumer._guess_attributes_from_name(f)
|
|
||||||
self.assertEqual(sender.name, test["result"]["sender"])
|
|
||||||
self.assertEqual(title, test["result"]["title"])
|
|
||||||
self.assertEqual(tags, test["result"]["tags"])
|
|
||||||
self.assertEqual(s, suffix)
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user