mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Fixed the auto-naming regexes
This commit is contained in:
		| @@ -16,6 +16,7 @@ from django.template.defaultfilters import slugify | ||||
|  | ||||
| from paperless.db import GnuPG | ||||
|  | ||||
| from .mixins import Renderable | ||||
| from .models import Sender, Tag, Document | ||||
| from .languages import ISO639 | ||||
|  | ||||
| @@ -28,7 +29,7 @@ class ConsumerError(Exception): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class Consumer(object): | ||||
| class Consumer(Renderable): | ||||
|     """ | ||||
|     Loop over every file found in CONSUMPTION_DIR and: | ||||
|       1. Convert it to a greyscale png | ||||
| @@ -50,11 +51,11 @@ class Consumer(object): | ||||
|         flags=re.IGNORECASE | ||||
|     ) | ||||
|     REGEX_SENDER_TITLE = re.compile( | ||||
|         r"^[^/]*/(.+) - ([^/]+)\.(pdf|jpe?g|png|gif|tiff)$", | ||||
|         r"^.*/(.+) - ([^/]+)\.(pdf|jpe?g|png|gif|tiff)$", | ||||
|         flags=re.IGNORECASE | ||||
|     ) | ||||
|     REGEX_SENDER_TITLE_TAGS = re.compile( | ||||
|         r"^.*/([^/]+) - ([^/]+) - ([a-z\-,]+)\.(pdf|jpe?g|png|gif|tiff)$", | ||||
|         r"^.*/(.*) - (.*) - ([a-z0-9\-,]*)\.(pdf|jpe?g|png|gif|tiff)$", | ||||
|         flags=re.IGNORECASE | ||||
|     ) | ||||
|  | ||||
| @@ -208,7 +209,7 @@ class Consumer(object): | ||||
|             for t in tags.split(","): | ||||
|                 r.append( | ||||
|                     Tag.objects.get_or_create(slug=t, defaults={"name": t})[0]) | ||||
|             return r | ||||
|             return tuple(r) | ||||
|  | ||||
|         # First attempt: "<sender> - <title> - <tags>.<suffix>" | ||||
|         m = re.match(self.REGEX_SENDER_TITLE_TAGS, parseable) | ||||
| @@ -223,11 +224,11 @@ class Consumer(object): | ||||
|         # Second attempt: "<sender> - <title>.<suffix>" | ||||
|         m = re.match(self.REGEX_SENDER_TITLE, parseable) | ||||
|         if m: | ||||
|             return get_sender(m.group(1)), m.group(2), [], m.group(3) | ||||
|             return get_sender(m.group(1)), m.group(2), (), m.group(3) | ||||
|  | ||||
|         # That didn't work, so we assume sender and tags are None | ||||
|         m = re.match(self.REGEX_TITLE, parseable) | ||||
|         return None, m.group(1), [], m.group(2) | ||||
|         return None, m.group(1), (), m.group(2) | ||||
|  | ||||
|     def _store(self, text, doc): | ||||
|  | ||||
| @@ -273,10 +274,6 @@ class Consumer(object): | ||||
|  | ||||
|         self._render("", 2) | ||||
|  | ||||
|     def _render(self, text, verbosity): | ||||
|         if self.verbosity >= verbosity: | ||||
|             print(text) | ||||
|  | ||||
|     def _is_ready(self, doc): | ||||
|         """ | ||||
|         Detect whether `doc` is ready to consume or if it's still being written | ||||
|   | ||||
| @@ -4,73 +4,83 @@ from ..consumer import Consumer | ||||
|  | ||||
|  | ||||
| class TestAttachment(TestCase): | ||||
|      | ||||
|     TAGS = ("tag1", "tag2", "tag3") | ||||
|     CONSUMER = Consumer() | ||||
|      | ||||
|     def _test_guess_attributes_from_name(self, path, sender, title, tags): | ||||
|         for suffix in ("pdf", "png", "jpg", "jpeg", "gif"): | ||||
|             f = path.format(suffix) | ||||
|             results = self.CONSUMER._guess_attributes_from_name(f) | ||||
|             self.assertEqual(results[0].name, sender, f) | ||||
|             self.assertEqual(results[1], title, f) | ||||
|             self.assertEqual(tuple([t.slug for t in results[2]]), tags, f) | ||||
|             self.assertEqual(results[3], suffix, f) | ||||
|  | ||||
|     def test_guess_attributes_from_name(self): | ||||
|         consumer = Consumer() | ||||
|         suffixes = ("pdf", "png", "jpg", "jpeg", "gif") | ||||
|         tests = ( | ||||
|             { | ||||
|                 "path": "/path/to/Sender - Title - tag1,tag2,tag3.{}", | ||||
|                 "result": { | ||||
|                     "sender": "Sender", | ||||
|                     "title": "Title", | ||||
|                     "tags": ("tag1", "tag2", "tag3") | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 "path": "/path/to/Spaced Sender - Title - tag1,tag2,tag3.{}", | ||||
|                 "result": { | ||||
|                     "sender": "Spaced Sender", | ||||
|                     "title": "Title", | ||||
|                     "tags": ("tag1", "tag2", "tag3") | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 "path": "/path/to/Sender - Spaced Title - tag1,tag2,tag3.{}", | ||||
|                 "result": { | ||||
|                     "sender": "Sender", | ||||
|                     "title": "Spaced Title", | ||||
|                     "tags": ("tag1", "tag2", "tag3") | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 "path": "/path/to/Spaced Sender - Spaced Title - tag1,tag2.{}", | ||||
|                 "result": { | ||||
|                     "sender": "Spaced Sender", | ||||
|                     "title": "Spaced Title", | ||||
|                     "tags": ("tag1", "tag2") | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 "path": "/path/to/Dash-Sender - Title - tag1,tag2.{}", | ||||
|                 "result": { | ||||
|                     "sender": "Dash-Sender", | ||||
|                     "title": "Title", | ||||
|                     "tags": ("tag1", "tag2") | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 "path": "/path/to/Sender - Dash-Title - tag1,tag2.{}", | ||||
|                 "result": { | ||||
|                     "sender": "Sender", | ||||
|                     "title": "Dash-Title", | ||||
|                     "tags": ("tag1", "tag2") | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 "path": "/path/to/Dash-Sender - Dash-Title - tag1,tag2.{}", | ||||
|                 "result": { | ||||
|                     "sender": "Dash-Sender", | ||||
|                     "title": "Dash-Title", | ||||
|                     "tags": ("tag1", "tag2") | ||||
|                 }, | ||||
|             }, | ||||
|     def test_guess_attributes_from_name0(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "/path/to/Sender - Title.{}", "Sender", "Title", ()) | ||||
|  | ||||
|     def test_guess_attributes_from_name1(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "/path/to/Spaced Sender - Title.{}", "Spaced Sender", "Title", ()) | ||||
|  | ||||
|     def test_guess_attributes_from_name2(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "/path/to/Sender - Spaced Title.{}", "Sender", "Spaced Title", ()) | ||||
|  | ||||
|     def test_guess_attributes_from_name3(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "/path/to/Dashed-Sender - Title.{}", "Dashed-Sender", "Title", ()) | ||||
|  | ||||
|     def test_guess_attributes_from_name4(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "/path/to/Sender - Dashed-Title.{}", "Sender", "Dashed-Title", ()) | ||||
|  | ||||
|     def test_guess_attributes_from_name5(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "/path/to/Sender - Title - tag1,tag2,tag3.{}", | ||||
|             "Sender", | ||||
|             "Title", | ||||
|             self.TAGS | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name6(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "/path/to/Spaced Sender - Title - tag1,tag2,tag3.{}", | ||||
|             "Spaced Sender", | ||||
|             "Title", | ||||
|             self.TAGS | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name7(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "/path/to/Sender - Spaced Title - tag1,tag2,tag3.{}", | ||||
|             "Sender", | ||||
|             "Spaced Title", | ||||
|             self.TAGS | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name8(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "/path/to/Dashed-Sender - Title - tag1,tag2,tag3.{}", | ||||
|             "Dashed-Sender", | ||||
|             "Title", | ||||
|             self.TAGS | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name9(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "/path/to/Sender - Dashed-Title - tag1,tag2,tag3.{}", | ||||
|             "Sender", | ||||
|             "Dashed-Title", | ||||
|             self.TAGS | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name10(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "/path/to/Σενδερ - Τιτλε - tag1,tag2,tag3.{}", | ||||
|             "Σενδερ", | ||||
|             "Τιτλε", | ||||
|             self.TAGS | ||||
|         ) | ||||
|         for test in tests: | ||||
|             for suffix in suffixes: | ||||
|                 f = test["path"].format(suffix) | ||||
|                 sender, title, tags, s = consumer._guess_attributes_from_name(f) | ||||
|                 self.assertEqual(sender.name, test["result"]["sender"]) | ||||
|                 self.assertEqual(title, test["result"]["title"]) | ||||
|                 self.assertEqual(tags, test["result"]["tags"]) | ||||
|                 self.assertEqual(s, suffix) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Daniel Quinn
					Daniel Quinn