From 11701391279b17995bd2becd7cc742f8ba3ff3aa Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Mon, 14 Mar 2016 21:20:44 +0000 Subject: [PATCH 1/4] Added a consume-start and consume-finish signal --- src/documents/consumer.py | 19 +++++++++++++++++-- src/documents/signals.py | 4 ++++ 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 src/documents/signals.py diff --git a/src/documents/consumer.py b/src/documents/consumer.py index fbdbbc276..244383211 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -26,6 +26,8 @@ from paperless.db import GnuPG from .models import Correspondent, Tag, Document, Log from .languages import ISO639 +from .signals import ( + document_consumption_started, document_consumption_finished) class OCRError(Exception): @@ -118,22 +120,33 @@ class Consumer(object): self.log("info", "Consuming {}".format(doc)) + document_consumption_started.send( + sender=self.__class__, filename=doc) + tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH) imgs = self._get_greyscale(tempdir, doc) thumbnail = self._get_thumbnail(tempdir, doc) try: - text = self._get_ocr(imgs) - self._store(text, doc, thumbnail) + + document = self._store(self._get_ocr(imgs), doc, thumbnail) + except OCRError as e: + self._ignore.append(doc) self.log("error", "OCR FAILURE for {}: {}".format(doc, e)) self._cleanup_tempdir(tempdir) + continue + else: + self._cleanup_tempdir(tempdir) self._cleanup_doc(doc) + document_consumption_finished.send( + sender=self.__class__, filename=document) + def _get_greyscale(self, tempdir, doc): """ Greyscale images are easier for Tesseract to OCR @@ -360,6 +373,8 @@ class Consumer(object): self.log("info", "Completed") + return document + def _cleanup_tempdir(self, d): self.log("debug", "Deleting directory {}".format(d)) shutil.rmtree(d) diff --git a/src/documents/signals.py b/src/documents/signals.py new file mode 100644 index 000000000..257a20d46 --- /dev/null +++ b/src/documents/signals.py @@ -0,0 +1,4 @@ +from django.dispatch import Signal + +document_consumption_started = Signal(providing_args=["filename"]) +document_consumption_finished = Signal(providing_args=["document"]) From b92e007e1567f5efe8e3fb32a05c64116ae4c082 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Mon, 28 Mar 2016 11:11:15 +0100 Subject: [PATCH 2/4] Removed log components and introduced signals for tags & correspondents --- src/documents/admin.py | 13 ++- src/documents/apps.py | 13 ++- src/documents/consumer.py | 15 ++- src/documents/loggers.py | 9 +- src/documents/mail.py | 3 +- .../management/commands/document_consumer.py | 5 +- .../migrations/0013_auto_20160325_2111.py | 35 ++++++ src/documents/models.py | 101 ++++++++---------- .../{signals.py => signals/__init__.py} | 0 src/documents/signals/handlers.py | 53 +++++++++ src/documents/tests/test_logger.py | 66 +----------- src/paperless/settings.py | 2 +- 12 files changed, 175 insertions(+), 140 deletions(-) create mode 100644 src/documents/migrations/0013_auto_20160325_2111.py rename src/documents/{signals.py => signals/__init__.py} (100%) create mode 100644 src/documents/signals/handlers.py diff --git a/src/documents/admin.py b/src/documents/admin.py index a5b523492..86a0bd1f8 100644 --- a/src/documents/admin.py +++ b/src/documents/admin.py @@ -31,6 +31,13 @@ class MonthListFilter(admin.SimpleListFilter): return queryset.filter(created__year=year, created__month=month) +class CorrespondentAdmin(admin.ModelAdmin): + + list_display = ("name", "match", "matching_algorithm") + list_filter = ("matching_algorithm",) + list_editable = ("match", "matching_algorithm") + + class TagAdmin(admin.ModelAdmin): list_display = ("name", "colour", "match", "matching_algorithm") @@ -103,11 +110,11 @@ class DocumentAdmin(admin.ModelAdmin): class LogAdmin(admin.ModelAdmin): - list_display = ("message", "level", "component") - list_filter = ("level", "component",) + list_display = ("message", "level",) + list_filter = ("level",) -admin.site.register(Correspondent) +admin.site.register(Correspondent, CorrespondentAdmin) admin.site.register(Tag, TagAdmin) admin.site.register(Document, DocumentAdmin) admin.site.register(Log, LogAdmin) diff --git a/src/documents/apps.py b/src/documents/apps.py index 93ca7550a..e58c736c0 100644 --- a/src/documents/apps.py +++ b/src/documents/apps.py @@ -2,4 +2,15 @@ from django.apps import AppConfig class DocumentsConfig(AppConfig): - name = 'documents' + + name = "documents" + + def ready(self): + + from .signals import document_consumption_finished + from .signals.handlers import set_correspondent, set_tags + + document_consumption_finished.connect(set_tags) + document_consumption_finished.connect(set_correspondent) + + AppConfig.ready(self) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 08ed98fd0..4044d3dff 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -80,8 +80,7 @@ class Consumer(object): def log(self, level, message): getattr(self.logger, level)(message, extra={ - "group": self.logging_group, - "component": Log.COMPONENT_CONSUMER + "group": self.logging_group }) def consume(self): @@ -107,7 +106,10 @@ class Consumer(object): self.log("info", "Consuming {}".format(doc)) document_consumption_started.send( - sender=self.__class__, filename=doc) + sender=self.__class__, + filename=doc, + logging_group=self.logging_group + ) tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH) imgs = self._get_greyscale(tempdir, doc) @@ -131,7 +133,10 @@ class Consumer(object): self._cleanup_doc(doc) document_consumption_finished.send( - sender=self.__class__, filename=document) + sender=self.__class__, + document=document, + logging_group=self.logging_group + ) def _get_greyscale(self, tempdir, doc): """ @@ -271,7 +276,6 @@ class Consumer(object): def _store(self, text, doc, thumbnail): file_info = FileInfo.from_path(doc) - relevant_tags = set(list(Tag.match_all(text)) + list(file_info.tags)) stats = os.stat(doc) @@ -288,6 +292,7 @@ class Consumer(object): datetime.datetime.fromtimestamp(stats.st_mtime)) ) + relevant_tags = set(list(Tag.match_all(text)) + list(file_info.tags)) if relevant_tags: tag_names = ", ".join([t.slug for t in relevant_tags]) self.log("debug", "Tagging with {}".format(tag_names)) diff --git a/src/documents/loggers.py b/src/documents/loggers.py index 3464478cc..a35841299 100644 --- a/src/documents/loggers.py +++ b/src/documents/loggers.py @@ -11,18 +11,11 @@ class PaperlessLogger(logging.StreamHandler): logging.StreamHandler.emit(self, record) - if not hasattr(record, "component"): - return - # We have to do the import here or Django will barf when it tries to # load this because the apps aren't loaded at that point from .models import Log - kwargs = { - "message": record.msg, - "component": record.component, - "level": record.levelno, - } + kwargs = {"message": record.msg, "level": record.levelno} if hasattr(record, "group"): kwargs["group"] = record.group diff --git a/src/documents/mail.py b/src/documents/mail.py index 5bacb5b5f..6d368987a 100644 --- a/src/documents/mail.py +++ b/src/documents/mail.py @@ -33,8 +33,7 @@ class Loggable(object): def log(self, level, message): getattr(self.logger, level)(message, extra={ - "group": self.logging_group, - "component": Log.COMPONENT_MAIL + "group": self.logging_group }) diff --git a/src/documents/management/commands/document_consumer.py b/src/documents/management/commands/document_consumer.py index 8116303b5..0acdaeeb0 100644 --- a/src/documents/management/commands/document_consumer.py +++ b/src/documents/management/commands/document_consumer.py @@ -47,10 +47,7 @@ class Command(BaseCommand): pass logging.getLogger(__name__).info( - "Starting document consumer at {}".format( - settings.CONSUMPTION_DIR - ), - extra={"component": Log.COMPONENT_CONSUMER} + "Starting document consumer at {}".format(settings.CONSUMPTION_DIR) ) try: diff --git a/src/documents/migrations/0013_auto_20160325_2111.py b/src/documents/migrations/0013_auto_20160325_2111.py new file mode 100644 index 000000000..c57ddc03e --- /dev/null +++ b/src/documents/migrations/0013_auto_20160325_2111.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.9.4 on 2016-03-25 21:11 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.utils.timezone + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0012_auto_20160305_0040'), + ] + + operations = [ + migrations.AddField( + model_name='correspondent', + name='match', + field=models.CharField(blank=True, max_length=256), + ), + migrations.AddField( + model_name='correspondent', + name='matching_algorithm', + field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.'), + ), + migrations.AlterField( + model_name='document', + name='created', + field=models.DateTimeField(default=django.utils.timezone.now), + ), + migrations.RemoveField( + model_name='log', + name='component', + ), + ] diff --git a/src/documents/models.py b/src/documents/models.py index cf32fabe3..425ca4c77 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -15,50 +15,7 @@ from django.utils import timezone from .managers import LogManager -class SluggedModel(models.Model): - - name = models.CharField(max_length=128, unique=True) - slug = models.SlugField(blank=True) - - class Meta(object): - abstract = True - - def save(self, *args, **kwargs): - if not self.slug: - self.slug = slugify(self.name) - models.Model.save(self, *args, **kwargs) - - def __str__(self): - return self.name - - -class Correspondent(SluggedModel): - - # This regex is probably more restrictive than it needs to be, but it's - # better safe than sorry. - SAFE_REGEX = re.compile(r"^[\w\- ,.']+$") - - class Meta(object): - ordering = ("name",) - - -class Tag(SluggedModel): - - COLOURS = ( - (1, "#a6cee3"), - (2, "#1f78b4"), - (3, "#b2df8a"), - (4, "#33a02c"), - (5, "#fb9a99"), - (6, "#e31a1c"), - (7, "#fdbf6f"), - (8, "#ff7f00"), - (9, "#cab2d6"), - (10, "#6a3d9a"), - (11, "#b15928"), - (12, "#000000"), - (13, "#cccccc") - ) +class MatchingModel(models.Model): MATCH_ANY = 1 MATCH_ALL = 2 @@ -71,7 +28,9 @@ class Tag(SluggedModel): (MATCH_REGEX, "Regular Expression"), ) - colour = models.PositiveIntegerField(choices=COLOURS, default=1) + name = models.CharField(max_length=128, unique=True) + slug = models.SlugField(blank=True) + match = models.CharField(max_length=256, blank=True) matching_algorithm = models.PositiveIntegerField( choices=MATCHING_ALGORITHMS, @@ -88,6 +47,12 @@ class Tag(SluggedModel): ) ) + class Meta(object): + abstract = True + + def __str__(self): + return self.name + @property def conditions(self): return "{}: \"{}\" ({})".format( @@ -131,8 +96,44 @@ class Tag(SluggedModel): raise NotImplementedError("Unsupported matching algorithm") def save(self, *args, **kwargs): + self.match = self.match.lower() - SluggedModel.save(self, *args, **kwargs) + + if not self.slug: + self.slug = slugify(self.name) + + models.Model.save(self, *args, **kwargs) + + +class Correspondent(MatchingModel): + + # This regex is probably more restrictive than it needs to be, but it's + # better safe than sorry. + SAFE_REGEX = re.compile(r"^[\w\- ,.']+$") + + class Meta(object): + ordering = ("name",) + + +class Tag(MatchingModel): + + COLOURS = ( + (1, "#a6cee3"), + (2, "#1f78b4"), + (3, "#b2df8a"), + (4, "#33a02c"), + (5, "#fb9a99"), + (6, "#e31a1c"), + (7, "#fdbf6f"), + (8, "#ff7f00"), + (9, "#cab2d6"), + (10, "#6a3d9a"), + (11, "#b15928"), + (12, "#000000"), + (13, "#cccccc") + ) + + colour = models.PositiveIntegerField(choices=COLOURS, default=1) class Document(models.Model): @@ -219,17 +220,9 @@ class Log(models.Model): (logging.CRITICAL, "Critical"), ) - COMPONENT_CONSUMER = 1 - COMPONENT_MAIL = 2 - COMPONENTS = ( - (COMPONENT_CONSUMER, "Consumer"), - (COMPONENT_MAIL, "Mail Fetcher") - ) - group = models.UUIDField(blank=True) message = models.TextField() level = models.PositiveIntegerField(choices=LEVELS, default=logging.INFO) - component = models.PositiveIntegerField(choices=COMPONENTS) created = models.DateTimeField(auto_now_add=True) modified = models.DateTimeField(auto_now=True) diff --git a/src/documents/signals.py b/src/documents/signals/__init__.py similarity index 100% rename from src/documents/signals.py rename to src/documents/signals/__init__.py diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py new file mode 100644 index 000000000..566fd0f76 --- /dev/null +++ b/src/documents/signals/handlers.py @@ -0,0 +1,53 @@ +import logging + +from ..models import Correspondent, Tag + + +def logger(message, group): + logging.getLogger(__name__).debug(message, extra={"group": group}) + + +def set_correspondent(sender, document=None, logging_group=None, **kwargs): + + # No sense in assigning a correspondent when one is already set. + if document.correspondent: + return + + # No matching correspondents, so no need to continue + potential_correspondents = Correspondent.match_all(document.content) + if not potential_correspondents: + return + + potential_count = len(potential_correspondents) + selected = potential_correspondents[0] + if potential_count > 1: + message = "Detected {} potential correspondents, so we've opted for {}" + logger( + message.format(potential_count, selected), + logging_group + ) + + logger( + 'Assigning correspondent "{}" to "{}" '.format(selected, document), + logging_group + ) + + document.correspondent = selected + document.save(update_fields="correspondent") + + +def set_tags(sender, document=None, logging_group=None, **kwargs): + + current_tags = set(document.tags.all()) + relevant_tags = set(Tag.match_all(document.content)) - current_tags + + if not relevant_tags: + return + + message = 'Tagging "{}" with "{}"' + logger( + message.format(document, ", ".join([t.slug for t in relevant_tags])), + logging_group + ) + + document.tags.add(*relevant_tags) diff --git a/src/documents/tests/test_logger.py b/src/documents/tests/test_logger.py index 23cea13e7..9b14a3902 100644 --- a/src/documents/tests/test_logger.py +++ b/src/documents/tests/test_logger.py @@ -15,21 +15,9 @@ class TestPaperlessLog(TestCase): self.logger = logging.getLogger( "documents.management.commands.document_consumer") - def test_ignored(self): - with mock.patch("logging.StreamHandler.emit") as __: - self.assertEqual(Log.objects.all().count(), 0) - self.logger.info("This is an informational message") - self.logger.warning("This is an informational message") - self.logger.error("This is an informational message") - self.logger.critical("This is an informational message") - self.assertEqual(Log.objects.all().count(), 0) - def test_that_it_saves_at_all(self): - kw = { - "group": uuid.uuid4(), - "component": Log.COMPONENT_MAIL - } + kw = {"group": uuid.uuid4()} self.assertEqual(Log.objects.all().count(), 0) @@ -53,14 +41,8 @@ class TestPaperlessLog(TestCase): def test_groups(self): - kw1 = { - "group": uuid.uuid4(), - "component": Log.COMPONENT_MAIL - } - kw2 = { - "group": uuid.uuid4(), - "component": Log.COMPONENT_MAIL - } + kw1 = {"group": uuid.uuid4()} + kw2 = {"group": uuid.uuid4()} self.assertEqual(Log.objects.all().count(), 0) @@ -86,49 +68,9 @@ class TestPaperlessLog(TestCase): self.assertEqual(Log.objects.all().count(), 4) self.assertEqual(Log.objects.filter(group=kw1["group"]).count(), 2) - def test_components(self): - - c1 = Log.COMPONENT_CONSUMER - c2 = Log.COMPONENT_MAIL - kw1 = { - "group": uuid.uuid4(), - "component": c1 - } - kw2 = { - "group": kw1["group"], - "component": c2 - } - - self.assertEqual(Log.objects.all().count(), 0) - - with mock.patch("logging.StreamHandler.emit") as __: - - # Debug messages are ignored by default - self.logger.debug("This is a debugging message", extra=kw1) - self.assertEqual(Log.objects.all().count(), 0) - - self.logger.info("This is an informational message", extra=kw2) - self.assertEqual(Log.objects.all().count(), 1) - self.assertEqual(Log.objects.filter(component=c2).count(), 1) - - self.logger.warning("This is an warning message", extra=kw1) - self.assertEqual(Log.objects.all().count(), 2) - self.assertEqual(Log.objects.filter(component=c1).count(), 1) - - self.logger.error("This is an error message", extra=kw2) - self.assertEqual(Log.objects.all().count(), 3) - self.assertEqual(Log.objects.filter(component=c2).count(), 2) - - self.logger.critical("This is a critical message", extra=kw1) - self.assertEqual(Log.objects.all().count(), 4) - self.assertEqual(Log.objects.filter(component=c1).count(), 2) - def test_groupped_query(self): - kw = { - "group": uuid.uuid4(), - "component": Log.COMPONENT_MAIL - } + kw = {"group": uuid.uuid4()} with mock.patch("logging.StreamHandler.emit") as __: self.logger.info("Message 0", extra=kw) self.logger.info("Message 1", extra=kw) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index bb1ba363b..209a7dedf 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -43,7 +43,7 @@ INSTALLED_APPS = [ "django_extensions", - "documents", + "documents.apps.DocumentsConfig", "rest_framework", "crispy_forms", From b4e648e1e395babd7799cdd23a8c21a09d47fed4 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Mon, 28 Mar 2016 14:16:26 +0100 Subject: [PATCH 3/4] Test All The Things --- src/documents/consumer.py | 2 +- src/documents/signals/handlers.py | 4 +- src/documents/tests/test_matchables.py | 196 +++++++++++++++++++++++++ src/documents/tests/test_tags.py | 119 --------------- 4 files changed, 199 insertions(+), 122 deletions(-) create mode 100644 src/documents/tests/test_matchables.py delete mode 100644 src/documents/tests/test_tags.py diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 4044d3dff..dcf81252f 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -23,7 +23,7 @@ from pyocr.tesseract import TesseractError from paperless.db import GnuPG -from .models import Tag, Document, Log, FileInfo +from .models import Tag, Document, FileInfo from .languages import ISO639 from .signals import ( document_consumption_started, document_consumption_finished) diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index 566fd0f76..4e0599ccd 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -14,7 +14,7 @@ def set_correspondent(sender, document=None, logging_group=None, **kwargs): return # No matching correspondents, so no need to continue - potential_correspondents = Correspondent.match_all(document.content) + potential_correspondents = list(Correspondent.match_all(document.content)) if not potential_correspondents: return @@ -33,7 +33,7 @@ def set_correspondent(sender, document=None, logging_group=None, **kwargs): ) document.correspondent = selected - document.save(update_fields="correspondent") + document.save(update_fields=("correspondent",)) def set_tags(sender, document=None, logging_group=None, **kwargs): diff --git a/src/documents/tests/test_matchables.py b/src/documents/tests/test_matchables.py new file mode 100644 index 000000000..637a34c1c --- /dev/null +++ b/src/documents/tests/test_matchables.py @@ -0,0 +1,196 @@ +from random import randint + +from django.test import TestCase + +from ..models import Correspondent, Document, Tag +from ..signals import document_consumption_finished + + +class TestMatching(TestCase): + + def _truefalse(self, text, algorithm, true, false): + for klass in (Tag, Correspondent): + instance = klass.objects.create( + name=str(randint(10000, 99999)), + match=text, + matching_algorithm=getattr(klass, algorithm) + ) + for string in true: + self.assertTrue(instance.matches(string)) + for string in false: + self.assertFalse(instance.matches(string)) + + def test_match_all(self): + + self._truefalse( + "alpha charlie gamma", + "MATCH_ALL", + ("I have alpha, charlie, and gamma in me",), + ( + "I have alpha in me", + "I have charlie in me", + "I have gamma in me", + "I have alpha and charlie in me", + "I have alphas, charlie, and gamma in me", + "I have alphas in me", + "I have bravo in me", + ) + ) + + self._truefalse( + "12 34 56", + "MATCH_ALL", + ( + "I have 12 34, and 56 in me", + ), + ( + "I have 12 in me", + "I have 34 in me", + "I have 56 in me", + "I have 12 and 34 in me", + "I have 120, 34, and 56 in me", + "I have 123456 in me", + "I have 01234567 in me", + ) + ) + + def test_match_any(self): + + self._truefalse( + "alpha charlie gamma", + "MATCH_ANY", + ( + "I have alpha in me", + "I have charlie in me", + "I have gamma in me", + "I have alpha, charlie, and gamma in me", + "I have alpha and charlie in me", + ), + ( + "I have alphas in me", + "I have bravo in me", + ) + ) + + self._truefalse( + "12 34 56", + "MATCH_ANY", + ( + "I have 12 in me", + "I have 34 in me", + "I have 56 in me", + "I have 12 and 34 in me", + "I have 12, 34, and 56 in me", + "I have 120, 34, and 56 in me", + ), + ( + "I have 123456 in me", + "I have 01234567 in me", + ) + ) + + def test_match_literal(self): + + self._truefalse( + "alpha charlie gamma", + "MATCH_LITERAL", + ( + "I have 'alpha charlie gamma' in me", + ), + ( + "I have alpha in me", + "I have charlie in me", + "I have gamma in me", + "I have alpha and charlie in me", + "I have alpha, charlie, and gamma in me", + "I have alphas, charlie, and gamma in me", + "I have alphas in me", + "I have bravo in me", + ) + ) + + self._truefalse( + "12 34 56", + "MATCH_LITERAL", + ( + "I have 12 34 56 in me", + ), + ( + "I have 12 in me", + "I have 34 in me", + "I have 56 in me", + "I have 12 and 34 in me", + "I have 12 34, and 56 in me", + "I have 120, 34, and 560 in me", + "I have 120, 340, and 560 in me", + "I have 123456 in me", + "I have 01234567 in me", + ) + ) + + def test_match_regex(self): + + self._truefalse( + "alpha\w+gamma", + "MATCH_REGEX", + ( + "I have alpha_and_gamma in me", + "I have alphas_and_gamma in me", + ), + ( + "I have alpha in me", + "I have gamma in me", + "I have alpha and charlie in me", + "I have alpha,and,gamma in me", + "I have alpha and gamma in me", + "I have alpha, charlie, and gamma in me", + "I have alphas, charlie, and gamma in me", + "I have alphas in me", + ) + ) + + +class TestApplications(TestCase): + """ + We make use of document_consumption_finished, so we should test that it's + doing what we expect wrt to tag & correspondent matching. + """ + + def setUp(self): + TestCase.setUp(self) + self.doc_contains = Document.objects.create( + content="I contain the keyword.", file_type="pdf") + + def test_tag_applied_any(self): + t1 = Tag.objects.create( + name="test", match="keyword", matching_algorithm=Tag.MATCH_ANY) + document_consumption_finished.send( + sender=self.__class__, document=self.doc_contains) + self.assertTrue(list(self.doc_contains.tags.all()) == [t1]) + + def test_tag_not_applied(self): + Tag.objects.create( + name="test", match="no-match", matching_algorithm=Tag.MATCH_ANY) + document_consumption_finished.send( + sender=self.__class__, document=self.doc_contains) + self.assertTrue(list(self.doc_contains.tags.all()) == []) + + def test_correspondent_applied(self): + correspondent = Correspondent.objects.create( + name="test", + match="keyword", + matching_algorithm=Correspondent.MATCH_ANY + ) + document_consumption_finished.send( + sender=self.__class__, document=self.doc_contains) + self.assertTrue(self.doc_contains.correspondent == correspondent) + + def test_correspondent_not_applied(self): + Tag.objects.create( + name="test", + match="no-match", + matching_algorithm=Correspondent.MATCH_ANY + ) + document_consumption_finished.send( + sender=self.__class__, document=self.doc_contains) + self.assertEqual(self.doc_contains.correspondent, None) diff --git a/src/documents/tests/test_tags.py b/src/documents/tests/test_tags.py deleted file mode 100644 index e0ab43244..000000000 --- a/src/documents/tests/test_tags.py +++ /dev/null @@ -1,119 +0,0 @@ -from django.test import TestCase - -from ..models import Tag - - -class TestTagMatching(TestCase): - - def test_match_all(self): - - t = Tag.objects.create( - name="Test 0", - match="alpha charlie gamma", - matching_algorithm=Tag.MATCH_ALL - ) - self.assertFalse(t.matches("I have alpha in me")) - self.assertFalse(t.matches("I have charlie in me")) - self.assertFalse(t.matches("I have gamma in me")) - self.assertFalse(t.matches("I have alpha and charlie in me")) - self.assertTrue(t.matches("I have alpha, charlie, and gamma in me")) - self.assertFalse(t.matches("I have alphas, charlie, and gamma in me")) - self.assertFalse(t.matches("I have alphas in me")) - self.assertFalse(t.matches("I have bravo in me")) - - t = Tag.objects.create( - name="Test 1", - match="12 34 56", - matching_algorithm=Tag.MATCH_ALL - ) - self.assertFalse(t.matches("I have 12 in me")) - self.assertFalse(t.matches("I have 34 in me")) - self.assertFalse(t.matches("I have 56 in me")) - self.assertFalse(t.matches("I have 12 and 34 in me")) - self.assertTrue(t.matches("I have 12 34, and 56 in me")) - self.assertFalse(t.matches("I have 120, 34, and 56 in me")) - self.assertFalse(t.matches("I have 123456 in me")) - self.assertFalse(t.matches("I have 01234567 in me")) - - def test_match_any(self): - - t = Tag.objects.create( - name="Test 0", - match="alpha charlie gamma", - matching_algorithm=Tag.MATCH_ANY - ) - - self.assertTrue(t.matches("I have alpha in me")) - self.assertTrue(t.matches("I have charlie in me")) - self.assertTrue(t.matches("I have gamma in me")) - self.assertTrue(t.matches("I have alpha and charlie in me")) - self.assertFalse(t.matches("I have alphas in me")) - self.assertFalse(t.matches("I have bravo in me")) - - t = Tag.objects.create( - name="Test 1", - match="12 34 56", - matching_algorithm=Tag.MATCH_ANY - ) - self.assertTrue(t.matches("I have 12 in me")) - self.assertTrue(t.matches("I have 34 in me")) - self.assertTrue(t.matches("I have 56 in me")) - self.assertTrue(t.matches("I have 12 and 34 in me")) - self.assertTrue(t.matches("I have 12 34, and 56 in me")) - self.assertTrue(t.matches("I have 120, 34, and 560 in me")) - self.assertFalse(t.matches("I have 120, 340, and 560 in me")) - self.assertFalse(t.matches("I have 123456 in me")) - self.assertFalse(t.matches("I have 01234567 in me")) - - def test_match_literal(self): - - t = Tag.objects.create( - name="Test 0", - match="alpha charlie gamma", - matching_algorithm=Tag.MATCH_LITERAL - ) - - self.assertFalse(t.matches("I have alpha in me")) - self.assertFalse(t.matches("I have charlie in me")) - self.assertFalse(t.matches("I have gamma in me")) - self.assertFalse(t.matches("I have alpha and charlie in me")) - self.assertFalse(t.matches("I have alpha, charlie, and gamma in me")) - self.assertFalse(t.matches("I have alphas, charlie, and gamma in me")) - self.assertTrue(t.matches("I have 'alpha charlie gamma' in me")) - self.assertFalse(t.matches("I have alphas in me")) - self.assertFalse(t.matches("I have bravo in me")) - - t = Tag.objects.create( - name="Test 1", - match="12 34 56", - matching_algorithm=Tag.MATCH_LITERAL - ) - self.assertFalse(t.matches("I have 12 in me")) - self.assertFalse(t.matches("I have 34 in me")) - self.assertFalse(t.matches("I have 56 in me")) - self.assertFalse(t.matches("I have 12 and 34 in me")) - self.assertFalse(t.matches("I have 12 34, and 56 in me")) - self.assertFalse(t.matches("I have 120, 34, and 560 in me")) - self.assertFalse(t.matches("I have 120, 340, and 560 in me")) - self.assertFalse(t.matches("I have 123456 in me")) - self.assertFalse(t.matches("I have 01234567 in me")) - self.assertTrue(t.matches("I have 12 34 56 in me")) - - def test_match_regex(self): - - t = Tag.objects.create( - name="Test 0", - match="alpha\w+gamma", - matching_algorithm=Tag.MATCH_REGEX - ) - - self.assertFalse(t.matches("I have alpha in me")) - self.assertFalse(t.matches("I have gamma in me")) - self.assertFalse(t.matches("I have alpha and charlie in me")) - self.assertTrue(t.matches("I have alpha_and_gamma in me")) - self.assertTrue(t.matches("I have alphas_and_gamma in me")) - self.assertFalse(t.matches("I have alpha,and,gamma in me")) - self.assertFalse(t.matches("I have alpha and gamma in me")) - self.assertFalse(t.matches("I have alpha, charlie, and gamma in me")) - self.assertFalse(t.matches("I have alphas, charlie, and gamma in me")) - self.assertFalse(t.matches("I have alphas in me")) From aea9ea50e5febae55503a9921b10b93ef9dcb7e5 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Mon, 28 Mar 2016 14:18:57 +0100 Subject: [PATCH 4/4] Better naming --- src/documents/tests/test_matchables.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/documents/tests/test_matchables.py b/src/documents/tests/test_matchables.py index 637a34c1c..bcd377cf0 100644 --- a/src/documents/tests/test_matchables.py +++ b/src/documents/tests/test_matchables.py @@ -8,7 +8,7 @@ from ..signals import document_consumption_finished class TestMatching(TestCase): - def _truefalse(self, text, algorithm, true, false): + def _test_matching(self, text, algorithm, true, false): for klass in (Tag, Correspondent): instance = klass.objects.create( name=str(randint(10000, 99999)), @@ -22,7 +22,7 @@ class TestMatching(TestCase): def test_match_all(self): - self._truefalse( + self._test_matching( "alpha charlie gamma", "MATCH_ALL", ("I have alpha, charlie, and gamma in me",), @@ -37,7 +37,7 @@ class TestMatching(TestCase): ) ) - self._truefalse( + self._test_matching( "12 34 56", "MATCH_ALL", ( @@ -56,7 +56,7 @@ class TestMatching(TestCase): def test_match_any(self): - self._truefalse( + self._test_matching( "alpha charlie gamma", "MATCH_ANY", ( @@ -72,7 +72,7 @@ class TestMatching(TestCase): ) ) - self._truefalse( + self._test_matching( "12 34 56", "MATCH_ANY", ( @@ -91,7 +91,7 @@ class TestMatching(TestCase): def test_match_literal(self): - self._truefalse( + self._test_matching( "alpha charlie gamma", "MATCH_LITERAL", ( @@ -109,7 +109,7 @@ class TestMatching(TestCase): ) ) - self._truefalse( + self._test_matching( "12 34 56", "MATCH_LITERAL", ( @@ -130,7 +130,7 @@ class TestMatching(TestCase): def test_match_regex(self): - self._truefalse( + self._test_matching( "alpha\w+gamma", "MATCH_REGEX", (