From b4e648e1e395babd7799cdd23a8c21a09d47fed4 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Mon, 28 Mar 2016 14:16:26 +0100 Subject: [PATCH] Test All The Things --- src/documents/consumer.py | 2 +- src/documents/signals/handlers.py | 4 +- src/documents/tests/test_matchables.py | 196 +++++++++++++++++++++++++ src/documents/tests/test_tags.py | 119 --------------- 4 files changed, 199 insertions(+), 122 deletions(-) create mode 100644 src/documents/tests/test_matchables.py delete mode 100644 src/documents/tests/test_tags.py diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 4044d3dff..dcf81252f 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -23,7 +23,7 @@ from pyocr.tesseract import TesseractError from paperless.db import GnuPG -from .models import Tag, Document, Log, FileInfo +from .models import Tag, Document, FileInfo from .languages import ISO639 from .signals import ( document_consumption_started, document_consumption_finished) diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index 566fd0f76..4e0599ccd 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -14,7 +14,7 @@ def set_correspondent(sender, document=None, logging_group=None, **kwargs): return # No matching correspondents, so no need to continue - potential_correspondents = Correspondent.match_all(document.content) + potential_correspondents = list(Correspondent.match_all(document.content)) if not potential_correspondents: return @@ -33,7 +33,7 @@ def set_correspondent(sender, document=None, logging_group=None, **kwargs): ) document.correspondent = selected - document.save(update_fields="correspondent") + document.save(update_fields=("correspondent",)) def set_tags(sender, document=None, logging_group=None, **kwargs): diff --git a/src/documents/tests/test_matchables.py b/src/documents/tests/test_matchables.py new file mode 100644 index 000000000..637a34c1c --- /dev/null +++ b/src/documents/tests/test_matchables.py @@ -0,0 +1,196 @@ +from random import randint + +from django.test import TestCase + +from ..models import Correspondent, Document, Tag +from ..signals import document_consumption_finished + + +class TestMatching(TestCase): + + def _truefalse(self, text, algorithm, true, false): + for klass in (Tag, Correspondent): + instance = klass.objects.create( + name=str(randint(10000, 99999)), + match=text, + matching_algorithm=getattr(klass, algorithm) + ) + for string in true: + self.assertTrue(instance.matches(string)) + for string in false: + self.assertFalse(instance.matches(string)) + + def test_match_all(self): + + self._truefalse( + "alpha charlie gamma", + "MATCH_ALL", + ("I have alpha, charlie, and gamma in me",), + ( + "I have alpha in me", + "I have charlie in me", + "I have gamma in me", + "I have alpha and charlie in me", + "I have alphas, charlie, and gamma in me", + "I have alphas in me", + "I have bravo in me", + ) + ) + + self._truefalse( + "12 34 56", + "MATCH_ALL", + ( + "I have 12 34, and 56 in me", + ), + ( + "I have 12 in me", + "I have 34 in me", + "I have 56 in me", + "I have 12 and 34 in me", + "I have 120, 34, and 56 in me", + "I have 123456 in me", + "I have 01234567 in me", + ) + ) + + def test_match_any(self): + + self._truefalse( + "alpha charlie gamma", + "MATCH_ANY", + ( + "I have alpha in me", + "I have charlie in me", + "I have gamma in me", + "I have alpha, charlie, and gamma in me", + "I have alpha and charlie in me", + ), + ( + "I have alphas in me", + "I have bravo in me", + ) + ) + + self._truefalse( + "12 34 56", + "MATCH_ANY", + ( + "I have 12 in me", + "I have 34 in me", + "I have 56 in me", + "I have 12 and 34 in me", + "I have 12, 34, and 56 in me", + "I have 120, 34, and 56 in me", + ), + ( + "I have 123456 in me", + "I have 01234567 in me", + ) + ) + + def test_match_literal(self): + + self._truefalse( + "alpha charlie gamma", + "MATCH_LITERAL", + ( + "I have 'alpha charlie gamma' in me", + ), + ( + "I have alpha in me", + "I have charlie in me", + "I have gamma in me", + "I have alpha and charlie in me", + "I have alpha, charlie, and gamma in me", + "I have alphas, charlie, and gamma in me", + "I have alphas in me", + "I have bravo in me", + ) + ) + + self._truefalse( + "12 34 56", + "MATCH_LITERAL", + ( + "I have 12 34 56 in me", + ), + ( + "I have 12 in me", + "I have 34 in me", + "I have 56 in me", + "I have 12 and 34 in me", + "I have 12 34, and 56 in me", + "I have 120, 34, and 560 in me", + "I have 120, 340, and 560 in me", + "I have 123456 in me", + "I have 01234567 in me", + ) + ) + + def test_match_regex(self): + + self._truefalse( + "alpha\w+gamma", + "MATCH_REGEX", + ( + "I have alpha_and_gamma in me", + "I have alphas_and_gamma in me", + ), + ( + "I have alpha in me", + "I have gamma in me", + "I have alpha and charlie in me", + "I have alpha,and,gamma in me", + "I have alpha and gamma in me", + "I have alpha, charlie, and gamma in me", + "I have alphas, charlie, and gamma in me", + "I have alphas in me", + ) + ) + + +class TestApplications(TestCase): + """ + We make use of document_consumption_finished, so we should test that it's + doing what we expect wrt to tag & correspondent matching. + """ + + def setUp(self): + TestCase.setUp(self) + self.doc_contains = Document.objects.create( + content="I contain the keyword.", file_type="pdf") + + def test_tag_applied_any(self): + t1 = Tag.objects.create( + name="test", match="keyword", matching_algorithm=Tag.MATCH_ANY) + document_consumption_finished.send( + sender=self.__class__, document=self.doc_contains) + self.assertTrue(list(self.doc_contains.tags.all()) == [t1]) + + def test_tag_not_applied(self): + Tag.objects.create( + name="test", match="no-match", matching_algorithm=Tag.MATCH_ANY) + document_consumption_finished.send( + sender=self.__class__, document=self.doc_contains) + self.assertTrue(list(self.doc_contains.tags.all()) == []) + + def test_correspondent_applied(self): + correspondent = Correspondent.objects.create( + name="test", + match="keyword", + matching_algorithm=Correspondent.MATCH_ANY + ) + document_consumption_finished.send( + sender=self.__class__, document=self.doc_contains) + self.assertTrue(self.doc_contains.correspondent == correspondent) + + def test_correspondent_not_applied(self): + Tag.objects.create( + name="test", + match="no-match", + matching_algorithm=Correspondent.MATCH_ANY + ) + document_consumption_finished.send( + sender=self.__class__, document=self.doc_contains) + self.assertEqual(self.doc_contains.correspondent, None) diff --git a/src/documents/tests/test_tags.py b/src/documents/tests/test_tags.py deleted file mode 100644 index e0ab43244..000000000 --- a/src/documents/tests/test_tags.py +++ /dev/null @@ -1,119 +0,0 @@ -from django.test import TestCase - -from ..models import Tag - - -class TestTagMatching(TestCase): - - def test_match_all(self): - - t = Tag.objects.create( - name="Test 0", - match="alpha charlie gamma", - matching_algorithm=Tag.MATCH_ALL - ) - self.assertFalse(t.matches("I have alpha in me")) - self.assertFalse(t.matches("I have charlie in me")) - self.assertFalse(t.matches("I have gamma in me")) - self.assertFalse(t.matches("I have alpha and charlie in me")) - self.assertTrue(t.matches("I have alpha, charlie, and gamma in me")) - self.assertFalse(t.matches("I have alphas, charlie, and gamma in me")) - self.assertFalse(t.matches("I have alphas in me")) - self.assertFalse(t.matches("I have bravo in me")) - - t = Tag.objects.create( - name="Test 1", - match="12 34 56", - matching_algorithm=Tag.MATCH_ALL - ) - self.assertFalse(t.matches("I have 12 in me")) - self.assertFalse(t.matches("I have 34 in me")) - self.assertFalse(t.matches("I have 56 in me")) - self.assertFalse(t.matches("I have 12 and 34 in me")) - self.assertTrue(t.matches("I have 12 34, and 56 in me")) - self.assertFalse(t.matches("I have 120, 34, and 56 in me")) - self.assertFalse(t.matches("I have 123456 in me")) - self.assertFalse(t.matches("I have 01234567 in me")) - - def test_match_any(self): - - t = Tag.objects.create( - name="Test 0", - match="alpha charlie gamma", - matching_algorithm=Tag.MATCH_ANY - ) - - self.assertTrue(t.matches("I have alpha in me")) - self.assertTrue(t.matches("I have charlie in me")) - self.assertTrue(t.matches("I have gamma in me")) - self.assertTrue(t.matches("I have alpha and charlie in me")) - self.assertFalse(t.matches("I have alphas in me")) - self.assertFalse(t.matches("I have bravo in me")) - - t = Tag.objects.create( - name="Test 1", - match="12 34 56", - matching_algorithm=Tag.MATCH_ANY - ) - self.assertTrue(t.matches("I have 12 in me")) - self.assertTrue(t.matches("I have 34 in me")) - self.assertTrue(t.matches("I have 56 in me")) - self.assertTrue(t.matches("I have 12 and 34 in me")) - self.assertTrue(t.matches("I have 12 34, and 56 in me")) - self.assertTrue(t.matches("I have 120, 34, and 560 in me")) - self.assertFalse(t.matches("I have 120, 340, and 560 in me")) - self.assertFalse(t.matches("I have 123456 in me")) - self.assertFalse(t.matches("I have 01234567 in me")) - - def test_match_literal(self): - - t = Tag.objects.create( - name="Test 0", - match="alpha charlie gamma", - matching_algorithm=Tag.MATCH_LITERAL - ) - - self.assertFalse(t.matches("I have alpha in me")) - self.assertFalse(t.matches("I have charlie in me")) - self.assertFalse(t.matches("I have gamma in me")) - self.assertFalse(t.matches("I have alpha and charlie in me")) - self.assertFalse(t.matches("I have alpha, charlie, and gamma in me")) - self.assertFalse(t.matches("I have alphas, charlie, and gamma in me")) - self.assertTrue(t.matches("I have 'alpha charlie gamma' in me")) - self.assertFalse(t.matches("I have alphas in me")) - self.assertFalse(t.matches("I have bravo in me")) - - t = Tag.objects.create( - name="Test 1", - match="12 34 56", - matching_algorithm=Tag.MATCH_LITERAL - ) - self.assertFalse(t.matches("I have 12 in me")) - self.assertFalse(t.matches("I have 34 in me")) - self.assertFalse(t.matches("I have 56 in me")) - self.assertFalse(t.matches("I have 12 and 34 in me")) - self.assertFalse(t.matches("I have 12 34, and 56 in me")) - self.assertFalse(t.matches("I have 120, 34, and 560 in me")) - self.assertFalse(t.matches("I have 120, 340, and 560 in me")) - self.assertFalse(t.matches("I have 123456 in me")) - self.assertFalse(t.matches("I have 01234567 in me")) - self.assertTrue(t.matches("I have 12 34 56 in me")) - - def test_match_regex(self): - - t = Tag.objects.create( - name="Test 0", - match="alpha\w+gamma", - matching_algorithm=Tag.MATCH_REGEX - ) - - self.assertFalse(t.matches("I have alpha in me")) - self.assertFalse(t.matches("I have gamma in me")) - self.assertFalse(t.matches("I have alpha and charlie in me")) - self.assertTrue(t.matches("I have alpha_and_gamma in me")) - self.assertTrue(t.matches("I have alphas_and_gamma in me")) - self.assertFalse(t.matches("I have alpha,and,gamma in me")) - self.assertFalse(t.matches("I have alpha and gamma in me")) - self.assertFalse(t.matches("I have alpha, charlie, and gamma in me")) - self.assertFalse(t.matches("I have alphas, charlie, and gamma in me")) - self.assertFalse(t.matches("I have alphas in me"))