mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
unified document matching, legacy and automatching work alongside now
This commit is contained in:
@@ -1,17 +1,18 @@
|
||||
import factory
|
||||
from factory import Faker
|
||||
from factory.django import DjangoModelFactory
|
||||
|
||||
from ..models import Document, Correspondent
|
||||
|
||||
|
||||
class CorrespondentFactory(factory.DjangoModelFactory):
|
||||
class CorrespondentFactory(DjangoModelFactory):
|
||||
|
||||
class Meta:
|
||||
model = Correspondent
|
||||
|
||||
name = factory.Faker("name")
|
||||
name = Faker("name")
|
||||
|
||||
|
||||
class DocumentFactory(factory.DjangoModelFactory):
|
||||
class DocumentFactory(DjangoModelFactory):
|
||||
|
||||
class Meta:
|
||||
model = Document
|
||||
|
256
src/documents/tests/test_matchables.py
Normal file
256
src/documents/tests/test_matchables.py
Normal file
@@ -0,0 +1,256 @@
|
||||
from random import randint
|
||||
|
||||
from django.contrib.admin.models import LogEntry
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from .. import matching
|
||||
from ..models import Correspondent, Document, Tag, DocumentType
|
||||
from ..signals import document_consumption_finished
|
||||
|
||||
|
||||
class TestMatching(TestCase):
|
||||
|
||||
def _test_matching(self, text, algorithm, true, false):
|
||||
for klass in (Tag, Correspondent, DocumentType):
|
||||
instance = klass.objects.create(
|
||||
name=str(randint(10000, 99999)),
|
||||
match=text,
|
||||
matching_algorithm=getattr(klass, algorithm)
|
||||
)
|
||||
for string in true:
|
||||
self.assertTrue(
|
||||
matching.matches(instance, string),
|
||||
'"%s" should match "%s" but it does not' % (text, string)
|
||||
)
|
||||
for string in false:
|
||||
self.assertFalse(
|
||||
matching.matches(instance, string),
|
||||
'"%s" should not match "%s" but it does' % (text, string)
|
||||
)
|
||||
|
||||
def test_match_all(self):
|
||||
|
||||
self._test_matching(
|
||||
"alpha charlie gamma",
|
||||
"MATCH_ALL",
|
||||
("I have alpha, charlie, and gamma in me",),
|
||||
(
|
||||
"I have alpha in me",
|
||||
"I have charlie in me",
|
||||
"I have gamma in me",
|
||||
"I have alpha and charlie in me",
|
||||
"I have alphas, charlie, and gamma in me",
|
||||
"I have alphas in me",
|
||||
"I have bravo in me",
|
||||
)
|
||||
)
|
||||
|
||||
self._test_matching(
|
||||
"12 34 56",
|
||||
"MATCH_ALL",
|
||||
(
|
||||
"I have 12 34, and 56 in me",
|
||||
),
|
||||
(
|
||||
"I have 12 in me",
|
||||
"I have 34 in me",
|
||||
"I have 56 in me",
|
||||
"I have 12 and 34 in me",
|
||||
"I have 120, 34, and 56 in me",
|
||||
"I have 123456 in me",
|
||||
"I have 01234567 in me",
|
||||
)
|
||||
)
|
||||
|
||||
self._test_matching(
|
||||
'brown fox "lazy dogs"',
|
||||
"MATCH_ALL",
|
||||
(
|
||||
"the quick brown fox jumped over the lazy dogs",
|
||||
"the quick brown fox jumped over the lazy dogs",
|
||||
),
|
||||
(
|
||||
"the quick fox jumped over the lazy dogs",
|
||||
"the quick brown wolf jumped over the lazy dogs",
|
||||
"the quick brown fox jumped over the fat dogs",
|
||||
"the quick brown fox jumped over the lazy... dogs",
|
||||
)
|
||||
)
|
||||
|
||||
def test_match_any(self):
|
||||
|
||||
self._test_matching(
|
||||
"alpha charlie gamma",
|
||||
"MATCH_ANY",
|
||||
(
|
||||
"I have alpha in me",
|
||||
"I have charlie in me",
|
||||
"I have gamma in me",
|
||||
"I have alpha, charlie, and gamma in me",
|
||||
"I have alpha and charlie in me",
|
||||
),
|
||||
(
|
||||
"I have alphas in me",
|
||||
"I have bravo in me",
|
||||
)
|
||||
)
|
||||
|
||||
self._test_matching(
|
||||
"12 34 56",
|
||||
"MATCH_ANY",
|
||||
(
|
||||
"I have 12 in me",
|
||||
"I have 34 in me",
|
||||
"I have 56 in me",
|
||||
"I have 12 and 34 in me",
|
||||
"I have 12, 34, and 56 in me",
|
||||
"I have 120, 34, and 56 in me",
|
||||
),
|
||||
(
|
||||
"I have 123456 in me",
|
||||
"I have 01234567 in me",
|
||||
)
|
||||
)
|
||||
|
||||
self._test_matching(
|
||||
'"brown fox" " lazy dogs "',
|
||||
"MATCH_ANY",
|
||||
(
|
||||
"the quick brown fox",
|
||||
"jumped over the lazy dogs.",
|
||||
),
|
||||
(
|
||||
"the lazy fox jumped over the brown dogs",
|
||||
)
|
||||
)
|
||||
|
||||
def test_match_literal(self):
|
||||
|
||||
self._test_matching(
|
||||
"alpha charlie gamma",
|
||||
"MATCH_LITERAL",
|
||||
(
|
||||
"I have 'alpha charlie gamma' in me",
|
||||
),
|
||||
(
|
||||
"I have alpha in me",
|
||||
"I have charlie in me",
|
||||
"I have gamma in me",
|
||||
"I have alpha and charlie in me",
|
||||
"I have alpha, charlie, and gamma in me",
|
||||
"I have alphas, charlie, and gamma in me",
|
||||
"I have alphas in me",
|
||||
"I have bravo in me",
|
||||
)
|
||||
)
|
||||
|
||||
self._test_matching(
|
||||
"12 34 56",
|
||||
"MATCH_LITERAL",
|
||||
(
|
||||
"I have 12 34 56 in me",
|
||||
),
|
||||
(
|
||||
"I have 12 in me",
|
||||
"I have 34 in me",
|
||||
"I have 56 in me",
|
||||
"I have 12 and 34 in me",
|
||||
"I have 12 34, and 56 in me",
|
||||
"I have 120, 34, and 560 in me",
|
||||
"I have 120, 340, and 560 in me",
|
||||
"I have 123456 in me",
|
||||
"I have 01234567 in me",
|
||||
)
|
||||
)
|
||||
|
||||
def test_match_regex(self):
|
||||
|
||||
self._test_matching(
|
||||
r"alpha\w+gamma",
|
||||
"MATCH_REGEX",
|
||||
(
|
||||
"I have alpha_and_gamma in me",
|
||||
"I have alphas_and_gamma in me",
|
||||
),
|
||||
(
|
||||
"I have alpha in me",
|
||||
"I have gamma in me",
|
||||
"I have alpha and charlie in me",
|
||||
"I have alpha,and,gamma in me",
|
||||
"I have alpha and gamma in me",
|
||||
"I have alpha, charlie, and gamma in me",
|
||||
"I have alphas, charlie, and gamma in me",
|
||||
"I have alphas in me",
|
||||
)
|
||||
)
|
||||
|
||||
def test_match_fuzzy(self):
|
||||
|
||||
self._test_matching(
|
||||
"Springfield, Miss.",
|
||||
"MATCH_FUZZY",
|
||||
(
|
||||
"1220 Main Street, Springf eld, Miss.",
|
||||
"1220 Main Street, Spring field, Miss.",
|
||||
"1220 Main Street, Springfeld, Miss.",
|
||||
"1220 Main Street Springfield Miss",
|
||||
),
|
||||
(
|
||||
"1220 Main Street, Springfield, Mich.",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@override_settings(POST_CONSUME_SCRIPT=None)
|
||||
class TestDocumentConsumptionFinishedSignal(TestCase):
|
||||
"""
|
||||
We make use of document_consumption_finished, so we should test that it's
|
||||
doing what we expect wrt to tag & correspondent matching.
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
TestCase.setUp(self)
|
||||
User.objects.create_user(username='test_consumer', password='12345')
|
||||
self.doc_contains = Document.objects.create(
|
||||
content="I contain the keyword.", file_type="pdf")
|
||||
|
||||
def test_tag_applied_any(self):
|
||||
t1 = Tag.objects.create(
|
||||
name="test", match="keyword", matching_algorithm=Tag.MATCH_ANY)
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__, document=self.doc_contains)
|
||||
self.assertTrue(list(self.doc_contains.tags.all()) == [t1])
|
||||
|
||||
def test_tag_not_applied(self):
|
||||
Tag.objects.create(
|
||||
name="test", match="no-match", matching_algorithm=Tag.MATCH_ANY)
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__, document=self.doc_contains)
|
||||
self.assertTrue(list(self.doc_contains.tags.all()) == [])
|
||||
|
||||
def test_correspondent_applied(self):
|
||||
correspondent = Correspondent.objects.create(
|
||||
name="test",
|
||||
match="keyword",
|
||||
matching_algorithm=Correspondent.MATCH_ANY
|
||||
)
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__, document=self.doc_contains)
|
||||
self.assertTrue(self.doc_contains.correspondent == correspondent)
|
||||
|
||||
def test_correspondent_not_applied(self):
|
||||
Tag.objects.create(
|
||||
name="test",
|
||||
match="no-match",
|
||||
matching_algorithm=Correspondent.MATCH_ANY
|
||||
)
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__, document=self.doc_contains)
|
||||
self.assertEqual(self.doc_contains.correspondent, None)
|
||||
|
||||
def test_logentry_created(self):
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__, document=self.doc_contains)
|
||||
|
||||
self.assertEqual(LogEntry.objects.count(), 1)
|
Reference in New Issue
Block a user