mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
264 lines
8.5 KiB
Python
264 lines
8.5 KiB
Python
import shutil
|
|
import tempfile
|
|
from random import randint
|
|
|
|
from django.contrib.admin.models import LogEntry
|
|
from django.contrib.auth.models import User
|
|
from django.test import TestCase, override_settings
|
|
|
|
from .. import matching
|
|
from ..models import Correspondent, Document, Tag, DocumentType
|
|
from ..signals import document_consumption_finished
|
|
|
|
|
|
class TestMatching(TestCase):
|
|
def _test_matching(self, text, algorithm, true, false):
|
|
for klass in (Tag, Correspondent, DocumentType):
|
|
instance = klass.objects.create(
|
|
name=str(randint(10000, 99999)),
|
|
match=text,
|
|
matching_algorithm=getattr(klass, algorithm),
|
|
)
|
|
for string in true:
|
|
doc = Document(content=string)
|
|
self.assertTrue(
|
|
matching.matches(instance, doc),
|
|
'"%s" should match "%s" but it does not' % (text, string),
|
|
)
|
|
for string in false:
|
|
doc = Document(content=string)
|
|
self.assertFalse(
|
|
matching.matches(instance, doc),
|
|
'"%s" should not match "%s" but it does' % (text, string),
|
|
)
|
|
|
|
def test_match_all(self):
|
|
|
|
self._test_matching(
|
|
"alpha charlie gamma",
|
|
"MATCH_ALL",
|
|
("I have alpha, charlie, and gamma in me",),
|
|
(
|
|
"I have alpha in me",
|
|
"I have charlie in me",
|
|
"I have gamma in me",
|
|
"I have alpha and charlie in me",
|
|
"I have alphas, charlie, and gamma in me",
|
|
"I have alphas in me",
|
|
"I have bravo in me",
|
|
),
|
|
)
|
|
|
|
self._test_matching(
|
|
"12 34 56",
|
|
"MATCH_ALL",
|
|
("I have 12 34, and 56 in me",),
|
|
(
|
|
"I have 12 in me",
|
|
"I have 34 in me",
|
|
"I have 56 in me",
|
|
"I have 12 and 34 in me",
|
|
"I have 120, 34, and 56 in me",
|
|
"I have 123456 in me",
|
|
"I have 01234567 in me",
|
|
),
|
|
)
|
|
|
|
self._test_matching(
|
|
'brown fox "lazy dogs"',
|
|
"MATCH_ALL",
|
|
(
|
|
"the quick brown fox jumped over the lazy dogs",
|
|
"the quick brown fox jumped over the lazy dogs",
|
|
),
|
|
(
|
|
"the quick fox jumped over the lazy dogs",
|
|
"the quick brown wolf jumped over the lazy dogs",
|
|
"the quick brown fox jumped over the fat dogs",
|
|
"the quick brown fox jumped over the lazy... dogs",
|
|
),
|
|
)
|
|
|
|
def test_match_any(self):
|
|
|
|
self._test_matching(
|
|
"alpha charlie gamma",
|
|
"MATCH_ANY",
|
|
(
|
|
"I have alpha in me",
|
|
"I have charlie in me",
|
|
"I have gamma in me",
|
|
"I have alpha, charlie, and gamma in me",
|
|
"I have alpha and charlie in me",
|
|
),
|
|
(
|
|
"I have alphas in me",
|
|
"I have bravo in me",
|
|
),
|
|
)
|
|
|
|
self._test_matching(
|
|
"12 34 56",
|
|
"MATCH_ANY",
|
|
(
|
|
"I have 12 in me",
|
|
"I have 34 in me",
|
|
"I have 56 in me",
|
|
"I have 12 and 34 in me",
|
|
"I have 12, 34, and 56 in me",
|
|
"I have 120, 34, and 56 in me",
|
|
),
|
|
(
|
|
"I have 123456 in me",
|
|
"I have 01234567 in me",
|
|
),
|
|
)
|
|
|
|
self._test_matching(
|
|
'"brown fox" " lazy dogs "',
|
|
"MATCH_ANY",
|
|
(
|
|
"the quick brown fox",
|
|
"jumped over the lazy dogs.",
|
|
),
|
|
("the lazy fox jumped over the brown dogs",),
|
|
)
|
|
|
|
def test_match_literal(self):
|
|
|
|
self._test_matching(
|
|
"alpha charlie gamma",
|
|
"MATCH_LITERAL",
|
|
("I have 'alpha charlie gamma' in me",),
|
|
(
|
|
"I have alpha in me",
|
|
"I have charlie in me",
|
|
"I have gamma in me",
|
|
"I have alpha and charlie in me",
|
|
"I have alpha, charlie, and gamma in me",
|
|
"I have alphas, charlie, and gamma in me",
|
|
"I have alphas in me",
|
|
"I have bravo in me",
|
|
),
|
|
)
|
|
|
|
self._test_matching(
|
|
"12 34 56",
|
|
"MATCH_LITERAL",
|
|
("I have 12 34 56 in me",),
|
|
(
|
|
"I have 12 in me",
|
|
"I have 34 in me",
|
|
"I have 56 in me",
|
|
"I have 12 and 34 in me",
|
|
"I have 12 34, and 56 in me",
|
|
"I have 120, 34, and 560 in me",
|
|
"I have 120, 340, and 560 in me",
|
|
"I have 123456 in me",
|
|
"I have 01234567 in me",
|
|
),
|
|
)
|
|
|
|
def test_match_regex(self):
|
|
|
|
self._test_matching(
|
|
"alpha\w+gamma",
|
|
"MATCH_REGEX",
|
|
(
|
|
"I have alpha_and_gamma in me",
|
|
"I have alphas_and_gamma in me",
|
|
),
|
|
(
|
|
"I have alpha in me",
|
|
"I have gamma in me",
|
|
"I have alpha and charlie in me",
|
|
"I have alpha,and,gamma in me",
|
|
"I have alpha and gamma in me",
|
|
"I have alpha, charlie, and gamma in me",
|
|
"I have alphas, charlie, and gamma in me",
|
|
"I have alphas in me",
|
|
),
|
|
)
|
|
|
|
def test_tach_invalid_regex(self):
|
|
self._test_matching("[[", "MATCH_REGEX", [], ["Don't match this"])
|
|
|
|
def test_match_fuzzy(self):
|
|
|
|
self._test_matching(
|
|
"Springfield, Miss.",
|
|
"MATCH_FUZZY",
|
|
(
|
|
"1220 Main Street, Springf eld, Miss.",
|
|
"1220 Main Street, Spring field, Miss.",
|
|
"1220 Main Street, Springfeld, Miss.",
|
|
"1220 Main Street Springfield Miss",
|
|
),
|
|
("1220 Main Street, Springfield, Mich.",),
|
|
)
|
|
|
|
|
|
@override_settings(POST_CONSUME_SCRIPT=None)
|
|
class TestDocumentConsumptionFinishedSignal(TestCase):
|
|
"""
|
|
We make use of document_consumption_finished, so we should test that it's
|
|
doing what we expect wrt to tag & correspondent matching.
|
|
"""
|
|
|
|
def setUp(self):
|
|
TestCase.setUp(self)
|
|
User.objects.create_user(username="test_consumer", password="12345")
|
|
self.doc_contains = Document.objects.create(
|
|
content="I contain the keyword.", mime_type="application/pdf"
|
|
)
|
|
|
|
self.index_dir = tempfile.mkdtemp()
|
|
# TODO: we should not need the index here.
|
|
override_settings(INDEX_DIR=self.index_dir).enable()
|
|
|
|
def tearDown(self) -> None:
|
|
shutil.rmtree(self.index_dir, ignore_errors=True)
|
|
|
|
def test_tag_applied_any(self):
|
|
t1 = Tag.objects.create(
|
|
name="test", match="keyword", matching_algorithm=Tag.MATCH_ANY
|
|
)
|
|
document_consumption_finished.send(
|
|
sender=self.__class__, document=self.doc_contains
|
|
)
|
|
self.assertTrue(list(self.doc_contains.tags.all()) == [t1])
|
|
|
|
def test_tag_not_applied(self):
|
|
Tag.objects.create(
|
|
name="test", match="no-match", matching_algorithm=Tag.MATCH_ANY
|
|
)
|
|
document_consumption_finished.send(
|
|
sender=self.__class__, document=self.doc_contains
|
|
)
|
|
self.assertTrue(list(self.doc_contains.tags.all()) == [])
|
|
|
|
def test_correspondent_applied(self):
|
|
correspondent = Correspondent.objects.create(
|
|
name="test", match="keyword", matching_algorithm=Correspondent.MATCH_ANY
|
|
)
|
|
document_consumption_finished.send(
|
|
sender=self.__class__, document=self.doc_contains
|
|
)
|
|
self.assertTrue(self.doc_contains.correspondent == correspondent)
|
|
|
|
def test_correspondent_not_applied(self):
|
|
Tag.objects.create(
|
|
name="test", match="no-match", matching_algorithm=Correspondent.MATCH_ANY
|
|
)
|
|
document_consumption_finished.send(
|
|
sender=self.__class__, document=self.doc_contains
|
|
)
|
|
self.assertEqual(self.doc_contains.correspondent, None)
|
|
|
|
def test_logentry_created(self):
|
|
document_consumption_finished.send(
|
|
sender=self.__class__, document=self.doc_contains
|
|
)
|
|
|
|
self.assertEqual(LogEntry.objects.count(), 1)
|