diff --git a/src/documents/index.py b/src/documents/index.py index 10de04245..f0f2b4047 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -17,6 +17,8 @@ from guardian.shortcuts import get_users_with_perms from whoosh import classify from whoosh import highlight from whoosh import query +from whoosh.analysis import CharsetFilter +from whoosh.analysis import StemmingAnalyzer from whoosh.fields import BOOLEAN from whoosh.fields import DATETIME from whoosh.fields import KEYWORD @@ -36,6 +38,7 @@ from whoosh.qparser.dateparse import DateParserPlugin from whoosh.qparser.dateparse import English from whoosh.qparser.plugins import FieldsPlugin from whoosh.scoring import TF_IDF +from whoosh.support.charset import accent_map from whoosh.util.times import timespan from whoosh.writing import AsyncWriter @@ -54,10 +57,13 @@ logger = logging.getLogger("paperless.index") def get_schema() -> Schema: + # add accent-folding filter to a stemming analyzer: + af_analyzer = StemmingAnalyzer() | CharsetFilter(accent_map) + return Schema( id=NUMERIC(stored=True, unique=True), - title=TEXT(sortable=True), - content=TEXT(), + title=TEXT(sortable=True, analyzer=af_analyzer), + content=TEXT(analyzer=af_analyzer), asn=NUMERIC(sortable=True, signed=False), correspondent=TEXT(sortable=True), correspondent_id=NUMERIC(), diff --git a/src/documents/tests/test_api_search.py b/src/documents/tests/test_api_search.py index 8f316c145..65f497d67 100644 --- a/src/documents/tests/test_api_search.py +++ b/src/documents/tests/test_api_search.py @@ -557,7 +557,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase): response = self.client.get("/api/search/autocomplete/?term=app") self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response.data, [b"apples", b"applebaum", b"appletini"]) + self.assertEqual(response.data, [b"appl", b"applebaum", b"appletini"]) d3.owner = u2 @@ -566,7 +566,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase): response = self.client.get("/api/search/autocomplete/?term=app") self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response.data, [b"apples", b"applebaum"]) + self.assertEqual(response.data, [b"appl", b"applebaum"]) assign_perm("view_document", u1, d3) @@ -575,7 +575,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase): response = self.client.get("/api/search/autocomplete/?term=app") self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response.data, [b"apples", b"applebaum", b"appletini"]) + self.assertEqual(response.data, [b"appl", b"applebaum", b"appletini"]) def test_search_autocomplete_field_name_match(self): """