Document index accent folding

This commit is contained in:
shamoon 2024-04-14 21:16:52 -07:00
parent f009d9868e
commit 9cf9f239ed
2 changed files with 11 additions and 5 deletions

View File

@ -15,6 +15,8 @@ from guardian.shortcuts import get_users_with_perms
from whoosh import classify
from whoosh import highlight
from whoosh import query
from whoosh.analysis import CharsetFilter
from whoosh.analysis import StemmingAnalyzer
from whoosh.fields import BOOLEAN
from whoosh.fields import DATETIME
from whoosh.fields import KEYWORD
@ -34,6 +36,7 @@ from whoosh.qparser.plugins import FieldsPlugin
from whoosh.scoring import TF_IDF
from whoosh.searching import ResultsPage
from whoosh.searching import Searcher
from whoosh.support.charset import accent_map
from whoosh.util.times import timespan
from whoosh.writing import AsyncWriter
@ -46,10 +49,13 @@ logger = logging.getLogger("paperless.index")
def get_schema():
# add accent-folding filter to a stemming analyzer:
af_analyzer = StemmingAnalyzer() | CharsetFilter(accent_map)
return Schema(
id=NUMERIC(stored=True, unique=True),
title=TEXT(sortable=True),
content=TEXT(),
title=TEXT(sortable=True, analyzer=af_analyzer),
content=TEXT(analyzer=af_analyzer),
asn=NUMERIC(sortable=True, signed=False),
correspondent=TEXT(sortable=True),
correspondent_id=NUMERIC(),

View File

@ -552,7 +552,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
response = self.client.get("/api/search/autocomplete/?term=app")
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data, [b"apples", b"applebaum", b"appletini"])
self.assertEqual(response.data, [b"appl", b"applebaum", b"appletini"])
d3.owner = u2
@ -561,7 +561,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
response = self.client.get("/api/search/autocomplete/?term=app")
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data, [b"apples", b"applebaum"])
self.assertEqual(response.data, [b"appl", b"applebaum"])
assign_perm("view_document", u1, d3)
@ -570,7 +570,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
response = self.client.get("/api/search/autocomplete/?term=app")
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data, [b"apples", b"applebaum", b"appletini"])
self.assertEqual(response.data, [b"appl", b"applebaum", b"appletini"])
def test_search_autocomplete_field_name_match(self):
"""