paginated search results

This commit is contained in:
Jonas Winkler 2020-11-02 12:23:50 +01:00
parent 5e3d05322e
commit 2ff5487510
2 changed files with 39 additions and 26 deletions

View File

@ -1,12 +1,8 @@
from collections import Iterable
from django.db import models from django.db import models
from django.dispatch import receiver from django.dispatch import receiver
from whoosh.fields import Schema, TEXT, NUMERIC, DATETIME, KEYWORD from whoosh.fields import Schema, TEXT, NUMERIC
from whoosh.highlight import Formatter, get_text from whoosh.highlight import Formatter, get_text
from whoosh.index import create_in, exists_in, open_dir from whoosh.index import create_in, exists_in, open_dir
from whoosh.qparser import QueryParser
from whoosh.query import terms
from whoosh.writing import AsyncWriter from whoosh.writing import AsyncWriter
from documents.models import Document from documents.models import Document
@ -57,7 +53,7 @@ def get_schema():
return Schema( return Schema(
id=NUMERIC(stored=True, unique=True, numtype=int), id=NUMERIC(stored=True, unique=True, numtype=int),
title=TEXT(stored=True), title=TEXT(stored=True),
content=TEXT(stored=True) content=TEXT()
) )
@ -90,21 +86,6 @@ def remove_document_from_index(sender, instance, **kwargs):
writer.delete_by_term('id', instance.id) writer.delete_by_term('id', instance.id)
def query_index(ix, querystr):
with ix.searcher() as searcher:
query = QueryParser("content", ix.schema, termclass=terms.FuzzyTerm).parse(querystr)
results = searcher.search(query)
results.formatter = JsonFormatter()
results.fragmenter.surround = 50
return [
{'id': r['id'],
'highlights': r.highlights("content"),
'score': r.score,
'title': r['title']
} for r in results]
def autocomplete(ix, term, limit=10): def autocomplete(ix, term, limit=10):
with ix.reader() as reader: with ix.reader() as reader:
terms = [] terms = []

View File

@ -6,6 +6,9 @@ from django_filters.rest_framework import DjangoFilterBackend
from rest_framework.decorators import action from rest_framework.decorators import action
from rest_framework.response import Response from rest_framework.response import Response
from rest_framework.views import APIView from rest_framework.views import APIView
from whoosh import highlight
from whoosh.qparser import QueryParser
from whoosh.query import terms
from paperless.db import GnuPG from paperless.db import GnuPG
from paperless.views import StandardPagination from paperless.views import StandardPagination
@ -164,16 +167,45 @@ class SearchView(APIView):
ix = index.open_index() ix = index.open_index()
def add_infos_to_hit(self, r):
doc = Document.objects.get(id=r['id'])
return {'id': r['id'],
'highlights': r.highlights("content", text=doc.content),
'score': r.score,
'rank': r.rank,
'document': DocumentSerializer(doc).data,
'title': r['title']
}
def get(self, request, format=None): def get(self, request, format=None):
if 'query' in request.query_params: if 'query' in request.query_params:
query = request.query_params['query'] query = request.query_params['query']
query_results = index.query_index(self.ix, query) try:
for r in query_results: page = int(request.query_params.get('page', 1))
r['document'] = DocumentSerializer(Document.objects.get(id=r['id'])).data except (ValueError, TypeError):
page = 1
with self.ix.searcher() as searcher:
query_parser = QueryParser("content", self.ix.schema,
termclass=terms.FuzzyTerm).parse(query)
result_page = searcher.search_page(query_parser, page)
result_page.results.fragmenter = highlight.ContextFragmenter(
surround=50)
result_page.results.fragmenter = highlight.PinpointFragmenter()
result_page.results.formatter = index.JsonFormatter()
return Response(
{'count': len(result_page),
'page': result_page.pagenum,
'page_count': result_page.pagecount,
'results': list(map(self.add_infos_to_hit, result_page))})
return Response(query_results)
else: else:
return Response([]) return Response({
'count': 0,
'page': 0,
'page_count': 0,
'results': []})
class SearchAutoCompleteView(APIView): class SearchAutoCompleteView(APIView):