paginated search results

This commit is contained in:
Jonas Winkler 2020-11-02 12:23:50 +01:00
parent 5e3d05322e
commit 2ff5487510
2 changed files with 39 additions and 26 deletions

View File

@ -1,12 +1,8 @@
from collections import Iterable
from django.db import models
from django.dispatch import receiver
from whoosh.fields import Schema, TEXT, NUMERIC, DATETIME, KEYWORD
from whoosh.fields import Schema, TEXT, NUMERIC
from whoosh.highlight import Formatter, get_text
from whoosh.index import create_in, exists_in, open_dir
from whoosh.qparser import QueryParser
from whoosh.query import terms
from whoosh.writing import AsyncWriter
from documents.models import Document
@ -57,7 +53,7 @@ def get_schema():
return Schema(
id=NUMERIC(stored=True, unique=True, numtype=int),
title=TEXT(stored=True),
content=TEXT(stored=True)
content=TEXT()
)
@ -90,21 +86,6 @@ def remove_document_from_index(sender, instance, **kwargs):
writer.delete_by_term('id', instance.id)
def query_index(ix, querystr):
with ix.searcher() as searcher:
query = QueryParser("content", ix.schema, termclass=terms.FuzzyTerm).parse(querystr)
results = searcher.search(query)
results.formatter = JsonFormatter()
results.fragmenter.surround = 50
return [
{'id': r['id'],
'highlights': r.highlights("content"),
'score': r.score,
'title': r['title']
} for r in results]
def autocomplete(ix, term, limit=10):
with ix.reader() as reader:
terms = []

View File

@ -6,6 +6,9 @@ from django_filters.rest_framework import DjangoFilterBackend
from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework.views import APIView
from whoosh import highlight
from whoosh.qparser import QueryParser
from whoosh.query import terms
from paperless.db import GnuPG
from paperless.views import StandardPagination
@ -164,16 +167,45 @@ class SearchView(APIView):
ix = index.open_index()
def add_infos_to_hit(self, r):
doc = Document.objects.get(id=r['id'])
return {'id': r['id'],
'highlights': r.highlights("content", text=doc.content),
'score': r.score,
'rank': r.rank,
'document': DocumentSerializer(doc).data,
'title': r['title']
}
def get(self, request, format=None):
if 'query' in request.query_params:
query = request.query_params['query']
query_results = index.query_index(self.ix, query)
for r in query_results:
r['document'] = DocumentSerializer(Document.objects.get(id=r['id'])).data
try:
page = int(request.query_params.get('page', 1))
except (ValueError, TypeError):
page = 1
with self.ix.searcher() as searcher:
query_parser = QueryParser("content", self.ix.schema,
termclass=terms.FuzzyTerm).parse(query)
result_page = searcher.search_page(query_parser, page)
result_page.results.fragmenter = highlight.ContextFragmenter(
surround=50)
result_page.results.fragmenter = highlight.PinpointFragmenter()
result_page.results.formatter = index.JsonFormatter()
return Response(
{'count': len(result_page),
'page': result_page.pagenum,
'page_count': result_page.pagecount,
'results': list(map(self.add_infos_to_hit, result_page))})
return Response(query_results)
else:
return Response([])
return Response({
'count': 0,
'page': 0,
'page_count': 0,
'results': []})
class SearchAutoCompleteView(APIView):