paginated search results

2026-02-22 00:49:35 -06:00 · 2020-11-02 12:23:50 +01:00
parent 5e3d05322e
commit 2ff5487510
2 changed files with 39 additions and 26 deletions
--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -1,12 +1,8 @@
-from collections import Iterable
-
 from django.db import models
 from django.dispatch import receiver
-from whoosh.fields import Schema, TEXT, NUMERIC, DATETIME, KEYWORD
+from whoosh.fields import Schema, TEXT, NUMERIC
 from whoosh.highlight import Formatter, get_text
 from whoosh.index import create_in, exists_in, open_dir
-from whoosh.qparser import QueryParser
-from whoosh.query import terms
 from whoosh.writing import AsyncWriter

 from documents.models import Document
@@ -57,7 +53,7 @@ def get_schema():
    return Schema(
        id=NUMERIC(stored=True, unique=True, numtype=int),
        title=TEXT(stored=True),
-        content=TEXT(stored=True)
+        content=TEXT()
    )


@@ -90,21 +86,6 @@ def remove_document_from_index(sender, instance, **kwargs):
        writer.delete_by_term('id', instance.id)


-def query_index(ix, querystr):
-    with ix.searcher() as searcher:
-        query = QueryParser("content", ix.schema, termclass=terms.FuzzyTerm).parse(querystr)
-        results = searcher.search(query)
-        results.formatter = JsonFormatter()
-        results.fragmenter.surround = 50
-
-        return [
-            {'id': r['id'],
-             'highlights': r.highlights("content"),
-             'score': r.score,
-             'title': r['title']
-             } for r in results]
-
-
 def autocomplete(ix, term, limit=10):
    with ix.reader() as reader:
        terms = []
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -6,6 +6,9 @@ from django_filters.rest_framework import DjangoFilterBackend
 from rest_framework.decorators import action
 from rest_framework.response import Response
 from rest_framework.views import APIView
+from whoosh import highlight
+from whoosh.qparser import QueryParser
+from whoosh.query import terms

 from paperless.db import GnuPG
 from paperless.views import StandardPagination
@@ -164,16 +167,45 @@ class SearchView(APIView):

    ix = index.open_index()

+    def add_infos_to_hit(self, r):
+        doc = Document.objects.get(id=r['id'])
+        return {'id': r['id'],
+                'highlights': r.highlights("content", text=doc.content),
+                'score': r.score,
+                'rank': r.rank,
+                'document': DocumentSerializer(doc).data,
+                'title': r['title']
+                }
+
    def get(self, request, format=None):
        if 'query' in request.query_params:
            query = request.query_params['query']
-            query_results = index.query_index(self.ix, query)
-            for r in query_results:
-                r['document'] = DocumentSerializer(Document.objects.get(id=r['id'])).data
+            try:
+                page = int(request.query_params.get('page', 1))
+            except (ValueError, TypeError):
+                page = 1
+
+            with self.ix.searcher() as searcher:
+                query_parser = QueryParser("content", self.ix.schema,
+                                    termclass=terms.FuzzyTerm).parse(query)
+                result_page = searcher.search_page(query_parser, page)
+                result_page.results.fragmenter = highlight.ContextFragmenter(
+                    surround=50)
+                result_page.results.fragmenter = highlight.PinpointFragmenter()
+                result_page.results.formatter = index.JsonFormatter()
+
+                return Response(
+                    {'count': len(result_page),
+                     'page': result_page.pagenum,
+                     'page_count': result_page.pagecount,
+                     'results': list(map(self.add_infos_to_hit, result_page))})

-            return Response(query_results)
        else:
-            return Response([])
+            return Response({
+                'count': 0,
+                'page': 0,
+                'page_count': 0,
+                'results': []})


 class SearchAutoCompleteView(APIView):