mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	paginated search results
This commit is contained in:
		| @@ -1,12 +1,8 @@ | |||||||
| from collections import Iterable |  | ||||||
|  |  | ||||||
| from django.db import models | from django.db import models | ||||||
| from django.dispatch import receiver | from django.dispatch import receiver | ||||||
| from whoosh.fields import Schema, TEXT, NUMERIC, DATETIME, KEYWORD | from whoosh.fields import Schema, TEXT, NUMERIC | ||||||
| from whoosh.highlight import Formatter, get_text | from whoosh.highlight import Formatter, get_text | ||||||
| from whoosh.index import create_in, exists_in, open_dir | from whoosh.index import create_in, exists_in, open_dir | ||||||
| from whoosh.qparser import QueryParser |  | ||||||
| from whoosh.query import terms |  | ||||||
| from whoosh.writing import AsyncWriter | from whoosh.writing import AsyncWriter | ||||||
|  |  | ||||||
| from documents.models import Document | from documents.models import Document | ||||||
| @@ -57,7 +53,7 @@ def get_schema(): | |||||||
|     return Schema( |     return Schema( | ||||||
|         id=NUMERIC(stored=True, unique=True, numtype=int), |         id=NUMERIC(stored=True, unique=True, numtype=int), | ||||||
|         title=TEXT(stored=True), |         title=TEXT(stored=True), | ||||||
|         content=TEXT(stored=True) |         content=TEXT() | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -90,21 +86,6 @@ def remove_document_from_index(sender, instance, **kwargs): | |||||||
|         writer.delete_by_term('id', instance.id) |         writer.delete_by_term('id', instance.id) | ||||||
|  |  | ||||||
|  |  | ||||||
| def query_index(ix, querystr): |  | ||||||
|     with ix.searcher() as searcher: |  | ||||||
|         query = QueryParser("content", ix.schema, termclass=terms.FuzzyTerm).parse(querystr) |  | ||||||
|         results = searcher.search(query) |  | ||||||
|         results.formatter = JsonFormatter() |  | ||||||
|         results.fragmenter.surround = 50 |  | ||||||
|  |  | ||||||
|         return [ |  | ||||||
|             {'id': r['id'], |  | ||||||
|              'highlights': r.highlights("content"), |  | ||||||
|              'score': r.score, |  | ||||||
|              'title': r['title'] |  | ||||||
|              } for r in results] |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def autocomplete(ix, term, limit=10): | def autocomplete(ix, term, limit=10): | ||||||
|     with ix.reader() as reader: |     with ix.reader() as reader: | ||||||
|         terms = [] |         terms = [] | ||||||
|   | |||||||
| @@ -6,6 +6,9 @@ from django_filters.rest_framework import DjangoFilterBackend | |||||||
| from rest_framework.decorators import action | from rest_framework.decorators import action | ||||||
| from rest_framework.response import Response | from rest_framework.response import Response | ||||||
| from rest_framework.views import APIView | from rest_framework.views import APIView | ||||||
|  | from whoosh import highlight | ||||||
|  | from whoosh.qparser import QueryParser | ||||||
|  | from whoosh.query import terms | ||||||
|  |  | ||||||
| from paperless.db import GnuPG | from paperless.db import GnuPG | ||||||
| from paperless.views import StandardPagination | from paperless.views import StandardPagination | ||||||
| @@ -164,16 +167,45 @@ class SearchView(APIView): | |||||||
|  |  | ||||||
|     ix = index.open_index() |     ix = index.open_index() | ||||||
|  |  | ||||||
|  |     def add_infos_to_hit(self, r): | ||||||
|  |         doc = Document.objects.get(id=r['id']) | ||||||
|  |         return {'id': r['id'], | ||||||
|  |                 'highlights': r.highlights("content", text=doc.content), | ||||||
|  |                 'score': r.score, | ||||||
|  |                 'rank': r.rank, | ||||||
|  |                 'document': DocumentSerializer(doc).data, | ||||||
|  |                 'title': r['title'] | ||||||
|  |                 } | ||||||
|  |  | ||||||
|     def get(self, request, format=None): |     def get(self, request, format=None): | ||||||
|         if 'query' in request.query_params: |         if 'query' in request.query_params: | ||||||
|             query = request.query_params['query'] |             query = request.query_params['query'] | ||||||
|             query_results = index.query_index(self.ix, query) |             try: | ||||||
|             for r in query_results: |                 page = int(request.query_params.get('page', 1)) | ||||||
|                 r['document'] = DocumentSerializer(Document.objects.get(id=r['id'])).data |             except (ValueError, TypeError): | ||||||
|  |                 page = 1 | ||||||
|  |  | ||||||
|  |             with self.ix.searcher() as searcher: | ||||||
|  |                 query_parser = QueryParser("content", self.ix.schema, | ||||||
|  |                                     termclass=terms.FuzzyTerm).parse(query) | ||||||
|  |                 result_page = searcher.search_page(query_parser, page) | ||||||
|  |                 result_page.results.fragmenter = highlight.ContextFragmenter( | ||||||
|  |                     surround=50) | ||||||
|  |                 result_page.results.fragmenter = highlight.PinpointFragmenter() | ||||||
|  |                 result_page.results.formatter = index.JsonFormatter() | ||||||
|  |  | ||||||
|  |                 return Response( | ||||||
|  |                     {'count': len(result_page), | ||||||
|  |                      'page': result_page.pagenum, | ||||||
|  |                      'page_count': result_page.pagecount, | ||||||
|  |                      'results': list(map(self.add_infos_to_hit, result_page))}) | ||||||
|  |  | ||||||
|             return Response(query_results) |  | ||||||
|         else: |         else: | ||||||
|             return Response([]) |             return Response({ | ||||||
|  |                 'count': 0, | ||||||
|  |                 'page': 0, | ||||||
|  |                 'page_count': 0, | ||||||
|  |                 'results': []}) | ||||||
|  |  | ||||||
|  |  | ||||||
| class SearchAutoCompleteView(APIView): | class SearchAutoCompleteView(APIView): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Jonas Winkler
					Jonas Winkler