mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	searching for tags, spelling corrections fixes #74
This commit is contained in:
		| @@ -274,6 +274,7 @@ management command: | |||||||
|  |  | ||||||
| This command takes no arguments. | This command takes no arguments. | ||||||
|  |  | ||||||
|  | .. _`administration-index`: | ||||||
|  |  | ||||||
| Managing the document search index | Managing the document search index | ||||||
| ================================== | ================================== | ||||||
|   | |||||||
| @@ -8,6 +8,15 @@ Changelog | |||||||
| paperless-ng 0.9.4 | paperless-ng 0.9.4 | ||||||
| ################## | ################## | ||||||
|  |  | ||||||
|  | * Searching: | ||||||
|  |  | ||||||
|  |   * Paperless now supports searching by tags. In order to have this applied to your | ||||||
|  |     existing documents, you need to perform a ``document_index reindex`` management command | ||||||
|  |     (see :ref:`administration-index`) | ||||||
|  |     that adds tags to your search index. Paperless keeps your index updated after that whenever | ||||||
|  |     something changes. | ||||||
|  |   * Paperless now has spelling corrections ("Did you mean") for misstyped queries. | ||||||
|  |  | ||||||
| * Front end: | * Front end: | ||||||
|  |  | ||||||
|   * Clickable tags, correspondents and types allow quick filtering for related documents. |   * Clickable tags, correspondents and types allow quick filtering for related documents. | ||||||
|   | |||||||
| @@ -1,7 +1,13 @@ | |||||||
| <app-page-header title="Search results"> | <app-page-header title="Search results"> | ||||||
| </app-page-header> | </app-page-header> | ||||||
|  |  | ||||||
| <p>Search string: <i>{{query}}</i></p> | <p> | ||||||
|  |     Search string: <i>{{query}}</i> | ||||||
|  |     <ng-container *ngIf="correctedQuery"> | ||||||
|  |         - Did you mean "<a [routerLink]="" (click)="searchCorrectedQuery()">{{correctedQuery}}</a>"? | ||||||
|  |     </ng-container> | ||||||
|  |  | ||||||
|  | </p> | ||||||
|  |  | ||||||
| <div [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()"> | <div [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()"> | ||||||
|     <p>{{resultCount}} result(s)</p> |     <p>{{resultCount}} result(s)</p> | ||||||
|   | |||||||
| @@ -1,5 +1,5 @@ | |||||||
| import { Component, OnInit } from '@angular/core'; | import { Component, OnInit } from '@angular/core'; | ||||||
| import { ActivatedRoute } from '@angular/router'; | import { ActivatedRoute, Router } from '@angular/router'; | ||||||
| import { SearchHit } from 'src/app/data/search-result'; | import { SearchHit } from 'src/app/data/search-result'; | ||||||
| import { SearchService } from 'src/app/services/rest/search.service'; | import { SearchService } from 'src/app/services/rest/search.service'; | ||||||
|  |  | ||||||
| @@ -22,7 +22,9 @@ export class SearchComponent implements OnInit { | |||||||
|  |  | ||||||
|   resultCount |   resultCount | ||||||
|  |  | ||||||
|   constructor(private searchService: SearchService, private route: ActivatedRoute) { } |   correctedQuery: string = null | ||||||
|  |  | ||||||
|  |   constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { } | ||||||
|  |  | ||||||
|   ngOnInit(): void { |   ngOnInit(): void { | ||||||
|     this.route.queryParamMap.subscribe(paramMap => { |     this.route.queryParamMap.subscribe(paramMap => { | ||||||
| @@ -34,6 +36,11 @@ export class SearchComponent implements OnInit { | |||||||
|  |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   searchCorrectedQuery() { | ||||||
|  |     this.router.navigate(["search"], {queryParams: {query: this.correctedQuery}}) | ||||||
|  |     this.correctedQuery = null | ||||||
|  |   } | ||||||
|  |  | ||||||
|   loadPage(append: boolean = false) { |   loadPage(append: boolean = false) { | ||||||
|     this.searchService.search(this.query, this.currentPage).subscribe(result => { |     this.searchService.search(this.query, this.currentPage).subscribe(result => { | ||||||
|       if (append) { |       if (append) { | ||||||
| @@ -44,12 +51,11 @@ export class SearchComponent implements OnInit { | |||||||
|       this.pageCount = result.page_count |       this.pageCount = result.page_count | ||||||
|       this.searching = false |       this.searching = false | ||||||
|       this.resultCount = result.count |       this.resultCount = result.count | ||||||
|  |       this.correctedQuery = result.corrected_query | ||||||
|     }) |     }) | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   onScroll() { |   onScroll() { | ||||||
|     console.log(this.currentPage) |  | ||||||
|     console.log(this.pageCount) |  | ||||||
|     if (this.currentPage < this.pageCount) { |     if (this.currentPage < this.pageCount) { | ||||||
|       this.currentPage += 1 |       this.currentPage += 1 | ||||||
|       this.loadPage(true) |       this.loadPage(true) | ||||||
|   | |||||||
| @@ -21,6 +21,8 @@ export interface SearchResult { | |||||||
|   page?: number |   page?: number | ||||||
|   page_count?: number |   page_count?: number | ||||||
|  |  | ||||||
|  |   corrected_query?: string | ||||||
|  |  | ||||||
|   results?: SearchHit[] |   results?: SearchHit[] | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -4,7 +4,7 @@ from contextlib import contextmanager | |||||||
|  |  | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| from whoosh import highlight | from whoosh import highlight | ||||||
| from whoosh.fields import Schema, TEXT, NUMERIC | from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD | ||||||
| from whoosh.highlight import Formatter, get_text | from whoosh.highlight import Formatter, get_text | ||||||
| from whoosh.index import create_in, exists_in, open_dir | from whoosh.index import create_in, exists_in, open_dir | ||||||
| from whoosh.qparser import MultifieldParser | from whoosh.qparser import MultifieldParser | ||||||
| @@ -59,14 +59,15 @@ def get_schema(): | |||||||
|         id=NUMERIC(stored=True, unique=True, numtype=int), |         id=NUMERIC(stored=True, unique=True, numtype=int), | ||||||
|         title=TEXT(stored=True), |         title=TEXT(stored=True), | ||||||
|         content=TEXT(), |         content=TEXT(), | ||||||
|         correspondent=TEXT(stored=True) |         correspondent=TEXT(stored=True), | ||||||
|  |         tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True) | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|  |  | ||||||
| def open_index(recreate=False): | def open_index(recreate=False): | ||||||
|     try: |     try: | ||||||
|         if exists_in(settings.INDEX_DIR) and not recreate: |         if exists_in(settings.INDEX_DIR) and not recreate: | ||||||
|             return open_dir(settings.INDEX_DIR) |             return open_dir(settings.INDEX_DIR, schema=get_schema()) | ||||||
|     except Exception as e: |     except Exception as e: | ||||||
|         logger.error(f"Error while opening the index: {e}, recreating.") |         logger.error(f"Error while opening the index: {e}, recreating.") | ||||||
|  |  | ||||||
| @@ -77,11 +78,13 @@ def open_index(recreate=False): | |||||||
|  |  | ||||||
| def update_document(writer, doc): | def update_document(writer, doc): | ||||||
|     logger.debug("Indexing {}...".format(doc)) |     logger.debug("Indexing {}...".format(doc)) | ||||||
|  |     tags = ",".join([t.name for t in doc.tags.all()]) | ||||||
|     writer.update_document( |     writer.update_document( | ||||||
|         id=doc.pk, |         id=doc.pk, | ||||||
|         title=doc.title, |         title=doc.title, | ||||||
|         content=doc.content, |         content=doc.content, | ||||||
|         correspondent=doc.correspondent.name if doc.correspondent else None |         correspondent=doc.correspondent.name if doc.correspondent else None, | ||||||
|  |         tag=tags if tags else None | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -106,13 +109,21 @@ def remove_document_from_index(document): | |||||||
| def query_page(ix, query, page): | def query_page(ix, query, page): | ||||||
|     searcher = ix.searcher() |     searcher = ix.searcher() | ||||||
|     try: |     try: | ||||||
|         query_parser = MultifieldParser(["content", "title", "correspondent"], |         query_parser = MultifieldParser( | ||||||
|                                         ix.schema).parse(query) |             ["content", "title", "correspondent", "tag"], | ||||||
|  |             ix.schema).parse(query) | ||||||
|         result_page = searcher.search_page(query_parser, page) |         result_page = searcher.search_page(query_parser, page) | ||||||
|         result_page.results.fragmenter = highlight.ContextFragmenter( |         result_page.results.fragmenter = highlight.ContextFragmenter( | ||||||
|             surround=50) |             surround=50) | ||||||
|         result_page.results.formatter = JsonFormatter() |         result_page.results.formatter = JsonFormatter() | ||||||
|         yield result_page |  | ||||||
|  |         corrected = searcher.correct_query(query_parser, query) | ||||||
|  |         if corrected.query != query_parser: | ||||||
|  |             corrected_query = corrected.string | ||||||
|  |         else: | ||||||
|  |             corrected_query = None | ||||||
|  |  | ||||||
|  |         yield result_page, corrected_query | ||||||
|     finally: |     finally: | ||||||
|         searcher.close() |         searcher.close() | ||||||
|  |  | ||||||
|   | |||||||
| @@ -289,6 +289,22 @@ class DocumentApiTest(DirectoriesMixin, APITestCase): | |||||||
|         self.assertEqual(response.status_code, 200) |         self.assertEqual(response.status_code, 200) | ||||||
|         self.assertEqual(len(response.data), 10) |         self.assertEqual(len(response.data), 10) | ||||||
|  |  | ||||||
|  |     def test_search_spelling_correction(self): | ||||||
|  |         with AsyncWriter(index.open_index()) as writer: | ||||||
|  |             for i in range(55): | ||||||
|  |                 doc = Document.objects.create(checksum=str(i), pk=i+1, title=f"Document {i+1}", content=f"Things document {i+1}") | ||||||
|  |                 index.update_document(writer, doc) | ||||||
|  |  | ||||||
|  |         response = self.client.get("/api/search/?query=thing") | ||||||
|  |         correction = response.data['corrected_query'] | ||||||
|  |  | ||||||
|  |         self.assertEqual(correction, "things") | ||||||
|  |  | ||||||
|  |         response = self.client.get("/api/search/?query=things") | ||||||
|  |         correction = response.data['corrected_query'] | ||||||
|  |  | ||||||
|  |         self.assertEqual(correction, None) | ||||||
|  |  | ||||||
|     def test_statistics(self): |     def test_statistics(self): | ||||||
|  |  | ||||||
|         doc1 = Document.objects.create(title="none1", checksum="A") |         doc1 = Document.objects.create(title="none1", checksum="A") | ||||||
|   | |||||||
| @@ -227,11 +227,13 @@ class SearchView(APIView): | |||||||
|             if page < 1: |             if page < 1: | ||||||
|                 page = 1 |                 page = 1 | ||||||
|  |  | ||||||
|             with index.query_page(self.ix, query, page) as result_page: |             with index.query_page(self.ix, query, page) as (result_page, | ||||||
|  |                                                             corrected_query): | ||||||
|                 return Response( |                 return Response( | ||||||
|                     {'count': len(result_page), |                     {'count': len(result_page), | ||||||
|                      'page': result_page.pagenum, |                      'page': result_page.pagenum, | ||||||
|                      'page_count': result_page.pagecount, |                      'page_count': result_page.pagecount, | ||||||
|  |                      'corrected_query': corrected_query, | ||||||
|                      'results': list(map(self.add_infos_to_hit, result_page))}) |                      'results': list(map(self.add_infos_to_hit, result_page))}) | ||||||
|  |  | ||||||
|         else: |         else: | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 jonaswinkler
					jonaswinkler