diff --git a/docs/administration.rst b/docs/administration.rst index 610d2c9d3..3284f7141 100644 --- a/docs/administration.rst +++ b/docs/administration.rst @@ -274,6 +274,7 @@ management command: This command takes no arguments. +.. _`administration-index`: Managing the document search index ================================== diff --git a/docs/changelog.rst b/docs/changelog.rst index 45817aa1a..f326b95ce 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -8,6 +8,15 @@ Changelog paperless-ng 0.9.4 ################## +* Searching: + + * Paperless now supports searching by tags. In order to have this applied to your + existing documents, you need to perform a ``document_index reindex`` management command + (see :ref:`administration-index`) + that adds tags to your search index. Paperless keeps your index updated after that whenever + something changes. + * Paperless now has spelling corrections ("Did you mean") for misstyped queries. + * Front end: * Clickable tags, correspondents and types allow quick filtering for related documents. diff --git a/src-ui/src/app/components/search/search.component.html b/src-ui/src/app/components/search/search.component.html index 59c24fa04..cb5c1a8e8 100644 --- a/src-ui/src/app/components/search/search.component.html +++ b/src-ui/src/app/components/search/search.component.html @@ -1,7 +1,13 @@ -

Search string: {{query}}

+

+ Search string: {{query}} + + - Did you mean "{{correctedQuery}}"? + + +

{{resultCount}} result(s)

@@ -10,4 +16,4 @@ [details]="result.highlights"> -
\ No newline at end of file + diff --git a/src-ui/src/app/components/search/search.component.ts b/src-ui/src/app/components/search/search.component.ts index f8c5d6cdc..8320ac545 100644 --- a/src-ui/src/app/components/search/search.component.ts +++ b/src-ui/src/app/components/search/search.component.ts @@ -1,5 +1,5 @@ import { Component, OnInit } from '@angular/core'; -import { ActivatedRoute } from '@angular/router'; +import { ActivatedRoute, Router } from '@angular/router'; import { SearchHit } from 'src/app/data/search-result'; import { SearchService } from 'src/app/services/rest/search.service'; @@ -9,7 +9,7 @@ import { SearchService } from 'src/app/services/rest/search.service'; styleUrls: ['./search.component.scss'] }) export class SearchComponent implements OnInit { - + results: SearchHit[] = [] query: string = "" @@ -22,7 +22,9 @@ export class SearchComponent implements OnInit { resultCount - constructor(private searchService: SearchService, private route: ActivatedRoute) { } + correctedQuery: string = null + + constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { } ngOnInit(): void { this.route.queryParamMap.subscribe(paramMap => { @@ -31,7 +33,12 @@ export class SearchComponent implements OnInit { this.currentPage = 1 this.loadPage() }) - + + } + + searchCorrectedQuery() { + this.router.navigate(["search"], {queryParams: {query: this.correctedQuery}}) + this.correctedQuery = null } loadPage(append: boolean = false) { @@ -44,12 +51,11 @@ export class SearchComponent implements OnInit { this.pageCount = result.page_count this.searching = false this.resultCount = result.count + this.correctedQuery = result.corrected_query }) } onScroll() { - console.log(this.currentPage) - console.log(this.pageCount) if (this.currentPage < this.pageCount) { this.currentPage += 1 this.loadPage(true) diff --git a/src-ui/src/app/data/search-result.ts b/src-ui/src/app/data/search-result.ts index b22dc64af..a769a8351 100644 --- a/src-ui/src/app/data/search-result.ts +++ b/src-ui/src/app/data/search-result.ts @@ -21,7 +21,9 @@ export interface SearchResult { page?: number page_count?: number + corrected_query?: string + results?: SearchHit[] -} \ No newline at end of file +} diff --git a/src/documents/index.py b/src/documents/index.py index ffa3e688f..822ac2e8a 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -4,7 +4,7 @@ from contextlib import contextmanager from django.conf import settings from whoosh import highlight -from whoosh.fields import Schema, TEXT, NUMERIC +from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD from whoosh.highlight import Formatter, get_text from whoosh.index import create_in, exists_in, open_dir from whoosh.qparser import MultifieldParser @@ -59,14 +59,15 @@ def get_schema(): id=NUMERIC(stored=True, unique=True, numtype=int), title=TEXT(stored=True), content=TEXT(), - correspondent=TEXT(stored=True) + correspondent=TEXT(stored=True), + tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True) ) def open_index(recreate=False): try: if exists_in(settings.INDEX_DIR) and not recreate: - return open_dir(settings.INDEX_DIR) + return open_dir(settings.INDEX_DIR, schema=get_schema()) except Exception as e: logger.error(f"Error while opening the index: {e}, recreating.") @@ -77,11 +78,13 @@ def open_index(recreate=False): def update_document(writer, doc): logger.debug("Indexing {}...".format(doc)) + tags = ",".join([t.name for t in doc.tags.all()]) writer.update_document( id=doc.pk, title=doc.title, content=doc.content, - correspondent=doc.correspondent.name if doc.correspondent else None + correspondent=doc.correspondent.name if doc.correspondent else None, + tag=tags if tags else None ) @@ -106,13 +109,21 @@ def remove_document_from_index(document): def query_page(ix, query, page): searcher = ix.searcher() try: - query_parser = MultifieldParser(["content", "title", "correspondent"], - ix.schema).parse(query) + query_parser = MultifieldParser( + ["content", "title", "correspondent", "tag"], + ix.schema).parse(query) result_page = searcher.search_page(query_parser, page) result_page.results.fragmenter = highlight.ContextFragmenter( surround=50) result_page.results.formatter = JsonFormatter() - yield result_page + + corrected = searcher.correct_query(query_parser, query) + if corrected.query != query_parser: + corrected_query = corrected.string + else: + corrected_query = None + + yield result_page, corrected_query finally: searcher.close() diff --git a/src/documents/tests/test_api.py b/src/documents/tests/test_api.py index dabae6d82..b9f3dcfba 100644 --- a/src/documents/tests/test_api.py +++ b/src/documents/tests/test_api.py @@ -289,6 +289,22 @@ class DocumentApiTest(DirectoriesMixin, APITestCase): self.assertEqual(response.status_code, 200) self.assertEqual(len(response.data), 10) + def test_search_spelling_correction(self): + with AsyncWriter(index.open_index()) as writer: + for i in range(55): + doc = Document.objects.create(checksum=str(i), pk=i+1, title=f"Document {i+1}", content=f"Things document {i+1}") + index.update_document(writer, doc) + + response = self.client.get("/api/search/?query=thing") + correction = response.data['corrected_query'] + + self.assertEqual(correction, "things") + + response = self.client.get("/api/search/?query=things") + correction = response.data['corrected_query'] + + self.assertEqual(correction, None) + def test_statistics(self): doc1 = Document.objects.create(title="none1", checksum="A") diff --git a/src/documents/views.py b/src/documents/views.py index 84f4a3999..0ac232436 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -227,11 +227,13 @@ class SearchView(APIView): if page < 1: page = 1 - with index.query_page(self.ix, query, page) as result_page: + with index.query_page(self.ix, query, page) as (result_page, + corrected_query): return Response( {'count': len(result_page), 'page': result_page.pagenum, 'page_count': result_page.pagecount, + 'corrected_query': corrected_query, 'results': list(map(self.add_infos_to_hit, result_page))}) else: