diff --git a/docs/administration.rst b/docs/administration.rst
index 610d2c9d3..3284f7141 100644
--- a/docs/administration.rst
+++ b/docs/administration.rst
@@ -274,6 +274,7 @@ management command:
This command takes no arguments.
+.. _`administration-index`:
Managing the document search index
==================================
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 45817aa1a..f326b95ce 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -8,6 +8,15 @@ Changelog
paperless-ng 0.9.4
##################
+* Searching:
+
+ * Paperless now supports searching by tags. In order to have this applied to your
+ existing documents, you need to perform a ``document_index reindex`` management command
+ (see :ref:`administration-index`)
+ that adds tags to your search index. Paperless keeps your index updated after that whenever
+ something changes.
+ * Paperless now has spelling corrections ("Did you mean") for misstyped queries.
+
* Front end:
* Clickable tags, correspondents and types allow quick filtering for related documents.
diff --git a/src-ui/src/app/components/search/search.component.html b/src-ui/src/app/components/search/search.component.html
index 59c24fa04..cb5c1a8e8 100644
--- a/src-ui/src/app/components/search/search.component.html
+++ b/src-ui/src/app/components/search/search.component.html
@@ -1,7 +1,13 @@
-
Search string: {{query}}
+
+ Search string: {{query}}
+
+ - Did you mean "{{correctedQuery}}"?
+
+
+
{{resultCount}} result(s)
@@ -10,4 +16,4 @@
[details]="result.highlights">
-
\ No newline at end of file
+
diff --git a/src-ui/src/app/components/search/search.component.ts b/src-ui/src/app/components/search/search.component.ts
index f8c5d6cdc..8320ac545 100644
--- a/src-ui/src/app/components/search/search.component.ts
+++ b/src-ui/src/app/components/search/search.component.ts
@@ -1,5 +1,5 @@
import { Component, OnInit } from '@angular/core';
-import { ActivatedRoute } from '@angular/router';
+import { ActivatedRoute, Router } from '@angular/router';
import { SearchHit } from 'src/app/data/search-result';
import { SearchService } from 'src/app/services/rest/search.service';
@@ -9,7 +9,7 @@ import { SearchService } from 'src/app/services/rest/search.service';
styleUrls: ['./search.component.scss']
})
export class SearchComponent implements OnInit {
-
+
results: SearchHit[] = []
query: string = ""
@@ -22,7 +22,9 @@ export class SearchComponent implements OnInit {
resultCount
- constructor(private searchService: SearchService, private route: ActivatedRoute) { }
+ correctedQuery: string = null
+
+ constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { }
ngOnInit(): void {
this.route.queryParamMap.subscribe(paramMap => {
@@ -31,7 +33,12 @@ export class SearchComponent implements OnInit {
this.currentPage = 1
this.loadPage()
})
-
+
+ }
+
+ searchCorrectedQuery() {
+ this.router.navigate(["search"], {queryParams: {query: this.correctedQuery}})
+ this.correctedQuery = null
}
loadPage(append: boolean = false) {
@@ -44,12 +51,11 @@ export class SearchComponent implements OnInit {
this.pageCount = result.page_count
this.searching = false
this.resultCount = result.count
+ this.correctedQuery = result.corrected_query
})
}
onScroll() {
- console.log(this.currentPage)
- console.log(this.pageCount)
if (this.currentPage < this.pageCount) {
this.currentPage += 1
this.loadPage(true)
diff --git a/src-ui/src/app/data/search-result.ts b/src-ui/src/app/data/search-result.ts
index b22dc64af..a769a8351 100644
--- a/src-ui/src/app/data/search-result.ts
+++ b/src-ui/src/app/data/search-result.ts
@@ -21,7 +21,9 @@ export interface SearchResult {
page?: number
page_count?: number
+ corrected_query?: string
+
results?: SearchHit[]
-}
\ No newline at end of file
+}
diff --git a/src/documents/index.py b/src/documents/index.py
index ffa3e688f..822ac2e8a 100644
--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -4,7 +4,7 @@ from contextlib import contextmanager
from django.conf import settings
from whoosh import highlight
-from whoosh.fields import Schema, TEXT, NUMERIC
+from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD
from whoosh.highlight import Formatter, get_text
from whoosh.index import create_in, exists_in, open_dir
from whoosh.qparser import MultifieldParser
@@ -59,14 +59,15 @@ def get_schema():
id=NUMERIC(stored=True, unique=True, numtype=int),
title=TEXT(stored=True),
content=TEXT(),
- correspondent=TEXT(stored=True)
+ correspondent=TEXT(stored=True),
+ tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True)
)
def open_index(recreate=False):
try:
if exists_in(settings.INDEX_DIR) and not recreate:
- return open_dir(settings.INDEX_DIR)
+ return open_dir(settings.INDEX_DIR, schema=get_schema())
except Exception as e:
logger.error(f"Error while opening the index: {e}, recreating.")
@@ -77,11 +78,13 @@ def open_index(recreate=False):
def update_document(writer, doc):
logger.debug("Indexing {}...".format(doc))
+ tags = ",".join([t.name for t in doc.tags.all()])
writer.update_document(
id=doc.pk,
title=doc.title,
content=doc.content,
- correspondent=doc.correspondent.name if doc.correspondent else None
+ correspondent=doc.correspondent.name if doc.correspondent else None,
+ tag=tags if tags else None
)
@@ -106,13 +109,21 @@ def remove_document_from_index(document):
def query_page(ix, query, page):
searcher = ix.searcher()
try:
- query_parser = MultifieldParser(["content", "title", "correspondent"],
- ix.schema).parse(query)
+ query_parser = MultifieldParser(
+ ["content", "title", "correspondent", "tag"],
+ ix.schema).parse(query)
result_page = searcher.search_page(query_parser, page)
result_page.results.fragmenter = highlight.ContextFragmenter(
surround=50)
result_page.results.formatter = JsonFormatter()
- yield result_page
+
+ corrected = searcher.correct_query(query_parser, query)
+ if corrected.query != query_parser:
+ corrected_query = corrected.string
+ else:
+ corrected_query = None
+
+ yield result_page, corrected_query
finally:
searcher.close()
diff --git a/src/documents/tests/test_api.py b/src/documents/tests/test_api.py
index dabae6d82..b9f3dcfba 100644
--- a/src/documents/tests/test_api.py
+++ b/src/documents/tests/test_api.py
@@ -289,6 +289,22 @@ class DocumentApiTest(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.data), 10)
+ def test_search_spelling_correction(self):
+ with AsyncWriter(index.open_index()) as writer:
+ for i in range(55):
+ doc = Document.objects.create(checksum=str(i), pk=i+1, title=f"Document {i+1}", content=f"Things document {i+1}")
+ index.update_document(writer, doc)
+
+ response = self.client.get("/api/search/?query=thing")
+ correction = response.data['corrected_query']
+
+ self.assertEqual(correction, "things")
+
+ response = self.client.get("/api/search/?query=things")
+ correction = response.data['corrected_query']
+
+ self.assertEqual(correction, None)
+
def test_statistics(self):
doc1 = Document.objects.create(title="none1", checksum="A")
diff --git a/src/documents/views.py b/src/documents/views.py
index 84f4a3999..0ac232436 100755
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -227,11 +227,13 @@ class SearchView(APIView):
if page < 1:
page = 1
- with index.query_page(self.ix, query, page) as result_page:
+ with index.query_page(self.ix, query, page) as (result_page,
+ corrected_query):
return Response(
{'count': len(result_page),
'page': result_page.pagenum,
'page_count': result_page.pagecount,
+ 'corrected_query': corrected_query,
'results': list(map(self.add_infos_to_hit, result_page))})
else: