mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-05-01 11:19:32 -05:00
searching for tags, spelling corrections fixes #74
This commit is contained in:
parent
0d8688515c
commit
1ef12d2cbc
docs
src-ui/src/app
src/documents
@ -274,6 +274,7 @@ management command:
|
|||||||
|
|
||||||
This command takes no arguments.
|
This command takes no arguments.
|
||||||
|
|
||||||
|
.. _`administration-index`:
|
||||||
|
|
||||||
Managing the document search index
|
Managing the document search index
|
||||||
==================================
|
==================================
|
||||||
|
@ -8,6 +8,15 @@ Changelog
|
|||||||
paperless-ng 0.9.4
|
paperless-ng 0.9.4
|
||||||
##################
|
##################
|
||||||
|
|
||||||
|
* Searching:
|
||||||
|
|
||||||
|
* Paperless now supports searching by tags. In order to have this applied to your
|
||||||
|
existing documents, you need to perform a ``document_index reindex`` management command
|
||||||
|
(see :ref:`administration-index`)
|
||||||
|
that adds tags to your search index. Paperless keeps your index updated after that whenever
|
||||||
|
something changes.
|
||||||
|
* Paperless now has spelling corrections ("Did you mean") for misstyped queries.
|
||||||
|
|
||||||
* Front end:
|
* Front end:
|
||||||
|
|
||||||
* Clickable tags, correspondents and types allow quick filtering for related documents.
|
* Clickable tags, correspondents and types allow quick filtering for related documents.
|
||||||
|
@ -1,7 +1,13 @@
|
|||||||
<app-page-header title="Search results">
|
<app-page-header title="Search results">
|
||||||
</app-page-header>
|
</app-page-header>
|
||||||
|
|
||||||
<p>Search string: <i>{{query}}</i></p>
|
<p>
|
||||||
|
Search string: <i>{{query}}</i>
|
||||||
|
<ng-container *ngIf="correctedQuery">
|
||||||
|
- Did you mean "<a [routerLink]="" (click)="searchCorrectedQuery()">{{correctedQuery}}</a>"?
|
||||||
|
</ng-container>
|
||||||
|
|
||||||
|
</p>
|
||||||
|
|
||||||
<div [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()">
|
<div [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()">
|
||||||
<p>{{resultCount}} result(s)</p>
|
<p>{{resultCount}} result(s)</p>
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import { Component, OnInit } from '@angular/core';
|
import { Component, OnInit } from '@angular/core';
|
||||||
import { ActivatedRoute } from '@angular/router';
|
import { ActivatedRoute, Router } from '@angular/router';
|
||||||
import { SearchHit } from 'src/app/data/search-result';
|
import { SearchHit } from 'src/app/data/search-result';
|
||||||
import { SearchService } from 'src/app/services/rest/search.service';
|
import { SearchService } from 'src/app/services/rest/search.service';
|
||||||
|
|
||||||
@ -22,7 +22,9 @@ export class SearchComponent implements OnInit {
|
|||||||
|
|
||||||
resultCount
|
resultCount
|
||||||
|
|
||||||
constructor(private searchService: SearchService, private route: ActivatedRoute) { }
|
correctedQuery: string = null
|
||||||
|
|
||||||
|
constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { }
|
||||||
|
|
||||||
ngOnInit(): void {
|
ngOnInit(): void {
|
||||||
this.route.queryParamMap.subscribe(paramMap => {
|
this.route.queryParamMap.subscribe(paramMap => {
|
||||||
@ -34,6 +36,11 @@ export class SearchComponent implements OnInit {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
searchCorrectedQuery() {
|
||||||
|
this.router.navigate(["search"], {queryParams: {query: this.correctedQuery}})
|
||||||
|
this.correctedQuery = null
|
||||||
|
}
|
||||||
|
|
||||||
loadPage(append: boolean = false) {
|
loadPage(append: boolean = false) {
|
||||||
this.searchService.search(this.query, this.currentPage).subscribe(result => {
|
this.searchService.search(this.query, this.currentPage).subscribe(result => {
|
||||||
if (append) {
|
if (append) {
|
||||||
@ -44,12 +51,11 @@ export class SearchComponent implements OnInit {
|
|||||||
this.pageCount = result.page_count
|
this.pageCount = result.page_count
|
||||||
this.searching = false
|
this.searching = false
|
||||||
this.resultCount = result.count
|
this.resultCount = result.count
|
||||||
|
this.correctedQuery = result.corrected_query
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
onScroll() {
|
onScroll() {
|
||||||
console.log(this.currentPage)
|
|
||||||
console.log(this.pageCount)
|
|
||||||
if (this.currentPage < this.pageCount) {
|
if (this.currentPage < this.pageCount) {
|
||||||
this.currentPage += 1
|
this.currentPage += 1
|
||||||
this.loadPage(true)
|
this.loadPage(true)
|
||||||
|
@ -21,6 +21,8 @@ export interface SearchResult {
|
|||||||
page?: number
|
page?: number
|
||||||
page_count?: number
|
page_count?: number
|
||||||
|
|
||||||
|
corrected_query?: string
|
||||||
|
|
||||||
results?: SearchHit[]
|
results?: SearchHit[]
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ from contextlib import contextmanager
|
|||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from whoosh import highlight
|
from whoosh import highlight
|
||||||
from whoosh.fields import Schema, TEXT, NUMERIC
|
from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD
|
||||||
from whoosh.highlight import Formatter, get_text
|
from whoosh.highlight import Formatter, get_text
|
||||||
from whoosh.index import create_in, exists_in, open_dir
|
from whoosh.index import create_in, exists_in, open_dir
|
||||||
from whoosh.qparser import MultifieldParser
|
from whoosh.qparser import MultifieldParser
|
||||||
@ -59,14 +59,15 @@ def get_schema():
|
|||||||
id=NUMERIC(stored=True, unique=True, numtype=int),
|
id=NUMERIC(stored=True, unique=True, numtype=int),
|
||||||
title=TEXT(stored=True),
|
title=TEXT(stored=True),
|
||||||
content=TEXT(),
|
content=TEXT(),
|
||||||
correspondent=TEXT(stored=True)
|
correspondent=TEXT(stored=True),
|
||||||
|
tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def open_index(recreate=False):
|
def open_index(recreate=False):
|
||||||
try:
|
try:
|
||||||
if exists_in(settings.INDEX_DIR) and not recreate:
|
if exists_in(settings.INDEX_DIR) and not recreate:
|
||||||
return open_dir(settings.INDEX_DIR)
|
return open_dir(settings.INDEX_DIR, schema=get_schema())
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error while opening the index: {e}, recreating.")
|
logger.error(f"Error while opening the index: {e}, recreating.")
|
||||||
|
|
||||||
@ -77,11 +78,13 @@ def open_index(recreate=False):
|
|||||||
|
|
||||||
def update_document(writer, doc):
|
def update_document(writer, doc):
|
||||||
logger.debug("Indexing {}...".format(doc))
|
logger.debug("Indexing {}...".format(doc))
|
||||||
|
tags = ",".join([t.name for t in doc.tags.all()])
|
||||||
writer.update_document(
|
writer.update_document(
|
||||||
id=doc.pk,
|
id=doc.pk,
|
||||||
title=doc.title,
|
title=doc.title,
|
||||||
content=doc.content,
|
content=doc.content,
|
||||||
correspondent=doc.correspondent.name if doc.correspondent else None
|
correspondent=doc.correspondent.name if doc.correspondent else None,
|
||||||
|
tag=tags if tags else None
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -106,13 +109,21 @@ def remove_document_from_index(document):
|
|||||||
def query_page(ix, query, page):
|
def query_page(ix, query, page):
|
||||||
searcher = ix.searcher()
|
searcher = ix.searcher()
|
||||||
try:
|
try:
|
||||||
query_parser = MultifieldParser(["content", "title", "correspondent"],
|
query_parser = MultifieldParser(
|
||||||
|
["content", "title", "correspondent", "tag"],
|
||||||
ix.schema).parse(query)
|
ix.schema).parse(query)
|
||||||
result_page = searcher.search_page(query_parser, page)
|
result_page = searcher.search_page(query_parser, page)
|
||||||
result_page.results.fragmenter = highlight.ContextFragmenter(
|
result_page.results.fragmenter = highlight.ContextFragmenter(
|
||||||
surround=50)
|
surround=50)
|
||||||
result_page.results.formatter = JsonFormatter()
|
result_page.results.formatter = JsonFormatter()
|
||||||
yield result_page
|
|
||||||
|
corrected = searcher.correct_query(query_parser, query)
|
||||||
|
if corrected.query != query_parser:
|
||||||
|
corrected_query = corrected.string
|
||||||
|
else:
|
||||||
|
corrected_query = None
|
||||||
|
|
||||||
|
yield result_page, corrected_query
|
||||||
finally:
|
finally:
|
||||||
searcher.close()
|
searcher.close()
|
||||||
|
|
||||||
|
@ -289,6 +289,22 @@ class DocumentApiTest(DirectoriesMixin, APITestCase):
|
|||||||
self.assertEqual(response.status_code, 200)
|
self.assertEqual(response.status_code, 200)
|
||||||
self.assertEqual(len(response.data), 10)
|
self.assertEqual(len(response.data), 10)
|
||||||
|
|
||||||
|
def test_search_spelling_correction(self):
|
||||||
|
with AsyncWriter(index.open_index()) as writer:
|
||||||
|
for i in range(55):
|
||||||
|
doc = Document.objects.create(checksum=str(i), pk=i+1, title=f"Document {i+1}", content=f"Things document {i+1}")
|
||||||
|
index.update_document(writer, doc)
|
||||||
|
|
||||||
|
response = self.client.get("/api/search/?query=thing")
|
||||||
|
correction = response.data['corrected_query']
|
||||||
|
|
||||||
|
self.assertEqual(correction, "things")
|
||||||
|
|
||||||
|
response = self.client.get("/api/search/?query=things")
|
||||||
|
correction = response.data['corrected_query']
|
||||||
|
|
||||||
|
self.assertEqual(correction, None)
|
||||||
|
|
||||||
def test_statistics(self):
|
def test_statistics(self):
|
||||||
|
|
||||||
doc1 = Document.objects.create(title="none1", checksum="A")
|
doc1 = Document.objects.create(title="none1", checksum="A")
|
||||||
|
@ -227,11 +227,13 @@ class SearchView(APIView):
|
|||||||
if page < 1:
|
if page < 1:
|
||||||
page = 1
|
page = 1
|
||||||
|
|
||||||
with index.query_page(self.ix, query, page) as result_page:
|
with index.query_page(self.ix, query, page) as (result_page,
|
||||||
|
corrected_query):
|
||||||
return Response(
|
return Response(
|
||||||
{'count': len(result_page),
|
{'count': len(result_page),
|
||||||
'page': result_page.pagenum,
|
'page': result_page.pagenum,
|
||||||
'page_count': result_page.pagecount,
|
'page_count': result_page.pagecount,
|
||||||
|
'corrected_query': corrected_query,
|
||||||
'results': list(map(self.add_infos_to_hit, result_page))})
|
'results': list(map(self.add_infos_to_hit, result_page))})
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user