searching for types and dates, error catching, documentation and changelog.

This commit is contained in:
jonaswinkler 2020-11-30 16:13:35 +01:00
parent 1ef12d2cbc
commit b03d4c7646
6 changed files with 112 additions and 29 deletions

View File

@ -10,12 +10,14 @@ paperless-ng 0.9.4
* Searching: * Searching:
* Paperless now supports searching by tags. In order to have this applied to your * Paperless now supports searching by tags, types and dates. In order to have this applied to your
existing documents, you need to perform a ``document_index reindex`` management command existing documents, you need to perform a ``document_index reindex`` management command
(see :ref:`administration-index`) (see :ref:`administration-index`)
that adds tags to your search index. Paperless keeps your index updated after that whenever that adds tags to your search index. You only need to do this once, so that paperless can find
your documents by tags,types and dates. Paperless keeps your index updated after that whenever
something changes. something changes.
* Paperless now has spelling corrections ("Did you mean") for misstyped queries. * Paperless now has spelling corrections ("Did you mean") for misstyped queries.
* The documentation contains :ref:`information about the query syntax <basic-searching>`.
* Front end: * Front end:

View File

@ -156,6 +156,62 @@ REST API
You can also submit a document using the REST API, see :ref:`api-file_uploads` for details. You can also submit a document using the REST API, see :ref:`api-file_uploads` for details.
.. _basic-searching:
Searching
#########
Paperless offers an extensive searching mechanism that is designed to allow you to quickly
find a document you're looking for (for example, that thing that just broke and you bought
a couple months ago, that contract you signed 8 years ago).
When you search paperless for a document, it tries to match this query against your documents.
Paperless will look for matching documents by inspecting their content, title, correspondent,
type and tags. Paperless returns a scored list of results, so that documents matching your query
better will appear further up in the search results.
By default, paperless returns only documents which contain all words typed in the search bar.
However, paperless also offers advanced search syntax if you want to drill down the results
further.
Matching documents with logical expressions:
.. code:: none
shopname AND (product1 OR product2)
Matching specific tags, correspondents or types:
.. code:: none
type:invoice tag:unpaid
correspondent:university certificate
Matching dates:
.. code:: none
created:[2005 to 2009]
added:yesterday
modified:today
Matching inexact words:
.. code:: none
produ*name
.. note::
Inexact terms are hard for search indexes. These queries might take a while to execute. That's why paperless offers
auto complete and query correction.
All of these constructs can be combined as you see fit.
If you want to learn more about the query language used by paperless, paperless uses Whoosh's default query language.
Head over to `Whoosh query language <https://whoosh.readthedocs.io/en/latest/querylang.html>`_.
For details on what date parsing utilities are available, see
`Date parsing <https://whoosh.readthedocs.io/en/latest/dates.html#parsing-date-queries>`_.
.. _usage-recommended_workflow: .. _usage-recommended_workflow:

View File

@ -1,6 +1,8 @@
<app-page-header title="Search results"> <app-page-header title="Search results">
</app-page-header> </app-page-header>
<div *ngIf="errorMessage" class="alert alert-danger">Invalid search query: {{errorMessage}}</div>
<p> <p>
Search string: <i>{{query}}</i> Search string: <i>{{query}}</i>
<ng-container *ngIf="correctedQuery"> <ng-container *ngIf="correctedQuery">
@ -9,7 +11,7 @@
</p> </p>
<div [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()"> <div *ngIf="!errorMessage" [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()">
<p>{{resultCount}} result(s)</p> <p>{{resultCount}} result(s)</p>
<app-document-card-large *ngFor="let result of results" <app-document-card-large *ngFor="let result of results"
[document]="result.document" [document]="result.document"

View File

@ -24,6 +24,8 @@ export class SearchComponent implements OnInit {
correctedQuery: string = null correctedQuery: string = null
errorMessage: string
constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { } constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { }
ngOnInit(): void { ngOnInit(): void {
@ -38,10 +40,11 @@ export class SearchComponent implements OnInit {
searchCorrectedQuery() { searchCorrectedQuery() {
this.router.navigate(["search"], {queryParams: {query: this.correctedQuery}}) this.router.navigate(["search"], {queryParams: {query: this.correctedQuery}})
this.correctedQuery = null
} }
loadPage(append: boolean = false) { loadPage(append: boolean = false) {
this.errorMessage = null
this.correctedQuery = null
this.searchService.search(this.query, this.currentPage).subscribe(result => { this.searchService.search(this.query, this.currentPage).subscribe(result => {
if (append) { if (append) {
this.results.push(...result.results) this.results.push(...result.results)
@ -52,6 +55,12 @@ export class SearchComponent implements OnInit {
this.searching = false this.searching = false
this.resultCount = result.count this.resultCount = result.count
this.correctedQuery = result.corrected_query this.correctedQuery = result.corrected_query
}, error => {
this.searching = false
this.resultCount = 1
this.page_count = 1
this.results = []
this.errorMessage = error.error
}) })
} }

View File

@ -4,10 +4,11 @@ from contextlib import contextmanager
from django.conf import settings from django.conf import settings
from whoosh import highlight from whoosh import highlight
from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME
from whoosh.highlight import Formatter, get_text from whoosh.highlight import Formatter, get_text
from whoosh.index import create_in, exists_in, open_dir from whoosh.index import create_in, exists_in, open_dir
from whoosh.qparser import MultifieldParser from whoosh.qparser import MultifieldParser
from whoosh.qparser.dateparse import DateParserPlugin
from whoosh.writing import AsyncWriter from whoosh.writing import AsyncWriter
@ -60,7 +61,11 @@ def get_schema():
title=TEXT(stored=True), title=TEXT(stored=True),
content=TEXT(), content=TEXT(),
correspondent=TEXT(stored=True), correspondent=TEXT(stored=True),
tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True) tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True),
type=TEXT(stored=True),
created=DATETIME(stored=True, sortable=True),
modified=DATETIME(stored=True, sortable=True),
added=DATETIME(stored=True, sortable=True),
) )
@ -84,7 +89,11 @@ def update_document(writer, doc):
title=doc.title, title=doc.title,
content=doc.content, content=doc.content,
correspondent=doc.correspondent.name if doc.correspondent else None, correspondent=doc.correspondent.name if doc.correspondent else None,
tag=tags if tags else None tag=tags if tags else None,
type=doc.document_type.name if doc.document_type else None,
created=doc.created,
added=doc.added,
modified=doc.modified,
) )
@ -106,19 +115,22 @@ def remove_document_from_index(document):
@contextmanager @contextmanager
def query_page(ix, query, page): def query_page(ix, querystring, page):
searcher = ix.searcher() searcher = ix.searcher()
try: try:
query_parser = MultifieldParser( qp = MultifieldParser(
["content", "title", "correspondent", "tag"], ["content", "title", "correspondent", "tag", "type"],
ix.schema).parse(query) ix.schema)
result_page = searcher.search_page(query_parser, page) qp.add_plugin(DateParserPlugin())
q = qp.parse(querystring)
result_page = searcher.search_page(q, page)
result_page.results.fragmenter = highlight.ContextFragmenter( result_page.results.fragmenter = highlight.ContextFragmenter(
surround=50) surround=50)
result_page.results.formatter = JsonFormatter() result_page.results.formatter = JsonFormatter()
corrected = searcher.correct_query(query_parser, query) corrected = searcher.correct_query(q, querystring)
if corrected.query != query_parser: if corrected.query != q:
corrected_query = corrected.string corrected_query = corrected.string
else: else:
corrected_query = None corrected_query = None

View File

@ -217,7 +217,13 @@ class SearchView(APIView):
} }
def get(self, request, format=None): def get(self, request, format=None):
if 'query' in request.query_params: if not 'query' in request.query_params:
return Response({
'count': 0,
'page': 0,
'page_count': 0,
'results': []})
query = request.query_params['query'] query = request.query_params['query']
try: try:
page = int(request.query_params.get('page', 1)) page = int(request.query_params.get('page', 1))
@ -227,6 +233,7 @@ class SearchView(APIView):
if page < 1: if page < 1:
page = 1 page = 1
try:
with index.query_page(self.ix, query, page) as (result_page, with index.query_page(self.ix, query, page) as (result_page,
corrected_query): corrected_query):
return Response( return Response(
@ -235,13 +242,8 @@ class SearchView(APIView):
'page_count': result_page.pagecount, 'page_count': result_page.pagecount,
'corrected_query': corrected_query, 'corrected_query': corrected_query,
'results': list(map(self.add_infos_to_hit, result_page))}) 'results': list(map(self.add_infos_to_hit, result_page))})
except Exception as e:
else: return HttpResponseBadRequest(str(e))
return Response({
'count': 0,
'page': 0,
'page_count': 0,
'results': []})
class SearchAutoCompleteView(APIView): class SearchAutoCompleteView(APIView):