searching for types and dates, error catching, documentation and changelog.

This commit is contained in:
jonaswinkler 2020-11-30 16:13:35 +01:00
parent 1ef12d2cbc
commit b03d4c7646
6 changed files with 112 additions and 29 deletions

View File

@ -10,12 +10,14 @@ paperless-ng 0.9.4
* Searching:
* Paperless now supports searching by tags. In order to have this applied to your
* Paperless now supports searching by tags, types and dates. In order to have this applied to your
existing documents, you need to perform a ``document_index reindex`` management command
(see :ref:`administration-index`)
that adds tags to your search index. Paperless keeps your index updated after that whenever
that adds tags to your search index. You only need to do this once, so that paperless can find
your documents by tags,types and dates. Paperless keeps your index updated after that whenever
something changes.
* Paperless now has spelling corrections ("Did you mean") for misstyped queries.
* The documentation contains :ref:`information about the query syntax <basic-searching>`.
* Front end:

View File

@ -156,6 +156,62 @@ REST API
You can also submit a document using the REST API, see :ref:`api-file_uploads` for details.
.. _basic-searching:
Searching
#########
Paperless offers an extensive searching mechanism that is designed to allow you to quickly
find a document you're looking for (for example, that thing that just broke and you bought
a couple months ago, that contract you signed 8 years ago).
When you search paperless for a document, it tries to match this query against your documents.
Paperless will look for matching documents by inspecting their content, title, correspondent,
type and tags. Paperless returns a scored list of results, so that documents matching your query
better will appear further up in the search results.
By default, paperless returns only documents which contain all words typed in the search bar.
However, paperless also offers advanced search syntax if you want to drill down the results
further.
Matching documents with logical expressions:
.. code:: none
shopname AND (product1 OR product2)
Matching specific tags, correspondents or types:
.. code:: none
type:invoice tag:unpaid
correspondent:university certificate
Matching dates:
.. code:: none
created:[2005 to 2009]
added:yesterday
modified:today
Matching inexact words:
.. code:: none
produ*name
.. note::
Inexact terms are hard for search indexes. These queries might take a while to execute. That's why paperless offers
auto complete and query correction.
All of these constructs can be combined as you see fit.
If you want to learn more about the query language used by paperless, paperless uses Whoosh's default query language.
Head over to `Whoosh query language <https://whoosh.readthedocs.io/en/latest/querylang.html>`_.
For details on what date parsing utilities are available, see
`Date parsing <https://whoosh.readthedocs.io/en/latest/dates.html#parsing-date-queries>`_.
.. _usage-recommended_workflow:

View File

@ -1,6 +1,8 @@
<app-page-header title="Search results">
</app-page-header>
<div *ngIf="errorMessage" class="alert alert-danger">Invalid search query: {{errorMessage}}</div>
<p>
Search string: <i>{{query}}</i>
<ng-container *ngIf="correctedQuery">
@ -9,7 +11,7 @@
</p>
<div [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()">
<div *ngIf="!errorMessage" [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()">
<p>{{resultCount}} result(s)</p>
<app-document-card-large *ngFor="let result of results"
[document]="result.document"

View File

@ -24,6 +24,8 @@ export class SearchComponent implements OnInit {
correctedQuery: string = null
errorMessage: string
constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { }
ngOnInit(): void {
@ -38,10 +40,11 @@ export class SearchComponent implements OnInit {
searchCorrectedQuery() {
this.router.navigate(["search"], {queryParams: {query: this.correctedQuery}})
this.correctedQuery = null
}
loadPage(append: boolean = false) {
this.errorMessage = null
this.correctedQuery = null
this.searchService.search(this.query, this.currentPage).subscribe(result => {
if (append) {
this.results.push(...result.results)
@ -52,6 +55,12 @@ export class SearchComponent implements OnInit {
this.searching = false
this.resultCount = result.count
this.correctedQuery = result.corrected_query
}, error => {
this.searching = false
this.resultCount = 1
this.page_count = 1
this.results = []
this.errorMessage = error.error
})
}

View File

@ -4,10 +4,11 @@ from contextlib import contextmanager
from django.conf import settings
from whoosh import highlight
from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD
from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME
from whoosh.highlight import Formatter, get_text
from whoosh.index import create_in, exists_in, open_dir
from whoosh.qparser import MultifieldParser
from whoosh.qparser.dateparse import DateParserPlugin
from whoosh.writing import AsyncWriter
@ -60,7 +61,11 @@ def get_schema():
title=TEXT(stored=True),
content=TEXT(),
correspondent=TEXT(stored=True),
tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True)
tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True),
type=TEXT(stored=True),
created=DATETIME(stored=True, sortable=True),
modified=DATETIME(stored=True, sortable=True),
added=DATETIME(stored=True, sortable=True),
)
@ -84,7 +89,11 @@ def update_document(writer, doc):
title=doc.title,
content=doc.content,
correspondent=doc.correspondent.name if doc.correspondent else None,
tag=tags if tags else None
tag=tags if tags else None,
type=doc.document_type.name if doc.document_type else None,
created=doc.created,
added=doc.added,
modified=doc.modified,
)
@ -106,19 +115,22 @@ def remove_document_from_index(document):
@contextmanager
def query_page(ix, query, page):
def query_page(ix, querystring, page):
searcher = ix.searcher()
try:
query_parser = MultifieldParser(
["content", "title", "correspondent", "tag"],
ix.schema).parse(query)
result_page = searcher.search_page(query_parser, page)
qp = MultifieldParser(
["content", "title", "correspondent", "tag", "type"],
ix.schema)
qp.add_plugin(DateParserPlugin())
q = qp.parse(querystring)
result_page = searcher.search_page(q, page)
result_page.results.fragmenter = highlight.ContextFragmenter(
surround=50)
result_page.results.formatter = JsonFormatter()
corrected = searcher.correct_query(query_parser, query)
if corrected.query != query_parser:
corrected = searcher.correct_query(q, querystring)
if corrected.query != q:
corrected_query = corrected.string
else:
corrected_query = None

View File

@ -217,16 +217,23 @@ class SearchView(APIView):
}
def get(self, request, format=None):
if 'query' in request.query_params:
query = request.query_params['query']
try:
page = int(request.query_params.get('page', 1))
except (ValueError, TypeError):
page = 1
if not 'query' in request.query_params:
return Response({
'count': 0,
'page': 0,
'page_count': 0,
'results': []})
if page < 1:
page = 1
query = request.query_params['query']
try:
page = int(request.query_params.get('page', 1))
except (ValueError, TypeError):
page = 1
if page < 1:
page = 1
try:
with index.query_page(self.ix, query, page) as (result_page,
corrected_query):
return Response(
@ -235,13 +242,8 @@ class SearchView(APIView):
'page_count': result_page.pagecount,
'corrected_query': corrected_query,
'results': list(map(self.add_infos_to_hit, result_page))})
else:
return Response({
'count': 0,
'page': 0,
'page_count': 0,
'results': []})
except Exception as e:
return HttpResponseBadRequest(str(e))
class SearchAutoCompleteView(APIView):