From b03d4c7646d870a261665aefa563062dac8fc246 Mon Sep 17 00:00:00 2001 From: jonaswinkler <jonas.winkler@jpwinkler.de> Date: Mon, 30 Nov 2020 16:13:35 +0100 Subject: [PATCH] searching for types and dates, error catching, documentation and changelog. --- docs/changelog.rst | 6 +- docs/usage_overview.rst | 56 +++++++++++++++++++ .../components/search/search.component.html | 4 +- .../app/components/search/search.component.ts | 11 +++- src/documents/index.py | 32 +++++++---- src/documents/views.py | 32 ++++++----- 6 files changed, 112 insertions(+), 29 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index f326b95ce..806d09fe0 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -10,12 +10,14 @@ paperless-ng 0.9.4 * Searching: - * Paperless now supports searching by tags. In order to have this applied to your + * Paperless now supports searching by tags, types and dates. In order to have this applied to your existing documents, you need to perform a ``document_index reindex`` management command (see :ref:`administration-index`) - that adds tags to your search index. Paperless keeps your index updated after that whenever + that adds tags to your search index. You only need to do this once, so that paperless can find + your documents by tags,types and dates. Paperless keeps your index updated after that whenever something changes. * Paperless now has spelling corrections ("Did you mean") for misstyped queries. + * The documentation contains :ref:`information about the query syntax <basic-searching>`. * Front end: diff --git a/docs/usage_overview.rst b/docs/usage_overview.rst index 0e50dafc2..4ce7f9b7a 100644 --- a/docs/usage_overview.rst +++ b/docs/usage_overview.rst @@ -156,6 +156,62 @@ REST API You can also submit a document using the REST API, see :ref:`api-file_uploads` for details. +.. _basic-searching: + +Searching +######### + +Paperless offers an extensive searching mechanism that is designed to allow you to quickly +find a document you're looking for (for example, that thing that just broke and you bought +a couple months ago, that contract you signed 8 years ago). + +When you search paperless for a document, it tries to match this query against your documents. +Paperless will look for matching documents by inspecting their content, title, correspondent, +type and tags. Paperless returns a scored list of results, so that documents matching your query +better will appear further up in the search results. + +By default, paperless returns only documents which contain all words typed in the search bar. +However, paperless also offers advanced search syntax if you want to drill down the results +further. + +Matching documents with logical expressions: + +.. code:: none + + shopname AND (product1 OR product2) + +Matching specific tags, correspondents or types: + +.. code:: none + + type:invoice tag:unpaid + correspondent:university certificate + +Matching dates: + +.. code:: none + + created:[2005 to 2009] + added:yesterday + modified:today + +Matching inexact words: + +.. code:: none + + produ*name + +.. note:: + + Inexact terms are hard for search indexes. These queries might take a while to execute. That's why paperless offers + auto complete and query correction. + +All of these constructs can be combined as you see fit. +If you want to learn more about the query language used by paperless, paperless uses Whoosh's default query language. +Head over to `Whoosh query language <https://whoosh.readthedocs.io/en/latest/querylang.html>`_. +For details on what date parsing utilities are available, see +`Date parsing <https://whoosh.readthedocs.io/en/latest/dates.html#parsing-date-queries>`_. + .. _usage-recommended_workflow: diff --git a/src-ui/src/app/components/search/search.component.html b/src-ui/src/app/components/search/search.component.html index cb5c1a8e8..55fcee900 100644 --- a/src-ui/src/app/components/search/search.component.html +++ b/src-ui/src/app/components/search/search.component.html @@ -1,6 +1,8 @@ <app-page-header title="Search results"> </app-page-header> +<div *ngIf="errorMessage" class="alert alert-danger">Invalid search query: {{errorMessage}}</div> + <p> Search string: <i>{{query}}</i> <ng-container *ngIf="correctedQuery"> @@ -9,7 +11,7 @@ </p> -<div [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()"> +<div *ngIf="!errorMessage" [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()"> <p>{{resultCount}} result(s)</p> <app-document-card-large *ngFor="let result of results" [document]="result.document" diff --git a/src-ui/src/app/components/search/search.component.ts b/src-ui/src/app/components/search/search.component.ts index 8320ac545..f3635e31e 100644 --- a/src-ui/src/app/components/search/search.component.ts +++ b/src-ui/src/app/components/search/search.component.ts @@ -24,6 +24,8 @@ export class SearchComponent implements OnInit { correctedQuery: string = null + errorMessage: string + constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { } ngOnInit(): void { @@ -38,10 +40,11 @@ export class SearchComponent implements OnInit { searchCorrectedQuery() { this.router.navigate(["search"], {queryParams: {query: this.correctedQuery}}) - this.correctedQuery = null } loadPage(append: boolean = false) { + this.errorMessage = null + this.correctedQuery = null this.searchService.search(this.query, this.currentPage).subscribe(result => { if (append) { this.results.push(...result.results) @@ -52,6 +55,12 @@ export class SearchComponent implements OnInit { this.searching = false this.resultCount = result.count this.correctedQuery = result.corrected_query + }, error => { + this.searching = false + this.resultCount = 1 + this.page_count = 1 + this.results = [] + this.errorMessage = error.error }) } diff --git a/src/documents/index.py b/src/documents/index.py index 822ac2e8a..b4d6e1c51 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -4,10 +4,11 @@ from contextlib import contextmanager from django.conf import settings from whoosh import highlight -from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD +from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME from whoosh.highlight import Formatter, get_text from whoosh.index import create_in, exists_in, open_dir from whoosh.qparser import MultifieldParser +from whoosh.qparser.dateparse import DateParserPlugin from whoosh.writing import AsyncWriter @@ -60,7 +61,11 @@ def get_schema(): title=TEXT(stored=True), content=TEXT(), correspondent=TEXT(stored=True), - tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True) + tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True), + type=TEXT(stored=True), + created=DATETIME(stored=True, sortable=True), + modified=DATETIME(stored=True, sortable=True), + added=DATETIME(stored=True, sortable=True), ) @@ -84,7 +89,11 @@ def update_document(writer, doc): title=doc.title, content=doc.content, correspondent=doc.correspondent.name if doc.correspondent else None, - tag=tags if tags else None + tag=tags if tags else None, + type=doc.document_type.name if doc.document_type else None, + created=doc.created, + added=doc.added, + modified=doc.modified, ) @@ -106,19 +115,22 @@ def remove_document_from_index(document): @contextmanager -def query_page(ix, query, page): +def query_page(ix, querystring, page): searcher = ix.searcher() try: - query_parser = MultifieldParser( - ["content", "title", "correspondent", "tag"], - ix.schema).parse(query) - result_page = searcher.search_page(query_parser, page) + qp = MultifieldParser( + ["content", "title", "correspondent", "tag", "type"], + ix.schema) + qp.add_plugin(DateParserPlugin()) + + q = qp.parse(querystring) + result_page = searcher.search_page(q, page) result_page.results.fragmenter = highlight.ContextFragmenter( surround=50) result_page.results.formatter = JsonFormatter() - corrected = searcher.correct_query(query_parser, query) - if corrected.query != query_parser: + corrected = searcher.correct_query(q, querystring) + if corrected.query != q: corrected_query = corrected.string else: corrected_query = None diff --git a/src/documents/views.py b/src/documents/views.py index 0ac232436..332bdfe8f 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -217,16 +217,23 @@ class SearchView(APIView): } def get(self, request, format=None): - if 'query' in request.query_params: - query = request.query_params['query'] - try: - page = int(request.query_params.get('page', 1)) - except (ValueError, TypeError): - page = 1 + if not 'query' in request.query_params: + return Response({ + 'count': 0, + 'page': 0, + 'page_count': 0, + 'results': []}) - if page < 1: - page = 1 + query = request.query_params['query'] + try: + page = int(request.query_params.get('page', 1)) + except (ValueError, TypeError): + page = 1 + if page < 1: + page = 1 + + try: with index.query_page(self.ix, query, page) as (result_page, corrected_query): return Response( @@ -235,13 +242,8 @@ class SearchView(APIView): 'page_count': result_page.pagecount, 'corrected_query': corrected_query, 'results': list(map(self.add_infos_to_hit, result_page))}) - - else: - return Response({ - 'count': 0, - 'page': 0, - 'page_count': 0, - 'results': []}) + except Exception as e: + return HttpResponseBadRequest(str(e)) class SearchAutoCompleteView(APIView):