diff --git a/docs/changelog.rst b/docs/changelog.rst index f326b95ce..806d09fe0 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -10,12 +10,14 @@ paperless-ng 0.9.4 * Searching: - * Paperless now supports searching by tags. In order to have this applied to your + * Paperless now supports searching by tags, types and dates. In order to have this applied to your existing documents, you need to perform a ``document_index reindex`` management command (see :ref:`administration-index`) - that adds tags to your search index. Paperless keeps your index updated after that whenever + that adds tags to your search index. You only need to do this once, so that paperless can find + your documents by tags,types and dates. Paperless keeps your index updated after that whenever something changes. * Paperless now has spelling corrections ("Did you mean") for misstyped queries. + * The documentation contains :ref:`information about the query syntax `. * Front end: diff --git a/docs/usage_overview.rst b/docs/usage_overview.rst index 0e50dafc2..4ce7f9b7a 100644 --- a/docs/usage_overview.rst +++ b/docs/usage_overview.rst @@ -156,6 +156,62 @@ REST API You can also submit a document using the REST API, see :ref:`api-file_uploads` for details. +.. _basic-searching: + +Searching +######### + +Paperless offers an extensive searching mechanism that is designed to allow you to quickly +find a document you're looking for (for example, that thing that just broke and you bought +a couple months ago, that contract you signed 8 years ago). + +When you search paperless for a document, it tries to match this query against your documents. +Paperless will look for matching documents by inspecting their content, title, correspondent, +type and tags. Paperless returns a scored list of results, so that documents matching your query +better will appear further up in the search results. + +By default, paperless returns only documents which contain all words typed in the search bar. +However, paperless also offers advanced search syntax if you want to drill down the results +further. + +Matching documents with logical expressions: + +.. code:: none + + shopname AND (product1 OR product2) + +Matching specific tags, correspondents or types: + +.. code:: none + + type:invoice tag:unpaid + correspondent:university certificate + +Matching dates: + +.. code:: none + + created:[2005 to 2009] + added:yesterday + modified:today + +Matching inexact words: + +.. code:: none + + produ*name + +.. note:: + + Inexact terms are hard for search indexes. These queries might take a while to execute. That's why paperless offers + auto complete and query correction. + +All of these constructs can be combined as you see fit. +If you want to learn more about the query language used by paperless, paperless uses Whoosh's default query language. +Head over to `Whoosh query language `_. +For details on what date parsing utilities are available, see +`Date parsing `_. + .. _usage-recommended_workflow: diff --git a/src-ui/src/app/components/search/search.component.html b/src-ui/src/app/components/search/search.component.html index cb5c1a8e8..55fcee900 100644 --- a/src-ui/src/app/components/search/search.component.html +++ b/src-ui/src/app/components/search/search.component.html @@ -1,6 +1,8 @@ +
Invalid search query: {{errorMessage}}
+

Search string: {{query}} @@ -9,7 +11,7 @@

-
+

{{resultCount}} result(s)

{ if (append) { this.results.push(...result.results) @@ -52,6 +55,12 @@ export class SearchComponent implements OnInit { this.searching = false this.resultCount = result.count this.correctedQuery = result.corrected_query + }, error => { + this.searching = false + this.resultCount = 1 + this.page_count = 1 + this.results = [] + this.errorMessage = error.error }) } diff --git a/src/documents/index.py b/src/documents/index.py index 822ac2e8a..b4d6e1c51 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -4,10 +4,11 @@ from contextlib import contextmanager from django.conf import settings from whoosh import highlight -from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD +from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME from whoosh.highlight import Formatter, get_text from whoosh.index import create_in, exists_in, open_dir from whoosh.qparser import MultifieldParser +from whoosh.qparser.dateparse import DateParserPlugin from whoosh.writing import AsyncWriter @@ -60,7 +61,11 @@ def get_schema(): title=TEXT(stored=True), content=TEXT(), correspondent=TEXT(stored=True), - tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True) + tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True), + type=TEXT(stored=True), + created=DATETIME(stored=True, sortable=True), + modified=DATETIME(stored=True, sortable=True), + added=DATETIME(stored=True, sortable=True), ) @@ -84,7 +89,11 @@ def update_document(writer, doc): title=doc.title, content=doc.content, correspondent=doc.correspondent.name if doc.correspondent else None, - tag=tags if tags else None + tag=tags if tags else None, + type=doc.document_type.name if doc.document_type else None, + created=doc.created, + added=doc.added, + modified=doc.modified, ) @@ -106,19 +115,22 @@ def remove_document_from_index(document): @contextmanager -def query_page(ix, query, page): +def query_page(ix, querystring, page): searcher = ix.searcher() try: - query_parser = MultifieldParser( - ["content", "title", "correspondent", "tag"], - ix.schema).parse(query) - result_page = searcher.search_page(query_parser, page) + qp = MultifieldParser( + ["content", "title", "correspondent", "tag", "type"], + ix.schema) + qp.add_plugin(DateParserPlugin()) + + q = qp.parse(querystring) + result_page = searcher.search_page(q, page) result_page.results.fragmenter = highlight.ContextFragmenter( surround=50) result_page.results.formatter = JsonFormatter() - corrected = searcher.correct_query(query_parser, query) - if corrected.query != query_parser: + corrected = searcher.correct_query(q, querystring) + if corrected.query != q: corrected_query = corrected.string else: corrected_query = None diff --git a/src/documents/views.py b/src/documents/views.py index 0ac232436..332bdfe8f 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -217,16 +217,23 @@ class SearchView(APIView): } def get(self, request, format=None): - if 'query' in request.query_params: - query = request.query_params['query'] - try: - page = int(request.query_params.get('page', 1)) - except (ValueError, TypeError): - page = 1 + if not 'query' in request.query_params: + return Response({ + 'count': 0, + 'page': 0, + 'page_count': 0, + 'results': []}) - if page < 1: - page = 1 + query = request.query_params['query'] + try: + page = int(request.query_params.get('page', 1)) + except (ValueError, TypeError): + page = 1 + if page < 1: + page = 1 + + try: with index.query_page(self.ix, query, page) as (result_page, corrected_query): return Response( @@ -235,13 +242,8 @@ class SearchView(APIView): 'page_count': result_page.pagecount, 'corrected_query': corrected_query, 'results': list(map(self.add_infos_to_hit, result_page))}) - - else: - return Response({ - 'count': 0, - 'page': 0, - 'page_count': 0, - 'results': []}) + except Exception as e: + return HttpResponseBadRequest(str(e)) class SearchAutoCompleteView(APIView):