From b03d4c7646d870a261665aefa563062dac8fc246 Mon Sep 17 00:00:00 2001
From: jonaswinkler <jonas.winkler@jpwinkler.de>
Date: Mon, 30 Nov 2020 16:13:35 +0100
Subject: [PATCH] searching for types and dates, error catching, documentation
 and changelog.

---
 docs/changelog.rst                            |  6 +-
 docs/usage_overview.rst                       | 56 +++++++++++++++++++
 .../components/search/search.component.html   |  4 +-
 .../app/components/search/search.component.ts | 11 +++-
 src/documents/index.py                        | 32 +++++++----
 src/documents/views.py                        | 32 ++++++-----
 6 files changed, 112 insertions(+), 29 deletions(-)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index f326b95ce..806d09fe0 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -10,12 +10,14 @@ paperless-ng 0.9.4
 
 * Searching:
 
-  * Paperless now supports searching by tags. In order to have this applied to your
+  * Paperless now supports searching by tags, types and dates. In order to have this applied to your
     existing documents, you need to perform a ``document_index reindex`` management command
     (see :ref:`administration-index`)
-    that adds tags to your search index. Paperless keeps your index updated after that whenever
+    that adds tags to your search index. You only need to do this once, so that paperless can find
+    your documents by tags,types and dates. Paperless keeps your index updated after that whenever
     something changes.
   * Paperless now has spelling corrections ("Did you mean") for misstyped queries.
+  * The documentation contains :ref:`information about the query syntax <basic-searching>`.
 
 * Front end:
 
diff --git a/docs/usage_overview.rst b/docs/usage_overview.rst
index 0e50dafc2..4ce7f9b7a 100644
--- a/docs/usage_overview.rst
+++ b/docs/usage_overview.rst
@@ -156,6 +156,62 @@ REST API
 
 You can also submit a document using the REST API, see :ref:`api-file_uploads` for details.
 
+.. _basic-searching:
+
+Searching
+#########
+
+Paperless offers an extensive searching mechanism that is designed to allow you to quickly
+find a document you're looking for (for example, that thing that just broke and you bought
+a couple months ago, that contract you signed 8 years ago).
+
+When you search paperless for a document, it tries to match this query against your documents.
+Paperless will look for matching documents by inspecting their content, title, correspondent,
+type and tags. Paperless returns a scored list of results, so that documents matching your query
+better will appear further up in the search results.
+
+By default, paperless returns only documents which contain all words typed in the search bar.
+However, paperless also offers advanced search syntax if you want to drill down the results
+further.
+
+Matching documents with logical expressions:
+
+.. code:: none
+
+  shopname AND (product1 OR product2)
+
+Matching specific tags, correspondents or types:
+
+.. code:: none
+
+  type:invoice tag:unpaid
+  correspondent:university certificate
+
+Matching dates:
+
+.. code:: none
+  
+  created:[2005 to 2009]
+  added:yesterday
+  modified:today
+
+Matching inexact words:
+
+.. code:: none
+
+  produ*name
+
+.. note::
+
+  Inexact terms are hard for search indexes. These queries might take a while to execute. That's why paperless offers
+  auto complete and query correction.
+
+All of these constructs can be combined as you see fit.
+If you want to learn more about the query language used by paperless, paperless uses Whoosh's default query language. 
+Head over to `Whoosh query language <https://whoosh.readthedocs.io/en/latest/querylang.html>`_.
+For details on what date parsing utilities are available, see
+`Date parsing <https://whoosh.readthedocs.io/en/latest/dates.html#parsing-date-queries>`_.
+ 
 
 .. _usage-recommended_workflow:
 
diff --git a/src-ui/src/app/components/search/search.component.html b/src-ui/src/app/components/search/search.component.html
index cb5c1a8e8..55fcee900 100644
--- a/src-ui/src/app/components/search/search.component.html
+++ b/src-ui/src/app/components/search/search.component.html
@@ -1,6 +1,8 @@
 <app-page-header title="Search results">
 </app-page-header>
 
+<div *ngIf="errorMessage" class="alert alert-danger">Invalid search query: {{errorMessage}}</div>
+
 <p>
     Search string: <i>{{query}}</i>
     <ng-container *ngIf="correctedQuery">
@@ -9,7 +11,7 @@
 
 </p>
 
-<div [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()">
+<div *ngIf="!errorMessage" [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()">
     <p>{{resultCount}} result(s)</p>
     <app-document-card-large *ngFor="let result of results"
         [document]="result.document"
diff --git a/src-ui/src/app/components/search/search.component.ts b/src-ui/src/app/components/search/search.component.ts
index 8320ac545..f3635e31e 100644
--- a/src-ui/src/app/components/search/search.component.ts
+++ b/src-ui/src/app/components/search/search.component.ts
@@ -24,6 +24,8 @@ export class SearchComponent implements OnInit {
 
   correctedQuery: string = null
 
+  errorMessage: string
+
   constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { }
 
   ngOnInit(): void {
@@ -38,10 +40,11 @@ export class SearchComponent implements OnInit {
 
   searchCorrectedQuery() {
     this.router.navigate(["search"], {queryParams: {query: this.correctedQuery}})
-    this.correctedQuery = null
   }
 
   loadPage(append: boolean = false) {
+    this.errorMessage = null
+    this.correctedQuery = null
     this.searchService.search(this.query, this.currentPage).subscribe(result => {
       if (append) {
         this.results.push(...result.results)
@@ -52,6 +55,12 @@ export class SearchComponent implements OnInit {
       this.searching = false
       this.resultCount = result.count
       this.correctedQuery = result.corrected_query
+    }, error => {
+      this.searching = false
+      this.resultCount = 1
+      this.page_count = 1
+      this.results = []
+      this.errorMessage = error.error
     })
   }
 
diff --git a/src/documents/index.py b/src/documents/index.py
index 822ac2e8a..b4d6e1c51 100644
--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -4,10 +4,11 @@ from contextlib import contextmanager
 
 from django.conf import settings
 from whoosh import highlight
-from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD
+from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME
 from whoosh.highlight import Formatter, get_text
 from whoosh.index import create_in, exists_in, open_dir
 from whoosh.qparser import MultifieldParser
+from whoosh.qparser.dateparse import DateParserPlugin
 from whoosh.writing import AsyncWriter
 
 
@@ -60,7 +61,11 @@ def get_schema():
         title=TEXT(stored=True),
         content=TEXT(),
         correspondent=TEXT(stored=True),
-        tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True)
+        tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True),
+        type=TEXT(stored=True),
+        created=DATETIME(stored=True, sortable=True),
+        modified=DATETIME(stored=True, sortable=True),
+        added=DATETIME(stored=True, sortable=True),
     )
 
 
@@ -84,7 +89,11 @@ def update_document(writer, doc):
         title=doc.title,
         content=doc.content,
         correspondent=doc.correspondent.name if doc.correspondent else None,
-        tag=tags if tags else None
+        tag=tags if tags else None,
+        type=doc.document_type.name if doc.document_type else None,
+        created=doc.created,
+        added=doc.added,
+        modified=doc.modified,
     )
 
 
@@ -106,19 +115,22 @@ def remove_document_from_index(document):
 
 
 @contextmanager
-def query_page(ix, query, page):
+def query_page(ix, querystring, page):
     searcher = ix.searcher()
     try:
-        query_parser = MultifieldParser(
-            ["content", "title", "correspondent", "tag"],
-            ix.schema).parse(query)
-        result_page = searcher.search_page(query_parser, page)
+        qp = MultifieldParser(
+            ["content", "title", "correspondent", "tag", "type"],
+            ix.schema)
+        qp.add_plugin(DateParserPlugin())
+
+        q = qp.parse(querystring)
+        result_page = searcher.search_page(q, page)
         result_page.results.fragmenter = highlight.ContextFragmenter(
             surround=50)
         result_page.results.formatter = JsonFormatter()
 
-        corrected = searcher.correct_query(query_parser, query)
-        if corrected.query != query_parser:
+        corrected = searcher.correct_query(q, querystring)
+        if corrected.query != q:
             corrected_query = corrected.string
         else:
             corrected_query = None
diff --git a/src/documents/views.py b/src/documents/views.py
index 0ac232436..332bdfe8f 100755
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -217,16 +217,23 @@ class SearchView(APIView):
                 }
 
     def get(self, request, format=None):
-        if 'query' in request.query_params:
-            query = request.query_params['query']
-            try:
-                page = int(request.query_params.get('page', 1))
-            except (ValueError, TypeError):
-                page = 1
+        if not 'query' in request.query_params:
+            return Response({
+                'count': 0,
+                'page': 0,
+                'page_count': 0,
+                'results': []})
 
-            if page < 1:
-                page = 1
+        query = request.query_params['query']
+        try:
+            page = int(request.query_params.get('page', 1))
+        except (ValueError, TypeError):
+            page = 1
 
+        if page < 1:
+            page = 1
+
+        try:
             with index.query_page(self.ix, query, page) as (result_page,
                                                             corrected_query):
                 return Response(
@@ -235,13 +242,8 @@ class SearchView(APIView):
                      'page_count': result_page.pagecount,
                      'corrected_query': corrected_query,
                      'results': list(map(self.add_infos_to_hit, result_page))})
-
-        else:
-            return Response({
-                'count': 0,
-                'page': 0,
-                'page_count': 0,
-                'results': []})
+        except Exception as e:
+            return HttpResponseBadRequest(str(e))
 
 
 class SearchAutoCompleteView(APIView):