searching for types and dates, error catching, documentation and changelog.

2025-12-24 02:05:48 -06:00 · 2020-11-30 16:13:35 +01:00
parent 1ef12d2cbc
commit b03d4c7646
6 changed files with 112 additions and 29 deletions
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -10,12 +10,14 @@ paperless-ng 0.9.4

 * Searching:

-  * Paperless now supports searching by tags. In order to have this applied to your
+  * Paperless now supports searching by tags, types and dates. In order to have this applied to your
    existing documents, you need to perform a ``document_index reindex`` management command
    (see :ref:`administration-index`)
-    that adds tags to your search index. Paperless keeps your index updated after that whenever
+    that adds tags to your search index. You only need to do this once, so that paperless can find
+    your documents by tags,types and dates. Paperless keeps your index updated after that whenever
    something changes.
  * Paperless now has spelling corrections ("Did you mean") for misstyped queries.
+  * The documentation contains :ref:`information about the query syntax <basic-searching>`.

 * Front end:

--- a/docs/usage_overview.rst
+++ b/docs/usage_overview.rst
@@ -156,6 +156,62 @@ REST API

 You can also submit a document using the REST API, see :ref:`api-file_uploads` for details.

+.. _basic-searching:
+
+Searching
+#########
+
+Paperless offers an extensive searching mechanism that is designed to allow you to quickly
+find a document you're looking for (for example, that thing that just broke and you bought
+a couple months ago, that contract you signed 8 years ago).
+
+When you search paperless for a document, it tries to match this query against your documents.
+Paperless will look for matching documents by inspecting their content, title, correspondent,
+type and tags. Paperless returns a scored list of results, so that documents matching your query
+better will appear further up in the search results.
+
+By default, paperless returns only documents which contain all words typed in the search bar.
+However, paperless also offers advanced search syntax if you want to drill down the results
+further.
+
+Matching documents with logical expressions:
+
+.. code:: none
+
+  shopname AND (product1 OR product2)
+
+Matching specific tags, correspondents or types:
+
+.. code:: none
+
+  type:invoice tag:unpaid
+  correspondent:university certificate
+
+Matching dates:
+
+.. code:: none
+  
+  created:[2005 to 2009]
+  added:yesterday
+  modified:today
+
+Matching inexact words:
+
+.. code:: none
+
+  produ*name
+
+.. note::
+
+  Inexact terms are hard for search indexes. These queries might take a while to execute. That's why paperless offers
+  auto complete and query correction.
+
+All of these constructs can be combined as you see fit.
+If you want to learn more about the query language used by paperless, paperless uses Whoosh's default query language. 
+Head over to `Whoosh query language <https://whoosh.readthedocs.io/en/latest/querylang.html>`_.
+For details on what date parsing utilities are available, see
+`Date parsing <https://whoosh.readthedocs.io/en/latest/dates.html#parsing-date-queries>`_.
+ 

 .. _usage-recommended_workflow:

--- a/src-ui/src/app/components/search/search.component.html
+++ b/src-ui/src/app/components/search/search.component.html
@@ -1,6 +1,8 @@
 <app-page-header title="Search results">
 </app-page-header>

+<div *ngIf="errorMessage" class="alert alert-danger">Invalid search query: {{errorMessage}}</div>
+
 <p>
    Search string: <i>{{query}}</i>
    <ng-container *ngIf="correctedQuery">
@@ -9,7 +11,7 @@

 </p>

-<div [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()">
+<div *ngIf="!errorMessage" [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()">
    <p>{{resultCount}} result(s)</p>
    <app-document-card-large *ngFor="let result of results"
        [document]="result.document"
--- a/src-ui/src/app/components/search/search.component.ts
+++ b/src-ui/src/app/components/search/search.component.ts
@@ -24,6 +24,8 @@ export class SearchComponent implements OnInit {

  correctedQuery: string = null

+  errorMessage: string
+
  constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { }

  ngOnInit(): void {
@@ -38,10 +40,11 @@ export class SearchComponent implements OnInit {

  searchCorrectedQuery() {
    this.router.navigate(["search"], {queryParams: {query: this.correctedQuery}})
-    this.correctedQuery = null
  }

  loadPage(append: boolean = false) {
+    this.errorMessage = null
+    this.correctedQuery = null
    this.searchService.search(this.query, this.currentPage).subscribe(result => {
      if (append) {
        this.results.push(...result.results)
@@ -52,6 +55,12 @@ export class SearchComponent implements OnInit {
      this.searching = false
      this.resultCount = result.count
      this.correctedQuery = result.corrected_query
+    }, error => {
+      this.searching = false
+      this.resultCount = 1
+      this.page_count = 1
+      this.results = []
+      this.errorMessage = error.error
    })
  }

--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -4,10 +4,11 @@ from contextlib import contextmanager

 from django.conf import settings
 from whoosh import highlight
-from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD
+from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME
 from whoosh.highlight import Formatter, get_text
 from whoosh.index import create_in, exists_in, open_dir
 from whoosh.qparser import MultifieldParser
+from whoosh.qparser.dateparse import DateParserPlugin
 from whoosh.writing import AsyncWriter


@@ -60,7 +61,11 @@ def get_schema():
        title=TEXT(stored=True),
        content=TEXT(),
        correspondent=TEXT(stored=True),
-        tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True)
+        tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True),
+        type=TEXT(stored=True),
+        created=DATETIME(stored=True, sortable=True),
+        modified=DATETIME(stored=True, sortable=True),
+        added=DATETIME(stored=True, sortable=True),
    )


@@ -84,7 +89,11 @@ def update_document(writer, doc):
        title=doc.title,
        content=doc.content,
        correspondent=doc.correspondent.name if doc.correspondent else None,
-        tag=tags if tags else None
+        tag=tags if tags else None,
+        type=doc.document_type.name if doc.document_type else None,
+        created=doc.created,
+        added=doc.added,
+        modified=doc.modified,
    )


@@ -106,19 +115,22 @@ def remove_document_from_index(document):


@contextmanager
-def query_page(ix, query, page):
+def query_page(ix, querystring, page):
    searcher = ix.searcher()
    try:
-        query_parser = MultifieldParser(
-            ["content", "title", "correspondent", "tag"],
-            ix.schema).parse(query)
-        result_page = searcher.search_page(query_parser, page)
+        qp = MultifieldParser(
+            ["content", "title", "correspondent", "tag", "type"],
+            ix.schema)
+        qp.add_plugin(DateParserPlugin())
+
+        q = qp.parse(querystring)
+        result_page = searcher.search_page(q, page)
        result_page.results.fragmenter = highlight.ContextFragmenter(
            surround=50)
        result_page.results.formatter = JsonFormatter()

-        corrected = searcher.correct_query(query_parser, query)
-        if corrected.query != query_parser:
+        corrected = searcher.correct_query(q, querystring)
+        if corrected.query != q:
            corrected_query = corrected.string
        else:
            corrected_query = None
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -217,16 +217,23 @@ class SearchView(APIView):
                }

    def get(self, request, format=None):
-        if 'query' in request.query_params:
-            query = request.query_params['query']
-            try:
-                page = int(request.query_params.get('page', 1))
-            except (ValueError, TypeError):
-                page = 1
+        if not 'query' in request.query_params:
+            return Response({
+                'count': 0,
+                'page': 0,
+                'page_count': 0,
+                'results': []})

-            if page < 1:
-                page = 1
+        query = request.query_params['query']
+        try:
+            page = int(request.query_params.get('page', 1))
+        except (ValueError, TypeError):
+            page = 1

+        if page < 1:
+            page = 1
+
+        try:
            with index.query_page(self.ix, query, page) as (result_page,
                                                            corrected_query):
                return Response(
@@ -235,13 +242,8 @@ class SearchView(APIView):
                     'page_count': result_page.pagecount,
                     'corrected_query': corrected_query,
                     'results': list(map(self.add_infos_to_hit, result_page))})
-
-        else:
-            return Response({
-                'count': 0,
-                'page': 0,
-                'page_count': 0,
-                'results': []})
+        except Exception as e:
+            return HttpResponseBadRequest(str(e))


 class SearchAutoCompleteView(APIView):