mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 03:16:10 -06:00 
			
		
		
		
	searching for tags, spelling corrections fixes #74
This commit is contained in:
		@@ -274,6 +274,7 @@ management command:
 | 
			
		||||
 | 
			
		||||
This command takes no arguments.
 | 
			
		||||
 | 
			
		||||
.. _`administration-index`:
 | 
			
		||||
 | 
			
		||||
Managing the document search index
 | 
			
		||||
==================================
 | 
			
		||||
 
 | 
			
		||||
@@ -8,6 +8,15 @@ Changelog
 | 
			
		||||
paperless-ng 0.9.4
 | 
			
		||||
##################
 | 
			
		||||
 | 
			
		||||
* Searching:
 | 
			
		||||
 | 
			
		||||
  * Paperless now supports searching by tags. In order to have this applied to your
 | 
			
		||||
    existing documents, you need to perform a ``document_index reindex`` management command
 | 
			
		||||
    (see :ref:`administration-index`)
 | 
			
		||||
    that adds tags to your search index. Paperless keeps your index updated after that whenever
 | 
			
		||||
    something changes.
 | 
			
		||||
  * Paperless now has spelling corrections ("Did you mean") for misstyped queries.
 | 
			
		||||
 | 
			
		||||
* Front end:
 | 
			
		||||
 | 
			
		||||
  * Clickable tags, correspondents and types allow quick filtering for related documents.
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,13 @@
 | 
			
		||||
<app-page-header title="Search results">
 | 
			
		||||
</app-page-header>
 | 
			
		||||
 | 
			
		||||
<p>Search string: <i>{{query}}</i></p>
 | 
			
		||||
<p>
 | 
			
		||||
    Search string: <i>{{query}}</i>
 | 
			
		||||
    <ng-container *ngIf="correctedQuery">
 | 
			
		||||
        - Did you mean "<a [routerLink]="" (click)="searchCorrectedQuery()">{{correctedQuery}}</a>"?
 | 
			
		||||
    </ng-container>
 | 
			
		||||
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
<div [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()">
 | 
			
		||||
    <p>{{resultCount}} result(s)</p>
 | 
			
		||||
@@ -10,4 +16,4 @@
 | 
			
		||||
        [details]="result.highlights">
 | 
			
		||||
 | 
			
		||||
</app-document-card-large>
 | 
			
		||||
</div>
 | 
			
		||||
</div>
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
import { Component, OnInit } from '@angular/core';
 | 
			
		||||
import { ActivatedRoute } from '@angular/router';
 | 
			
		||||
import { ActivatedRoute, Router } from '@angular/router';
 | 
			
		||||
import { SearchHit } from 'src/app/data/search-result';
 | 
			
		||||
import { SearchService } from 'src/app/services/rest/search.service';
 | 
			
		||||
 | 
			
		||||
@@ -9,7 +9,7 @@ import { SearchService } from 'src/app/services/rest/search.service';
 | 
			
		||||
  styleUrls: ['./search.component.scss']
 | 
			
		||||
})
 | 
			
		||||
export class SearchComponent implements OnInit {
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
  results: SearchHit[] = []
 | 
			
		||||
 | 
			
		||||
  query: string = ""
 | 
			
		||||
@@ -22,7 +22,9 @@ export class SearchComponent implements OnInit {
 | 
			
		||||
 | 
			
		||||
  resultCount
 | 
			
		||||
 | 
			
		||||
  constructor(private searchService: SearchService, private route: ActivatedRoute) { }
 | 
			
		||||
  correctedQuery: string = null
 | 
			
		||||
 | 
			
		||||
  constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { }
 | 
			
		||||
 | 
			
		||||
  ngOnInit(): void {
 | 
			
		||||
    this.route.queryParamMap.subscribe(paramMap => {
 | 
			
		||||
@@ -31,7 +33,12 @@ export class SearchComponent implements OnInit {
 | 
			
		||||
      this.currentPage = 1
 | 
			
		||||
      this.loadPage()
 | 
			
		||||
    })
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  searchCorrectedQuery() {
 | 
			
		||||
    this.router.navigate(["search"], {queryParams: {query: this.correctedQuery}})
 | 
			
		||||
    this.correctedQuery = null
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  loadPage(append: boolean = false) {
 | 
			
		||||
@@ -44,12 +51,11 @@ export class SearchComponent implements OnInit {
 | 
			
		||||
      this.pageCount = result.page_count
 | 
			
		||||
      this.searching = false
 | 
			
		||||
      this.resultCount = result.count
 | 
			
		||||
      this.correctedQuery = result.corrected_query
 | 
			
		||||
    })
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  onScroll() {
 | 
			
		||||
    console.log(this.currentPage)
 | 
			
		||||
    console.log(this.pageCount)
 | 
			
		||||
    if (this.currentPage < this.pageCount) {
 | 
			
		||||
      this.currentPage += 1
 | 
			
		||||
      this.loadPage(true)
 | 
			
		||||
 
 | 
			
		||||
@@ -21,7 +21,9 @@ export interface SearchResult {
 | 
			
		||||
  page?: number
 | 
			
		||||
  page_count?: number
 | 
			
		||||
 | 
			
		||||
  corrected_query?: string
 | 
			
		||||
 | 
			
		||||
  results?: SearchHit[]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@ from contextlib import contextmanager
 | 
			
		||||
 | 
			
		||||
from django.conf import settings
 | 
			
		||||
from whoosh import highlight
 | 
			
		||||
from whoosh.fields import Schema, TEXT, NUMERIC
 | 
			
		||||
from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD
 | 
			
		||||
from whoosh.highlight import Formatter, get_text
 | 
			
		||||
from whoosh.index import create_in, exists_in, open_dir
 | 
			
		||||
from whoosh.qparser import MultifieldParser
 | 
			
		||||
@@ -59,14 +59,15 @@ def get_schema():
 | 
			
		||||
        id=NUMERIC(stored=True, unique=True, numtype=int),
 | 
			
		||||
        title=TEXT(stored=True),
 | 
			
		||||
        content=TEXT(),
 | 
			
		||||
        correspondent=TEXT(stored=True)
 | 
			
		||||
        correspondent=TEXT(stored=True),
 | 
			
		||||
        tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True)
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def open_index(recreate=False):
 | 
			
		||||
    try:
 | 
			
		||||
        if exists_in(settings.INDEX_DIR) and not recreate:
 | 
			
		||||
            return open_dir(settings.INDEX_DIR)
 | 
			
		||||
            return open_dir(settings.INDEX_DIR, schema=get_schema())
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(f"Error while opening the index: {e}, recreating.")
 | 
			
		||||
 | 
			
		||||
@@ -77,11 +78,13 @@ def open_index(recreate=False):
 | 
			
		||||
 | 
			
		||||
def update_document(writer, doc):
 | 
			
		||||
    logger.debug("Indexing {}...".format(doc))
 | 
			
		||||
    tags = ",".join([t.name for t in doc.tags.all()])
 | 
			
		||||
    writer.update_document(
 | 
			
		||||
        id=doc.pk,
 | 
			
		||||
        title=doc.title,
 | 
			
		||||
        content=doc.content,
 | 
			
		||||
        correspondent=doc.correspondent.name if doc.correspondent else None
 | 
			
		||||
        correspondent=doc.correspondent.name if doc.correspondent else None,
 | 
			
		||||
        tag=tags if tags else None
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -106,13 +109,21 @@ def remove_document_from_index(document):
 | 
			
		||||
def query_page(ix, query, page):
 | 
			
		||||
    searcher = ix.searcher()
 | 
			
		||||
    try:
 | 
			
		||||
        query_parser = MultifieldParser(["content", "title", "correspondent"],
 | 
			
		||||
                                        ix.schema).parse(query)
 | 
			
		||||
        query_parser = MultifieldParser(
 | 
			
		||||
            ["content", "title", "correspondent", "tag"],
 | 
			
		||||
            ix.schema).parse(query)
 | 
			
		||||
        result_page = searcher.search_page(query_parser, page)
 | 
			
		||||
        result_page.results.fragmenter = highlight.ContextFragmenter(
 | 
			
		||||
            surround=50)
 | 
			
		||||
        result_page.results.formatter = JsonFormatter()
 | 
			
		||||
        yield result_page
 | 
			
		||||
 | 
			
		||||
        corrected = searcher.correct_query(query_parser, query)
 | 
			
		||||
        if corrected.query != query_parser:
 | 
			
		||||
            corrected_query = corrected.string
 | 
			
		||||
        else:
 | 
			
		||||
            corrected_query = None
 | 
			
		||||
 | 
			
		||||
        yield result_page, corrected_query
 | 
			
		||||
    finally:
 | 
			
		||||
        searcher.close()
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -289,6 +289,22 @@ class DocumentApiTest(DirectoriesMixin, APITestCase):
 | 
			
		||||
        self.assertEqual(response.status_code, 200)
 | 
			
		||||
        self.assertEqual(len(response.data), 10)
 | 
			
		||||
 | 
			
		||||
    def test_search_spelling_correction(self):
 | 
			
		||||
        with AsyncWriter(index.open_index()) as writer:
 | 
			
		||||
            for i in range(55):
 | 
			
		||||
                doc = Document.objects.create(checksum=str(i), pk=i+1, title=f"Document {i+1}", content=f"Things document {i+1}")
 | 
			
		||||
                index.update_document(writer, doc)
 | 
			
		||||
 | 
			
		||||
        response = self.client.get("/api/search/?query=thing")
 | 
			
		||||
        correction = response.data['corrected_query']
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(correction, "things")
 | 
			
		||||
 | 
			
		||||
        response = self.client.get("/api/search/?query=things")
 | 
			
		||||
        correction = response.data['corrected_query']
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(correction, None)
 | 
			
		||||
 | 
			
		||||
    def test_statistics(self):
 | 
			
		||||
 | 
			
		||||
        doc1 = Document.objects.create(title="none1", checksum="A")
 | 
			
		||||
 
 | 
			
		||||
@@ -227,11 +227,13 @@ class SearchView(APIView):
 | 
			
		||||
            if page < 1:
 | 
			
		||||
                page = 1
 | 
			
		||||
 | 
			
		||||
            with index.query_page(self.ix, query, page) as result_page:
 | 
			
		||||
            with index.query_page(self.ix, query, page) as (result_page,
 | 
			
		||||
                                                            corrected_query):
 | 
			
		||||
                return Response(
 | 
			
		||||
                    {'count': len(result_page),
 | 
			
		||||
                     'page': result_page.pagenum,
 | 
			
		||||
                     'page_count': result_page.pagecount,
 | 
			
		||||
                     'corrected_query': corrected_query,
 | 
			
		||||
                     'results': list(map(self.add_infos_to_hit, result_page))})
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user