-
+
Score:
-
+
diff --git a/src-ui/src/app/components/document-list/document-list.component.ts b/src-ui/src/app/components/document-list/document-list.component.ts
index cf7afb845..c0ad354ba 100644
--- a/src-ui/src/app/components/document-list/document-list.component.ts
+++ b/src-ui/src/app/components/document-list/document-list.component.ts
@@ -207,6 +207,13 @@ export class DocumentListComponent implements OnInit, OnDestroy {
})
}
+ clickMoreLike(documentID: number) {
+ this.list.selectNone()
+ setTimeout(() => {
+ //this.filterEditor.moreLikeThis(doc)
+ })
+ }
+
trackByDocumentId(index, item: PaperlessDocument) {
return item.id
}
diff --git a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.html b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.html
index 7290354eb..490eed95d 100644
--- a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.html
+++ b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.html
@@ -1,7 +1,6 @@
diff --git a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts
index 43387c08f..3b645ec97 100644
--- a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts
+++ b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts
@@ -8,13 +8,17 @@ import { DocumentTypeService } from 'src/app/services/rest/document-type.service
import { TagService } from 'src/app/services/rest/tag.service';
import { CorrespondentService } from 'src/app/services/rest/correspondent.service';
import { FilterRule } from 'src/app/data/filter-rule';
-import { FILTER_ADDED_AFTER, FILTER_ADDED_BEFORE, FILTER_ASN, FILTER_CORRESPONDENT, FILTER_CREATED_AFTER, FILTER_CREATED_BEFORE, FILTER_DOCUMENT_TYPE, FILTER_HAS_ANY_TAG, FILTER_HAS_TAG, FILTER_TITLE, FILTER_TITLE_CONTENT } from 'src/app/data/filter-rule-type';
+import { FILTER_ADDED_AFTER, FILTER_ADDED_BEFORE, FILTER_ASN, FILTER_CORRESPONDENT, FILTER_CREATED_AFTER, FILTER_CREATED_BEFORE, FILTER_DOCUMENT_TYPE, FILTER_FULLTEXT_MORELIKE, FILTER_FULLTEXT_QUERY, FILTER_HAS_ANY_TAG, FILTER_HAS_TAG, FILTER_TITLE, FILTER_TITLE_CONTENT } from 'src/app/data/filter-rule-type';
import { FilterableDropdownSelectionModel } from '../../common/filterable-dropdown/filterable-dropdown.component';
import { ToggleableItemState } from '../../common/filterable-dropdown/toggleable-dropdown-button/toggleable-dropdown-button.component';
+import { DocumentService } from 'src/app/services/rest/document.service';
+import { PaperlessDocument } from 'src/app/data/paperless-document';
const TEXT_FILTER_TARGET_TITLE = "title"
const TEXT_FILTER_TARGET_TITLE_CONTENT = "title-content"
const TEXT_FILTER_TARGET_ASN = "asn"
+const TEXT_FILTER_TARGET_FULLTEXT_QUERY = "fulltext-query"
+const TEXT_FILTER_TARGET_FULLTEXT_MORELIKE = "fulltext-morelike"
@Component({
selector: 'app-filter-editor',
@@ -64,7 +68,8 @@ export class FilterEditorComponent implements OnInit, OnDestroy {
constructor(
private documentTypeService: DocumentTypeService,
private tagService: TagService,
- private correspondentService: CorrespondentService
+ private correspondentService: CorrespondentService,
+ private documentService: DocumentService
) { }
tags: PaperlessTag[] = []
@@ -72,12 +77,21 @@ export class FilterEditorComponent implements OnInit, OnDestroy {
documentTypes: PaperlessDocumentType[] = []
_textFilter = ""
+ _moreLikeId: number
+ _moreLikeDoc: PaperlessDocument
- textFilterTargets = [
- {id: TEXT_FILTER_TARGET_TITLE, name: $localize`Title`},
- {id: TEXT_FILTER_TARGET_TITLE_CONTENT, name: $localize`Title & content`},
- {id: TEXT_FILTER_TARGET_ASN, name: $localize`ASN`}
- ]
+ get textFilterTargets() {
+ let targets = [
+ {id: TEXT_FILTER_TARGET_TITLE, name: $localize`Title`},
+ {id: TEXT_FILTER_TARGET_TITLE_CONTENT, name: $localize`Title & content`},
+ {id: TEXT_FILTER_TARGET_ASN, name: $localize`ASN`},
+ {id: TEXT_FILTER_TARGET_FULLTEXT_QUERY, name: $localize`Fulltext search`}
+ ]
+ if (this.textFilterTarget == TEXT_FILTER_TARGET_FULLTEXT_MORELIKE) {
+ targets.push({id: TEXT_FILTER_TARGET_FULLTEXT_MORELIKE, name: $localize`More like`})
+ }
+ return targets
+ }
textFilterTarget = TEXT_FILTER_TARGET_TITLE_CONTENT
@@ -101,6 +115,7 @@ export class FilterEditorComponent implements OnInit, OnDestroy {
this.tagSelectionModel.clear(false)
this.correspondentSelectionModel.clear(false)
this._textFilter = null
+ this._moreLikeId = null
this.dateAddedBefore = null
this.dateAddedAfter = null
this.dateCreatedBefore = null
@@ -120,6 +135,17 @@ export class FilterEditorComponent implements OnInit, OnDestroy {
this._textFilter = rule.value
this.textFilterTarget = TEXT_FILTER_TARGET_ASN
break
+ case FILTER_FULLTEXT_QUERY:
+ this._textFilter = rule.value
+ this.textFilterTarget = TEXT_FILTER_TARGET_FULLTEXT_QUERY
+ break
+ case FILTER_FULLTEXT_MORELIKE:
+ this._moreLikeId = +rule.value
+ this.textFilterTarget = TEXT_FILTER_TARGET_FULLTEXT_MORELIKE
+ this.documentService.get(this._moreLikeId).subscribe(result => {
+ this._moreLikeDoc = result
+ })
+ break
case FILTER_CREATED_AFTER:
this.dateCreatedAfter = rule.value
break
@@ -159,6 +185,12 @@ export class FilterEditorComponent implements OnInit, OnDestroy {
if (this._textFilter && this.textFilterTarget == TEXT_FILTER_TARGET_ASN) {
filterRules.push({rule_type: FILTER_ASN, value: this._textFilter})
}
+ if (this._textFilter && this.textFilterTarget == TEXT_FILTER_TARGET_FULLTEXT_QUERY) {
+ filterRules.push({rule_type: FILTER_FULLTEXT_QUERY, value: this._textFilter})
+ }
+ if (this._moreLikeId && this.textFilterTarget == TEXT_FILTER_TARGET_FULLTEXT_MORELIKE) {
+ filterRules.push({rule_type: FILTER_FULLTEXT_MORELIKE, value: this._moreLikeId?.toString()})
+ }
if (this.tagSelectionModel.isNoneSelected()) {
filterRules.push({rule_type: FILTER_HAS_ANY_TAG, value: "false"})
} else {
@@ -232,6 +264,7 @@ export class FilterEditorComponent implements OnInit, OnDestroy {
}
resetSelected() {
+ this.textFilterTarget = TEXT_FILTER_TARGET_TITLE_CONTENT
this.reset.next()
}
diff --git a/src-ui/src/app/components/search/result-highlight/result-highlight.component.html b/src-ui/src/app/components/search/result-highlight/result-highlight.component.html
deleted file mode 100644
index 5dc5baa94..000000000
--- a/src-ui/src/app/components/search/result-highlight/result-highlight.component.html
+++ /dev/null
@@ -1,3 +0,0 @@
-...
- {{token.text}} ...
-
\ No newline at end of file
diff --git a/src-ui/src/app/components/search/result-highlight/result-highlight.component.scss b/src-ui/src/app/components/search/result-highlight/result-highlight.component.scss
deleted file mode 100644
index e04dd13b2..000000000
--- a/src-ui/src/app/components/search/result-highlight/result-highlight.component.scss
+++ /dev/null
@@ -1,4 +0,0 @@
-.match {
- color: black;
- background-color: rgb(255, 211, 66);
-}
\ No newline at end of file
diff --git a/src-ui/src/app/components/search/result-highlight/result-highlight.component.spec.ts b/src-ui/src/app/components/search/result-highlight/result-highlight.component.spec.ts
deleted file mode 100644
index 8e00a9d0b..000000000
--- a/src-ui/src/app/components/search/result-highlight/result-highlight.component.spec.ts
+++ /dev/null
@@ -1,25 +0,0 @@
-import { ComponentFixture, TestBed } from '@angular/core/testing';
-
-import { ResultHighlightComponent } from './result-highlight.component';
-
-describe('ResultHighlightComponent', () => {
- let component: ResultHighlightComponent;
- let fixture: ComponentFixture
;
-
- beforeEach(async () => {
- await TestBed.configureTestingModule({
- declarations: [ ResultHighlightComponent ]
- })
- .compileComponents();
- });
-
- beforeEach(() => {
- fixture = TestBed.createComponent(ResultHighlightComponent);
- component = fixture.componentInstance;
- fixture.detectChanges();
- });
-
- it('should create', () => {
- expect(component).toBeTruthy();
- });
-});
diff --git a/src-ui/src/app/components/search/result-highlight/result-highlight.component.ts b/src-ui/src/app/components/search/result-highlight/result-highlight.component.ts
deleted file mode 100644
index d9a1a50b1..000000000
--- a/src-ui/src/app/components/search/result-highlight/result-highlight.component.ts
+++ /dev/null
@@ -1,19 +0,0 @@
-import { Component, Input, OnInit } from '@angular/core';
-import { SearchHitHighlight } from 'src/app/data/search-result';
-
-@Component({
- selector: 'app-result-highlight',
- templateUrl: './result-highlight.component.html',
- styleUrls: ['./result-highlight.component.scss']
-})
-export class ResultHighlightComponent implements OnInit {
-
- constructor() { }
-
- @Input()
- highlights: SearchHitHighlight[][]
-
- ngOnInit(): void {
- }
-
-}
diff --git a/src-ui/src/app/components/search/search.component.html b/src-ui/src/app/components/search/search.component.html
deleted file mode 100644
index f794a0feb..000000000
--- a/src-ui/src/app/components/search/search.component.html
+++ /dev/null
@@ -1,26 +0,0 @@
-
-
-
-Invalid search query: {{errorMessage}}
-
-Showing documents similar to {{more_like_doc?.original_file_name}}
-
-
- Search query: {{query}}
-
- - Did you mean "{{correctedQuery}}"?
-
-
-
-
-
{resultCount, plural, =0 {No results} =1 {One result} other {{{resultCount}} results}}
-
-
-
-
-
-
diff --git a/src-ui/src/app/components/search/search.component.scss b/src-ui/src/app/components/search/search.component.scss
deleted file mode 100644
index 40ca79a61..000000000
--- a/src-ui/src/app/components/search/search.component.scss
+++ /dev/null
@@ -1,15 +0,0 @@
-.result-content {
- color: darkgray;
-}
-
-.doc-img {
- object-fit: cover;
- object-position: top;
- height: 100%;
- position: absolute;
-
-}
-
-.result-content-searching {
- opacity: 0.3;
-}
\ No newline at end of file
diff --git a/src-ui/src/app/components/search/search.component.spec.ts b/src-ui/src/app/components/search/search.component.spec.ts
deleted file mode 100644
index 918ce7071..000000000
--- a/src-ui/src/app/components/search/search.component.spec.ts
+++ /dev/null
@@ -1,25 +0,0 @@
-import { ComponentFixture, TestBed } from '@angular/core/testing';
-
-import { SearchComponent } from './search.component';
-
-describe('SearchComponent', () => {
- let component: SearchComponent;
- let fixture: ComponentFixture;
-
- beforeEach(async () => {
- await TestBed.configureTestingModule({
- declarations: [ SearchComponent ]
- })
- .compileComponents();
- });
-
- beforeEach(() => {
- fixture = TestBed.createComponent(SearchComponent);
- component = fixture.componentInstance;
- fixture.detectChanges();
- });
-
- it('should create', () => {
- expect(component).toBeTruthy();
- });
-});
diff --git a/src-ui/src/app/components/search/search.component.ts b/src-ui/src/app/components/search/search.component.ts
deleted file mode 100644
index 4570ac3fa..000000000
--- a/src-ui/src/app/components/search/search.component.ts
+++ /dev/null
@@ -1,95 +0,0 @@
-import { Component, OnInit } from '@angular/core';
-import { ActivatedRoute, Router } from '@angular/router';
-import { PaperlessDocument } from 'src/app/data/paperless-document';
-import { PaperlessDocumentType } from 'src/app/data/paperless-document-type';
-import { SearchHit } from 'src/app/data/search-result';
-import { DocumentService } from 'src/app/services/rest/document.service';
-import { SearchService } from 'src/app/services/rest/search.service';
-
-@Component({
- selector: 'app-search',
- templateUrl: './search.component.html',
- styleUrls: ['./search.component.scss']
-})
-export class SearchComponent implements OnInit {
-
- results: SearchHit[] = []
-
- query: string = ""
-
- more_like: number
-
- more_like_doc: PaperlessDocument
-
- searching = false
-
- currentPage = 1
-
- pageCount = 1
-
- resultCount
-
- correctedQuery: string = null
-
- errorMessage: string
-
- get maxScore() {
- return this.results?.length > 0 ? this.results[0].score : 100
- }
-
- constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router, private documentService: DocumentService) { }
-
- ngOnInit(): void {
- this.route.queryParamMap.subscribe(paramMap => {
- window.scrollTo(0, 0)
- this.query = paramMap.get('query')
- this.more_like = paramMap.has('more_like') ? +paramMap.get('more_like') : null
- if (this.more_like) {
- this.documentService.get(this.more_like).subscribe(r => {
- this.more_like_doc = r
- })
- } else {
- this.more_like_doc = null
- }
- this.searching = true
- this.currentPage = 1
- this.loadPage()
- })
-
- }
-
- searchCorrectedQuery() {
- this.router.navigate(["search"], {queryParams: {query: this.correctedQuery, more_like: this.more_like}})
- }
-
- loadPage(append: boolean = false) {
- this.errorMessage = null
- this.correctedQuery = null
-
- this.searchService.search(this.query, this.currentPage, this.more_like).subscribe(result => {
- if (append) {
- this.results.push(...result.results)
- } else {
- this.results = result.results
- }
- this.pageCount = result.page_count
- this.searching = false
- this.resultCount = result.count
- this.correctedQuery = result.corrected_query
- }, error => {
- this.searching = false
- this.resultCount = 1
- this.pageCount = 1
- this.results = []
- this.errorMessage = error.error
- })
- }
-
- onScroll() {
- if (this.currentPage < this.pageCount) {
- this.currentPage += 1
- this.loadPage(true)
- }
- }
-
-}
diff --git a/src-ui/src/app/data/filter-rule-type.ts b/src-ui/src/app/data/filter-rule-type.ts
index 2c9f8a373..c215be84e 100644
--- a/src-ui/src/app/data/filter-rule-type.ts
+++ b/src-ui/src/app/data/filter-rule-type.ts
@@ -22,6 +22,9 @@ export const FILTER_ASN_ISNULL = 18
export const FILTER_TITLE_CONTENT = 19
+export const FILTER_FULLTEXT_QUERY = 20
+export const FILTER_FULLTEXT_MORELIKE = 21
+
export const FILTER_RULE_TYPES: FilterRuleType[] = [
{id: FILTER_TITLE, filtervar: "title__icontains", datatype: "string", multi: false, default: ""},
@@ -51,7 +54,11 @@ export const FILTER_RULE_TYPES: FilterRuleType[] = [
{id: FILTER_MODIFIED_AFTER, filtervar: "modified__date__gt", datatype: "date", multi: false},
{id: FILTER_ASN_ISNULL, filtervar: "archive_serial_number__isnull", datatype: "boolean", multi: false},
- {id: FILTER_TITLE_CONTENT, filtervar: "title_content", datatype: "string", multi: false}
+ {id: FILTER_TITLE_CONTENT, filtervar: "title_content", datatype: "string", multi: false},
+
+ {id: FILTER_FULLTEXT_QUERY, filtervar: "query", datatype: "string", multi: false},
+
+ {id: FILTER_FULLTEXT_MORELIKE, filtervar: "more_like_id", datatype: "number", multi: false},
]
export interface FilterRuleType {
diff --git a/src-ui/src/app/data/paperless-document.ts b/src-ui/src/app/data/paperless-document.ts
index 9d0aeda88..e7412278b 100644
--- a/src-ui/src/app/data/paperless-document.ts
+++ b/src-ui/src/app/data/paperless-document.ts
@@ -4,6 +4,15 @@ import { PaperlessTag } from './paperless-tag'
import { PaperlessDocumentType } from './paperless-document-type'
import { Observable } from 'rxjs'
+export interface SearchHit {
+
+ score?: number
+ rank?: number
+
+ highlights?: string
+
+}
+
export interface PaperlessDocument extends ObjectWithId {
correspondent$?: Observable
@@ -40,4 +49,6 @@ export interface PaperlessDocument extends ObjectWithId {
archive_serial_number?: number
+ __search_hit__?: SearchHit
+
}
diff --git a/src-ui/src/app/data/search-result.ts b/src-ui/src/app/data/search-result.ts
deleted file mode 100644
index a769a8351..000000000
--- a/src-ui/src/app/data/search-result.ts
+++ /dev/null
@@ -1,29 +0,0 @@
-import { PaperlessDocument } from './paperless-document'
-
-export class SearchHitHighlight {
- text?: string
- term?: number
-}
-
-export interface SearchHit {
- id?: number
- title?: string
- score?: number
- rank?: number
-
- highlights?: SearchHitHighlight[][]
- document?: PaperlessDocument
-}
-
-export interface SearchResult {
-
- count?: number
- page?: number
- page_count?: number
-
- corrected_query?: string
-
- results?: SearchHit[]
-
-
-}
diff --git a/src-ui/src/app/services/document-list-view.service.ts b/src-ui/src/app/services/document-list-view.service.ts
index 334706a3c..d844323f9 100644
--- a/src-ui/src/app/services/document-list-view.service.ts
+++ b/src-ui/src/app/services/document-list-view.service.ts
@@ -1,7 +1,9 @@
+import { Route } from '@angular/compiler/src/core';
import { Injectable } from '@angular/core';
-import { Router } from '@angular/router';
+import { ActivatedRoute, Router } from '@angular/router';
import { Observable } from 'rxjs';
import { cloneFilterRules, FilterRule } from '../data/filter-rule';
+import { FILTER_FULLTEXT_MORELIKE, FILTER_FULLTEXT_QUERY } from '../data/filter-rule-type';
import { PaperlessDocument } from '../data/paperless-document';
import { PaperlessSavedView } from '../data/paperless-saved-view';
import { DOCUMENT_LIST_SERVICE } from '../data/storage-keys';
@@ -207,7 +209,11 @@ export class DocumentListViewService {
this.activeListViewState.currentPage = 1
this.reduceSelectionToFilter()
this.saveDocumentListView()
- this.router.navigate(["documents"])
+ if (this.router.url == "/documents") {
+ this.reload()
+ } else {
+ this.router.navigate(["documents"])
+ }
}
getLastPage(): number {
@@ -317,7 +323,7 @@ export class DocumentListViewService {
return this.documents.map(d => d.id).indexOf(documentID)
}
- constructor(private documentService: DocumentService, private settings: SettingsService, private router: Router) {
+ constructor(private documentService: DocumentService, private settings: SettingsService, private router: Router, private route: ActivatedRoute) {
let documentListViewConfigJson = sessionStorage.getItem(DOCUMENT_LIST_SERVICE.CURRENT_VIEW_CONFIG)
if (documentListViewConfigJson) {
try {
diff --git a/src-ui/src/app/services/rest/search.service.ts b/src-ui/src/app/services/rest/search.service.ts
index e750100fa..f10c53485 100644
--- a/src-ui/src/app/services/rest/search.service.ts
+++ b/src-ui/src/app/services/rest/search.service.ts
@@ -2,8 +2,6 @@ import { HttpClient, HttpParams } from '@angular/common/http';
import { Injectable } from '@angular/core';
import { Observable } from 'rxjs';
import { map } from 'rxjs/operators';
-import { PaperlessDocument } from 'src/app/data/paperless-document';
-import { SearchResult } from 'src/app/data/search-result';
import { environment } from 'src/environments/environment';
import { DocumentService } from './document.service';
@@ -13,30 +11,7 @@ import { DocumentService } from './document.service';
})
export class SearchService {
- constructor(private http: HttpClient, private documentService: DocumentService) { }
-
- search(query: string, page?: number, more_like?: number): Observable {
- let httpParams = new HttpParams()
- if (query) {
- httpParams = httpParams.set('query', query)
- }
- if (page) {
- httpParams = httpParams.set('page', page.toString())
- }
- if (more_like) {
- httpParams = httpParams.set('more_like', more_like.toString())
- }
- return this.http.get(`${environment.apiBaseUrl}search/`, {params: httpParams}).pipe(
- map(result => {
- result.results.forEach(hit => {
- if (hit.document) {
- this.documentService.addObservablesToDocument(hit.document)
- }
- })
- return result
- })
- )
- }
+ constructor(private http: HttpClient) { }
autocomplete(term: string): Observable {
return this.http.get(`${environment.apiBaseUrl}search/autocomplete/`, {params: new HttpParams().set('term', term)})
diff --git a/src/documents/index.py b/src/documents/index.py
index 2c851c9ea..a75534514 100644
--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -5,12 +5,12 @@ from contextlib import contextmanager
import math
from django.conf import settings
from whoosh import highlight, classify, query
-from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME
-from whoosh.highlight import Formatter, get_text
+from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME, BOOLEAN
+from whoosh.highlight import Formatter, get_text, HtmlFormatter
from whoosh.index import create_in, exists_in, open_dir
from whoosh.qparser import MultifieldParser
from whoosh.qparser.dateparse import DateParserPlugin
-from whoosh.searching import ResultsPage
+from whoosh.searching import ResultsPage, Searcher
from whoosh.writing import AsyncWriter
from documents.models import Document
@@ -18,63 +18,53 @@ from documents.models import Document
logger = logging.getLogger("paperless.index")
-class JsonFormatter(Formatter):
- def __init__(self):
- self.seen = {}
-
- def format_token(self, text, token, replace=False):
- ttext = self._text(get_text(text, token, replace))
- return {'text': ttext, 'highlight': 'true'}
-
- def format_fragment(self, fragment, replace=False):
- output = []
- index = fragment.startchar
- text = fragment.text
- amend_token = None
- for t in fragment.matches:
- if t.startchar is None:
- continue
- if t.startchar < index:
- continue
- if t.startchar > index:
- text_inbetween = text[index:t.startchar]
- if amend_token and t.startchar - index < 10:
- amend_token['text'] += text_inbetween
- else:
- output.append({'text': text_inbetween,
- 'highlight': False})
- amend_token = None
- token = self.format_token(text, t, replace)
- if amend_token:
- amend_token['text'] += token['text']
- else:
- output.append(token)
- amend_token = token
- index = t.endchar
- if index < fragment.endchar:
- output.append({'text': text[index:fragment.endchar],
- 'highlight': False})
- return output
-
- def format(self, fragments, replace=False):
- output = []
- for fragment in fragments:
- output.append(self.format_fragment(fragment, replace=replace))
- return output
-
-
def get_schema():
return Schema(
- id=NUMERIC(stored=True, unique=True, numtype=int),
- title=TEXT(stored=True),
+ id=NUMERIC(
+ stored=True,
+ unique=True
+ ),
+ title=TEXT(
+ sortable=True
+ ),
content=TEXT(),
- correspondent=TEXT(stored=True),
- correspondent_id=NUMERIC(stored=True, numtype=int),
- tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True),
- type=TEXT(stored=True),
- created=DATETIME(stored=True, sortable=True),
- modified=DATETIME(stored=True, sortable=True),
- added=DATETIME(stored=True, sortable=True),
+ archive_serial_number=NUMERIC(
+ sortable=True
+ ),
+
+ correspondent=TEXT(
+ sortable=True
+ ),
+ correspondent_id=NUMERIC(),
+ has_correspondent=BOOLEAN(),
+
+ tag=KEYWORD(
+ commas=True,
+ scorable=True,
+ lowercase=True
+ ),
+ tag_id=KEYWORD(
+ commas=True,
+ scorable=True
+ ),
+ has_tag=BOOLEAN(),
+
+ type=TEXT(
+ sortable=True
+ ),
+ type_id=NUMERIC(),
+ has_type=BOOLEAN(),
+
+ created=DATETIME(
+ sortable=True
+ ),
+ modified=DATETIME(
+ sortable=True
+ ),
+ added=DATETIME(
+ sortable=True
+ ),
+
)
@@ -106,18 +96,38 @@ def open_index_writer(ix=None, optimize=False):
writer.commit(optimize=optimize)
+@contextmanager
+def open_index_searcher(ix=None):
+ if ix:
+ searcher = ix.searcher()
+ else:
+ searcher = open_index().searcher()
+
+ try:
+ yield searcher
+ finally:
+ searcher.close()
+
+
def update_document(writer, doc):
tags = ",".join([t.name for t in doc.tags.all()])
+ tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
writer.update_document(
id=doc.pk,
title=doc.title,
content=doc.content,
correspondent=doc.correspondent.name if doc.correspondent else None,
correspondent_id=doc.correspondent.id if doc.correspondent else None,
+ has_correspondent=doc.correspondent is not None,
tag=tags if tags else None,
+ tag_id=tags_ids if tags_ids else None,
+ has_tag=len(tags) > 0,
type=doc.document_type.name if doc.document_type else None,
+ type_id=doc.document_type.id if doc.document_type else None,
+ has_type=doc.document_type is not None,
created=doc.created,
added=doc.added,
+ archive_serial_number=doc.archive_serial_number,
modified=doc.modified,
)
@@ -140,78 +150,11 @@ def remove_document_from_index(document):
remove_document(writer, document)
-@contextmanager
-def query_page(ix, page, querystring, more_like_doc_id, more_like_doc_content):
- searcher = ix.searcher()
- try:
- if querystring:
- qp = MultifieldParser(
- ["content", "title", "correspondent", "tag", "type"],
- ix.schema)
- qp.add_plugin(DateParserPlugin())
- str_q = qp.parse(querystring)
- corrected = searcher.correct_query(str_q, querystring)
- else:
- str_q = None
- corrected = None
-
- if more_like_doc_id:
- docnum = searcher.document_number(id=more_like_doc_id)
- kts = searcher.key_terms_from_text(
- 'content', more_like_doc_content, numterms=20,
- model=classify.Bo1Model, normalize=False)
- more_like_q = query.Or(
- [query.Term('content', word, boost=weight)
- for word, weight in kts])
- result_page = searcher.search_page(
- more_like_q, page, filter=str_q, mask={docnum})
- elif str_q:
- result_page = searcher.search_page(str_q, page)
- else:
- raise ValueError(
- "Either querystring or more_like_doc_id is required."
- )
-
- result_page.results.fragmenter = highlight.ContextFragmenter(
- surround=50)
- result_page.results.formatter = JsonFormatter()
-
- if corrected and corrected.query != str_q:
- corrected_query = corrected.string
- else:
- corrected_query = None
-
- yield result_page, corrected_query
- finally:
- searcher.close()
-
-
class DelayedQuery:
@property
def _query(self):
- if 'query' in self.query_params:
- qp = MultifieldParser(
- ["content", "title", "correspondent", "tag", "type"],
- self.ix.schema)
- qp.add_plugin(DateParserPlugin())
- q = qp.parse(self.query_params['query'])
- elif 'more_like_id' in self.query_params:
- more_like_doc_id = int(self.query_params['more_like_id'])
- content = Document.objects.get(id=more_like_doc_id).content
-
- docnum = self.searcher.document_number(id=more_like_doc_id)
- kts = self.searcher.key_terms_from_text(
- 'content', content, numterms=20,
- model=classify.Bo1Model, normalize=False)
- q = query.Or(
- [query.Term('content', word, boost=weight)
- for word, weight in kts])
- else:
- raise ValueError(
- "Either query or more_like_id is required."
- )
- return q
+ raise NotImplementedError()
@property
def _query_filter(self):
@@ -219,32 +162,114 @@ class DelayedQuery:
for k, v in self.query_params.items():
if k == 'correspondent__id':
criterias.append(query.Term('correspondent_id', v))
+ elif k == 'tags__id__all':
+ for tag_id in v.split(","):
+ criterias.append(query.Term('tag_id', tag_id))
+ elif k == 'document_type__id':
+ criterias.append(query.Term('type_id', v))
+ elif k == 'correspondent__isnull':
+ criterias.append(query.Term("has_correspondent", v == "false"))
+ elif k == 'is_tagged':
+ criterias.append(query.Term("has_tag", v == "true"))
+ elif k == 'document_type__isnull':
+ criterias.append(query.Term("has_type", v == "false"))
+ elif k == 'created__date__lt':
+ pass
+ elif k == 'created__date__gt':
+ pass
+ elif k == 'added__date__gt':
+ pass
+ elif k == 'added__date__lt':
+ pass
if len(criterias) > 0:
return query.And(criterias)
else:
return None
- def __init__(self, ix, searcher, query_params, page_size):
- self.ix = ix
+ @property
+ def _query_sortedby(self):
+ if not 'ordering' in self.query_params:
+ return None, False
+
+ o: str = self.query_params['ordering']
+ if o.startswith('-'):
+ return o[1:], True
+ else:
+ return o, False
+
+ def __init__(self, searcher: Searcher, query_params, page_size):
self.searcher = searcher
self.query_params = query_params
self.page_size = page_size
+ self.saved_results = dict()
def __len__(self):
- results = self.searcher.search(self._query, limit=1, filter=self._query_filter)
- return len(results)
- #return 1000
+ page = self[0:1]
+ return len(page)
def __getitem__(self, item):
+ if item.start in self.saved_results:
+ return self.saved_results[item.start]
+
+ q, mask = self._query
+ sortedby, reverse = self._query_sortedby
+
+ print("OY", self.page_size)
page: ResultsPage = self.searcher.search_page(
- self._query,
+ q,
+ mask=mask,
filter=self._query_filter,
pagenum=math.floor(item.start / self.page_size) + 1,
- pagelen=self.page_size
+ pagelen=self.page_size,
+ sortedby=sortedby,
+ reverse=reverse
)
+ page.results.fragmenter = highlight.ContextFragmenter(
+ surround=50)
+ page.results.formatter = HtmlFormatter(tagname="span", between=" ... ")
+
+ self.saved_results[item.start] = page
+
return page
+class DelayedFullTextQuery(DelayedQuery):
+
+ @property
+ def _query(self):
+ q_str = self.query_params['query']
+ qp = MultifieldParser(
+ ["content", "title", "correspondent", "tag", "type"],
+ self.searcher.ixreader.schema)
+ qp.add_plugin(DateParserPlugin())
+ q = qp.parse(q_str)
+
+ corrected = self.searcher.correct_query(q, q_str)
+ if corrected.query != q:
+ corrected_query = corrected.string
+
+ return q, None
+
+
+class DelayedMoreLikeThisQuery(DelayedQuery):
+
+ @property
+ def _query(self):
+ more_like_doc_id = int(self.query_params['more_like_id'])
+ content = Document.objects.get(id=more_like_doc_id).content
+
+ docnum = self.searcher.document_number(id=more_like_doc_id)
+ kts = self.searcher.key_terms_from_text(
+ 'content', content, numterms=20,
+ model=classify.Bo1Model, normalize=False)
+ q = query.Or(
+ [query.Term('content', word, boost=weight)
+ for word, weight in kts])
+ mask = {docnum}
+
+ return q, mask
+
+
def autocomplete(ix, term, limit=10):
with ix.reader() as reader:
terms = []
diff --git a/src/documents/models.py b/src/documents/models.py
index 6ee93e3ad..cdd35a2f7 100755
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -359,7 +359,10 @@ class SavedView(models.Model):
sort_field = models.CharField(
_("sort field"),
- max_length=128)
+ max_length=128,
+ null=True,
+ blank=True
+ )
sort_reverse = models.BooleanField(
_("sort reverse"),
default=False)
@@ -387,6 +390,8 @@ class SavedViewFilterRule(models.Model):
(17, _("does not have tag")),
(18, _("does not have ASN")),
(19, _("title or content contains")),
+ (20, _("fulltext query")),
+ (21, _("more like this"))
]
saved_view = models.ForeignKey(
diff --git a/src/documents/tests/test_index.py b/src/documents/tests/test_index.py
index 2baa9621d..14304ab28 100644
--- a/src/documents/tests/test_index.py
+++ b/src/documents/tests/test_index.py
@@ -1,20 +1,10 @@
from django.test import TestCase
from documents import index
-from documents.index import JsonFormatter
from documents.models import Document
from documents.tests.utils import DirectoriesMixin
-class JsonFormatterTest(TestCase):
-
- def setUp(self) -> None:
- self.formatter = JsonFormatter()
-
- def test_empty_fragments(self):
- self.assertListEqual(self.formatter.format([]), [])
-
-
class TestAutoComplete(DirectoriesMixin, TestCase):
def test_auto_complete(self):
diff --git a/src/documents/views.py b/src/documents/views.py
index a29983738..f61933e16 100755
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -36,7 +36,6 @@ from rest_framework.viewsets import (
from paperless.db import GnuPG
from paperless.views import StandardPagination
-from . import index
from .bulk_download import OriginalAndArchiveStrategy, OriginalsOnlyStrategy, \
ArchiveOnlyStrategy
from .classifier import load_classifier
@@ -332,15 +331,23 @@ class SearchResultSerializer(DocumentSerializer):
def to_representation(self, instance):
doc = Document.objects.get(id=instance['id'])
- # repressentation = super(SearchResultSerializer, self).to_representation(doc)
- # repressentation['__search_hit__'] = {
- # "score": instance.score
- # }
- return super(SearchResultSerializer, self).to_representation(doc)
+ representation = super(SearchResultSerializer, self).to_representation(doc)
+ representation['__search_hit__'] = {
+ "score": instance.score,
+ "highlights": instance.highlights("content",
+ text=doc.content) if doc else None, # NOQA: E501
+ "rank": instance.rank
+ }
+
+ return representation
class UnifiedSearchViewSet(DocumentViewSet):
+ def __init__(self, *args, **kwargs):
+ super(UnifiedSearchViewSet, self).__init__(*args, **kwargs)
+ self.searcher = None
+
def get_serializer_class(self):
if self._is_search_request():
return SearchResultSerializer
@@ -348,25 +355,39 @@ class UnifiedSearchViewSet(DocumentViewSet):
return DocumentSerializer
def _is_search_request(self):
- return "query" in self.request.query_params
+ return "query" in self.request.query_params or "more_like_id" in self.request.query_params
def filter_queryset(self, queryset):
-
if self._is_search_request():
- ix = index.open_index()
- return index.DelayedQuery(ix, self.searcher, self.request.query_params, self.paginator.page_size)
+ from documents import index
+
+ if "query" in self.request.query_params:
+ query_class = index.DelayedFullTextQuery
+ elif "more_like_id" in self.request.query_params:
+ query_class = index.DelayedMoreLikeThisQuery
+ else:
+ raise ValueError()
+
+ return query_class(
+ self.searcher,
+ self.request.query_params,
+ self.paginator.get_page_size(self.request))
else:
return super(UnifiedSearchViewSet, self).filter_queryset(queryset)
def list(self, request, *args, **kwargs):
if self._is_search_request():
- ix = index.open_index()
- with ix.searcher() as s:
- self.searcher = s
- return super(UnifiedSearchViewSet, self).list(request)
+ from documents import index
+ try:
+ with index.open_index_searcher() as s:
+ self.searcher = s
+ return super(UnifiedSearchViewSet, self).list(request)
+ except Exception as e:
+ return HttpResponseBadRequest(str(e))
else:
return super(UnifiedSearchViewSet, self).list(request)
+
class LogViewSet(ViewSet):
permission_classes = (IsAuthenticated,)
@@ -518,74 +539,6 @@ class SelectionDataView(GenericAPIView):
return r
-class SearchView(APIView):
-
- permission_classes = (IsAuthenticated,)
-
- def add_infos_to_hit(self, r):
- try:
- doc = Document.objects.get(id=r['id'])
- except Document.DoesNotExist:
- logger.warning(
- f"Search index returned a non-existing document: "
- f"id: {r['id']}, title: {r['title']}. "
- f"Search index needs reindex."
- )
- doc = None
-
- return {'id': r['id'],
- 'highlights': r.highlights("content", text=doc.content) if doc else None, # NOQA: E501
- 'score': r.score,
- 'rank': r.rank,
- 'document': DocumentSerializer(doc).data if doc else None,
- 'title': r['title']
- }
-
- def get(self, request, format=None):
- from documents import index
-
- if 'query' in request.query_params:
- query = request.query_params['query']
- else:
- query = None
-
- if 'more_like' in request.query_params:
- more_like_id = request.query_params['more_like']
- more_like_content = Document.objects.get(id=more_like_id).content
- else:
- more_like_id = None
- more_like_content = None
-
- if not query and not more_like_id:
- return Response({
- 'count': 0,
- 'page': 0,
- 'page_count': 0,
- 'corrected_query': None,
- 'results': []})
-
- try:
- page = int(request.query_params.get('page', 1))
- except (ValueError, TypeError):
- page = 1
-
- if page < 1:
- page = 1
-
- ix = index.open_index()
-
- try:
- with index.query_page(ix, page, query, more_like_id, more_like_content) as (result_page, corrected_query): # NOQA: E501
- return Response(
- {'count': len(result_page),
- 'page': result_page.pagenum,
- 'page_count': result_page.pagecount,
- 'corrected_query': corrected_query,
- 'results': list(map(self.add_infos_to_hit, result_page))})
- except Exception as e:
- return HttpResponseBadRequest(str(e))
-
-
class SearchAutoCompleteView(APIView):
permission_classes = (IsAuthenticated,)
diff --git a/src/paperless/urls.py b/src/paperless/urls.py
index 176fce257..7521d49de 100755
--- a/src/paperless/urls.py
+++ b/src/paperless/urls.py
@@ -16,7 +16,6 @@ from documents.views import (
LogViewSet,
TagViewSet,
DocumentTypeViewSet,
- SearchView,
IndexView,
SearchAutoCompleteView,
StatisticsView,
@@ -47,10 +46,6 @@ urlpatterns = [
SearchAutoCompleteView.as_view(),
name="autocomplete"),
- re_path(r"^search/",
- SearchView.as_view(),
- name="search"),
-
re_path(r"^statistics/",
StatisticsView.as_view(),
name="statistics"),