From f9263ddb624ffeb37c40ba71ecb6d07ad235644c Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sun, 7 Mar 2021 13:16:23 +0100 Subject: [PATCH 01/18] some initial attempts to merge search and document list --- src/documents/index.py | 64 ++++++++++++++++++++++++++++++++++++++++++ src/documents/views.py | 40 ++++++++++++++++++++++++++ src/paperless/urls.py | 4 +-- 3 files changed, 106 insertions(+), 2 deletions(-) diff --git a/src/documents/index.py b/src/documents/index.py index 89e56e930..2c851c9ea 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -2,6 +2,7 @@ import logging import os from contextlib import contextmanager +import math from django.conf import settings from whoosh import highlight, classify, query from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME @@ -9,8 +10,10 @@ from whoosh.highlight import Formatter, get_text from whoosh.index import create_in, exists_in, open_dir from whoosh.qparser import MultifieldParser from whoosh.qparser.dateparse import DateParserPlugin +from whoosh.searching import ResultsPage from whoosh.writing import AsyncWriter +from documents.models import Document logger = logging.getLogger("paperless.index") @@ -66,6 +69,7 @@ def get_schema(): title=TEXT(stored=True), content=TEXT(), correspondent=TEXT(stored=True), + correspondent_id=NUMERIC(stored=True, numtype=int), tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True), type=TEXT(stored=True), created=DATETIME(stored=True, sortable=True), @@ -109,6 +113,7 @@ def update_document(writer, doc): title=doc.title, content=doc.content, correspondent=doc.correspondent.name if doc.correspondent else None, + correspondent_id=doc.correspondent.id if doc.correspondent else None, tag=tags if tags else None, type=doc.document_type.name if doc.document_type else None, created=doc.created, @@ -181,6 +186,65 @@ def query_page(ix, page, querystring, more_like_doc_id, more_like_doc_content): searcher.close() +class DelayedQuery: + + @property + def _query(self): + if 'query' in self.query_params: + qp = MultifieldParser( + ["content", "title", "correspondent", "tag", "type"], + self.ix.schema) + qp.add_plugin(DateParserPlugin()) + q = qp.parse(self.query_params['query']) + elif 'more_like_id' in self.query_params: + more_like_doc_id = int(self.query_params['more_like_id']) + content = Document.objects.get(id=more_like_doc_id).content + + docnum = self.searcher.document_number(id=more_like_doc_id) + kts = self.searcher.key_terms_from_text( + 'content', content, numterms=20, + model=classify.Bo1Model, normalize=False) + q = query.Or( + [query.Term('content', word, boost=weight) + for word, weight in kts]) + else: + raise ValueError( + "Either query or more_like_id is required." + ) + return q + + @property + def _query_filter(self): + criterias = [] + for k, v in self.query_params.items(): + if k == 'correspondent__id': + criterias.append(query.Term('correspondent_id', v)) + if len(criterias) > 0: + return query.And(criterias) + else: + return None + + def __init__(self, ix, searcher, query_params, page_size): + self.ix = ix + self.searcher = searcher + self.query_params = query_params + self.page_size = page_size + + def __len__(self): + results = self.searcher.search(self._query, limit=1, filter=self._query_filter) + return len(results) + #return 1000 + + def __getitem__(self, item): + page: ResultsPage = self.searcher.search_page( + self._query, + filter=self._query_filter, + pagenum=math.floor(item.start / self.page_size) + 1, + pagelen=self.page_size + ) + return page + + def autocomplete(ix, term, limit=10): with ix.reader() as reader: terms = [] diff --git a/src/documents/views.py b/src/documents/views.py index a3f495d50..209a277b8 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -35,6 +35,7 @@ from rest_framework.viewsets import ( from paperless.db import GnuPG from paperless.views import StandardPagination +from . import index from .bulk_download import OriginalAndArchiveStrategy, OriginalsOnlyStrategy, \ ArchiveOnlyStrategy from .classifier import load_classifier @@ -326,6 +327,45 @@ class DocumentViewSet(RetrieveModelMixin, raise Http404() +class SearchResultSerializer(DocumentSerializer): + + def to_representation(self, instance): + doc = Document.objects.get(id=instance['id']) + # repressentation = super(SearchResultSerializer, self).to_representation(doc) + # repressentation['__search_hit__'] = { + # "score": instance.score + # } + return super(SearchResultSerializer, self).to_representation(doc) + + +class UnifiedSearchViewSet(DocumentViewSet): + + def get_serializer_class(self): + if self._is_search_request(): + return SearchResultSerializer + else: + return DocumentSerializer + + def _is_search_request(self): + return "query" in self.request.query_params + + def filter_queryset(self, queryset): + + if self._is_search_request(): + ix = index.open_index() + return index.DelayedQuery(ix, self.searcher, self.request.query_params, self.paginator.page_size) + else: + return super(UnifiedSearchViewSet, self).filter_queryset(queryset) + + def list(self, request, *args, **kwargs): + if self._is_search_request(): + ix = index.open_index() + with ix.searcher() as s: + self.searcher = s + return super(UnifiedSearchViewSet, self).list(request) + else: + return super(UnifiedSearchViewSet, self).list(request) + class LogViewSet(ViewSet): permission_classes = (IsAuthenticated,) diff --git a/src/paperless/urls.py b/src/paperless/urls.py index 4e0b8f191..176fce257 100755 --- a/src/paperless/urls.py +++ b/src/paperless/urls.py @@ -12,7 +12,7 @@ from django.utils.translation import gettext_lazy as _ from paperless.consumers import StatusConsumer from documents.views import ( CorrespondentViewSet, - DocumentViewSet, + UnifiedSearchViewSet, LogViewSet, TagViewSet, DocumentTypeViewSet, @@ -31,7 +31,7 @@ from paperless.views import FaviconView api_router = DefaultRouter() api_router.register(r"correspondents", CorrespondentViewSet) api_router.register(r"document_types", DocumentTypeViewSet) -api_router.register(r"documents", DocumentViewSet) +api_router.register(r"documents", UnifiedSearchViewSet) api_router.register(r"logs", LogViewSet, basename="logs") api_router.register(r"tags", TagViewSet) api_router.register(r"saved_views", SavedViewViewSet) From b6ff88645bd2e52599d1151e5c4074b0f3f1ffb0 Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Wed, 17 Mar 2021 22:25:22 +0100 Subject: [PATCH 02/18] lots of changes for the new unified search --- src-ui/src/app/app-routing.module.ts | 2 - src-ui/src/app/app.module.ts | 4 - .../app-frame/app-frame.component.ts | 5 +- .../document-detail.component.ts | 3 +- .../document-card-large.component.html | 10 +- .../document-card-large.component.scss | 5 + .../document-card-large.component.ts | 28 +- .../document-list.component.html | 2 +- .../document-list/document-list.component.ts | 7 + .../filter-editor.component.html | 4 +- .../filter-editor/filter-editor.component.ts | 47 ++- .../result-highlight.component.html | 3 - .../result-highlight.component.scss | 4 - .../result-highlight.component.spec.ts | 25 -- .../result-highlight.component.ts | 19 -- .../components/search/search.component.html | 26 -- .../components/search/search.component.scss | 15 - .../search/search.component.spec.ts | 25 -- .../app/components/search/search.component.ts | 95 ------ src-ui/src/app/data/filter-rule-type.ts | 9 +- src-ui/src/app/data/paperless-document.ts | 11 + src-ui/src/app/data/search-result.ts | 29 -- .../services/document-list-view.service.ts | 12 +- .../src/app/services/rest/search.service.ts | 27 +- src/documents/index.py | 289 ++++++++++-------- src/documents/models.py | 7 +- src/documents/tests/test_index.py | 10 - src/documents/views.py | 117 +++---- src/paperless/urls.py | 5 - 29 files changed, 302 insertions(+), 543 deletions(-) delete mode 100644 src-ui/src/app/components/search/result-highlight/result-highlight.component.html delete mode 100644 src-ui/src/app/components/search/result-highlight/result-highlight.component.scss delete mode 100644 src-ui/src/app/components/search/result-highlight/result-highlight.component.spec.ts delete mode 100644 src-ui/src/app/components/search/result-highlight/result-highlight.component.ts delete mode 100644 src-ui/src/app/components/search/search.component.html delete mode 100644 src-ui/src/app/components/search/search.component.scss delete mode 100644 src-ui/src/app/components/search/search.component.spec.ts delete mode 100644 src-ui/src/app/components/search/search.component.ts delete mode 100644 src-ui/src/app/data/search-result.ts diff --git a/src-ui/src/app/app-routing.module.ts b/src-ui/src/app/app-routing.module.ts index 27f0629b4..89fec9eac 100644 --- a/src-ui/src/app/app-routing.module.ts +++ b/src-ui/src/app/app-routing.module.ts @@ -10,7 +10,6 @@ import { LogsComponent } from './components/manage/logs/logs.component'; import { SettingsComponent } from './components/manage/settings/settings.component'; import { TagListComponent } from './components/manage/tag-list/tag-list.component'; import { NotFoundComponent } from './components/not-found/not-found.component'; -import { SearchComponent } from './components/search/search.component'; const routes: Routes = [ {path: '', redirectTo: 'dashboard', pathMatch: 'full'}, @@ -18,7 +17,6 @@ const routes: Routes = [ {path: 'dashboard', component: DashboardComponent }, {path: 'documents', component: DocumentListComponent }, {path: 'view/:id', component: DocumentListComponent }, - {path: 'search', component: SearchComponent }, {path: 'documents/:id', component: DocumentDetailComponent }, {path: 'tags', component: TagListComponent }, diff --git a/src-ui/src/app/app.module.ts b/src-ui/src/app/app.module.ts index c364424ad..cf149b785 100644 --- a/src-ui/src/app/app.module.ts +++ b/src-ui/src/app/app.module.ts @@ -21,8 +21,6 @@ import { CorrespondentEditDialogComponent } from './components/manage/correspond import { TagEditDialogComponent } from './components/manage/tag-list/tag-edit-dialog/tag-edit-dialog.component'; import { DocumentTypeEditDialogComponent } from './components/manage/document-type-list/document-type-edit-dialog/document-type-edit-dialog.component'; import { TagComponent } from './components/common/tag/tag.component'; -import { SearchComponent } from './components/search/search.component'; -import { ResultHighlightComponent } from './components/search/result-highlight/result-highlight.component'; import { PageHeaderComponent } from './components/common/page-header/page-header.component'; import { AppFrameComponent } from './components/app-frame/app-frame.component'; import { ToastsComponent } from './components/common/toasts/toasts.component'; @@ -104,8 +102,6 @@ registerLocaleData(localeEs) TagEditDialogComponent, DocumentTypeEditDialogComponent, TagComponent, - SearchComponent, - ResultHighlightComponent, PageHeaderComponent, AppFrameComponent, ToastsComponent, diff --git a/src-ui/src/app/components/app-frame/app-frame.component.ts b/src-ui/src/app/components/app-frame/app-frame.component.ts index e360e7567..f8e76f0ae 100644 --- a/src-ui/src/app/components/app-frame/app-frame.component.ts +++ b/src-ui/src/app/components/app-frame/app-frame.component.ts @@ -10,6 +10,8 @@ import { SearchService } from 'src/app/services/rest/search.service'; import { environment } from 'src/environments/environment'; import { DocumentDetailComponent } from '../document-detail/document-detail.component'; import { Meta } from '@angular/platform-browser'; +import { DocumentListViewService } from 'src/app/services/document-list-view.service'; +import { FILTER_FULLTEXT_QUERY } from 'src/app/data/filter-rule-type'; @Component({ selector: 'app-app-frame', @@ -24,6 +26,7 @@ export class AppFrameComponent implements OnInit { private openDocumentsService: OpenDocumentsService, private searchService: SearchService, public savedViewService: SavedViewService, + private list: DocumentListViewService, private meta: Meta ) { @@ -74,7 +77,7 @@ export class AppFrameComponent implements OnInit { search() { this.closeMenu() - this.router.navigate(['search'], {queryParams: {query: this.searchField.value}}) + this.list.quickFilter([{rule_type: FILTER_FULLTEXT_QUERY, value: this.searchField.value}]) } closeDocument(d: PaperlessDocument) { diff --git a/src-ui/src/app/components/document-detail/document-detail.component.ts b/src-ui/src/app/components/document-detail/document-detail.component.ts index af98a6f7f..fee707b22 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.ts +++ b/src-ui/src/app/components/document-detail/document-detail.component.ts @@ -20,6 +20,7 @@ import { ToastService } from 'src/app/services/toast.service'; import { TextComponent } from '../common/input/text/text.component'; import { SettingsService, SETTINGS_KEYS } from 'src/app/services/settings.service'; import { PaperlessDocumentSuggestions } from 'src/app/data/paperless-document-suggestions'; +import { FILTER_FULLTEXT_MORELIKE } from 'src/app/data/filter-rule-type'; @Component({ selector: 'app-document-detail', @@ -219,7 +220,7 @@ export class DocumentDetailComponent implements OnInit { } moreLike() { - this.router.navigate(["search"], {queryParams: {more_like:this.document.id}}) + this.documentListViewService.quickFilter([{rule_type: FILTER_FULLTEXT_MORELIKE, value: this.documentId.toString()}]) } hasNext() { diff --git a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html index 119960386..f3037b4fc 100644 --- a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html +++ b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html @@ -25,14 +25,14 @@

- - {{getDetailsAsString()}} + + {{contentTrimmed}}

- +  More like this @@ -62,9 +62,9 @@
-
+
Score: - +
- +
diff --git a/src-ui/src/app/components/document-list/document-list.component.ts b/src-ui/src/app/components/document-list/document-list.component.ts index cf7afb845..c0ad354ba 100644 --- a/src-ui/src/app/components/document-list/document-list.component.ts +++ b/src-ui/src/app/components/document-list/document-list.component.ts @@ -207,6 +207,13 @@ export class DocumentListComponent implements OnInit, OnDestroy { }) } + clickMoreLike(documentID: number) { + this.list.selectNone() + setTimeout(() => { + //this.filterEditor.moreLikeThis(doc) + }) + } + trackByDocumentId(index, item: PaperlessDocument) { return item.id } diff --git a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.html b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.html index 7290354eb..490eed95d 100644 --- a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.html +++ b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.html @@ -1,7 +1,6 @@
-
@@ -9,7 +8,8 @@
- + + {{_moreLikeDoc?.title}}
diff --git a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts index 43387c08f..3b645ec97 100644 --- a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts +++ b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts @@ -8,13 +8,17 @@ import { DocumentTypeService } from 'src/app/services/rest/document-type.service import { TagService } from 'src/app/services/rest/tag.service'; import { CorrespondentService } from 'src/app/services/rest/correspondent.service'; import { FilterRule } from 'src/app/data/filter-rule'; -import { FILTER_ADDED_AFTER, FILTER_ADDED_BEFORE, FILTER_ASN, FILTER_CORRESPONDENT, FILTER_CREATED_AFTER, FILTER_CREATED_BEFORE, FILTER_DOCUMENT_TYPE, FILTER_HAS_ANY_TAG, FILTER_HAS_TAG, FILTER_TITLE, FILTER_TITLE_CONTENT } from 'src/app/data/filter-rule-type'; +import { FILTER_ADDED_AFTER, FILTER_ADDED_BEFORE, FILTER_ASN, FILTER_CORRESPONDENT, FILTER_CREATED_AFTER, FILTER_CREATED_BEFORE, FILTER_DOCUMENT_TYPE, FILTER_FULLTEXT_MORELIKE, FILTER_FULLTEXT_QUERY, FILTER_HAS_ANY_TAG, FILTER_HAS_TAG, FILTER_TITLE, FILTER_TITLE_CONTENT } from 'src/app/data/filter-rule-type'; import { FilterableDropdownSelectionModel } from '../../common/filterable-dropdown/filterable-dropdown.component'; import { ToggleableItemState } from '../../common/filterable-dropdown/toggleable-dropdown-button/toggleable-dropdown-button.component'; +import { DocumentService } from 'src/app/services/rest/document.service'; +import { PaperlessDocument } from 'src/app/data/paperless-document'; const TEXT_FILTER_TARGET_TITLE = "title" const TEXT_FILTER_TARGET_TITLE_CONTENT = "title-content" const TEXT_FILTER_TARGET_ASN = "asn" +const TEXT_FILTER_TARGET_FULLTEXT_QUERY = "fulltext-query" +const TEXT_FILTER_TARGET_FULLTEXT_MORELIKE = "fulltext-morelike" @Component({ selector: 'app-filter-editor', @@ -64,7 +68,8 @@ export class FilterEditorComponent implements OnInit, OnDestroy { constructor( private documentTypeService: DocumentTypeService, private tagService: TagService, - private correspondentService: CorrespondentService + private correspondentService: CorrespondentService, + private documentService: DocumentService ) { } tags: PaperlessTag[] = [] @@ -72,12 +77,21 @@ export class FilterEditorComponent implements OnInit, OnDestroy { documentTypes: PaperlessDocumentType[] = [] _textFilter = "" + _moreLikeId: number + _moreLikeDoc: PaperlessDocument - textFilterTargets = [ - {id: TEXT_FILTER_TARGET_TITLE, name: $localize`Title`}, - {id: TEXT_FILTER_TARGET_TITLE_CONTENT, name: $localize`Title & content`}, - {id: TEXT_FILTER_TARGET_ASN, name: $localize`ASN`} - ] + get textFilterTargets() { + let targets = [ + {id: TEXT_FILTER_TARGET_TITLE, name: $localize`Title`}, + {id: TEXT_FILTER_TARGET_TITLE_CONTENT, name: $localize`Title & content`}, + {id: TEXT_FILTER_TARGET_ASN, name: $localize`ASN`}, + {id: TEXT_FILTER_TARGET_FULLTEXT_QUERY, name: $localize`Fulltext search`} + ] + if (this.textFilterTarget == TEXT_FILTER_TARGET_FULLTEXT_MORELIKE) { + targets.push({id: TEXT_FILTER_TARGET_FULLTEXT_MORELIKE, name: $localize`More like`}) + } + return targets + } textFilterTarget = TEXT_FILTER_TARGET_TITLE_CONTENT @@ -101,6 +115,7 @@ export class FilterEditorComponent implements OnInit, OnDestroy { this.tagSelectionModel.clear(false) this.correspondentSelectionModel.clear(false) this._textFilter = null + this._moreLikeId = null this.dateAddedBefore = null this.dateAddedAfter = null this.dateCreatedBefore = null @@ -120,6 +135,17 @@ export class FilterEditorComponent implements OnInit, OnDestroy { this._textFilter = rule.value this.textFilterTarget = TEXT_FILTER_TARGET_ASN break + case FILTER_FULLTEXT_QUERY: + this._textFilter = rule.value + this.textFilterTarget = TEXT_FILTER_TARGET_FULLTEXT_QUERY + break + case FILTER_FULLTEXT_MORELIKE: + this._moreLikeId = +rule.value + this.textFilterTarget = TEXT_FILTER_TARGET_FULLTEXT_MORELIKE + this.documentService.get(this._moreLikeId).subscribe(result => { + this._moreLikeDoc = result + }) + break case FILTER_CREATED_AFTER: this.dateCreatedAfter = rule.value break @@ -159,6 +185,12 @@ export class FilterEditorComponent implements OnInit, OnDestroy { if (this._textFilter && this.textFilterTarget == TEXT_FILTER_TARGET_ASN) { filterRules.push({rule_type: FILTER_ASN, value: this._textFilter}) } + if (this._textFilter && this.textFilterTarget == TEXT_FILTER_TARGET_FULLTEXT_QUERY) { + filterRules.push({rule_type: FILTER_FULLTEXT_QUERY, value: this._textFilter}) + } + if (this._moreLikeId && this.textFilterTarget == TEXT_FILTER_TARGET_FULLTEXT_MORELIKE) { + filterRules.push({rule_type: FILTER_FULLTEXT_MORELIKE, value: this._moreLikeId?.toString()}) + } if (this.tagSelectionModel.isNoneSelected()) { filterRules.push({rule_type: FILTER_HAS_ANY_TAG, value: "false"}) } else { @@ -232,6 +264,7 @@ export class FilterEditorComponent implements OnInit, OnDestroy { } resetSelected() { + this.textFilterTarget = TEXT_FILTER_TARGET_TITLE_CONTENT this.reset.next() } diff --git a/src-ui/src/app/components/search/result-highlight/result-highlight.component.html b/src-ui/src/app/components/search/result-highlight/result-highlight.component.html deleted file mode 100644 index 5dc5baa94..000000000 --- a/src-ui/src/app/components/search/result-highlight/result-highlight.component.html +++ /dev/null @@ -1,3 +0,0 @@ -... - {{token.text}} ... - \ No newline at end of file diff --git a/src-ui/src/app/components/search/result-highlight/result-highlight.component.scss b/src-ui/src/app/components/search/result-highlight/result-highlight.component.scss deleted file mode 100644 index e04dd13b2..000000000 --- a/src-ui/src/app/components/search/result-highlight/result-highlight.component.scss +++ /dev/null @@ -1,4 +0,0 @@ -.match { - color: black; - background-color: rgb(255, 211, 66); -} \ No newline at end of file diff --git a/src-ui/src/app/components/search/result-highlight/result-highlight.component.spec.ts b/src-ui/src/app/components/search/result-highlight/result-highlight.component.spec.ts deleted file mode 100644 index 8e00a9d0b..000000000 --- a/src-ui/src/app/components/search/result-highlight/result-highlight.component.spec.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { ComponentFixture, TestBed } from '@angular/core/testing'; - -import { ResultHighlightComponent } from './result-highlight.component'; - -describe('ResultHighlightComponent', () => { - let component: ResultHighlightComponent; - let fixture: ComponentFixture; - - beforeEach(async () => { - await TestBed.configureTestingModule({ - declarations: [ ResultHighlightComponent ] - }) - .compileComponents(); - }); - - beforeEach(() => { - fixture = TestBed.createComponent(ResultHighlightComponent); - component = fixture.componentInstance; - fixture.detectChanges(); - }); - - it('should create', () => { - expect(component).toBeTruthy(); - }); -}); diff --git a/src-ui/src/app/components/search/result-highlight/result-highlight.component.ts b/src-ui/src/app/components/search/result-highlight/result-highlight.component.ts deleted file mode 100644 index d9a1a50b1..000000000 --- a/src-ui/src/app/components/search/result-highlight/result-highlight.component.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { Component, Input, OnInit } from '@angular/core'; -import { SearchHitHighlight } from 'src/app/data/search-result'; - -@Component({ - selector: 'app-result-highlight', - templateUrl: './result-highlight.component.html', - styleUrls: ['./result-highlight.component.scss'] -}) -export class ResultHighlightComponent implements OnInit { - - constructor() { } - - @Input() - highlights: SearchHitHighlight[][] - - ngOnInit(): void { - } - -} diff --git a/src-ui/src/app/components/search/search.component.html b/src-ui/src/app/components/search/search.component.html deleted file mode 100644 index f794a0feb..000000000 --- a/src-ui/src/app/components/search/search.component.html +++ /dev/null @@ -1,26 +0,0 @@ - - - -
Invalid search query: {{errorMessage}}
- -

Showing documents similar to {{more_like_doc?.original_file_name}}

- -

- Search query: {{query}} - - - Did you mean "{{correctedQuery}}"? - -

- -
-

{resultCount, plural, =0 {No results} =1 {One result} other {{{resultCount}} results}}

- - - - - -
diff --git a/src-ui/src/app/components/search/search.component.scss b/src-ui/src/app/components/search/search.component.scss deleted file mode 100644 index 40ca79a61..000000000 --- a/src-ui/src/app/components/search/search.component.scss +++ /dev/null @@ -1,15 +0,0 @@ -.result-content { - color: darkgray; -} - -.doc-img { - object-fit: cover; - object-position: top; - height: 100%; - position: absolute; - -} - -.result-content-searching { - opacity: 0.3; -} \ No newline at end of file diff --git a/src-ui/src/app/components/search/search.component.spec.ts b/src-ui/src/app/components/search/search.component.spec.ts deleted file mode 100644 index 918ce7071..000000000 --- a/src-ui/src/app/components/search/search.component.spec.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { ComponentFixture, TestBed } from '@angular/core/testing'; - -import { SearchComponent } from './search.component'; - -describe('SearchComponent', () => { - let component: SearchComponent; - let fixture: ComponentFixture; - - beforeEach(async () => { - await TestBed.configureTestingModule({ - declarations: [ SearchComponent ] - }) - .compileComponents(); - }); - - beforeEach(() => { - fixture = TestBed.createComponent(SearchComponent); - component = fixture.componentInstance; - fixture.detectChanges(); - }); - - it('should create', () => { - expect(component).toBeTruthy(); - }); -}); diff --git a/src-ui/src/app/components/search/search.component.ts b/src-ui/src/app/components/search/search.component.ts deleted file mode 100644 index 4570ac3fa..000000000 --- a/src-ui/src/app/components/search/search.component.ts +++ /dev/null @@ -1,95 +0,0 @@ -import { Component, OnInit } from '@angular/core'; -import { ActivatedRoute, Router } from '@angular/router'; -import { PaperlessDocument } from 'src/app/data/paperless-document'; -import { PaperlessDocumentType } from 'src/app/data/paperless-document-type'; -import { SearchHit } from 'src/app/data/search-result'; -import { DocumentService } from 'src/app/services/rest/document.service'; -import { SearchService } from 'src/app/services/rest/search.service'; - -@Component({ - selector: 'app-search', - templateUrl: './search.component.html', - styleUrls: ['./search.component.scss'] -}) -export class SearchComponent implements OnInit { - - results: SearchHit[] = [] - - query: string = "" - - more_like: number - - more_like_doc: PaperlessDocument - - searching = false - - currentPage = 1 - - pageCount = 1 - - resultCount - - correctedQuery: string = null - - errorMessage: string - - get maxScore() { - return this.results?.length > 0 ? this.results[0].score : 100 - } - - constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router, private documentService: DocumentService) { } - - ngOnInit(): void { - this.route.queryParamMap.subscribe(paramMap => { - window.scrollTo(0, 0) - this.query = paramMap.get('query') - this.more_like = paramMap.has('more_like') ? +paramMap.get('more_like') : null - if (this.more_like) { - this.documentService.get(this.more_like).subscribe(r => { - this.more_like_doc = r - }) - } else { - this.more_like_doc = null - } - this.searching = true - this.currentPage = 1 - this.loadPage() - }) - - } - - searchCorrectedQuery() { - this.router.navigate(["search"], {queryParams: {query: this.correctedQuery, more_like: this.more_like}}) - } - - loadPage(append: boolean = false) { - this.errorMessage = null - this.correctedQuery = null - - this.searchService.search(this.query, this.currentPage, this.more_like).subscribe(result => { - if (append) { - this.results.push(...result.results) - } else { - this.results = result.results - } - this.pageCount = result.page_count - this.searching = false - this.resultCount = result.count - this.correctedQuery = result.corrected_query - }, error => { - this.searching = false - this.resultCount = 1 - this.pageCount = 1 - this.results = [] - this.errorMessage = error.error - }) - } - - onScroll() { - if (this.currentPage < this.pageCount) { - this.currentPage += 1 - this.loadPage(true) - } - } - -} diff --git a/src-ui/src/app/data/filter-rule-type.ts b/src-ui/src/app/data/filter-rule-type.ts index 2c9f8a373..c215be84e 100644 --- a/src-ui/src/app/data/filter-rule-type.ts +++ b/src-ui/src/app/data/filter-rule-type.ts @@ -22,6 +22,9 @@ export const FILTER_ASN_ISNULL = 18 export const FILTER_TITLE_CONTENT = 19 +export const FILTER_FULLTEXT_QUERY = 20 +export const FILTER_FULLTEXT_MORELIKE = 21 + export const FILTER_RULE_TYPES: FilterRuleType[] = [ {id: FILTER_TITLE, filtervar: "title__icontains", datatype: "string", multi: false, default: ""}, @@ -51,7 +54,11 @@ export const FILTER_RULE_TYPES: FilterRuleType[] = [ {id: FILTER_MODIFIED_AFTER, filtervar: "modified__date__gt", datatype: "date", multi: false}, {id: FILTER_ASN_ISNULL, filtervar: "archive_serial_number__isnull", datatype: "boolean", multi: false}, - {id: FILTER_TITLE_CONTENT, filtervar: "title_content", datatype: "string", multi: false} + {id: FILTER_TITLE_CONTENT, filtervar: "title_content", datatype: "string", multi: false}, + + {id: FILTER_FULLTEXT_QUERY, filtervar: "query", datatype: "string", multi: false}, + + {id: FILTER_FULLTEXT_MORELIKE, filtervar: "more_like_id", datatype: "number", multi: false}, ] export interface FilterRuleType { diff --git a/src-ui/src/app/data/paperless-document.ts b/src-ui/src/app/data/paperless-document.ts index 9d0aeda88..e7412278b 100644 --- a/src-ui/src/app/data/paperless-document.ts +++ b/src-ui/src/app/data/paperless-document.ts @@ -4,6 +4,15 @@ import { PaperlessTag } from './paperless-tag' import { PaperlessDocumentType } from './paperless-document-type' import { Observable } from 'rxjs' +export interface SearchHit { + + score?: number + rank?: number + + highlights?: string + +} + export interface PaperlessDocument extends ObjectWithId { correspondent$?: Observable @@ -40,4 +49,6 @@ export interface PaperlessDocument extends ObjectWithId { archive_serial_number?: number + __search_hit__?: SearchHit + } diff --git a/src-ui/src/app/data/search-result.ts b/src-ui/src/app/data/search-result.ts deleted file mode 100644 index a769a8351..000000000 --- a/src-ui/src/app/data/search-result.ts +++ /dev/null @@ -1,29 +0,0 @@ -import { PaperlessDocument } from './paperless-document' - -export class SearchHitHighlight { - text?: string - term?: number -} - -export interface SearchHit { - id?: number - title?: string - score?: number - rank?: number - - highlights?: SearchHitHighlight[][] - document?: PaperlessDocument -} - -export interface SearchResult { - - count?: number - page?: number - page_count?: number - - corrected_query?: string - - results?: SearchHit[] - - -} diff --git a/src-ui/src/app/services/document-list-view.service.ts b/src-ui/src/app/services/document-list-view.service.ts index 334706a3c..d844323f9 100644 --- a/src-ui/src/app/services/document-list-view.service.ts +++ b/src-ui/src/app/services/document-list-view.service.ts @@ -1,7 +1,9 @@ +import { Route } from '@angular/compiler/src/core'; import { Injectable } from '@angular/core'; -import { Router } from '@angular/router'; +import { ActivatedRoute, Router } from '@angular/router'; import { Observable } from 'rxjs'; import { cloneFilterRules, FilterRule } from '../data/filter-rule'; +import { FILTER_FULLTEXT_MORELIKE, FILTER_FULLTEXT_QUERY } from '../data/filter-rule-type'; import { PaperlessDocument } from '../data/paperless-document'; import { PaperlessSavedView } from '../data/paperless-saved-view'; import { DOCUMENT_LIST_SERVICE } from '../data/storage-keys'; @@ -207,7 +209,11 @@ export class DocumentListViewService { this.activeListViewState.currentPage = 1 this.reduceSelectionToFilter() this.saveDocumentListView() - this.router.navigate(["documents"]) + if (this.router.url == "/documents") { + this.reload() + } else { + this.router.navigate(["documents"]) + } } getLastPage(): number { @@ -317,7 +323,7 @@ export class DocumentListViewService { return this.documents.map(d => d.id).indexOf(documentID) } - constructor(private documentService: DocumentService, private settings: SettingsService, private router: Router) { + constructor(private documentService: DocumentService, private settings: SettingsService, private router: Router, private route: ActivatedRoute) { let documentListViewConfigJson = sessionStorage.getItem(DOCUMENT_LIST_SERVICE.CURRENT_VIEW_CONFIG) if (documentListViewConfigJson) { try { diff --git a/src-ui/src/app/services/rest/search.service.ts b/src-ui/src/app/services/rest/search.service.ts index e750100fa..f10c53485 100644 --- a/src-ui/src/app/services/rest/search.service.ts +++ b/src-ui/src/app/services/rest/search.service.ts @@ -2,8 +2,6 @@ import { HttpClient, HttpParams } from '@angular/common/http'; import { Injectable } from '@angular/core'; import { Observable } from 'rxjs'; import { map } from 'rxjs/operators'; -import { PaperlessDocument } from 'src/app/data/paperless-document'; -import { SearchResult } from 'src/app/data/search-result'; import { environment } from 'src/environments/environment'; import { DocumentService } from './document.service'; @@ -13,30 +11,7 @@ import { DocumentService } from './document.service'; }) export class SearchService { - constructor(private http: HttpClient, private documentService: DocumentService) { } - - search(query: string, page?: number, more_like?: number): Observable { - let httpParams = new HttpParams() - if (query) { - httpParams = httpParams.set('query', query) - } - if (page) { - httpParams = httpParams.set('page', page.toString()) - } - if (more_like) { - httpParams = httpParams.set('more_like', more_like.toString()) - } - return this.http.get(`${environment.apiBaseUrl}search/`, {params: httpParams}).pipe( - map(result => { - result.results.forEach(hit => { - if (hit.document) { - this.documentService.addObservablesToDocument(hit.document) - } - }) - return result - }) - ) - } + constructor(private http: HttpClient) { } autocomplete(term: string): Observable { return this.http.get(`${environment.apiBaseUrl}search/autocomplete/`, {params: new HttpParams().set('term', term)}) diff --git a/src/documents/index.py b/src/documents/index.py index 2c851c9ea..a75534514 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -5,12 +5,12 @@ from contextlib import contextmanager import math from django.conf import settings from whoosh import highlight, classify, query -from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME -from whoosh.highlight import Formatter, get_text +from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME, BOOLEAN +from whoosh.highlight import Formatter, get_text, HtmlFormatter from whoosh.index import create_in, exists_in, open_dir from whoosh.qparser import MultifieldParser from whoosh.qparser.dateparse import DateParserPlugin -from whoosh.searching import ResultsPage +from whoosh.searching import ResultsPage, Searcher from whoosh.writing import AsyncWriter from documents.models import Document @@ -18,63 +18,53 @@ from documents.models import Document logger = logging.getLogger("paperless.index") -class JsonFormatter(Formatter): - def __init__(self): - self.seen = {} - - def format_token(self, text, token, replace=False): - ttext = self._text(get_text(text, token, replace)) - return {'text': ttext, 'highlight': 'true'} - - def format_fragment(self, fragment, replace=False): - output = [] - index = fragment.startchar - text = fragment.text - amend_token = None - for t in fragment.matches: - if t.startchar is None: - continue - if t.startchar < index: - continue - if t.startchar > index: - text_inbetween = text[index:t.startchar] - if amend_token and t.startchar - index < 10: - amend_token['text'] += text_inbetween - else: - output.append({'text': text_inbetween, - 'highlight': False}) - amend_token = None - token = self.format_token(text, t, replace) - if amend_token: - amend_token['text'] += token['text'] - else: - output.append(token) - amend_token = token - index = t.endchar - if index < fragment.endchar: - output.append({'text': text[index:fragment.endchar], - 'highlight': False}) - return output - - def format(self, fragments, replace=False): - output = [] - for fragment in fragments: - output.append(self.format_fragment(fragment, replace=replace)) - return output - - def get_schema(): return Schema( - id=NUMERIC(stored=True, unique=True, numtype=int), - title=TEXT(stored=True), + id=NUMERIC( + stored=True, + unique=True + ), + title=TEXT( + sortable=True + ), content=TEXT(), - correspondent=TEXT(stored=True), - correspondent_id=NUMERIC(stored=True, numtype=int), - tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True), - type=TEXT(stored=True), - created=DATETIME(stored=True, sortable=True), - modified=DATETIME(stored=True, sortable=True), - added=DATETIME(stored=True, sortable=True), + archive_serial_number=NUMERIC( + sortable=True + ), + + correspondent=TEXT( + sortable=True + ), + correspondent_id=NUMERIC(), + has_correspondent=BOOLEAN(), + + tag=KEYWORD( + commas=True, + scorable=True, + lowercase=True + ), + tag_id=KEYWORD( + commas=True, + scorable=True + ), + has_tag=BOOLEAN(), + + type=TEXT( + sortable=True + ), + type_id=NUMERIC(), + has_type=BOOLEAN(), + + created=DATETIME( + sortable=True + ), + modified=DATETIME( + sortable=True + ), + added=DATETIME( + sortable=True + ), + ) @@ -106,18 +96,38 @@ def open_index_writer(ix=None, optimize=False): writer.commit(optimize=optimize) +@contextmanager +def open_index_searcher(ix=None): + if ix: + searcher = ix.searcher() + else: + searcher = open_index().searcher() + + try: + yield searcher + finally: + searcher.close() + + def update_document(writer, doc): tags = ",".join([t.name for t in doc.tags.all()]) + tags_ids = ",".join([str(t.id) for t in doc.tags.all()]) writer.update_document( id=doc.pk, title=doc.title, content=doc.content, correspondent=doc.correspondent.name if doc.correspondent else None, correspondent_id=doc.correspondent.id if doc.correspondent else None, + has_correspondent=doc.correspondent is not None, tag=tags if tags else None, + tag_id=tags_ids if tags_ids else None, + has_tag=len(tags) > 0, type=doc.document_type.name if doc.document_type else None, + type_id=doc.document_type.id if doc.document_type else None, + has_type=doc.document_type is not None, created=doc.created, added=doc.added, + archive_serial_number=doc.archive_serial_number, modified=doc.modified, ) @@ -140,78 +150,11 @@ def remove_document_from_index(document): remove_document(writer, document) -@contextmanager -def query_page(ix, page, querystring, more_like_doc_id, more_like_doc_content): - searcher = ix.searcher() - try: - if querystring: - qp = MultifieldParser( - ["content", "title", "correspondent", "tag", "type"], - ix.schema) - qp.add_plugin(DateParserPlugin()) - str_q = qp.parse(querystring) - corrected = searcher.correct_query(str_q, querystring) - else: - str_q = None - corrected = None - - if more_like_doc_id: - docnum = searcher.document_number(id=more_like_doc_id) - kts = searcher.key_terms_from_text( - 'content', more_like_doc_content, numterms=20, - model=classify.Bo1Model, normalize=False) - more_like_q = query.Or( - [query.Term('content', word, boost=weight) - for word, weight in kts]) - result_page = searcher.search_page( - more_like_q, page, filter=str_q, mask={docnum}) - elif str_q: - result_page = searcher.search_page(str_q, page) - else: - raise ValueError( - "Either querystring or more_like_doc_id is required." - ) - - result_page.results.fragmenter = highlight.ContextFragmenter( - surround=50) - result_page.results.formatter = JsonFormatter() - - if corrected and corrected.query != str_q: - corrected_query = corrected.string - else: - corrected_query = None - - yield result_page, corrected_query - finally: - searcher.close() - - class DelayedQuery: @property def _query(self): - if 'query' in self.query_params: - qp = MultifieldParser( - ["content", "title", "correspondent", "tag", "type"], - self.ix.schema) - qp.add_plugin(DateParserPlugin()) - q = qp.parse(self.query_params['query']) - elif 'more_like_id' in self.query_params: - more_like_doc_id = int(self.query_params['more_like_id']) - content = Document.objects.get(id=more_like_doc_id).content - - docnum = self.searcher.document_number(id=more_like_doc_id) - kts = self.searcher.key_terms_from_text( - 'content', content, numterms=20, - model=classify.Bo1Model, normalize=False) - q = query.Or( - [query.Term('content', word, boost=weight) - for word, weight in kts]) - else: - raise ValueError( - "Either query or more_like_id is required." - ) - return q + raise NotImplementedError() @property def _query_filter(self): @@ -219,32 +162,114 @@ class DelayedQuery: for k, v in self.query_params.items(): if k == 'correspondent__id': criterias.append(query.Term('correspondent_id', v)) + elif k == 'tags__id__all': + for tag_id in v.split(","): + criterias.append(query.Term('tag_id', tag_id)) + elif k == 'document_type__id': + criterias.append(query.Term('type_id', v)) + elif k == 'correspondent__isnull': + criterias.append(query.Term("has_correspondent", v == "false")) + elif k == 'is_tagged': + criterias.append(query.Term("has_tag", v == "true")) + elif k == 'document_type__isnull': + criterias.append(query.Term("has_type", v == "false")) + elif k == 'created__date__lt': + pass + elif k == 'created__date__gt': + pass + elif k == 'added__date__gt': + pass + elif k == 'added__date__lt': + pass if len(criterias) > 0: return query.And(criterias) else: return None - def __init__(self, ix, searcher, query_params, page_size): - self.ix = ix + @property + def _query_sortedby(self): + if not 'ordering' in self.query_params: + return None, False + + o: str = self.query_params['ordering'] + if o.startswith('-'): + return o[1:], True + else: + return o, False + + def __init__(self, searcher: Searcher, query_params, page_size): self.searcher = searcher self.query_params = query_params self.page_size = page_size + self.saved_results = dict() def __len__(self): - results = self.searcher.search(self._query, limit=1, filter=self._query_filter) - return len(results) - #return 1000 + page = self[0:1] + return len(page) def __getitem__(self, item): + if item.start in self.saved_results: + return self.saved_results[item.start] + + q, mask = self._query + sortedby, reverse = self._query_sortedby + + print("OY", self.page_size) page: ResultsPage = self.searcher.search_page( - self._query, + q, + mask=mask, filter=self._query_filter, pagenum=math.floor(item.start / self.page_size) + 1, - pagelen=self.page_size + pagelen=self.page_size, + sortedby=sortedby, + reverse=reverse ) + page.results.fragmenter = highlight.ContextFragmenter( + surround=50) + page.results.formatter = HtmlFormatter(tagname="span", between=" ... ") + + self.saved_results[item.start] = page + return page +class DelayedFullTextQuery(DelayedQuery): + + @property + def _query(self): + q_str = self.query_params['query'] + qp = MultifieldParser( + ["content", "title", "correspondent", "tag", "type"], + self.searcher.ixreader.schema) + qp.add_plugin(DateParserPlugin()) + q = qp.parse(q_str) + + corrected = self.searcher.correct_query(q, q_str) + if corrected.query != q: + corrected_query = corrected.string + + return q, None + + +class DelayedMoreLikeThisQuery(DelayedQuery): + + @property + def _query(self): + more_like_doc_id = int(self.query_params['more_like_id']) + content = Document.objects.get(id=more_like_doc_id).content + + docnum = self.searcher.document_number(id=more_like_doc_id) + kts = self.searcher.key_terms_from_text( + 'content', content, numterms=20, + model=classify.Bo1Model, normalize=False) + q = query.Or( + [query.Term('content', word, boost=weight) + for word, weight in kts]) + mask = {docnum} + + return q, mask + + def autocomplete(ix, term, limit=10): with ix.reader() as reader: terms = [] diff --git a/src/documents/models.py b/src/documents/models.py index 6ee93e3ad..cdd35a2f7 100755 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -359,7 +359,10 @@ class SavedView(models.Model): sort_field = models.CharField( _("sort field"), - max_length=128) + max_length=128, + null=True, + blank=True + ) sort_reverse = models.BooleanField( _("sort reverse"), default=False) @@ -387,6 +390,8 @@ class SavedViewFilterRule(models.Model): (17, _("does not have tag")), (18, _("does not have ASN")), (19, _("title or content contains")), + (20, _("fulltext query")), + (21, _("more like this")) ] saved_view = models.ForeignKey( diff --git a/src/documents/tests/test_index.py b/src/documents/tests/test_index.py index 2baa9621d..14304ab28 100644 --- a/src/documents/tests/test_index.py +++ b/src/documents/tests/test_index.py @@ -1,20 +1,10 @@ from django.test import TestCase from documents import index -from documents.index import JsonFormatter from documents.models import Document from documents.tests.utils import DirectoriesMixin -class JsonFormatterTest(TestCase): - - def setUp(self) -> None: - self.formatter = JsonFormatter() - - def test_empty_fragments(self): - self.assertListEqual(self.formatter.format([]), []) - - class TestAutoComplete(DirectoriesMixin, TestCase): def test_auto_complete(self): diff --git a/src/documents/views.py b/src/documents/views.py index a29983738..f61933e16 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -36,7 +36,6 @@ from rest_framework.viewsets import ( from paperless.db import GnuPG from paperless.views import StandardPagination -from . import index from .bulk_download import OriginalAndArchiveStrategy, OriginalsOnlyStrategy, \ ArchiveOnlyStrategy from .classifier import load_classifier @@ -332,15 +331,23 @@ class SearchResultSerializer(DocumentSerializer): def to_representation(self, instance): doc = Document.objects.get(id=instance['id']) - # repressentation = super(SearchResultSerializer, self).to_representation(doc) - # repressentation['__search_hit__'] = { - # "score": instance.score - # } - return super(SearchResultSerializer, self).to_representation(doc) + representation = super(SearchResultSerializer, self).to_representation(doc) + representation['__search_hit__'] = { + "score": instance.score, + "highlights": instance.highlights("content", + text=doc.content) if doc else None, # NOQA: E501 + "rank": instance.rank + } + + return representation class UnifiedSearchViewSet(DocumentViewSet): + def __init__(self, *args, **kwargs): + super(UnifiedSearchViewSet, self).__init__(*args, **kwargs) + self.searcher = None + def get_serializer_class(self): if self._is_search_request(): return SearchResultSerializer @@ -348,25 +355,39 @@ class UnifiedSearchViewSet(DocumentViewSet): return DocumentSerializer def _is_search_request(self): - return "query" in self.request.query_params + return "query" in self.request.query_params or "more_like_id" in self.request.query_params def filter_queryset(self, queryset): - if self._is_search_request(): - ix = index.open_index() - return index.DelayedQuery(ix, self.searcher, self.request.query_params, self.paginator.page_size) + from documents import index + + if "query" in self.request.query_params: + query_class = index.DelayedFullTextQuery + elif "more_like_id" in self.request.query_params: + query_class = index.DelayedMoreLikeThisQuery + else: + raise ValueError() + + return query_class( + self.searcher, + self.request.query_params, + self.paginator.get_page_size(self.request)) else: return super(UnifiedSearchViewSet, self).filter_queryset(queryset) def list(self, request, *args, **kwargs): if self._is_search_request(): - ix = index.open_index() - with ix.searcher() as s: - self.searcher = s - return super(UnifiedSearchViewSet, self).list(request) + from documents import index + try: + with index.open_index_searcher() as s: + self.searcher = s + return super(UnifiedSearchViewSet, self).list(request) + except Exception as e: + return HttpResponseBadRequest(str(e)) else: return super(UnifiedSearchViewSet, self).list(request) + class LogViewSet(ViewSet): permission_classes = (IsAuthenticated,) @@ -518,74 +539,6 @@ class SelectionDataView(GenericAPIView): return r -class SearchView(APIView): - - permission_classes = (IsAuthenticated,) - - def add_infos_to_hit(self, r): - try: - doc = Document.objects.get(id=r['id']) - except Document.DoesNotExist: - logger.warning( - f"Search index returned a non-existing document: " - f"id: {r['id']}, title: {r['title']}. " - f"Search index needs reindex." - ) - doc = None - - return {'id': r['id'], - 'highlights': r.highlights("content", text=doc.content) if doc else None, # NOQA: E501 - 'score': r.score, - 'rank': r.rank, - 'document': DocumentSerializer(doc).data if doc else None, - 'title': r['title'] - } - - def get(self, request, format=None): - from documents import index - - if 'query' in request.query_params: - query = request.query_params['query'] - else: - query = None - - if 'more_like' in request.query_params: - more_like_id = request.query_params['more_like'] - more_like_content = Document.objects.get(id=more_like_id).content - else: - more_like_id = None - more_like_content = None - - if not query and not more_like_id: - return Response({ - 'count': 0, - 'page': 0, - 'page_count': 0, - 'corrected_query': None, - 'results': []}) - - try: - page = int(request.query_params.get('page', 1)) - except (ValueError, TypeError): - page = 1 - - if page < 1: - page = 1 - - ix = index.open_index() - - try: - with index.query_page(ix, page, query, more_like_id, more_like_content) as (result_page, corrected_query): # NOQA: E501 - return Response( - {'count': len(result_page), - 'page': result_page.pagenum, - 'page_count': result_page.pagecount, - 'corrected_query': corrected_query, - 'results': list(map(self.add_infos_to_hit, result_page))}) - except Exception as e: - return HttpResponseBadRequest(str(e)) - - class SearchAutoCompleteView(APIView): permission_classes = (IsAuthenticated,) diff --git a/src/paperless/urls.py b/src/paperless/urls.py index 176fce257..7521d49de 100755 --- a/src/paperless/urls.py +++ b/src/paperless/urls.py @@ -16,7 +16,6 @@ from documents.views import ( LogViewSet, TagViewSet, DocumentTypeViewSet, - SearchView, IndexView, SearchAutoCompleteView, StatisticsView, @@ -47,10 +46,6 @@ urlpatterns = [ SearchAutoCompleteView.as_view(), name="autocomplete"), - re_path(r"^search/", - SearchView.as_view(), - name="search"), - re_path(r"^statistics/", StatisticsView.as_view(), name="statistics"), From 740237a8fa087ab13da71f1969d9f5e38d839c77 Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Wed, 17 Mar 2021 22:33:00 +0100 Subject: [PATCH 03/18] add migration --- .../migrations/1015_auto_20210317_1351.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 src/documents/migrations/1015_auto_20210317_1351.py diff --git a/src/documents/migrations/1015_auto_20210317_1351.py b/src/documents/migrations/1015_auto_20210317_1351.py new file mode 100644 index 000000000..b6dca444c --- /dev/null +++ b/src/documents/migrations/1015_auto_20210317_1351.py @@ -0,0 +1,23 @@ +# Generated by Django 3.1.7 on 2021-03-17 12:51 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '1014_auto_20210228_1614'), + ] + + operations = [ + migrations.AlterField( + model_name='savedview', + name='sort_field', + field=models.CharField(blank=True, max_length=128, null=True, verbose_name='sort field'), + ), + migrations.AlterField( + model_name='savedviewfilterrule', + name='rule_type', + field=models.PositiveIntegerField(choices=[(0, 'title contains'), (1, 'content contains'), (2, 'ASN is'), (3, 'correspondent is'), (4, 'document type is'), (5, 'is in inbox'), (6, 'has tag'), (7, 'has any tag'), (8, 'created before'), (9, 'created after'), (10, 'created year is'), (11, 'created month is'), (12, 'created day is'), (13, 'added before'), (14, 'added after'), (15, 'modified before'), (16, 'modified after'), (17, 'does not have tag'), (18, 'does not have ASN'), (19, 'title or content contains'), (20, 'fulltext query'), (21, 'more like this')], verbose_name='rule type'), + ), + ] From 38a386d5ae711ddd5a0269b9867fd4af2505f876 Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sat, 3 Apr 2021 21:02:13 +0200 Subject: [PATCH 04/18] fix date filtering for full text search --- src/documents/index.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/documents/index.py b/src/documents/index.py index a75534514..ec8057403 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -3,6 +3,7 @@ import os from contextlib import contextmanager import math +from dateutil.parser import isoparse from django.conf import settings from whoosh import highlight, classify, query from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME, BOOLEAN @@ -174,13 +175,17 @@ class DelayedQuery: elif k == 'document_type__isnull': criterias.append(query.Term("has_type", v == "false")) elif k == 'created__date__lt': - pass + criterias.append( + query.DateRange("created", start=None, end=isoparse(v))) elif k == 'created__date__gt': - pass + criterias.append( + query.DateRange("created", start=isoparse(v), end=None)) elif k == 'added__date__gt': - pass + criterias.append( + query.DateRange("added", start=isoparse(v), end=None)) elif k == 'added__date__lt': - pass + criterias.append( + query.DateRange("added", start=None, end=isoparse(v))) if len(criterias) > 0: return query.And(criterias) else: From fb1e9fe66a8758ee5cc6c0de798ad3926abef950 Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sat, 3 Apr 2021 21:02:33 +0200 Subject: [PATCH 05/18] error messages for invalid search queries --- .../document-list.component.html | 171 +++++++++--------- .../services/document-list-view.service.ts | 6 +- 2 files changed, 94 insertions(+), 83 deletions(-) diff --git a/src-ui/src/app/components/document-list/document-list.component.html b/src-ui/src/app/components/document-list/document-list.component.html index 63900b399..f1f21b19c 100644 --- a/src-ui/src/app/components/document-list/document-list.component.html +++ b/src-ui/src/app/components/document-list/document-list.component.html @@ -89,86 +89,95 @@ [rotate]="true" aria-label="Default pagination">
-
- - -
+ + + - - - - - - - - - - - - - - - - - - - - - -
ASNCorrespondentTitleDocument typeCreatedAdded
-
- - -
-
- {{d.archive_serial_number}} - - - {{(d.correspondent$ | async)?.name}} - - - {{d.title | documentTitle}} - - - - {{(d.document_type$ | async)?.name}} - - - {{d.created | customDate}} - - {{d.added | customDate}} -
+ -
- -
+
+ + +
+ + + + + + + + + + + + + + + + + + + + + + +
ASNCorrespondentTitleDocument typeCreatedAdded
+
+ + +
+
+ {{d.archive_serial_number}} + + + {{(d.correspondent$ | async)?.name}} + + + {{d.title | documentTitle}} + + + + {{(d.document_type$ | async)?.name}} + + + {{d.created | customDate}} + + {{d.added | customDate}} +
+ +
+ +
+ + +
diff --git a/src-ui/src/app/services/document-list-view.service.ts b/src-ui/src/app/services/document-list-view.service.ts index 91c2c870a..b542358c7 100644 --- a/src-ui/src/app/services/document-list-view.service.ts +++ b/src-ui/src/app/services/document-list-view.service.ts @@ -1,9 +1,7 @@ -import { Route } from '@angular/compiler/src/core'; import { Injectable } from '@angular/core'; import { ActivatedRoute, Router } from '@angular/router'; import { Observable } from 'rxjs'; import { cloneFilterRules, FilterRule } from '../data/filter-rule'; -import { FILTER_FULLTEXT_MORELIKE, FILTER_FULLTEXT_QUERY } from '../data/filter-rule-type'; import { PaperlessDocument } from '../data/paperless-document'; import { PaperlessSavedView } from '../data/paperless-saved-view'; import { DOCUMENT_LIST_SERVICE } from '../data/storage-keys'; @@ -40,6 +38,7 @@ interface ListViewState { export class DocumentListViewService { isReloading: boolean = false + error: string = null rangeSelectionAnchorIndex: number lastRangeSelectionToIndex: number @@ -103,6 +102,7 @@ export class DocumentListViewService { reload(onFinish?) { this.isReloading = true + this.error = null let activeListViewState = this.activeListViewState this.documentService.listFiltered( @@ -126,6 +126,8 @@ export class DocumentListViewService { // this happens when applying a filter: the current page might not be available anymore due to the reduced result set. activeListViewState.currentPage = 1 this.reload() + } else { + this.error = error.error } }) } From 1ed9c245f5fa3e1a9effd0df73d496d7d7dc318e Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sat, 3 Apr 2021 21:07:36 +0200 Subject: [PATCH 06/18] fixed more like this --- .../app/components/document-list/document-list.component.ts | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src-ui/src/app/components/document-list/document-list.component.ts b/src-ui/src/app/components/document-list/document-list.component.ts index c0ad354ba..aa534e23e 100644 --- a/src-ui/src/app/components/document-list/document-list.component.ts +++ b/src-ui/src/app/components/document-list/document-list.component.ts @@ -2,6 +2,7 @@ import { Component, OnDestroy, OnInit, QueryList, ViewChild, ViewChildren } from import { ActivatedRoute, Router } from '@angular/router'; import { NgbModal } from '@ng-bootstrap/ng-bootstrap'; import { Subscription } from 'rxjs'; +import { FILTER_FULLTEXT_MORELIKE } from 'src/app/data/filter-rule-type'; import { PaperlessDocument } from 'src/app/data/paperless-document'; import { PaperlessSavedView } from 'src/app/data/paperless-saved-view'; import { SortableDirective, SortEvent } from 'src/app/directives/sortable.directive'; @@ -208,10 +209,7 @@ export class DocumentListComponent implements OnInit, OnDestroy { } clickMoreLike(documentID: number) { - this.list.selectNone() - setTimeout(() => { - //this.filterEditor.moreLikeThis(doc) - }) + this.list.quickFilter([{rule_type: FILTER_FULLTEXT_MORELIKE, value: documentID.toString()}]) } trackByDocumentId(index, item: PaperlessDocument) { From b7063b199a4d0483d872fe5c1e6001385f063392 Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sat, 3 Apr 2021 21:49:31 +0200 Subject: [PATCH 07/18] disable sorting for now --- src/documents/index.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/documents/index.py b/src/documents/index.py index ec8057403..e17c82daa 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -193,20 +193,21 @@ class DelayedQuery: @property def _query_sortedby(self): - if not 'ordering' in self.query_params: - return None, False + # if not 'ordering' in self.query_params: + return None, False - o: str = self.query_params['ordering'] - if o.startswith('-'): - return o[1:], True - else: - return o, False + # o: str = self.query_params['ordering'] + # if o.startswith('-'): + # return o[1:], True + # else: + # return o, False def __init__(self, searcher: Searcher, query_params, page_size): self.searcher = searcher self.query_params = query_params self.page_size = page_size self.saved_results = dict() + self.first_score = None def __len__(self): page = self[0:1] @@ -219,7 +220,6 @@ class DelayedQuery: q, mask = self._query sortedby, reverse = self._query_sortedby - print("OY", self.page_size) page: ResultsPage = self.searcher.search_page( q, mask=mask, @@ -233,6 +233,15 @@ class DelayedQuery: surround=50) page.results.formatter = HtmlFormatter(tagname="span", between=" ... ") + if not self.first_score and len(page.results) > 0: + self.first_score = page.results[0].score + + if self.first_score: + page.results.top_n = list(map( + lambda hit: (hit[0] / self.first_score, hit[1]), + page.results.top_n + )) + self.saved_results[item.start] = page return page From be87caf7f65cdf315d11316ac40c647174e8ac11 Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sat, 3 Apr 2021 21:50:05 +0200 Subject: [PATCH 08/18] fix search scores --- .../document-card-large.component.html | 8 ++++---- .../document-card-large.component.ts | 17 ++++++++--------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html index f3037b4fc..3a552eb4e 100644 --- a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html +++ b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html @@ -62,10 +62,6 @@
-
- Score: - -
+
+ Score: + +
diff --git a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.ts b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.ts index 0b7b7d793..d8f29ef5a 100644 --- a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.ts +++ b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.ts @@ -41,21 +41,20 @@ export class DocumentCardLargeComponent implements OnInit { @Output() clickMoreLike= new EventEmitter() - @Input() - searchScore: number - @ViewChild('popover') popover: NgbPopover mouseOnPreview = false popoverHidden = true get searchScoreClass() { - if (this.searchScore > 0.7) { - return "success" - } else if (this.searchScore > 0.3) { - return "warning" - } else { - return "danger" + if (this.document.__search_hit__) { + if (this.document.__search_hit__.score > 0.7) { + return "success" + } else if (this.document.__search_hit__.score > 0.3) { + return "warning" + } else { + return "danger" + } } } From 3dfe5c92628d4c41d89dbd89f753b7485da0eeeb Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sat, 3 Apr 2021 21:50:23 +0200 Subject: [PATCH 09/18] fix page out of range with full text --- src/documents/views.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/documents/views.py b/src/documents/views.py index f61933e16..71b01cdd6 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -17,6 +17,7 @@ from django_filters.rest_framework import DjangoFilterBackend from django_q.tasks import async_task from rest_framework import parsers from rest_framework.decorators import action +from rest_framework.exceptions import NotFound from rest_framework.filters import OrderingFilter, SearchFilter from rest_framework.generics import GenericAPIView from rest_framework.mixins import ( @@ -382,6 +383,8 @@ class UnifiedSearchViewSet(DocumentViewSet): with index.open_index_searcher() as s: self.searcher = s return super(UnifiedSearchViewSet, self).list(request) + except NotFound: + raise except Exception as e: return HttpResponseBadRequest(str(e)) else: From 4e289c7dab2391a6fd1c0ad2f39f09975f25533a Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sat, 3 Apr 2021 21:56:33 +0200 Subject: [PATCH 10/18] rename search --- .../document-list/filter-editor/filter-editor.component.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts index 3b645ec97..16c342308 100644 --- a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts +++ b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts @@ -85,7 +85,7 @@ export class FilterEditorComponent implements OnInit, OnDestroy { {id: TEXT_FILTER_TARGET_TITLE, name: $localize`Title`}, {id: TEXT_FILTER_TARGET_TITLE_CONTENT, name: $localize`Title & content`}, {id: TEXT_FILTER_TARGET_ASN, name: $localize`ASN`}, - {id: TEXT_FILTER_TARGET_FULLTEXT_QUERY, name: $localize`Fulltext search`} + {id: TEXT_FILTER_TARGET_FULLTEXT_QUERY, name: $localize`Advanced search`} ] if (this.textFilterTarget == TEXT_FILTER_TARGET_FULLTEXT_MORELIKE) { targets.push({id: TEXT_FILTER_TARGET_FULLTEXT_MORELIKE, name: $localize`More like`}) From fffe4f694fa4bad6ad3985fcdbc0d236f5eb6ecc Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sat, 3 Apr 2021 22:19:12 +0200 Subject: [PATCH 11/18] reset page when doing full text search --- src-ui/src/app/services/document-list-view.service.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src-ui/src/app/services/document-list-view.service.ts b/src-ui/src/app/services/document-list-view.service.ts index b542358c7..aa82190d7 100644 --- a/src-ui/src/app/services/document-list-view.service.ts +++ b/src-ui/src/app/services/document-list-view.service.ts @@ -2,6 +2,7 @@ import { Injectable } from '@angular/core'; import { ActivatedRoute, Router } from '@angular/router'; import { Observable } from 'rxjs'; import { cloneFilterRules, FilterRule } from '../data/filter-rule'; +import { FILTER_FULLTEXT_MORELIKE, FILTER_FULLTEXT_QUERY } from '../data/filter-rule-type'; import { PaperlessDocument } from '../data/paperless-document'; import { PaperlessSavedView } from '../data/paperless-saved-view'; import { DOCUMENT_LIST_SERVICE } from '../data/storage-keys'; @@ -134,6 +135,9 @@ export class DocumentListViewService { set filterRules(filterRules: FilterRule[]) { this.activeListViewState.filterRules = filterRules + if (filterRules.find(r => (r.rule_type == FILTER_FULLTEXT_QUERY || r.rule_type == FILTER_FULLTEXT_MORELIKE))) { + this.activeListViewState.currentPage = 1 + } this.reload() this.reduceSelectionToFilter() this.saveDocumentListView() From ab7a499e8f5ad1252905d2688180d01db5ba985d Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sun, 4 Apr 2021 00:03:51 +0200 Subject: [PATCH 12/18] refactor filter reset --- .../document-list.component.html | 2 +- .../document-list/document-list.component.ts | 48 ++----------------- .../filter-editor/filter-editor.component.ts | 42 ++++++++++++++-- 3 files changed, 42 insertions(+), 50 deletions(-) diff --git a/src-ui/src/app/components/document-list/document-list.component.html b/src-ui/src/app/components/document-list/document-list.component.html index f1f21b19c..45b6fad37 100644 --- a/src-ui/src/app/components/document-list/document-list.component.html +++ b/src-ui/src/app/components/document-list/document-list.component.html @@ -76,7 +76,7 @@
- +
diff --git a/src-ui/src/app/components/document-list/document-list.component.ts b/src-ui/src/app/components/document-list/document-list.component.ts index aa534e23e..020b38e78 100644 --- a/src-ui/src/app/components/document-list/document-list.component.ts +++ b/src-ui/src/app/components/document-list/document-list.component.ts @@ -2,6 +2,7 @@ import { Component, OnDestroy, OnInit, QueryList, ViewChild, ViewChildren } from import { ActivatedRoute, Router } from '@angular/router'; import { NgbModal } from '@ng-bootstrap/ng-bootstrap'; import { Subscription } from 'rxjs'; +import { FilterRule } from 'src/app/data/filter-rule'; import { FILTER_FULLTEXT_MORELIKE } from 'src/app/data/filter-rule-type'; import { PaperlessDocument } from 'src/app/data/paperless-document'; import { PaperlessSavedView } from 'src/app/data/paperless-saved-view'; @@ -38,7 +39,7 @@ export class DocumentListComponent implements OnInit, OnDestroy { displayMode = 'smallCards' // largeCards, smallCards, details - filterRulesModified: boolean = false + unmodifiedFilterRules: FilterRule[] = [] private consumptionFinishedSubscription: Subscription @@ -82,12 +83,12 @@ export class DocumentListComponent implements OnInit, OnDestroy { } this.list.activateSavedView(view) this.list.reload() - this.rulesChanged() + this.unmodifiedFilterRules = view.filter_rules }) } else { this.list.activateSavedView(null) this.list.reload() - this.rulesChanged() + this.unmodifiedFilterRules = [] } }) } @@ -101,7 +102,6 @@ export class DocumentListComponent implements OnInit, OnDestroy { loadViewConfig(view: PaperlessSavedView) { this.list.loadSavedView(view) this.list.reload() - this.rulesChanged() } saveViewConfig() { @@ -142,46 +142,6 @@ export class DocumentListComponent implements OnInit, OnDestroy { }) } - resetFilters(): void { - this.filterRulesModified = false - if (this.list.activeSavedViewId) { - this.savedViewService.getCached(this.list.activeSavedViewId).subscribe(viewUntouched => { - this.list.filterRules = viewUntouched.filter_rules - this.list.reload() - }) - } else { - this.list.filterRules = [] - this.list.reload() - } - } - - rulesChanged() { - let modified = false - if (this.list.activeSavedViewId == null) { - modified = this.list.filterRules.length > 0 // documents list is modified if it has any filters - } else { - // compare savedView current filters vs original - this.savedViewService.getCached(this.list.activeSavedViewId).subscribe(view => { - let filterRulesInitial = view.filter_rules - - if (this.list.filterRules.length !== filterRulesInitial.length) modified = true - else { - modified = this.list.filterRules.some(rule => { - return (filterRulesInitial.find(fri => fri.rule_type == rule.rule_type && fri.value == rule.value) == undefined) - }) - - if (!modified) { - // only check other direction if we havent already determined is modified - modified = filterRulesInitial.some(rule => { - this.list.filterRules.find(fr => fr.rule_type == rule.rule_type && fr.value == rule.value) == undefined - }) - } - } - }) - } - this.filterRulesModified = modified - } - toggleSelected(document: PaperlessDocument, event: MouseEvent): void { if (!event.shiftKey) this.list.toggleSelected(document) else this.list.selectRangeTo(document) diff --git a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts index 16c342308..c1fd8536f 100644 --- a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts +++ b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts @@ -109,8 +109,23 @@ export class FilterEditorComponent implements OnInit, OnDestroy { dateAddedBefore: string dateAddedAfter: string + _unmodifiedFilterRules: FilterRule[] = [] + _filterRules: FilterRule[] = [] + + @Input() + set unmodifiedFilterRules(value: FilterRule[]) { + this._unmodifiedFilterRules = value + this.checkIfRulesHaveChanged() + } + + get unmodifiedFilterRules(): FilterRule[] { + return this._unmodifiedFilterRules + } + @Input() set filterRules (value: FilterRule[]) { + this._filterRules = value + this.documentTypeSelectionModel.clear(false) this.tagSelectionModel.clear(false) this.correspondentSelectionModel.clear(false) @@ -172,6 +187,7 @@ export class FilterEditorComponent implements OnInit, OnDestroy { break } }) + this.checkIfRulesHaveChanged() } get filterRules(): FilterRule[] { @@ -222,12 +238,27 @@ export class FilterEditorComponent implements OnInit, OnDestroy { @Output() filterRulesChange = new EventEmitter() - @Output() - reset = new EventEmitter() - - @Input() rulesModified: boolean = false + private checkIfRulesHaveChanged() { + let modified = false + if (this._unmodifiedFilterRules.length != this._filterRules.length) { + modified = true + } else { + modified = this._unmodifiedFilterRules.some(rule => { + return (this._filterRules.find(fri => fri.rule_type == rule.rule_type && fri.value == rule.value) == undefined) + }) + + if (!modified) { + // only check other direction if we havent already determined is modified + modified = this._filterRules.some(rule => { + this._unmodifiedFilterRules.find(fr => fr.rule_type == rule.rule_type && fr.value == rule.value) == undefined + }) + } + } + this.rulesModified = modified + } + updateRules() { this.filterRulesChange.next(this.filterRules) } @@ -265,7 +296,8 @@ export class FilterEditorComponent implements OnInit, OnDestroy { resetSelected() { this.textFilterTarget = TEXT_FILTER_TARGET_TITLE_CONTENT - this.reset.next() + this.filterRules = this._unmodifiedFilterRules + this.updateRules() } toggleTag(tagId: number) { From 3b83e9a43d57975efc9ca3de6158d4ec6137a49d Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sun, 4 Apr 2021 00:04:00 +0200 Subject: [PATCH 13/18] pycodestyle --- src/documents/views.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/documents/views.py b/src/documents/views.py index 71b01cdd6..d8fcca2a9 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -332,15 +332,15 @@ class SearchResultSerializer(DocumentSerializer): def to_representation(self, instance): doc = Document.objects.get(id=instance['id']) - representation = super(SearchResultSerializer, self).to_representation(doc) - representation['__search_hit__'] = { + r = super(SearchResultSerializer, self).to_representation(doc) + r['__search_hit__'] = { "score": instance.score, "highlights": instance.highlights("content", text=doc.content) if doc else None, # NOQA: E501 "rank": instance.rank } - return representation + return r class UnifiedSearchViewSet(DocumentViewSet): @@ -356,7 +356,8 @@ class UnifiedSearchViewSet(DocumentViewSet): return DocumentSerializer def _is_search_request(self): - return "query" in self.request.query_params or "more_like_id" in self.request.query_params + return ("query" in self.request.query_params or + "more_like_id" in self.request.query_params) def filter_queryset(self, queryset): if self._is_search_request(): From 359b46c15bcc60d9a5781c3feda4bf39366faa3c Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sun, 4 Apr 2021 00:29:40 +0200 Subject: [PATCH 14/18] fixed the test cases --- src/documents/tests/test_admin.py | 2 +- src/documents/tests/test_api.py | 60 ++++++++----------------------- 2 files changed, 15 insertions(+), 47 deletions(-) diff --git a/src/documents/tests/test_admin.py b/src/documents/tests/test_admin.py index ce00a0698..fc1d7ffaf 100644 --- a/src/documents/tests/test_admin.py +++ b/src/documents/tests/test_admin.py @@ -27,7 +27,7 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase): doc.title = "new title" self.doc_admin.save_model(None, doc, None, None) self.assertEqual(Document.objects.get(id=doc.id).title, "new title") - self.assertEqual(self.get_document_from_index(doc)['title'], "new title") + self.assertEqual(self.get_document_from_index(doc)['id'], doc.id) def test_delete_model(self): doc = Document.objects.create(title="test") diff --git a/src/documents/tests/test_api.py b/src/documents/tests/test_api.py index 853131db2..5c54a8d74 100644 --- a/src/documents/tests/test_api.py +++ b/src/documents/tests/test_api.py @@ -7,6 +7,7 @@ import tempfile import zipfile from unittest import mock +import pytest from django.conf import settings from django.contrib.auth.models import User from django.test import override_settings @@ -294,12 +295,6 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): results = response.data['results'] self.assertEqual(len(results), 0) - def test_search_no_query(self): - response = self.client.get("/api/search/") - results = response.data['results'] - - self.assertEqual(len(results), 0) - def test_search(self): d1=Document.objects.create(title="invoice", content="the thing i bought at a shop and paid with bank account", checksum="A", pk=1) d2=Document.objects.create(title="bank statement 1", content="things i paid for in august", pk=2, checksum="B") @@ -311,32 +306,24 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): index.update_document(writer, d1) index.update_document(writer, d2) index.update_document(writer, d3) - response = self.client.get("/api/search/?query=bank") + response = self.client.get("/api/documents/?query=bank") results = response.data['results'] self.assertEqual(response.data['count'], 3) - self.assertEqual(response.data['page'], 1) - self.assertEqual(response.data['page_count'], 1) self.assertEqual(len(results), 3) - response = self.client.get("/api/search/?query=september") + response = self.client.get("/api/documents/?query=september") results = response.data['results'] self.assertEqual(response.data['count'], 1) - self.assertEqual(response.data['page'], 1) - self.assertEqual(response.data['page_count'], 1) self.assertEqual(len(results), 1) - response = self.client.get("/api/search/?query=statement") + response = self.client.get("/api/documents/?query=statement") results = response.data['results'] self.assertEqual(response.data['count'], 2) - self.assertEqual(response.data['page'], 1) - self.assertEqual(response.data['page_count'], 1) self.assertEqual(len(results), 2) - response = self.client.get("/api/search/?query=sfegdfg") + response = self.client.get("/api/documents/?query=sfegdfg") results = response.data['results'] self.assertEqual(response.data['count'], 0) - self.assertEqual(response.data['page'], 0) - self.assertEqual(response.data['page_count'], 0) self.assertEqual(len(results), 0) def test_search_multi_page(self): @@ -349,53 +336,34 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): seen_ids = [] for i in range(1, 6): - response = self.client.get(f"/api/search/?query=content&page={i}") + response = self.client.get(f"/api/documents/?query=content&page={i}&page_size=10") results = response.data['results'] self.assertEqual(response.data['count'], 55) - self.assertEqual(response.data['page'], i) - self.assertEqual(response.data['page_count'], 6) self.assertEqual(len(results), 10) for result in results: self.assertNotIn(result['id'], seen_ids) seen_ids.append(result['id']) - response = self.client.get(f"/api/search/?query=content&page=6") + response = self.client.get(f"/api/documents/?query=content&page=6&page_size=10") results = response.data['results'] self.assertEqual(response.data['count'], 55) - self.assertEqual(response.data['page'], 6) - self.assertEqual(response.data['page_count'], 6) self.assertEqual(len(results), 5) for result in results: self.assertNotIn(result['id'], seen_ids) seen_ids.append(result['id']) - response = self.client.get(f"/api/search/?query=content&page=7") - results = response.data['results'] - self.assertEqual(response.data['count'], 55) - self.assertEqual(response.data['page'], 6) - self.assertEqual(response.data['page_count'], 6) - self.assertEqual(len(results), 5) - def test_search_invalid_page(self): with AsyncWriter(index.open_index()) as writer: for i in range(15): doc = Document.objects.create(checksum=str(i), pk=i+1, title=f"Document {i+1}", content="content") index.update_document(writer, doc) - first_page = self.client.get(f"/api/search/?query=content&page=1").data - second_page = self.client.get(f"/api/search/?query=content&page=2").data - should_be_first_page_1 = self.client.get(f"/api/search/?query=content&page=0").data - should_be_first_page_2 = self.client.get(f"/api/search/?query=content&page=dgfd").data - should_be_first_page_3 = self.client.get(f"/api/search/?query=content&page=").data - should_be_first_page_4 = self.client.get(f"/api/search/?query=content&page=-7868").data - - self.assertDictEqual(first_page, should_be_first_page_1) - self.assertDictEqual(first_page, should_be_first_page_2) - self.assertDictEqual(first_page, should_be_first_page_3) - self.assertDictEqual(first_page, should_be_first_page_4) - self.assertNotEqual(len(first_page['results']), len(second_page['results'])) + response = self.client.get(f"/api/documents/?query=content&page=0&page_size=10") + self.assertEqual(response.status_code, 404) + response = self.client.get(f"/api/documents/?query=content&page=3&page_size=10") + self.assertEqual(response.status_code, 404) @mock.patch("documents.index.autocomplete") def test_search_autocomplete(self, m): @@ -419,6 +387,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertEqual(response.status_code, 200) self.assertEqual(len(response.data), 10) + @pytest.mark.skip(reason="Not implemented yet") def test_search_spelling_correction(self): with AsyncWriter(index.open_index()) as writer: for i in range(55): @@ -444,7 +413,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): index.update_document(writer, d2) index.update_document(writer, d3) - response = self.client.get(f"/api/search/?more_like={d2.id}") + response = self.client.get(f"/api/documents/?more_like_id={d2.id}") self.assertEqual(response.status_code, 200) @@ -1375,8 +1344,7 @@ class TestApiAuth(APITestCase): self.assertEqual(self.client.get("/api/logs/").status_code, 401) self.assertEqual(self.client.get("/api/saved_views/").status_code, 401) - self.assertEqual(self.client.get("/api/search/").status_code, 401) - self.assertEqual(self.client.get("/api/search/auto_complete/").status_code, 401) + self.assertEqual(self.client.get("/api/search/autocomplete/").status_code, 401) self.assertEqual(self.client.get("/api/documents/bulk_edit/").status_code, 401) self.assertEqual(self.client.get("/api/documents/bulk_download/").status_code, 401) self.assertEqual(self.client.get("/api/documents/selection_data/").status_code, 401) From d13baab0a6478877116df2b298b00c654dc27277 Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sun, 4 Apr 2021 01:19:07 +0200 Subject: [PATCH 15/18] more testing --- src/documents/index.py | 14 +++------- src/documents/tests/test_api.py | 48 +++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/src/documents/index.py b/src/documents/index.py index e17c82daa..6fdcff42f 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -82,11 +82,8 @@ def open_index(recreate=False): @contextmanager -def open_index_writer(ix=None, optimize=False): - if ix: - writer = AsyncWriter(ix) - else: - writer = AsyncWriter(open_index()) +def open_index_writer(optimize=False): + writer = AsyncWriter(open_index()) try: yield writer @@ -98,11 +95,8 @@ def open_index_writer(ix=None, optimize=False): @contextmanager -def open_index_searcher(ix=None): - if ix: - searcher = ix.searcher() - else: - searcher = open_index().searcher() +def open_index_searcher(): + searcher = open_index().searcher() try: yield searcher diff --git a/src/documents/tests/test_api.py b/src/documents/tests/test_api.py index 5c54a8d74..cfde28e2d 100644 --- a/src/documents/tests/test_api.py +++ b/src/documents/tests/test_api.py @@ -423,6 +423,54 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertEqual(results[0]['id'], d3.id) self.assertEqual(results[1]['id'], d1.id) + def test_search_filtering(self): + t = Tag.objects.create(name="tag") + t2 = Tag.objects.create(name="tag2") + c = Correspondent.objects.create(name="correspondent") + dt = DocumentType.objects.create(name="type") + + d1 = Document.objects.create(checksum="1", correspondent=c, content="test") + d2 = Document.objects.create(checksum="2", document_type=dt, content="test") + d3 = Document.objects.create(checksum="3", content="test") + d3.tags.add(t) + d3.tags.add(t2) + d4 = Document.objects.create(checksum="4", created=datetime.datetime(2020, 7, 13), content="test") + d4.tags.add(t2) + d5 = Document.objects.create(checksum="5", added=datetime.datetime(2020, 7, 13), content="test") + d6 = Document.objects.create(checksum="6", content="test2") + + with AsyncWriter(index.open_index()) as writer: + for doc in Document.objects.all(): + index.update_document(writer, doc) + + def search_query(q): + r = self.client.get("/api/documents/?query=test" + q) + self.assertEqual(r.status_code, 200) + return [hit['id'] for hit in r.data['results']] + + self.assertCountEqual(search_query(""), [d1.id, d2.id, d3.id, d4.id, d5.id]) + self.assertCountEqual(search_query("&is_tagged=true"), [d3.id, d4.id]) + self.assertCountEqual(search_query("&is_tagged=false"), [d1.id, d2.id, d5.id]) + self.assertCountEqual(search_query("&correspondent__id=" + str(c.id)), [d1.id]) + self.assertCountEqual(search_query("&document_type__id=" + str(dt.id)), [d2.id]) + self.assertCountEqual(search_query("&correspondent__isnull"), [d2.id, d3.id, d4.id, d5.id]) + self.assertCountEqual(search_query("&document_type__isnull"), [d1.id, d3.id, d4.id, d5.id]) + self.assertCountEqual(search_query("&tags__id__all=" + str(t.id) + "," + str(t2.id)), [d3.id]) + self.assertCountEqual(search_query("&tags__id__all=" + str(t.id)), [d3.id]) + self.assertCountEqual(search_query("&tags__id__all=" + str(t2.id)), [d3.id, d4.id]) + + self.assertIn(d4.id, search_query("&created__date__lt=" + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"))) + self.assertNotIn(d4.id, search_query("&created__date__gt=" + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"))) + + self.assertNotIn(d4.id, search_query("&created__date__lt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"))) + self.assertIn(d4.id, search_query("&created__date__gt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"))) + + self.assertIn(d5.id, search_query("&added__date__lt=" + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"))) + self.assertNotIn(d5.id, search_query("&added__date__gt=" + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"))) + + self.assertNotIn(d5.id, search_query("&added__date__lt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"))) + self.assertIn(d5.id, search_query("&added__date__gt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"))) + def test_statistics(self): doc1 = Document.objects.create(title="none1", checksum="A") From c49471fb3b305196e031a1a2119dcbb9853a4d81 Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sun, 4 Apr 2021 01:25:54 +0200 Subject: [PATCH 16/18] bugfix --- .../src/app/components/document-list/document-list.component.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src-ui/src/app/components/document-list/document-list.component.ts b/src-ui/src/app/components/document-list/document-list.component.ts index 020b38e78..13a827e97 100644 --- a/src-ui/src/app/components/document-list/document-list.component.ts +++ b/src-ui/src/app/components/document-list/document-list.component.ts @@ -114,6 +114,7 @@ export class DocumentListComponent implements OnInit, OnDestroy { } this.savedViewService.patch(savedView).subscribe(result => { this.toastService.showInfo($localize`View "${this.list.activeSavedViewTitle}" saved successfully.`) + this.unmodifiedFilterRules = this.list.filterRules }) } } From ab47d03e1ad19d585d41cfb10baa499f7b26a0fa Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sun, 4 Apr 2021 20:46:25 +0200 Subject: [PATCH 17/18] update messages --- src-ui/messages.xlf | 144 ++++++++++++++++++-------------------------- 1 file changed, 58 insertions(+), 86 deletions(-) diff --git a/src-ui/messages.xlf b/src-ui/messages.xlf index 880a32b1b..6a956512f 100644 --- a/src-ui/messages.xlf +++ b/src-ui/messages.xlf @@ -48,21 +48,21 @@ Documents src/app/components/document-list/document-list.component.ts - 49 + 51 View "" saved successfully. src/app/components/document-list/document-list.component.ts - 115 + 116 View "" created successfully. src/app/components/document-list/document-list.component.ts - 136 + 138 @@ -146,77 +146,77 @@ ASN src/app/components/document-list/document-list.component.html - 105 + 111 Correspondent src/app/components/document-list/document-list.component.html - 111 + 117 Title src/app/components/document-list/document-list.component.html - 117 + 123 Document type src/app/components/document-list/document-list.component.html - 123 + 129 Created src/app/components/document-list/document-list.component.html - 129 + 135 Added src/app/components/document-list/document-list.component.html - 135 + 141 Confirm delete src/app/components/document-detail/document-detail.component.ts - 203 + 204 Do you really want to delete document ""? src/app/components/document-detail/document-detail.component.ts - 204 + 205 The files for this document will be deleted permanently. This operation cannot be undone. src/app/components/document-detail/document-detail.component.ts - 205 + 206 Delete document src/app/components/document-detail/document-detail.component.ts - 207 + 208 Error deleting document: src/app/components/document-detail/document-detail.component.ts - 214 + 215 @@ -912,48 +912,6 @@ 25 - - Search results - - src/app/components/search/search.component.html - 1 - - - - Invalid search query: - - src/app/components/search/search.component.html - 4 - - - - Showing documents similar to - - src/app/components/search/search.component.html - 6 - - - - Search query: - - src/app/components/search/search.component.html - 9 - - - - Did you mean ""? - - src/app/components/search/search.component.html - 11 - - - - {VAR_PLURAL, plural, =0 {No results} =1 {One result} other { results}} - - src/app/components/search/search.component.html - 16 - - Paperless-ng @@ -1039,81 +997,95 @@ 106 - - Title - - src/app/components/document-list/filter-editor/filter-editor.component.ts - 77 - - - - Title & content - - src/app/components/document-list/filter-editor/filter-editor.component.ts - 78 - - - - ASN - - src/app/components/document-list/filter-editor/filter-editor.component.ts - 79 - - Correspondent: src/app/components/document-list/filter-editor/filter-editor.component.ts - 33 + 37 Without correspondent src/app/components/document-list/filter-editor/filter-editor.component.ts - 35 + 39 Type: src/app/components/document-list/filter-editor/filter-editor.component.ts - 40 + 44 Without document type src/app/components/document-list/filter-editor/filter-editor.component.ts - 42 + 46 Tag: src/app/components/document-list/filter-editor/filter-editor.component.ts - 46 + 50 Without any tag src/app/components/document-list/filter-editor/filter-editor.component.ts - 50 + 54 Title: src/app/components/document-list/filter-editor/filter-editor.component.ts - 54 + 58 ASN: src/app/components/document-list/filter-editor/filter-editor.component.ts - 57 + 61 + + + + Title + + src/app/components/document-list/filter-editor/filter-editor.component.ts + 85 + + + + Title & content + + src/app/components/document-list/filter-editor/filter-editor.component.ts + 86 + + + + ASN + + src/app/components/document-list/filter-editor/filter-editor.component.ts + 87 + + + + Advanced search + + src/app/components/document-list/filter-editor/filter-editor.component.ts + 88 + + + + More like + + src/app/components/document-list/filter-editor/filter-editor.component.ts + 91 @@ -1233,7 +1205,7 @@ Score: src/app/components/document-list/document-card-large/document-card-large.component.html - 66 + 86 From 808b507b0f0bc2ef30a276f23e3a1301892da52f Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Mon, 5 Apr 2021 00:16:50 +0200 Subject: [PATCH 18/18] fix migration --- .../{1015_auto_20210317_1351.py => 1016_auto_20210317_1351.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/documents/migrations/{1015_auto_20210317_1351.py => 1016_auto_20210317_1351.py} (95%) diff --git a/src/documents/migrations/1015_auto_20210317_1351.py b/src/documents/migrations/1016_auto_20210317_1351.py similarity index 95% rename from src/documents/migrations/1015_auto_20210317_1351.py rename to src/documents/migrations/1016_auto_20210317_1351.py index b6dca444c..733c1bb33 100644 --- a/src/documents/migrations/1015_auto_20210317_1351.py +++ b/src/documents/migrations/1016_auto_20210317_1351.py @@ -6,7 +6,7 @@ from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ('documents', '1014_auto_20210228_1614'), + ('documents', '1015_remove_null_characters'), ] operations = [