From d1e10754a515183ae01fe70aef19c819eefbf60d Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Fri, 30 Oct 2020 22:46:43 +0100 Subject: [PATCH 01/44] Saved views, some refactoring --- src-ui/src/app/app-routing.module.ts | 2 +- src-ui/src/app/app.module.ts | 4 +- .../app-frame/app-frame.component.html | 14 ++++ .../app-frame/app-frame.component.ts | 9 ++- .../edit-dialog/edit-dialog.component.ts | 6 +- .../common/input/select/select.component.ts | 3 +- .../components/common/tag/tag.component.ts | 4 +- .../document-detail.component.ts | 5 +- .../document-list.component.html | 65 ++++++++------- .../document-list/document-list.component.ts | 53 ++++++++++-- .../save-view-config-dialog.component.css | 0 .../save-view-config-dialog.component.html | 17 ++++ .../save-view-config-dialog.component.spec.ts | 25 ++++++ .../save-view-config-dialog.component.ts | 33 ++++++++ .../filter-editor.component.html | 2 +- .../filter-editor/filter-editor.component.ts | 80 +++---------------- .../generic-list/generic-list.component.ts | 6 +- .../manage/settings/settings.component.html | 35 +++++++- .../manage/settings/settings.component.ts | 12 ++- .../tag-edit-dialog.component.ts | 6 +- .../manage/tag-list/tag-list.component.ts | 4 +- src-ui/src/app/data/filter-rule-type.ts | 31 +++++++ src-ui/src/app/data/filter-rule.ts | 23 ++++++ src-ui/src/app/data/matching-model.spec.ts | 7 -- src-ui/src/app/data/matching-model.ts | 31 +++---- src-ui/src/app/data/object-with-id.spec.ts | 7 -- src-ui/src/app/data/object-with-id.ts | 2 +- .../app/data/paperless-correspondent.spec.ts | 7 -- .../src/app/data/paperless-correspondent.ts | 2 +- .../app/data/paperless-document-type.spec.ts | 7 -- .../src/app/data/paperless-document-type.ts | 2 +- .../src/app/data/paperless-document.spec.ts | 7 -- src-ui/src/app/data/paperless-document.ts | 2 +- src-ui/src/app/data/paperless-log.spec.ts | 7 -- src-ui/src/app/data/paperless-log.ts | 2 +- src-ui/src/app/data/paperless-tag.spec.ts | 7 -- src-ui/src/app/data/paperless-tag.ts | 33 ++++---- src-ui/src/app/data/results.spec.ts | 7 -- src-ui/src/app/data/results.ts | 2 +- src-ui/src/app/data/saved-view-config.ts | 19 +++++ .../services/document-list-view.service.ts | 33 +++++--- .../saved-view-config.service.spec.ts | 16 ++++ .../app/services/saved-view-config.service.ts | 54 +++++++++++++ 43 files changed, 461 insertions(+), 232 deletions(-) create mode 100644 src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.css create mode 100644 src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.html create mode 100644 src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.spec.ts create mode 100644 src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.ts create mode 100644 src-ui/src/app/data/filter-rule-type.ts create mode 100644 src-ui/src/app/data/filter-rule.ts delete mode 100644 src-ui/src/app/data/matching-model.spec.ts delete mode 100644 src-ui/src/app/data/object-with-id.spec.ts delete mode 100644 src-ui/src/app/data/paperless-correspondent.spec.ts delete mode 100644 src-ui/src/app/data/paperless-document-type.spec.ts delete mode 100644 src-ui/src/app/data/paperless-document.spec.ts delete mode 100644 src-ui/src/app/data/paperless-log.spec.ts delete mode 100644 src-ui/src/app/data/paperless-tag.spec.ts delete mode 100644 src-ui/src/app/data/results.spec.ts create mode 100644 src-ui/src/app/data/saved-view-config.ts create mode 100644 src-ui/src/app/services/saved-view-config.service.spec.ts create mode 100644 src-ui/src/app/services/saved-view-config.service.ts diff --git a/src-ui/src/app/app-routing.module.ts b/src-ui/src/app/app-routing.module.ts index f1398e8f1..fde8fd31f 100644 --- a/src-ui/src/app/app-routing.module.ts +++ b/src-ui/src/app/app-routing.module.ts @@ -19,7 +19,7 @@ const routes: Routes = [ {path: '', component: AppFrameComponent, children: [ {path: 'dashboard', component: DashboardComponent, canActivate: [AuthGuardService] }, {path: 'documents', component: DocumentListComponent, canActivate: [AuthGuardService] }, - {path: 'view/:name', component: DocumentListComponent, canActivate: [AuthGuardService] }, + {path: 'view/:id', component: DocumentListComponent, canActivate: [AuthGuardService] }, {path: 'search', component: SearchComponent, canActivate: [AuthGuardService] }, {path: 'documents/:id', component: DocumentDetailComponent, canActivate: [AuthGuardService] }, diff --git a/src-ui/src/app/app.module.ts b/src-ui/src/app/app.module.ts index 73c3244e3..3c79fae30 100644 --- a/src-ui/src/app/app.module.ts +++ b/src-ui/src/app/app.module.ts @@ -36,6 +36,7 @@ import { NgxFileDropModule } from 'ngx-file-drop'; import { TextComponent } from './components/common/input/text/text.component'; import { SelectComponent } from './components/common/input/select/select.component'; import { CheckComponent } from './components/common/input/check/check.component'; +import { SaveViewConfigDialogComponent } from './components/document-list/save-view-config-dialog/save-view-config-dialog.component'; @NgModule({ declarations: [ @@ -66,7 +67,8 @@ import { CheckComponent } from './components/common/input/check/check.component' DocumentCardSmallComponent, TextComponent, SelectComponent, - CheckComponent + CheckComponent, + SaveViewConfigDialogComponent ], imports: [ BrowserModule, diff --git a/src-ui/src/app/components/app-frame/app-frame.component.html b/src-ui/src/app/components/app-frame/app-frame.component.html index c4158bf9c..fb13b90ff 100644 --- a/src-ui/src/app/components/app-frame/app-frame.component.html +++ b/src-ui/src/app/components/app-frame/app-frame.component.html @@ -43,6 +43,20 @@ + + + diff --git a/src-ui/src/app/components/app-frame/app-frame.component.ts b/src-ui/src/app/components/app-frame/app-frame.component.ts index 33a13b384..595da5b1d 100644 --- a/src-ui/src/app/components/app-frame/app-frame.component.ts +++ b/src-ui/src/app/components/app-frame/app-frame.component.ts @@ -7,6 +7,7 @@ import { PaperlessDocument } from 'src/app/data/paperless-document'; import { AuthService } from 'src/app/services/auth.service'; import { OpenDocumentsService } from 'src/app/services/open-documents.service'; import { SearchService } from 'src/app/services/rest/search.service'; +import { SavedViewConfigService } from 'src/app/services/saved-view-config.service'; @Component({ selector: 'app-app-frame', @@ -15,7 +16,13 @@ import { SearchService } from 'src/app/services/rest/search.service'; }) export class AppFrameComponent implements OnInit, OnDestroy { - constructor (public router: Router, private openDocumentsService: OpenDocumentsService, private authService: AuthService, private searchService: SearchService) { + constructor ( + public router: Router, + private openDocumentsService: OpenDocumentsService, + private authService: AuthService, + private searchService: SearchService, + public viewConfigService: SavedViewConfigService + ) { } searchField = new FormControl('') diff --git a/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts b/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts index 153f588a3..ba0d90847 100644 --- a/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts +++ b/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts @@ -1,8 +1,8 @@ import { Directive, EventEmitter, Input, OnInit, Output } from '@angular/core'; -import { Form, FormGroup } from '@angular/forms'; +import { FormGroup } from '@angular/forms'; import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; import { Observable } from 'rxjs'; -import { MatchingModel } from 'src/app/data/matching-model'; +import { MATCHING_ALGORITHMS } from 'src/app/data/matching-model'; import { ObjectWithId } from 'src/app/data/object-with-id'; import { AbstractPaperlessService } from 'src/app/services/rest/abstract-paperless-service'; import { Toast, ToastService } from 'src/app/services/toast.service'; @@ -47,7 +47,7 @@ export abstract class EditDialogComponent implements OnI } getMatchingAlgorithms() { - return MatchingModel.MATCHING_ALGORITHMS + return MATCHING_ALGORITHMS } save() { diff --git a/src-ui/src/app/components/common/input/select/select.component.ts b/src-ui/src/app/components/common/input/select/select.component.ts index a53566dab..c8e213722 100644 --- a/src-ui/src/app/components/common/input/select/select.component.ts +++ b/src-ui/src/app/components/common/input/select/select.component.ts @@ -1,6 +1,5 @@ import { Component, EventEmitter, forwardRef, Input, OnInit, Output } from '@angular/core'; -import { ControlValueAccessor, NG_VALUE_ACCESSOR } from '@angular/forms'; -import { v4 as uuidv4 } from 'uuid'; +import { NG_VALUE_ACCESSOR } from '@angular/forms'; import { AbstractInputComponent } from '../abstract-input'; @Component({ diff --git a/src-ui/src/app/components/common/tag/tag.component.ts b/src-ui/src/app/components/common/tag/tag.component.ts index bb4c2a15c..a7f81fa0a 100644 --- a/src-ui/src/app/components/common/tag/tag.component.ts +++ b/src-ui/src/app/components/common/tag/tag.component.ts @@ -1,5 +1,5 @@ import { Component, EventEmitter, Input, OnInit, Output } from '@angular/core'; -import { PaperlessTag } from 'src/app/data/paperless-tag'; +import { TAG_COLOURS, PaperlessTag } from 'src/app/data/paperless-tag'; @Component({ selector: 'app-tag', @@ -23,7 +23,7 @@ export class TagComponent implements OnInit { } getColour() { - return PaperlessTag.COLOURS.find(c => c.id == this.tag.colour) + return TAG_COLOURS.find(c => c.id == this.tag.colour) } } diff --git a/src-ui/src/app/components/document-detail/document-detail.component.ts b/src-ui/src/app/components/document-detail/document-detail.component.ts index 8ae46b9c8..6e0e51300 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.ts +++ b/src-ui/src/app/components/document-detail/document-detail.component.ts @@ -6,7 +6,7 @@ import { NgbModal } from '@ng-bootstrap/ng-bootstrap'; import { PaperlessCorrespondent } from 'src/app/data/paperless-correspondent'; import { PaperlessDocument } from 'src/app/data/paperless-document'; import { PaperlessDocumentType } from 'src/app/data/paperless-document-type'; -import { PaperlessTag } from 'src/app/data/paperless-tag'; +import { TAG_COLOURS, PaperlessTag } from 'src/app/data/paperless-tag'; import { DocumentListViewService } from 'src/app/services/document-list-view.service'; import { OpenDocumentsService } from 'src/app/services/open-documents.service'; import { CorrespondentService } from 'src/app/services/rest/correspondent.service'; @@ -17,6 +17,7 @@ import { DeleteDialogComponent } from '../common/delete-dialog/delete-dialog.com import { CorrespondentEditDialogComponent } from '../manage/correspondent-list/correspondent-edit-dialog/correspondent-edit-dialog.component'; import { DocumentTypeEditDialogComponent } from '../manage/document-type-list/document-type-edit-dialog/document-type-edit-dialog.component'; import { TagEditDialogComponent } from '../manage/tag-list/tag-edit-dialog/tag-edit-dialog.component'; + @Component({ selector: 'app-document-detail', templateUrl: './document-detail.component.html', @@ -116,7 +117,7 @@ export class DocumentDetailComponent implements OnInit { } getColour(id: number) { - return PaperlessTag.COLOURS.find(c => c.id == this.getTag(id).colour) + return TAG_COLOURS.find(c => c.id == this.getTag(id).colour) } addTag(id: number) { diff --git a/src-ui/src/app/components/document-list/document-list.component.html b/src-ui/src/app/components/document-list/document-list.component.html index f7558be49..b66cfbfa0 100644 --- a/src-ui/src/app/components/document-list/document-list.component.html +++ b/src-ui/src/app/components/document-list/document-list.component.html @@ -1,74 +1,83 @@ - + -
+
-
+
- +
- +
+ + + +
+ + +
+ + +
Filter
- +
- +
- +
diff --git a/src-ui/src/app/components/document-list/document-list.component.ts b/src-ui/src/app/components/document-list/document-list.component.ts index 927b9aa43..21537d224 100644 --- a/src-ui/src/app/components/document-list/document-list.component.ts +++ b/src-ui/src/app/components/document-list/document-list.component.ts @@ -1,6 +1,11 @@ import { Component, OnInit } from '@angular/core'; +import { ActivatedRoute, Router } from '@angular/router'; +import { NgbModal } from '@ng-bootstrap/ng-bootstrap'; +import { cloneFilterRules, FilterRule } from 'src/app/data/filter-rule'; +import { SavedViewConfig } from 'src/app/data/saved-view-config'; import { DocumentListViewService } from 'src/app/services/document-list-view.service'; -import { FilterRuleSet } from '../filter-editor/filter-editor.component'; +import { SavedViewConfigService } from 'src/app/services/saved-view-config.service'; +import { SaveViewConfigDialogComponent } from './save-view-config-dialog/save-view-config-dialog.component'; @Component({ selector: 'app-document-list', @@ -10,11 +15,14 @@ import { FilterRuleSet } from '../filter-editor/filter-editor.component'; export class DocumentListComponent implements OnInit { constructor( - public docs: DocumentListViewService) { } + public docs: DocumentListViewService, + public savedViewConfigService: SavedViewConfigService, + public route: ActivatedRoute, + public modalService: NgbModal) { } displayMode = 'smallCards' // largeCards, smallCards, details - filter = new FilterRuleSet() + filterRules: FilterRule[] = [] showFilter = false getSortFields() { @@ -34,18 +42,47 @@ export class DocumentListComponent implements OnInit { if (localStorage.getItem('document-list:displayMode') != null) { this.displayMode = localStorage.getItem('document-list:displayMode') } - this.filter = this.docs.currentFilter.clone() - this.showFilter = this.filter.rules.length > 0 - this.reload() + this.route.paramMap.subscribe(params => { + if (params.has('id')) { + this.docs.viewConfig = this.savedViewConfigService.getConfig(params.get('id')) + } else { + this.filterRules = cloneFilterRules(this.docs.currentFilterRules) + this.showFilter = this.filterRules.length > 0 + this.docs.viewConfig = null + } + this.reload() + }) } reload() { this.docs.reload() } - applyFilter() { - this.docs.setFilter(this.filter.clone()) + applyFilterRules() { + this.docs.setFilterRules(this.filterRules) this.reload() } + loadViewConfig(config: SavedViewConfig) { + this.filterRules = config.filterRules + this.docs.setFilterRules(config.filterRules) + this.docs.currentSortField = config.sortField + this.docs.currentSortDirection = config.sortDirection + this.reload() + } + + saveViewConfig() { + let modal = this.modalService.open(SaveViewConfigDialogComponent, {backdrop: 'static'}) + modal.componentInstance.saveClicked.subscribe(formValue => { + this.savedViewConfigService.saveConfig({ + filterRules: cloneFilterRules(this.filterRules), + title: formValue.title, + showInDashboard: formValue.showInDashboard, + showInSideBar: formValue.showInSideBar, + sortDirection: this.docs.currentSortDirection, + sortField: this.docs.currentSortField + }) + modal.close() + }) + } } diff --git a/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.css b/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.css new file mode 100644 index 000000000..e69de29bb diff --git a/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.html b/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.html new file mode 100644 index 000000000..870431096 --- /dev/null +++ b/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.html @@ -0,0 +1,17 @@ +
+ + + +
diff --git a/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.spec.ts b/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.spec.ts new file mode 100644 index 000000000..11ac77c0b --- /dev/null +++ b/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.spec.ts @@ -0,0 +1,25 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { SaveViewConfigDialogComponent } from './save-view-config-dialog.component'; + +describe('SaveViewConfigDialogComponent', () => { + let component: SaveViewConfigDialogComponent; + let fixture: ComponentFixture; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [ SaveViewConfigDialogComponent ] + }) + .compileComponents(); + }); + + beforeEach(() => { + fixture = TestBed.createComponent(SaveViewConfigDialogComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.ts b/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.ts new file mode 100644 index 000000000..6fcdbd2c8 --- /dev/null +++ b/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.ts @@ -0,0 +1,33 @@ +import { Component, EventEmitter, OnInit, Output } from '@angular/core'; +import { FormControl, FormGroup } from '@angular/forms'; +import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; + +@Component({ + selector: 'app-save-view-config-dialog', + templateUrl: './save-view-config-dialog.component.html', + styleUrls: ['./save-view-config-dialog.component.css'] +}) +export class SaveViewConfigDialogComponent implements OnInit { + + constructor(private modal: NgbActiveModal) { } + + @Output() + public saveClicked = new EventEmitter() + + saveViewConfigForm = new FormGroup({ + title: new FormControl(''), + showInSideBar: new FormControl(false), + showInDashboard: new FormControl(false), + }) + + ngOnInit(): void { + } + + save() { + this.saveClicked.emit(this.saveViewConfigForm.value) + } + + cancel() { + this.modal.close() + } +} diff --git a/src-ui/src/app/components/filter-editor/filter-editor.component.html b/src-ui/src/app/components/filter-editor/filter-editor.component.html index de65b1150..ee8ee67bb 100644 --- a/src-ui/src/app/components/filter-editor/filter-editor.component.html +++ b/src-ui/src/app/components/filter-editor/filter-editor.component.html @@ -1,4 +1,4 @@ -
+
- From 6f3d25d7b1d411ae1ea675f5f13015ecb3270719 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Tue, 3 Nov 2020 13:51:49 +0100 Subject: [PATCH 37/44] this was not required since saving a document updates the index anyway --- src/documents/apps.py | 2 -- src/documents/signals/handlers.py | 4 ---- 2 files changed, 6 deletions(-) diff --git a/src/documents/apps.py b/src/documents/apps.py index 48807adf1..ca278e2e3 100644 --- a/src/documents/apps.py +++ b/src/documents/apps.py @@ -16,7 +16,6 @@ class DocumentsConfig(AppConfig): run_post_consume_script, cleanup_document_deletion, set_log_entry, - index_document, set_correspondent, set_document_type, set_tags @@ -25,7 +24,6 @@ class DocumentsConfig(AppConfig): document_consumption_started.connect(run_pre_consume_script) - document_consumption_finished.connect(index_document) document_consumption_finished.connect(add_inbox_tags) document_consumption_finished.connect(set_correspondent) document_consumption_finished.connect(set_document_type) diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index edabfe0aa..231a39e0d 100755 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -16,10 +16,6 @@ def logger(message, group): logging.getLogger(__name__).debug(message, extra={"group": group}) -def index_document(sender, document=None, logging_group=None, **kwargs): - index.add_document_to_index(sender, instance=document) - - def add_inbox_tags(sender, document=None, logging_group=None, **kwargs): inbox_tags = Tag.objects.filter(is_inbox_tag=True) document.tags.add(*inbox_tags) From f4cebda085c8155df1f52e0bb3a57b17274ebc24 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Tue, 3 Nov 2020 14:04:11 +0100 Subject: [PATCH 38/44] A handy script to redo ocr on all documents, --- src/documents/consumer.py | 49 +++------------ .../management/commands/document_rerun_ocr.py | 60 +++++++++++++++++++ src/documents/parsers.py | 27 +++++++++ 3 files changed, 95 insertions(+), 41 deletions(-) create mode 100644 src/documents/management/commands/document_rerun_ocr.py diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 639152725..3920f2942 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -12,9 +12,8 @@ from django.utils import timezone from paperless.db import GnuPG from .classifier import DocumentClassifier from .models import Document, FileInfo -from .parsers import ParseError +from .parsers import ParseError, get_parser_class from .signals import ( - document_consumer_declaration, document_consumption_finished, document_consumption_started ) @@ -61,15 +60,6 @@ class Consumer: raise ConsumerError( "Consumption directory {} does not exist".format(self.consume)) - self.parsers = [] - for response in document_consumer_declaration.send(self): - self.parsers.append(response[1]) - - if not self.parsers: - raise ConsumerError( - "No parsers could be found, not even the default. " - "This is a problem." - ) def log(self, level, message): getattr(self.logger, level)(message, extra={ @@ -82,6 +72,8 @@ class Consumer: Return True if file was consumed """ + self.logging_group = uuid.uuid4() + if not re.match(FileInfo.REGEXES["title"], file): return False @@ -96,13 +88,13 @@ class Consumer: self.log("info", "Consuming {}".format(doc)) - parser_class = self._get_parser_class(doc) + parser_class = get_parser_class(doc) if not parser_class: self.log( "error", "No parsers could be found for {}".format(doc)) return False - - self.logging_group = uuid.uuid4() + else: + self.log("info", "Parser: {}".format(parser_class.__name__)) document_consumption_started.send( @@ -114,6 +106,7 @@ class Consumer: document_parser = parser_class(doc, self.logging_group) try: + self.log("info", "Generating thumbnail for {}...".format(doc)) thumbnail = document_parser.get_optimised_thumbnail() date = document_parser.get_date() document = self._store( @@ -154,31 +147,6 @@ class Consumer: ) return True - def _get_parser_class(self, doc): - """ - Determine the appropriate parser class based on the file - """ - - options = [] - for parser in self.parsers: - result = parser(doc) - if result: - options.append(result) - - self.log( - "info", - "Parsers available: {}".format( - ", ".join([str(o["parser"].__name__) for o in options]) - ) - ) - - if not options: - return None - - # Return the parser with the highest weight. - return sorted( - options, key=lambda _: _["weight"], reverse=True)[0]["parser"] - def _store(self, text, doc, thumbnail, date): file_info = FileInfo.from_path(doc) @@ -211,10 +179,9 @@ class Consumer: self._write(document, doc, document.source_path) self._write(document, thumbnail, document.thumbnail_path) + #TODO: why do we need to save the document again? document.save() - self.log("debug", "Completed") - return document def _write(self, document, source, target): diff --git a/src/documents/management/commands/document_rerun_ocr.py b/src/documents/management/commands/document_rerun_ocr.py new file mode 100644 index 000000000..794357420 --- /dev/null +++ b/src/documents/management/commands/document_rerun_ocr.py @@ -0,0 +1,60 @@ +import argparse +import threading +from multiprocessing import Pool +from multiprocessing.pool import ThreadPool + +from django.core.management.base import BaseCommand + +from documents.consumer import Consumer +from documents.models import Log, Document +from documents.parsers import get_parser_class + + +def process_document(doc): + parser_class = get_parser_class(doc.file_name) + if not parser_class: + print("no parser available") + else: + print("Parser: {}".format(parser_class.__name__)) + parser = parser_class(doc.source_path, None) + try: + text = parser.get_text() + doc.content = text + doc.save() + finally: + parser.cleanup() + + +def document_index(value): + ivalue = int(value) + if not (1 <= ivalue <= Document.objects.count()): + raise argparse.ArgumentTypeError( + "{} is not a valid document index (out of range)".format(value)) + + return ivalue + + +class Command(BaseCommand): + + help = "Performs OCR on all documents again!" + + + def add_arguments(self, parser): + parser.add_argument( + "-s", "--start_index", + default=None, + type=document_index + ) + + def handle(self, *args, **options): + + docs = Document.objects.all().order_by("added") + + indices = range(options['start_index']-1, len(docs)) if options['start_index'] else range(len(docs)) + + for i in indices: + doc = docs[i] + print("==================================") + print("{} out of {}: {}".format(i+1, len(docs), doc.file_name)) + print("==================================") + process_document(doc) diff --git a/src/documents/parsers.py b/src/documents/parsers.py index 63afa906d..60ad5cd7d 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -20,6 +20,8 @@ from django.utils import timezone # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits # - MONTH ZZZZ, with ZZZZ being 4 digits # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits +from documents.signals import document_consumer_declaration + DATE_REGEX = re.compile( r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' + # NOQA: E501 r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' + # NOQA: E501 @@ -32,6 +34,31 @@ DATE_REGEX = re.compile( logger = logging.getLogger(__name__) +def get_parser_class(doc): + """ + Determine the appropriate parser class based on the file + """ + + parsers = [] + for response in document_consumer_declaration.send(None): + parsers.append(response[1]) + + #TODO: add a check that checks parser availability. + + options = [] + for parser in parsers: + result = parser(doc) + if result: + options.append(result) + + if not options: + return None + + # Return the parser with the highest weight. + return sorted( + options, key=lambda _: _["weight"], reverse=True)[0]["parser"] + + def run_convert(input, output, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None): environment = os.environ.copy() if settings.CONVERT_MEMORY_LIMIT: From 28ba634e6acb82f56b0674536d6e5ff6e08e37a7 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Tue, 3 Nov 2020 14:04:21 +0100 Subject: [PATCH 39/44] silenced unpaper once and for all --- src/documents/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/documents/parsers.py b/src/documents/parsers.py index 60ad5cd7d..0cbd13987 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -90,7 +90,7 @@ def run_unpaper(pnm, logging_group=None): logger.debug("Execute: " + " ".join(command_args), extra={'group': logging_group}) - if not subprocess.Popen(command_args).wait() == 0: + if not subprocess.Popen(command_args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL).wait() == 0: raise ParseError("Unpaper failed at {}".format(command_args)) return pnm_out From ebac10bdfbb0e1fdb6eb5f5097436b8875de042a Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Tue, 3 Nov 2020 14:10:53 +0100 Subject: [PATCH 40/44] added migration step to create initial classifier, silences consumption warnings --- .../migrations/1000_update_paperless.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/documents/migrations/1000_update_paperless.py b/src/documents/migrations/1000_update_paperless.py index 9a6ccd077..900510c72 100644 --- a/src/documents/migrations/1000_update_paperless.py +++ b/src/documents/migrations/1000_update_paperless.py @@ -41,6 +41,21 @@ def restore_filenames(apps, schema_editor): pass +def initialize_document_classifier(apps, schema_editor): + try: + print("Initalizing document classifier...") + from documents.classifier import DocumentClassifier + classifier = DocumentClassifier() + try: + classifier.train() + classifier.save_classifier() + except Exception as e: + print("Classifier error: {}".format(e)) + except ImportError: + print("Document classifier not found, skipping") + + + class Migration(migrations.Migration): dependencies = [ @@ -50,6 +65,7 @@ class Migration(migrations.Migration): operations = [ migrations.RunPython(make_index, migrations.RunPython.noop), migrations.RunPython(restore_filenames), + migrations.RunPython(initialize_document_classifier, migrations.RunPython.noop), migrations.RemoveField( model_name='document', name='filename', From 9f1fe64b800313d42536e1c1cff2ef399a77931b Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Tue, 3 Nov 2020 14:25:44 +0100 Subject: [PATCH 41/44] small frontend changes --- .../dashboard/dashboard.component.html | 27 +++++++++++-------- .../document-detail.component.html | 2 ++ 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src-ui/src/app/components/dashboard/dashboard.component.html b/src-ui/src/app/components/dashboard/dashboard.component.html index f1a3ef1ed..1894b3e0b 100644 --- a/src-ui/src/app/components/dashboard/dashboard.component.html +++ b/src-ui/src/app/components/dashboard/dashboard.component.html @@ -2,7 +2,7 @@ -

... This space for rent

+

Welcome to paperless!

@@ -12,19 +12,23 @@ + - - - + + +
Date created Document
{{ doc.title }}
{{doc.created | date}}{{doc.title}} +
- - + +

Saved views

+

This space is reserved to display your saved views. Go to your documents and save a view to have it displayed here!

+
@@ -43,16 +47,17 @@
Document conumser status
-
+

This is what it might look like in the future.

+
-

Filename.pdf: OCR for ger...

+

Filename.pdf: Running tesseract on page 4/8...

-
+
-

Filename2.pdf: FAILED: language ITA not found

-

+

Filename2.pdf: Completed.

+

diff --git a/src-ui/src/app/components/document-detail/document-detail.component.html b/src-ui/src/app/components/document-detail/document-detail.component.html index a32418fe7..f6bb4cebb 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.html +++ b/src-ui/src/app/components/document-detail/document-detail.component.html @@ -69,6 +69,8 @@
+ Hold CTRL to (de)select multiple tags. +
    From 87be49c03d2f3ee8c570f25b7e13b66886d972d1 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Tue, 3 Nov 2020 14:47:42 +0100 Subject: [PATCH 42/44] added backward compatibility URLs --- src/paperless/urls.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/paperless/urls.py b/src/paperless/urls.py index 16169309b..43ba5eb49 100755 --- a/src/paperless/urls.py +++ b/src/paperless/urls.py @@ -1,6 +1,7 @@ from django.conf.urls import include, url from django.contrib import admin from django.urls import path +from django.views.decorators.csrf import csrf_exempt from django.views.generic import RedirectView from rest_framework.authtoken import views from rest_framework.routers import DefaultRouter @@ -41,6 +42,21 @@ urlpatterns = [ # The Django admin url(r"admin/", admin.site.urls), + # These redirects are here to support clients that use the old FetchView. + url( + r"^fetch/doc/(?P\d+)$", + RedirectView.as_view(url='/api/documents/%(pk)s/download/'), + ), + url( + r"^fetch/thumb/(?P\d+)$", + RedirectView.as_view(url='/api/documents/%(pk)s/thumb/'), + ), + url( + r"^fetch/preview/(?P\d+)$", + RedirectView.as_view(url='/api/documents/%(pk)s/preview/'), + ), + url(r"^push$", csrf_exempt(RedirectView.as_view(url='/api/documents/post_document/'))), + # Frontend assets TODO: this is pretty bad. path('assets/', RedirectView.as_view(url='/static/assets/%(path)s')), From 883d5aa5b693cbb796123708325fee04223a903d Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Tue, 3 Nov 2020 14:59:16 +0100 Subject: [PATCH 43/44] updated readme --- README.md | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index ef90456a8..7b4ea0de0 100644 --- a/README.md +++ b/README.md @@ -23,9 +23,11 @@ Here's what you get: This is a list of changes that have been made to the original project. ## Added -- **A new single page UI** built with bootstrap and Angular. Its much more responsive than the django admin pages. -- **Document uploading on the web page.** This is very crude right now, but gets the job done. It simply uploads the documents and stores them in the configured consumer directory. The API for that has always been in the project, there simply was no form on the UI to support it. -- **Full text search** with a proper document indexer: The search feature sorts documents by relevance to the search query, highlights query terms in the found documents and provides autocomplete while typing the query. This is still very basic but will see extensions in the future. +- **A new single page UI** built with bootstrap and Angular. Its much more responsive than the django admin pages. It features the follwing improvements over the old django admin interface: + - *Document uploading on the web page.* This is very crude right now, but gets the job done. It simply uploads the documents and stores them in the configured consumer directory. The API for that has always been in the project, there simply was no form on the UI to support it. + - *Full text search* with a proper document indexer: The search feature sorts documents by relevance to the search query, highlights query terms in the found documents and provides autocomplete while typing the query. This is still very basic but will see extensions in the future. + - *Saveable filters.* Save filter and sorting presets and optionally display a couple documents of saved filters (i.e., your inbox sorted descending by added date, or tagged TODO, oldest to newest) on the dash board. + - *Statistics.* Provides basic statistics about your document collection. - **Document types.** Similar to correspondents, each document may have a type (i.e., invoice, letter, receipt, bank statement, ...). I've initially intented to use this for some individual processing of differently typed documents, however, no such features exists yet. - **Inbox tags.** These tags are automatically assigned to every newly scanned document. They are intented to be removed once you have manually edited the meta data of a document. - **Automatic matching** for document types, correspondents, and tags. A new matching algorithm has been implemented (Auto), which is based on a classification model (simple feed forward neural nets are used). This classifier is trained on your document collection and learns to assign metadata to new documents based on their similiarity to existing documents. @@ -34,7 +36,11 @@ This is a list of changes that have been made to the original project. - **Archive serial numbers.** These are there to support the recommended workflow for storing physical copies of very important documents. The idea is that if a document has to be kept in physical form, you write a running number on the document before scanning (the archive serial number) and keep these documents sorted by number in a binder. If you need to access a specific physical document at some point in time, search for the document in paperless, identify the ASN and grab the document. ## Modified -- **(BREAKING) REST API changes.** In order to support the new UI, changes had to be made to the API. Some filters are not available anymore, other filters were added. Furthermore, foreign key relationships are not expressed with URLs anymore, but with their respective ids. Also, the old urls for fetching documents and thumbnails are not valid anymore. These resources are now served through the api. +- **(BREAKING) REST API changes.** In order to support the new UI, changes had to be made to the API. Some filters are not available anymore, other filters were added. Furthermore, foreign key relationships are not expressed with URLs anymore, but with their respective ids. Also, the urls for fetching documents and thumbnails have changed. Redirects are in place to support the old urls. + +## Internal changes +- Many improvements to the code. More concise logging of the consumer, better multithreading of the tesseract parser for large documents, less hacks overall. +- Updated docker image. This image runs everything in a single container. (Except the optional database, of course) ## Removed @@ -48,8 +54,7 @@ These features were removed each due to two reasons. First, I did not feel these These features will make it into the application at some point, sorted by priority. -- **Saveable filters.** Save filter and sorting presets and optionally display a couple documents of saved filters (i.e., your inbox sorted descending by added date, or tagged TODO, oldest to newest) on the dash board. -- **Better tag editor.** The tag editor on the document detail page is not very convenient. This was put in there to get the project working but will be replaced with something nicer. +- **Better tag editor.** The tag editor on the document detail page is not very convenient. This was put in there to get the project working but will be replaced with something nicer eventually. - **More search.** The search backend is incredibly versatile and customizable. Searching is the most important feature of this project and thus, I want to implement things like: - Group and limit search results by correspondent, show “more from this” links in the results. - Ability to search for “Similar documents” in the search results From 965e87474021428e6d7126a007cea984e5aa56ab Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 4 Nov 2020 00:01:08 +0100 Subject: [PATCH 44/44] fixes #17 search for multiple tags --- .gitignore | 3 +- .../filter-editor.component.html | 7 ++-- .../filter-editor/filter-editor.component.ts | 3 +- src-ui/src/app/data/filter-rule-type.ts | 36 ++++++++++--------- .../rest/abstract-paperless-service.ts | 2 +- .../src/app/services/rest/document.service.ts | 6 +++- src/documents/filters.py | 34 +++++++++++++++++- 7 files changed, 66 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index 3d79e7145..871a7bd08 100644 --- a/.gitignore +++ b/.gitignore @@ -84,5 +84,6 @@ scripts/nuke /data/index /paperless.conf -/consumption/ +/consume +/export /src-ui/.vscode diff --git a/src-ui/src/app/components/filter-editor/filter-editor.component.html b/src-ui/src/app/components/filter-editor/filter-editor.component.html index 27b941e49..1cca0fd7f 100644 --- a/src-ui/src/app/components/filter-editor/filter-editor.component.html +++ b/src-ui/src/app/components/filter-editor/filter-editor.component.html @@ -1,8 +1,9 @@
-
- diff --git a/src-ui/src/app/components/filter-editor/filter-editor.component.ts b/src-ui/src/app/components/filter-editor/filter-editor.component.ts index 61102fcd0..8c47ceafb 100644 --- a/src-ui/src/app/components/filter-editor/filter-editor.component.ts +++ b/src-ui/src/app/components/filter-editor/filter-editor.component.ts @@ -32,6 +32,7 @@ export class FilterEditorComponent implements OnInit { newRuleClicked() { this.filterRules.push({type: this.selectedRuleType, value: null}) + this.selectedRuleType = this.getRuleTypes().length > 0 ? this.getRuleTypes()[0] : null } removeRuleClicked(rule) { @@ -57,7 +58,7 @@ export class FilterEditorComponent implements OnInit { } getRuleTypes() { - return FILTER_RULE_TYPES + return FILTER_RULE_TYPES.filter(rt => rt.multi || !this.filterRules.find(r => r.type == rt)) } } diff --git a/src-ui/src/app/data/filter-rule-type.ts b/src-ui/src/app/data/filter-rule-type.ts index eb9465aaa..e5de30271 100644 --- a/src-ui/src/app/data/filter-rule-type.ts +++ b/src-ui/src/app/data/filter-rule-type.ts @@ -1,31 +1,33 @@ export const FILTER_RULE_TYPES: FilterRuleType[] = [ - {name: "Title contains", filtervar: "title__icontains", datatype: "string"}, - {name: "Content contains", filtervar: "content__icontains", datatype: "string"}, + {name: "Title contains", filtervar: "title__icontains", datatype: "string", multi: false}, + {name: "Content contains", filtervar: "content__icontains", datatype: "string", multi: false}, - {name: "ASN is", filtervar: "archive_serial_number", datatype: "number"}, + {name: "ASN is", filtervar: "archive_serial_number", datatype: "number", multi: false}, - {name: "Correspondent is", filtervar: "correspondent__id", datatype: "correspondent"}, - {name: "Document type is", filtervar: "document_type__id", datatype: "document_type"}, - {name: "Has tag", filtervar: "tags__id", datatype: "tag"}, - - {name: "Has any tag", filtervar: "is_tagged", datatype: "boolean"}, + {name: "Correspondent is", filtervar: "correspondent__id", datatype: "correspondent", multi: false}, + {name: "Document type is", filtervar: "document_type__id", datatype: "document_type", multi: false}, - {name: "Date created before", filtervar: "created__date__lt", datatype: "date"}, - {name: "Date created after", filtervar: "created__date__gt", datatype: "date"}, + {name: "Is in Inbox", filtervar: "is_in_inbox", datatype: "boolean", multi: false}, + {name: "Has tag", filtervar: "tags__id__all", datatype: "tag", multi: true}, + {name: "Has any tag", filtervar: "is_tagged", datatype: "boolean", multi: false}, - {name: "Year created is", filtervar: "created__year", datatype: "number"}, - {name: "Month created is", filtervar: "created__month", datatype: "number"}, - {name: "Day created is", filtervar: "created__day", datatype: "number"}, + {name: "Created before", filtervar: "created__date__lt", datatype: "date", multi: false}, + {name: "Created after", filtervar: "created__date__gt", datatype: "date", multi: false}, - {name: "Date added before", filtervar: "added__date__lt", datatype: "date"}, - {name: "Date added after", filtervar: "added__date__gt", datatype: "date"}, + {name: "Year created is", filtervar: "created__year", datatype: "number", multi: false}, + {name: "Month created is", filtervar: "created__month", datatype: "number", multi: false}, + {name: "Day created is", filtervar: "created__day", datatype: "number", multi: false}, + + {name: "Added before", filtervar: "added__date__lt", datatype: "date", multi: false}, + {name: "Added after", filtervar: "added__date__gt", datatype: "date", multi: false}, - {name: "Date modified before", filtervar: "modified__date__lt", datatype: "date"}, - {name: "Date modified after", filtervar: "modified__date__gt", datatype: "date"}, + {name: "Modified before", filtervar: "modified__date__lt", datatype: "date", multi: false}, + {name: "Modified after", filtervar: "modified__date__gt", datatype: "date", multi: false}, ] export interface FilterRuleType { name: string filtervar: string datatype: string //number, string, boolean, date + multi: boolean } \ No newline at end of file diff --git a/src-ui/src/app/services/rest/abstract-paperless-service.ts b/src-ui/src/app/services/rest/abstract-paperless-service.ts index 9ee07d31a..c8459f080 100644 --- a/src-ui/src/app/services/rest/abstract-paperless-service.ts +++ b/src-ui/src/app/services/rest/abstract-paperless-service.ts @@ -33,7 +33,7 @@ export abstract class AbstractPaperlessService { httpParams = httpParams.set('ordering', ordering) } for (let extraParamKey in extraParams) { - if (extraParams[extraParamKey]) { + if (extraParams[extraParamKey] != null) { httpParams = httpParams.set(extraParamKey, extraParams[extraParamKey]) } } diff --git a/src-ui/src/app/services/rest/document.service.ts b/src-ui/src/app/services/rest/document.service.ts index ff030e21d..7328b380e 100644 --- a/src-ui/src/app/services/rest/document.service.ts +++ b/src-ui/src/app/services/rest/document.service.ts @@ -34,7 +34,11 @@ export class DocumentService extends AbstractPaperlessService if (filterRules) { let params = {} for (let rule of filterRules) { - params[rule.type.filtervar] = rule.value + if (rule.type.multi) { + params[rule.type.filtervar] = params[rule.type.filtervar] ? params[rule.type.filtervar] + "," + rule.value : rule.value + } else { + params[rule.type.filtervar] = rule.value + } } return params } else { diff --git a/src/documents/filters.py b/src/documents/filters.py index d8fe09301..770e0e5af 100755 --- a/src/documents/filters.py +++ b/src/documents/filters.py @@ -1,4 +1,4 @@ -from django_filters.rest_framework import BooleanFilter, FilterSet +from django_filters.rest_framework import BooleanFilter, FilterSet, Filter from .models import Correspondent, Document, Tag, DocumentType, Log @@ -35,6 +35,34 @@ class DocumentTypeFilterSet(FilterSet): } +class TagsFilter(Filter): + + def filter(self, qs, value): + if not value: + return qs + + try: + tag_ids = [int(x) for x in value.split(',')] + except ValueError: + return qs + + for tag_id in tag_ids: + qs = qs.filter(tags__id=tag_id) + + return qs + + +class InboxFilter(Filter): + + def filter(self, qs, value): + if value == 'true': + return qs.filter(tags__is_inbox_tag=True) + elif value == 'false': + return qs.exclude(tags__is_inbox_tag=True) + else: + return qs + + class DocumentFilterSet(FilterSet): is_tagged = BooleanFilter( @@ -44,6 +72,10 @@ class DocumentFilterSet(FilterSet): exclude=True ) + tags__id__all = TagsFilter() + + is_in_inbox = InboxFilter() + class Meta: model = Document fields = {