Merge branch 'dev' into celery-tasks

This commit is contained in:
Jonas Winkler 2020-11-10 00:16:59 +01:00
commit b3126934b3
77 changed files with 1605 additions and 959 deletions

View File

@ -1,3 +1,7 @@
src-ui/node_modules
src-ui/dist
/src-ui/node_modules
/src-ui/dist
.git
/export
/consume
/media
/data

1
.gitignore vendored
View File

@ -65,7 +65,6 @@ target/
.virtualenv
virtualenv
/venv
docker-compose.yml
docker-compose.env
# Used for development

View File

@ -25,7 +25,6 @@ COPY Pipfile* ./
#Dependencies
RUN apt-get update \
&& DEBIAN_FRONTEND="noninteractive" apt-get -y --no-install-recommends install \
anacron \
build-essential \
curl \
ghostscript \
@ -60,20 +59,17 @@ RUN apt-get update \
COPY scripts/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml
COPY scripts/gunicorn.conf.py ./
COPY scripts/supervisord.conf /etc/supervisord.conf
COPY scripts/paperless-cron /etc/cron.daily/
COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh
# copy app
COPY src/ ./src/
COPY --from=frontend /usr/src/paperless/src-ui/dist/paperless-ui/ ./src/documents/static/
COPY --from=frontend /usr/src/paperless/src-ui/dist/paperless-ui/ ./src/documents/static/frontend/
# add users, setup scripts
RUN addgroup --gid 1000 paperless \
&& useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \
&& chown -R paperless:paperless . \
&& chmod 755 /sbin/docker-entrypoint.sh \
&& chmod +x /etc/cron.daily/paperless-cron \
&& rm /etc/cron.daily/apt-compat /etc/cron.daily/dpkg
&& chmod 755 /sbin/docker-entrypoint.sh
WORKDIR /usr/src/paperless/src/
@ -81,6 +77,6 @@ RUN sudo -HEu paperless python3 manage.py collectstatic --clear --no-input
VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/consume", "/usr/src/paperless/export"]
ENTRYPOINT ["/sbin/docker-entrypoint.sh"]
CMD ["python3", "manage.py", "--help"]
CMD ["supervisord", "-c", "/etc/supervisord.conf"]
LABEL maintainer="Jonas Winkler <dev@jpwinkler.de>"

View File

@ -24,8 +24,11 @@ gunicorn = "*"
whitenoise = "*"
fuzzywuzzy = "*"
python-Levenshtein = "*"
django-extensions = ""
django-extensions = "*"
watchdog = "*"
pathvalidate = "*"
django-q = "*"
redis = "*"
channels = "~=3.0"
channels-redis = "*"
daphne = "~=3.0"

View File

@ -26,6 +26,7 @@ This is a list of changes that have been made to the original project.
## Added
- **A new single page UI** built with bootstrap and Angular. Its much more responsive than the django admin pages. It features the follwing improvements over the old django admin interface:
- *Dashboard.* The landing page shows some useful information, such as statistics, recently scanned documents, file uploading, and possibly more in the future.
- *Document uploading on the web page.* This is very crude right now, but gets the job done. It simply uploads the documents and stores them in the configured consumer directory. The API for that has always been in the project, there simply was no form on the UI to support it.
- *Full text search* with a proper document indexer: The search feature sorts documents by relevance to the search query, highlights query terms in the found documents and provides autocomplete while typing the query. This is still very basic but will see extensions in the future.
- *Saveable filters.* Save filter and sorting presets and optionally display a couple documents of saved filters (i.e., your inbox sorted descending by added date, or tagged TODO, oldest to newest) on the dash board.
@ -49,21 +50,18 @@ This is a list of changes that have been made to the original project.
These features were removed each due to two reasons. First, I did not feel these features contributed all that much to the over project, and second, I don't want to maintain these features.
- **(BREAKING) Reminders.** I have no idea what they were used for and thus removed them from the project.
- **Filename handling (I'm sorry).** The master branch of the paperless project has seen some changes regarding the filename handling of stored documents. These changes allow you to change the filename of stored documents from their default form {id}.pdf. These changes have not made it into this project, since the whole point of paperless is that you don't have to access your documents on the disk anymore. If you are using version 2.7.0, this does not affect you. If you are on the most recent push on the master branch, the provided migration will revert these changes and rename all your files to their original file name.
- **Every customization made to the admin interface.** Since this is not the primary interface for the application anymore, there is no need to keep and maintain these. Besides, some changes were incompatible with the most recent versions of django. The interface is completely usable, though.
## Planned
These features will make it into the application at some point, sorted by priority.
- **Better tag editor.** The tag editor on the document detail page is not very convenient. This was put in there to get the project working but will be replaced with something nicer eventually.
- **More search.** The search backend is incredibly versatile and customizable. Searching is the most important feature of this project and thus, I want to implement things like:
- Group and limit search results by correspondent, show “more from this” links in the results.
- Ability to search for “Similar documents” in the search results
- Provide corrections for mispelled queries
- **More robust consumer** that shows its progress on the web page.
- **Arbitrary tag colors**. Allow the selection of any color with a color picker.
- **Dashboard**. The landing page is a little bleak right now but will feature status updates about the consumer, previews of saved filters and database statistics in the future.
## On the chopping block.

View File

@ -1,7 +1,3 @@
# Database settings for paperless
# If you want to use sqlite instead, remove this setting.
PAPERLESS_DBHOST="db"
# The UID and GID of the user used to run paperless in the container. Set this
# to your UID and GID on the host so that you have write access to the
# consumption directory.

View File

@ -2,8 +2,7 @@ version: "3.4"
services:
broker:
image: redis:latest
ports:
- 6379:6379
#restart: always
db:
image: postgres:13
@ -16,13 +15,12 @@ services:
POSTGRES_PASSWORD: paperless
webserver:
build: .
image: paperless-ng
image: paperless-ng:latest
#restart: always
depends_on:
- db
ports:
- "8000:8000"
- 8000:8000
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000"]
interval: 30s
@ -34,6 +32,9 @@ services:
- ./export:/usr/src/paperless/export
- ./consume:/usr/src/paperless/consume
env_file: docker-compose.env
environment:
PAPERLESS_REDIS: redis://broker:6379
PAPERLESS_DBHOST: db
command: ["supervisord", "-c", "/etc/supervisord.conf"]

View File

@ -3,6 +3,16 @@
# As this file contains passwords it should only be readable by the user
# running paperless.
###############################################################################
#### Message Broker ####
###############################################################################
# This is required for processing scheduled tasks such as email fetching, index
# optimization and for training the automatic document matcher.
# Defaults to localhost:6379.
#PAPERLESS_REDIS="redis://localhost:6379"
###############################################################################
#### Database Settings ####
###############################################################################
@ -63,7 +73,19 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
# Any email sent to the target account that does not contain this text will be
# ignored.
#PAPERLESS_EMAIL_SECRET=""
PAPERLESS_EMAIL_SECRET=""
# Specify a filename format for the document (directories are supported)
# Use the following placeholders:
# * {correspondent}
# * {title}
# * {created}
# * {added}
# * {tags[KEY]} If your tags conform to key_value or key-value
# * {tags[INDEX]} If your tags are strings, select the tag by index
# Uniqueness of filenames is ensured, as an incrementing counter is attached
# to each filename.
#PAPERLESS_FILENAME_FORMAT=""
###############################################################################
#### Security ####
@ -117,11 +139,6 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
# https://docs.djangoproject.com/en/1.11/ref/settings/#force-script-name
#PAPERLESS_FORCE_SCRIPT_NAME=""
# If you are using alternative authentication means or are just using paperless
# as a single user on a small private network, this option allows you to disable
# user authentication if you set it to "true"
#PAPERLESS_DISABLE_LOGIN="false"
###############################################################################
#### Software Tweaks ####
###############################################################################

View File

@ -1,5 +0,0 @@
#!/bin/sh
cd /usr/src/paperless/src
sudo -HEu paperless python3 manage.py document_create_classifier

View File

@ -24,8 +24,9 @@ stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
[program:anacron]
command=anacron -d
[program:scheduler]
command=python3 manage.py qcluster
user=paperless
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0

View File

@ -14,6 +14,7 @@
"builder": "@angular-devkit/build-angular:browser",
"options": {
"outputPath": "dist/paperless-ui",
"outputHashing": "none",
"index": "src/index.html",
"main": "src/main.ts",
"polyfills": "src/polyfills.ts",
@ -38,7 +39,7 @@
}
],
"optimization": true,
"outputHashing": "all",
"outputHashing": "none",
"sourceMap": false,
"extractCss": true,
"namedChunks": false,

View File

@ -2049,9 +2049,9 @@
}
},
"@ng-bootstrap/ng-bootstrap": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/@ng-bootstrap/ng-bootstrap/-/ng-bootstrap-7.0.0.tgz",
"integrity": "sha512-SxUaptGWJmCxM0d2Zy1mx7K7p/YBwGZ69NmmBQVY4BE6p5av0hWrVmv9rzzfBz0rhxU7RPZLor2Jpaoq8Xyl4w==",
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/@ng-bootstrap/ng-bootstrap/-/ng-bootstrap-8.0.0.tgz",
"integrity": "sha512-v77Gfd8xHH+exq0WqIqVRlxbUEHdA/2+RUJenUP2IDTQN9E1rWl7O461/kosr+0XPuxPArHQJxhh/WsCYckcNg==",
"requires": {
"tslib": "^2.0.0"
}

View File

@ -20,7 +20,7 @@
"@angular/platform-browser": "~10.1.5",
"@angular/platform-browser-dynamic": "~10.1.5",
"@angular/router": "~10.1.5",
"@ng-bootstrap/ng-bootstrap": "^7.0.0",
"@ng-bootstrap/ng-bootstrap": "^8.0.0",
"bootstrap": "^4.5.0",
"ng-bootstrap": "^1.6.3",
"ngx-file-drop": "^10.0.0",

View File

@ -4,7 +4,6 @@ import { AppFrameComponent } from './components/app-frame/app-frame.component';
import { DashboardComponent } from './components/dashboard/dashboard.component';
import { DocumentDetailComponent } from './components/document-detail/document-detail.component';
import { DocumentListComponent } from './components/document-list/document-list.component';
import { LoginComponent } from './components/login/login.component';
import { CorrespondentListComponent } from './components/manage/correspondent-list/correspondent-list.component';
import { DocumentTypeListComponent } from './components/manage/document-type-list/document-type-list.component';
import { LogsComponent } from './components/manage/logs/logs.component';
@ -12,25 +11,23 @@ import { SettingsComponent } from './components/manage/settings/settings.compone
import { TagListComponent } from './components/manage/tag-list/tag-list.component';
import { NotFoundComponent } from './components/not-found/not-found.component';
import { SearchComponent } from './components/search/search.component';
import { AuthGuardService } from './services/auth-guard.service';
const routes: Routes = [
{path: '', redirectTo: 'dashboard', pathMatch: 'full'},
{path: '', component: AppFrameComponent, children: [
{path: 'dashboard', component: DashboardComponent, canActivate: [AuthGuardService] },
{path: 'documents', component: DocumentListComponent, canActivate: [AuthGuardService] },
{path: 'view/:id', component: DocumentListComponent, canActivate: [AuthGuardService] },
{path: 'search', component: SearchComponent, canActivate: [AuthGuardService] },
{path: 'documents/:id', component: DocumentDetailComponent, canActivate: [AuthGuardService] },
{path: 'dashboard', component: DashboardComponent },
{path: 'documents', component: DocumentListComponent },
{path: 'view/:id', component: DocumentListComponent },
{path: 'search', component: SearchComponent },
{path: 'documents/:id', component: DocumentDetailComponent },
{path: 'tags', component: TagListComponent, canActivate: [AuthGuardService] },
{path: 'documenttypes', component: DocumentTypeListComponent, canActivate: [AuthGuardService] },
{path: 'correspondents', component: CorrespondentListComponent, canActivate: [AuthGuardService] },
{path: 'logs', component: LogsComponent, canActivate: [AuthGuardService] },
{path: 'settings', component: SettingsComponent, canActivate: [AuthGuardService] },
{path: 'tags', component: TagListComponent },
{path: 'documenttypes', component: DocumentTypeListComponent },
{path: 'correspondents', component: CorrespondentListComponent },
{path: 'logs', component: LogsComponent },
{path: 'settings', component: SettingsComponent },
]},
{path: 'login', component: LoginComponent },
{path: '404', component: NotFoundComponent},
{path: '**', redirectTo: '/404', pathMatch: 'full'}
];

View File

@ -12,7 +12,6 @@ import { TagListComponent } from './components/manage/tag-list/tag-list.componen
import { DocumentTypeListComponent } from './components/manage/document-type-list/document-type-list.component';
import { LogsComponent } from './components/manage/logs/logs.component';
import { SettingsComponent } from './components/manage/settings/settings.component';
import { LoginComponent } from './components/login/login.component';
import { FormsModule, ReactiveFormsModule } from '@angular/forms';
import { DatePipe } from '@angular/common';
import { SafePipe } from './pipes/safe.pipe';
@ -29,7 +28,6 @@ import { PageHeaderComponent } from './components/common/page-header/page-header
import { AppFrameComponent } from './components/app-frame/app-frame.component';
import { ToastsComponent } from './components/common/toasts/toasts.component';
import { FilterEditorComponent } from './components/filter-editor/filter-editor.component';
import { AuthInterceptor } from './services/auth.interceptor';
import { DocumentCardLargeComponent } from './components/document-list/document-card-large/document-card-large.component';
import { DocumentCardSmallComponent } from './components/document-list/document-card-small/document-card-small.component';
import { NgxFileDropModule } from 'ngx-file-drop';
@ -40,6 +38,7 @@ import { SaveViewConfigDialogComponent } from './components/document-list/save-v
import { InfiniteScrollModule } from 'ngx-infinite-scroll';
import { DateTimeComponent } from './components/common/input/date-time/date-time.component';
import { TagsComponent } from './components/common/input/tags/tags.component';
import { SortableDirective } from './directives/sortable.directive';
import { SavedViewWidgetComponent } from './components/dashboard/widgets/saved-view-widget/saved-view-widget.component';
import { ConsumerStatusWidgetComponent } from './components/dashboard/widgets/consumer-status-widget/consumer-status-widget.component';
import { StatisticsWidgetComponent } from './components/dashboard/widgets/statistics-widget/statistics-widget.component';
@ -56,7 +55,6 @@ import { FileUploadWidgetComponent } from './components/dashboard/widgets/file-u
DocumentTypeListComponent,
LogsComponent,
SettingsComponent,
LoginComponent,
SafePipe,
NotFoundComponent,
CorrespondentEditDialogComponent,
@ -78,6 +76,7 @@ import { FileUploadWidgetComponent } from './components/dashboard/widgets/file-u
SaveViewConfigDialogComponent,
DateTimeComponent,
TagsComponent,
SortableDirective,
ConsumerStatusWidgetComponent,
SavedViewWidgetComponent,
StatisticsWidgetComponent,
@ -94,12 +93,7 @@ import { FileUploadWidgetComponent } from './components/dashboard/widgets/file-u
InfiniteScrollModule
],
providers: [
DatePipe,
{
provide: HTTP_INTERCEPTORS,
useClass: AuthInterceptor,
multi: true
}
DatePipe
],
bootstrap: [AppComponent]
})

View File

@ -10,7 +10,7 @@
</form>
<ul class="navbar-nav px-3">
<li class="nav-item text-nowrap">
<a class="nav-link" (click)="logout()" style="cursor: pointer;">
<a class="nav-link" href="accounts/logout/">
<svg class="buttonicon" fill="currentColor">
<use xlink:href="assets/bootstrap-icons.svg#door-closed"/>
</svg>

View File

@ -1,10 +1,9 @@
import { Component, OnDestroy, OnInit } from '@angular/core';
import { FormControl } from '@angular/forms';
import { Router } from '@angular/router';
import { from, Observable, of, scheduled, Subscription } from 'rxjs';
import { from, Observable, Subscription } from 'rxjs';
import { debounceTime, distinctUntilChanged, map, switchMap } from 'rxjs/operators';
import { PaperlessDocument } from 'src/app/data/paperless-document';
import { AuthService } from 'src/app/services/auth.service';
import { OpenDocumentsService } from 'src/app/services/open-documents.service';
import { SearchService } from 'src/app/services/rest/search.service';
import { SavedViewConfigService } from 'src/app/services/saved-view-config.service';
@ -19,7 +18,6 @@ export class AppFrameComponent implements OnInit, OnDestroy {
constructor (
public router: Router,
private openDocumentsService: OpenDocumentsService,
private authService: AuthService,
private searchService: SearchService,
public viewConfigService: SavedViewConfigService
) {
@ -64,10 +62,6 @@ export class AppFrameComponent implements OnInit, OnDestroy {
this.router.navigate(['search'], {queryParams: {query: this.searchField.value}})
}
logout() {
this.authService.logout()
}
ngOnInit() {
this.openDocuments = this.openDocumentsService.getOpenDocuments()
}

View File

@ -134,8 +134,8 @@ export class DocumentDetailComponent implements OnInit {
close() {
this.openDocumentService.closeDocument(this.document)
if (this.documentListViewService.viewConfig) {
this.router.navigate(['view', this.documentListViewService.viewConfig.id])
if (this.documentListViewService.viewId) {
this.router.navigate(['view', this.documentListViewService.viewId])
} else {
this.router.navigate(['documents'])
}

View File

@ -1,4 +1,4 @@
<app-page-header [title]="docs.viewConfig ? docs.viewConfig.title : 'Documents'">
<app-page-header [title]="getTitle()">
<div class="btn-group btn-group-toggle mr-2" ngbRadioGroup [(ngModel)]="displayMode"
(ngModelChange)="saveDisplayMode()">
@ -21,14 +21,13 @@
</svg>
</label>
</div>
<div class="btn-group btn-group-toggle mr-2" ngbRadioGroup [(ngModel)]="docs.currentSortDirection"
(ngModelChange)="reload()"
*ngIf="!docs.viewConfig">
<div class="btn-group btn-group-toggle mr-2" ngbRadioGroup [(ngModel)]="docs.sortDirection"
*ngIf="!docs.viewId">
<div ngbDropdown class="btn-group">
<button class="btn btn-outline-secondary btn-sm" id="dropdownBasic1" ngbDropdownToggle>Sort by</button>
<div ngbDropdownMenu aria-labelledby="dropdownBasic1">
<button *ngFor="let f of getSortFields()" ngbDropdownItem (click)="setSort(f.field)"
[class.active]="docs.currentSortField == f.field">{{f.name}}</button>
[class.active]="docs.sortField == f.field">{{f.name}}</button>
</div>
</div>
<label ngbButtonLabel class="btn-outline-secondary btn-sm">
@ -44,7 +43,7 @@
</svg>
</label>
</div>
<div class="btn-group" *ngIf="!docs.viewConfig">
<div class="btn-group" *ngIf="!docs.viewId">
<button type="button" class="btn btn-sm btn-outline-secondary" (click)="showFilter=!showFilter">
<svg class="toolbaricon" fill="currentColor">
@ -62,7 +61,6 @@
</div>
</div>
</div>
</app-page-header>

View File

@ -26,13 +26,16 @@ export class DocumentListComponent implements OnInit {
filterRules: FilterRule[] = []
showFilter = false
getTitle() {
return this.docs.viewConfigOverride ? this.docs.viewConfigOverride.title : "Documents"
}
getSortFields() {
return DOCUMENT_SORT_FIELDS
}
setSort(field: string) {
this.docs.currentSortField = field
this.reload()
this.docs.sortField = field
}
saveDisplayMode() {
@ -45,11 +48,11 @@ export class DocumentListComponent implements OnInit {
}
this.route.paramMap.subscribe(params => {
if (params.has('id')) {
this.docs.viewConfig = this.savedViewConfigService.getConfig(params.get('id'))
this.docs.viewConfigOverride = this.savedViewConfigService.getConfig(params.get('id'))
} else {
this.filterRules = cloneFilterRules(this.docs.currentFilterRules)
this.filterRules = this.docs.filterRules
this.showFilter = this.filterRules.length > 0
this.docs.viewConfig = null
this.docs.viewConfigOverride = null
}
this.reload()
})
@ -60,28 +63,24 @@ export class DocumentListComponent implements OnInit {
}
applyFilterRules() {
this.docs.setFilterRules(this.filterRules)
this.reload()
this.docs.filterRules = this.filterRules
}
loadViewConfig(config: SavedViewConfig) {
this.filterRules = cloneFilterRules(config.filterRules)
this.docs.setFilterRules(config.filterRules)
this.docs.currentSortField = config.sortField
this.docs.currentSortDirection = config.sortDirection
this.reload()
this.docs.loadViewConfig(config)
}
saveViewConfig() {
let modal = this.modalService.open(SaveViewConfigDialogComponent, {backdrop: 'static'})
modal.componentInstance.saveClicked.subscribe(formValue => {
this.savedViewConfigService.saveConfig({
filterRules: cloneFilterRules(this.filterRules),
title: formValue.title,
showInDashboard: formValue.showInDashboard,
showInSideBar: formValue.showInSideBar,
sortDirection: this.docs.currentSortDirection,
sortField: this.docs.currentSortField
filterRules: this.docs.filterRules,
sortDirection: this.docs.sortDirection,
sortField: this.docs.sortField
})
modal.close()
})

View File

@ -1,17 +0,0 @@
<div class="form-signin-container">
<form class="form-signin mt-5" [formGroup]="loginForm" (ngSubmit)="loginClicked()">
<img class="mb-4" src="assets/logo.svg" alt="" width="100%">
<h1 class="h3 mb-3 font-weight-normal">Login</h1>
<label for="inputUsername" class="sr-only">Username</label>
<input type="text" id="inputUsername" class="form-control" placeholder="Username" required autofocus formControlName="username">
<label for="inputPassword" class="sr-only">Password</label>
<input type="password" id="inputPassword" class="form-control" placeholder="Password" required formControlName="password">
<div class="checkbox mb-3">
<label>
<input type="checkbox" value="remember-me" formControlName="rememberMe"> Remember me
</label>
</div>
<button class="btn btn-lg btn-primary btn-block mb-4" type="submit">Login</button>
<p><a href="/admin/">Go to admin interface</a></p>
</form>
</div>

View File

@ -1,25 +0,0 @@
import { ComponentFixture, TestBed } from '@angular/core/testing';
import { LoginComponent } from './login.component';
describe('LoginComponent', () => {
let component: LoginComponent;
let fixture: ComponentFixture<LoginComponent>;
beforeEach(async () => {
await TestBed.configureTestingModule({
declarations: [ LoginComponent ]
})
.compileComponents();
});
beforeEach(() => {
fixture = TestBed.createComponent(LoginComponent);
component = fixture.componentInstance;
fixture.detectChanges();
});
it('should create', () => {
expect(component).toBeTruthy();
});
});

View File

@ -1,34 +0,0 @@
import { Component, OnInit } from '@angular/core';
import { FormControl, FormGroup } from '@angular/forms';
import { Router } from '@angular/router';
import { AuthService } from 'src/app/services/auth.service';
import { ToastService } from 'src/app/services/toast.service';
@Component({
selector: 'app-login',
templateUrl: './login.component.html',
styleUrls: ['./login.component.css']
})
export class LoginComponent implements OnInit {
constructor(private auth: AuthService, private router: Router, private toastService: ToastService) { }
loginForm = new FormGroup({
username: new FormControl(''),
password: new FormControl(''),
rememberMe: new FormControl(false)
})
ngOnInit(): void {
}
loginClicked() {
this.auth.login(this.loginForm.value.username, this.loginForm.value.password, this.loginForm.value.rememberMe).subscribe(result => {
this.router.navigate([''])
}, (error) => {
this.toastService.showError("Unable to log in with provided credentials.")
}
)
}
}

View File

@ -9,10 +9,10 @@
<table class="table table-striped">
<thead>
<tr>
<th scope="col">Name</th>
<th scope="col">Matching</th>
<th scope="col">Document count</th>
<th scope="col">Last correspondence</th>
<th scope="col" sortable="name" (sort)="onSort($event)">Name</th>
<th scope="col" sortable="matching_algorithm" (sort)="onSort($event)">Matching</th>
<th scope="col" sortable="document_count" (sort)="onSort($event)">Document count</th>
<th scope="col" sortable="last_correspondence" (sort)="onSort($event)">Last correspondence</th>
<th scope="col">Actions</th>
</tr>
</thead>

View File

@ -10,9 +10,9 @@
<table class="table table-striped">
<thead>
<tr>
<th scope="col">Name</th>
<th scope="col">Matching</th>
<th scope="col">Document count</th>
<th scope="col" sortable="name" (sort)="onSort($event)">Name</th>
<th scope="col" sortable="matching_algorithm" (sort)="onSort($event)">Matching</th>
<th scope="col" sortable="document_count" (sort)="onSort($event)">Document count</th>
<th scope="col">Actions</th>
</tr>
</thead>

View File

@ -1,7 +1,8 @@
import { Directive, OnInit } from '@angular/core';
import { Directive, OnInit, QueryList, ViewChildren } from '@angular/core';
import { NgbModal } from '@ng-bootstrap/ng-bootstrap';
import { MatchingModel, MATCHING_ALGORITHMS, MATCH_AUTO } from 'src/app/data/matching-model';
import { ObjectWithId } from 'src/app/data/object-with-id';
import { SortableDirective, SortEvent } from 'src/app/directives/sortable.directive';
import { AbstractPaperlessService } from 'src/app/services/rest/abstract-paperless-service';
import { DeleteDialogComponent } from '../../common/delete-dialog/delete-dialog.component';
@ -14,12 +15,17 @@ export abstract class GenericListComponent<T extends ObjectWithId> implements On
private editDialogComponent: any) {
}
@ViewChildren(SortableDirective) headers: QueryList<SortableDirective>;
public data: T[] = []
public page = 1
public collectionSize = 0
public sortField: string
public sortDirection: string
getMatching(o: MatchingModel) {
if (o.matching_algorithm == MATCH_AUTO) {
return "Automatic"
@ -30,12 +36,31 @@ export abstract class GenericListComponent<T extends ObjectWithId> implements On
}
}
onSort(event: SortEvent) {
if (event.direction && event.direction.length > 0) {
this.sortField = event.column
this.sortDirection = event.direction
} else {
this.sortField = null
this.sortDirection = null
}
this.headers.forEach(header => {
if (header.sortable !== this.sortField) {
header.direction = '';
}
});
this.reloadData()
}
ngOnInit(): void {
this.reloadData()
}
reloadData() {
this.service.list(this.page).subscribe(c => {
this.service.list(this.page, null, this.sortField, this.sortDirection).subscribe(c => {
this.data = c.results
this.collectionSize = c.count
});

View File

@ -20,7 +20,7 @@ export class LogsComponent implements OnInit {
}
reload() {
this.logService.list(1, 50, null, {'level__gte': this.level}).subscribe(result => this.logs = result.results)
this.logService.list(1, 50, 'created', 'des', {'level__gte': this.level}).subscribe(result => this.logs = result.results)
}
getLevelText(level: number) {
@ -32,7 +32,7 @@ export class LogsComponent implements OnInit {
if (this.logs.length > 0) {
lastCreated = this.logs[this.logs.length-1].created
}
this.logService.list(1, 25, null, {'created__lt': lastCreated, 'level__gte': this.level}).subscribe(result => {
this.logService.list(1, 25, 'created', 'des', {'created__lt': lastCreated, 'level__gte': this.level}).subscribe(result => {
this.logs.push(...result.results)
})
}

View File

@ -34,7 +34,7 @@
<a ngbNavLink>Saved views</a>
<ng-template ngbNavContent>
<table class="table table-striped">
<table class="table table-borderless table-sm">
<thead>
<tr>
<th scope="col">Title</th>
@ -57,7 +57,7 @@
</li>
</ul>
<div [ngbNavOutlet]="nav" class="mt-2"></div>
<div [ngbNavOutlet]="nav" class="border-left border-right border-bottom p-3 mb-3"></div>
<button type="submit" class="btn btn-primary">Save</button>
</form>

View File

@ -9,10 +9,10 @@
<table class="table table-striped">
<thead>
<tr>
<th scope="col">Name</th>
<th scope="col" sortable="name" (sort)="onSort($event)">Name</th>
<th scope="col">Colour</th>
<th scope="col">Matching</th>
<th scope="col">Document count</th>
<th scope="col" sortable="matching_algorithm" (sort)="onSort($event)">Matching</th>
<th scope="col" sortable="document_count" (sort)="onSort($event)">Document count</th>
<th scope="col">Actions</th>
</tr>
</thead>

View File

@ -10,10 +10,10 @@ export interface SavedViewConfig {
sortDirection: string
title: string
title?: string
showInSideBar: boolean
showInSideBar?: boolean
showInDashboard: boolean
showInDashboard?: boolean
}

View File

@ -2,6 +2,10 @@ export const OPEN_DOCUMENT_SERVICE = {
DOCUMENTS: 'open-documents-service:openDocuments'
}
export const DOCUMENT_LIST_SERVICE = {
CURRENT_VIEW_CONFIG: 'document-list-service:currentViewConfig'
}
export const GENERAL_SETTINGS = {
DOCUMENT_LIST_SIZE: 'general-settings:documentListSize',
DOCUMENT_LIST_SIZE_DEFAULT: 50

View File

@ -0,0 +1,8 @@
import { SortableDirective } from './sortable.directive';
describe('SortableDirective', () => {
it('should create an instance', () => {
const directive = new SortableDirective();
expect(directive).toBeTruthy();
});
});

View File

@ -0,0 +1,30 @@
import { Directive, EventEmitter, Input, Output } from '@angular/core';
export interface SortEvent {
column: string;
direction: string;
}
const rotate: {[key: string]: string} = { 'asc': 'des', 'des': '', '': 'asc' };
@Directive({
selector: 'th[sortable]',
host: {
'[class.asc]': 'direction === "asc"',
'[class.des]': 'direction === "des"',
'(click)': 'rotate()'
}
})
export class SortableDirective {
constructor() { }
@Input() sortable: string = '';
@Input() direction: string = '';
@Output() sort = new EventEmitter<SortEvent>();
rotate() {
this.direction = rotate[this.direction];
this.sort.emit({column: this.sortable, direction: this.direction});
}
}

View File

@ -1,16 +0,0 @@
import { TestBed } from '@angular/core/testing';
import { AuthGuardService } from './auth-guard.service';
describe('AuthGuardService', () => {
let service: AuthGuardService;
beforeEach(() => {
TestBed.configureTestingModule({});
service = TestBed.inject(AuthGuardService);
});
it('should be created', () => {
expect(service).toBeTruthy();
});
});

View File

@ -1,20 +0,0 @@
import { Injectable } from '@angular/core';
import { Router } from '@angular/router';
import { AuthService } from './auth.service';
@Injectable({
providedIn: 'root'
})
export class AuthGuardService {
constructor(public auth: AuthService, public router: Router) { }
canActivate(): boolean {
if (!this.auth.isAuthenticated()) {
this.router.navigate(['login']);
return false;
}
return true;
}
}

View File

@ -1,16 +0,0 @@
import { TestBed } from '@angular/core/testing';
import { AuthInterceptor } from './auth.interceptor';
describe('AuthInterceptor', () => {
beforeEach(() => TestBed.configureTestingModule({
providers: [
AuthInterceptor
]
}));
it('should be created', () => {
const interceptor: AuthInterceptor = TestBed.inject(AuthInterceptor);
expect(interceptor).toBeTruthy();
});
});

View File

@ -1,37 +0,0 @@
import { Injectable } from '@angular/core';
import {
HttpRequest,
HttpHandler,
HttpEvent,
HttpInterceptor,
HttpErrorResponse
} from '@angular/common/http';
import { Observable, throwError } from 'rxjs';
import { AuthService } from './auth.service';
import { catchError } from 'rxjs/operators';
import { Toast, ToastService } from './toast.service';
@Injectable()
export class AuthInterceptor implements HttpInterceptor {
constructor(private authService: AuthService, private toastService: ToastService) {}
intercept(request: HttpRequest<unknown>, next: HttpHandler): Observable<HttpEvent<unknown>> {
if (this.authService.isAuthenticated()) {
request = request.clone({
setHeaders: {
Authorization: 'Token ' + this.authService.getToken()
}
});
}
return next.handle(request).pipe(
catchError((error: HttpErrorResponse) => {
if (error.status == 401 && this.authService.isAuthenticated()) {
this.authService.logout()
this.toastService.showError("Your session has expired. Please log in again.")
}
return throwError(error)
})
);
}
}

View File

@ -1,16 +0,0 @@
import { TestBed } from '@angular/core/testing';
import { AuthService } from './auth.service';
describe('AuthService', () => {
let service: AuthService;
beforeEach(() => {
TestBed.configureTestingModule({});
service = TestBed.inject(AuthService);
});
it('should be created', () => {
expect(service).toBeTruthy();
});
});

View File

@ -1,72 +0,0 @@
import { Injectable } from '@angular/core';
import { Observable } from 'rxjs';
import { map } from 'rxjs/operators';
import { HttpClient } from '@angular/common/http';
import { Router } from '@angular/router';
import { environment } from 'src/environments/environment';
interface TokenResponse {
token: string
}
@Injectable({
providedIn: 'root'
})
export class AuthService {
private currentUsername: string
private token: string
constructor(private http: HttpClient, private router: Router) {
this.token = localStorage.getItem('auth-service:token')
if (this.token == null) {
this.token = sessionStorage.getItem('auth-service:token')
}
this.currentUsername = localStorage.getItem('auth-service:currentUsername')
if (this.currentUsername == null) {
this.currentUsername = sessionStorage.getItem('auth-service:currentUsername')
}
}
private requestToken(username: string, password: string): Observable<TokenResponse> {
return this.http.post<TokenResponse>(`${environment.apiBaseUrl}token/`, {"username": username, "password": password})
}
isAuthenticated(): boolean {
return this.currentUsername != null
}
logout() {
this.currentUsername = null
this.token = null
localStorage.removeItem('auth-service:token')
localStorage.removeItem('auth-service:currentUsername')
sessionStorage.removeItem('auth-service:token')
sessionStorage.removeItem('auth-service:currentUsername')
this.router.navigate(['login'])
}
login(username: string, password: string, rememberMe: boolean): Observable<boolean> {
return this.requestToken(username,password).pipe(
map(tokenResponse => {
this.currentUsername = username
this.token = tokenResponse.token
let storage = rememberMe ? localStorage : sessionStorage
storage.setItem('auth-service:token', this.token)
storage.setItem('auth-service:currentUsername', this.currentUsername)
return true
})
)
}
getToken(): string {
return this.token
}
getCurrentUsername(): string {
return this.currentUsername
}
}

View File

@ -3,8 +3,8 @@ import { Observable } from 'rxjs';
import { cloneFilterRules, FilterRule } from '../data/filter-rule';
import { PaperlessDocument } from '../data/paperless-document';
import { SavedViewConfig } from '../data/saved-view-config';
import { GENERAL_SETTINGS } from '../data/storage-keys';
import { DocumentService, SORT_DIRECTION_DESCENDING } from './rest/document.service';
import { DOCUMENT_LIST_SERVICE, GENERAL_SETTINGS } from '../data/storage-keys';
import { DocumentService } from './rest/document.service';
@Injectable({
@ -18,33 +18,24 @@ export class DocumentListViewService {
currentPage = 1
currentPageSize: number = +localStorage.getItem(GENERAL_SETTINGS.DOCUMENT_LIST_SIZE) || GENERAL_SETTINGS.DOCUMENT_LIST_SIZE_DEFAULT
collectionSize: number
currentFilterRules: FilterRule[] = []
currentSortDirection = SORT_DIRECTION_DESCENDING
currentSortField = DocumentListViewService.DEFAULT_SORT_FIELD
viewConfig: SavedViewConfig
private currentViewConfig: SavedViewConfig
//TODO: make private
viewConfigOverride: SavedViewConfig
get viewId() {
return this.viewConfigOverride?.id
}
reload(onFinish?) {
let sortField: string
let sortDirection: string
let filterRules: FilterRule[]
if (this.viewConfig) {
sortField = this.viewConfig.sortField
sortDirection = this.viewConfig.sortDirection
filterRules = this.viewConfig.filterRules
} else {
sortField = this.currentSortField
sortDirection = this.currentSortDirection
filterRules = this.currentFilterRules
}
let viewConfig = this.viewConfigOverride || this.currentViewConfig
this.documentService.list(
this.currentPage,
this.currentPageSize,
sortField,
sortDirection,
filterRules).subscribe(
viewConfig.sortField,
viewConfig.sortDirection,
viewConfig.filterRules).subscribe(
result => {
this.collectionSize = result.count
this.documents = result.results
@ -60,9 +51,43 @@ export class DocumentListViewService {
})
}
set filterRules(filterRules: FilterRule[]) {
this.currentViewConfig.filterRules = cloneFilterRules(filterRules)
this.saveCurrentViewConfig()
this.reload()
}
setFilterRules(filterRules: FilterRule[]) {
this.currentFilterRules = cloneFilterRules(filterRules)
get filterRules(): FilterRule[] {
return cloneFilterRules(this.currentViewConfig.filterRules)
}
set sortField(field: string) {
this.currentViewConfig.sortField = field
this.saveCurrentViewConfig()
this.reload()
}
get sortField(): string {
return this.currentViewConfig.sortField
}
set sortDirection(direction: string) {
this.currentViewConfig.sortDirection = direction
this.saveCurrentViewConfig()
this.reload()
}
get sortDirection(): string {
return this.currentViewConfig.sortDirection
}
loadViewConfig(config: SavedViewConfig) {
Object.assign(this.currentViewConfig, config)
this.reload()
}
private saveCurrentViewConfig() {
sessionStorage.setItem(DOCUMENT_LIST_SERVICE.CURRENT_VIEW_CONFIG, JSON.stringify(this.currentViewConfig))
}
getLastPage(): number {
@ -108,5 +133,22 @@ export class DocumentListViewService {
}
}
constructor(private documentService: DocumentService) { }
constructor(private documentService: DocumentService) {
let currentViewConfigJson = sessionStorage.getItem(DOCUMENT_LIST_SERVICE.CURRENT_VIEW_CONFIG)
if (currentViewConfigJson) {
try {
this.currentViewConfig = JSON.parse(currentViewConfigJson)
} catch (e) {
sessionStorage.removeItem(DOCUMENT_LIST_SERVICE.CURRENT_VIEW_CONFIG)
this.currentViewConfig = null
}
}
if (!this.currentViewConfig) {
this.currentViewConfig = {
filterRules: [],
sortDirection: 'des',
sortField: 'created'
}
}
}
}

View File

@ -21,7 +21,17 @@ export abstract class AbstractPaperlessService<T extends ObjectWithId> {
return url
}
list(page?: number, pageSize?: number, ordering?: string, extraParams?): Observable<Results<T>> {
private getOrderingQueryParam(sortField: string, sortDirection: string) {
if (sortField && sortDirection) {
return (sortDirection == 'des' ? '-' : '') + sortField
} else if (sortField) {
return sortField
} else {
return null
}
}
list(page?: number, pageSize?: number, sortField?: string, sortDirection?: string, extraParams?): Observable<Results<T>> {
let httpParams = new HttpParams()
if (page) {
httpParams = httpParams.set('page', page.toString())
@ -29,6 +39,7 @@ export abstract class AbstractPaperlessService<T extends ObjectWithId> {
if (pageSize) {
httpParams = httpParams.set('page_size', pageSize.toString())
}
let ordering = this.getOrderingQueryParam(sortField, sortDirection)
if (ordering) {
httpParams = httpParams.set('ordering', ordering)
}

View File

@ -2,7 +2,6 @@ import { Injectable } from '@angular/core';
import { PaperlessDocument } from 'src/app/data/paperless-document';
import { AbstractPaperlessService } from './abstract-paperless-service';
import { HttpClient } from '@angular/common/http';
import { AuthService } from '../auth.service';
import { Observable } from 'rxjs';
import { Results } from 'src/app/data/results';
import { FilterRule } from 'src/app/data/filter-rule';
@ -10,6 +9,7 @@ import { FilterRule } from 'src/app/data/filter-rule';
export const DOCUMENT_SORT_FIELDS = [
{ field: "correspondent__name", name: "Correspondent" },
{ field: "document_type__name", name: "Document type" },
{ field: 'title', name: 'Title' },
{ field: 'archive_serial_number', name: 'ASN' },
{ field: 'created', name: 'Created' },
@ -26,7 +26,7 @@ export const SORT_DIRECTION_DESCENDING = "des"
})
export class DocumentService extends AbstractPaperlessService<PaperlessDocument> {
constructor(http: HttpClient, private auth: AuthService) {
constructor(http: HttpClient) {
super(http, 'documents')
}
@ -46,28 +46,20 @@ export class DocumentService extends AbstractPaperlessService<PaperlessDocument>
}
}
private getOrderingQueryParam(sortField: string, sortDirection: string) {
if (DOCUMENT_SORT_FIELDS.find(f => f.field == sortField)) {
return (sortDirection == SORT_DIRECTION_DESCENDING ? '-' : '') + sortField
} else {
return null
}
}
list(page?: number, pageSize?: number, sortField?: string, sortDirection?: string, filterRules?: FilterRule[]): Observable<Results<PaperlessDocument>> {
return super.list(page, pageSize, this.getOrderingQueryParam(sortField, sortDirection), this.filterRulesToQueryParams(filterRules))
return super.list(page, pageSize, sortField, sortDirection, this.filterRulesToQueryParams(filterRules))
}
getPreviewUrl(id: number): string {
return this.getResourceUrl(id, 'preview') + `?auth_token=${this.auth.getToken()}`
return this.getResourceUrl(id, 'preview')
}
getThumbUrl(id: number): string {
return this.getResourceUrl(id, 'thumb') + `?auth_token=${this.auth.getToken()}`
return this.getResourceUrl(id, 'thumb')
}
getDownloadUrl(id: number): string {
return this.getResourceUrl(id, 'download') + `?auth_token=${this.auth.getToken()}`
return this.getResourceUrl(id, 'download')
}
uploadDocument(formData) {

View File

@ -10,7 +10,11 @@ export class SavedViewConfigService {
constructor() {
let savedConfigs = localStorage.getItem('saved-view-config-service:savedConfigs')
if (savedConfigs) {
this.configs = JSON.parse(savedConfigs)
try {
this.configs = JSON.parse(savedConfigs)
} catch (e) {
this.configs = []
}
}
}

View File

@ -28,4 +28,34 @@ body {
.form-control-dark:focus {
border-color: transparent;
box-shadow: 0 0 0 3px rgba(255, 255, 255, .25);
}
.asc {
background-color: #f8f9fa!important;
}
.asc:after {
content: '';
transform: rotate(180deg);
background: url("") no-repeat;
height: 1rem;
width: 1rem;
display: block;
background-size: 1rem;
float: right;
}
.des {
background-color: #f8f9fa!important;
}
.des:after {
content: '';
background: url("") no-repeat;
height: 1rem;
width: 1rem;
display: block;
background-size: 1rem;
float: right;
}

View File

@ -2,7 +2,9 @@ from django.contrib import admin
from django.contrib.auth.models import Group, User
from django.utils.html import format_html, format_html_join
from django.utils.safestring import mark_safe
from whoosh.writing import AsyncWriter
from . import index
from .models import Correspondent, Document, DocumentType, Log, Tag
@ -71,6 +73,21 @@ class DocumentAdmin(admin.ModelAdmin):
return obj.created.date().strftime("%Y-%m-%d")
created_.short_description = "Created"
def delete_queryset(self, request, queryset):
ix = index.open_index()
with AsyncWriter(ix) as writer:
for o in queryset:
index.remove_document(writer, o)
super(DocumentAdmin, self).delete_queryset(request, queryset)
def delete_model(self, request, obj):
index.remove_document_from_index(obj)
super(DocumentAdmin, self).delete_model(request, obj)
def save_model(self, request, obj, form, change):
index.add_or_update_document(obj)
super(DocumentAdmin, self).save_model(request, obj, form, change)
@mark_safe
def tags_(self, obj):
r = ""

View File

@ -18,7 +18,8 @@ class DocumentsConfig(AppConfig):
set_log_entry,
set_correspondent,
set_document_type,
set_tags
set_tags,
add_to_index
)
@ -29,6 +30,7 @@ class DocumentsConfig(AppConfig):
document_consumption_finished.connect(set_document_type)
document_consumption_finished.connect(set_tags)
document_consumption_finished.connect(set_log_entry)
document_consumption_finished.connect(add_to_index)
document_consumption_finished.connect(run_post_consume_script)
post_delete.connect(cleanup_document_deletion)

View File

@ -1,4 +1,3 @@
import magic
import os
from datetime import datetime
@ -6,77 +5,25 @@ from time import mktime
from django import forms
from django.conf import settings
from .models import Document, Correspondent
from pathvalidate import validate_filename, ValidationError
class UploadForm(forms.Form):
TYPE_LOOKUP = {
"application/pdf": Document.TYPE_PDF,
"image/png": Document.TYPE_PNG,
"image/jpeg": Document.TYPE_JPG,
"image/gif": Document.TYPE_GIF,
"image/tiff": Document.TYPE_TIF,
}
correspondent = forms.CharField(
max_length=Correspondent._meta.get_field("name").max_length,
required=False
)
title = forms.CharField(
max_length=Document._meta.get_field("title").max_length,
required=False
)
document = forms.FileField()
def __init__(self, *args, **kwargs):
forms.Form.__init__(self, *args, **kwargs)
self._file_type = None
def clean_correspondent(self):
"""
I suppose it might look cleaner to use .get_or_create() here, but that
would also allow someone to fill up the db with bogus correspondents
before all validation was met.
"""
corresp = self.cleaned_data.get("correspondent")
if not corresp:
return None
if not Correspondent.SAFE_REGEX.match(corresp) or " - " in corresp:
raise forms.ValidationError(
"That correspondent name is suspicious.")
return corresp
def clean_title(self):
title = self.cleaned_data.get("title")
if not title:
return None
if not Correspondent.SAFE_REGEX.match(title) or " - " in title:
raise forms.ValidationError("That title is suspicious.")
return title
def clean_document(self):
try:
validate_filename(self.cleaned_data.get("document").name)
except ValidationError:
raise forms.ValidationError("That filename is suspicious.")
return self.cleaned_data.get("document")
document = self.cleaned_data.get("document").read()
with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m:
file_type = m.id_buffer(document)
if file_type not in self.TYPE_LOOKUP:
raise forms.ValidationError("The file type is invalid.")
self._file_type = self.TYPE_LOOKUP[file_type]
return document
def get_filename(self, i=None):
return os.path.join(
settings.CONSUMPTION_DIR,
"{}_{}".format(str(i), self.cleaned_data.get("document").name) if i else self.cleaned_data.get("document").name
)
def save(self):
"""
@ -85,15 +32,15 @@ class UploadForm(forms.Form):
form do that as well. Think of it as a poor-man's queue server.
"""
correspondent = self.cleaned_data.get("correspondent")
title = self.cleaned_data.get("title")
document = self.cleaned_data.get("document")
document = self.cleaned_data.get("document").read()
t = int(mktime(datetime.now().timetuple()))
file_name = os.path.join(
settings.CONSUMPTION_DIR,
"{} - {}.{}".format(correspondent, title, self._file_type)
)
file_name = self.get_filename()
i = 0
while os.path.exists(file_name):
i += 1
file_name = self.get_filename(i)
with open(file_name, "wb") as f:
f.write(document)

View File

@ -2,15 +2,20 @@ import logging
from django.db import models
from django.dispatch import receiver
from whoosh import highlight
from whoosh.fields import Schema, TEXT, NUMERIC
from whoosh.highlight import Formatter, get_text
from whoosh.index import create_in, exists_in, open_dir
from whoosh.qparser import MultifieldParser
from whoosh.writing import AsyncWriter
from documents.models import Document
from paperless import settings
logger = logging.getLogger(__name__)
class JsonFormatter(Formatter):
def __init__(self):
self.seen = {}
@ -68,7 +73,7 @@ def open_index(recreate=False):
def update_document(writer, doc):
logging.getLogger(__name__).debug("Updating index with document{}".format(str(doc)))
logger.debug("Indexing {}...".format(doc))
writer.update_document(
id=doc.pk,
title=doc.title,
@ -77,19 +82,32 @@ def update_document(writer, doc):
)
@receiver(models.signals.post_save, sender=Document)
def add_document_to_index(sender, instance, **kwargs):
ix = open_index()
with AsyncWriter(ix) as writer:
update_document(writer, instance)
def remove_document(writer, doc):
logger.debug("Removing {} from index...".format(doc))
writer.delete_by_term('id', doc.pk)
@receiver(models.signals.post_delete, sender=Document)
def remove_document_from_index(sender, instance, **kwargs):
logging.getLogger(__name__).debug("Removing document {} from index".format(str(instance)))
def add_or_update_document(document):
ix = open_index()
with AsyncWriter(ix) as writer:
writer.delete_by_term('id', instance.pk)
update_document(writer, document)
def remove_document_from_index(document):
ix = open_index()
with AsyncWriter(ix) as writer:
remove_document(writer, document)
def query_page(ix, query, page):
with ix.searcher() as searcher:
query_parser = MultifieldParser(["content", "title", "correspondent"],
ix.schema).parse(query)
result_page = searcher.search_page(query_parser, page)
result_page.results.fragmenter = highlight.ContextFragmenter(
surround=50)
result_page.results.formatter = JsonFormatter()
return result_page
def autocomplete(ix, term, limit=10):

View File

@ -1,10 +1,6 @@
import logging
from django.core.management.base import BaseCommand
from documents.classifier import DocumentClassifier, \
IncompatibleClassifierVersionError
from paperless import settings
from ...mixins import Renderable
from ...tasks import train_classifier
class Command(Renderable, BaseCommand):
@ -18,27 +14,4 @@ class Command(Renderable, BaseCommand):
BaseCommand.__init__(self, *args, **kwargs)
def handle(self, *args, **options):
classifier = DocumentClassifier()
try:
# load the classifier, since we might not have to train it again.
classifier.reload()
except (FileNotFoundError, IncompatibleClassifierVersionError):
# This is what we're going to fix here.
pass
try:
if classifier.train():
logging.getLogger(__name__).info(
"Saving updated classifier model to {}...".format(settings.MODEL_FILE)
)
classifier.save_classifier()
else:
logging.getLogger(__name__).debug(
"Training data unchanged."
)
except Exception as e:
logging.getLogger(__name__).error(
"Classifier error: " + str(e)
)
train_classifier()

View File

@ -1,9 +1,7 @@
from django.core.management import BaseCommand
from whoosh.writing import AsyncWriter
import documents.index as index
from documents.mixins import Renderable
from documents.models import Document
from documents.tasks import index_reindex, index_optimize
class Command(Renderable, BaseCommand):
@ -22,13 +20,6 @@ class Command(Renderable, BaseCommand):
self.verbosity = options["verbosity"]
if options['command'] == 'reindex':
documents = Document.objects.all()
ix = index.open_index(recreate=True)
with AsyncWriter(ix) as writer:
for document in documents:
index.update_document(writer, document)
index_reindex()
elif options['command'] == 'optimize':
index.open_index().optimize()
index_optimize()

View File

@ -0,0 +1,24 @@
from django.core.management.base import BaseCommand
from documents.models import Document, Tag
from ...mixins import Renderable
class Command(Renderable, BaseCommand):
help = """
This will rename all documents to match the latest filename format.
""".replace(" ", "")
def __init__(self, *args, **kwargs):
self.verbosity = 0
BaseCommand.__init__(self, *args, **kwargs)
def handle(self, *args, **options):
self.verbosity = options["verbosity"]
for document in Document.objects.all():
# Saving the document again will generate a new filename and rename
document.save()

View File

@ -1,60 +0,0 @@
import argparse
import threading
from multiprocessing import Pool
from multiprocessing.pool import ThreadPool
from django.core.management.base import BaseCommand
from documents.consumer import Consumer
from documents.models import Log, Document
from documents.parsers import get_parser_class
def process_document(doc):
parser_class = get_parser_class(doc.file_name)
if not parser_class:
print("no parser available")
else:
print("Parser: {}".format(parser_class.__name__))
parser = parser_class(doc.source_path, None)
try:
text = parser.get_text()
doc.content = text
doc.save()
finally:
parser.cleanup()
def document_index(value):
ivalue = int(value)
if not (1 <= ivalue <= Document.objects.count()):
raise argparse.ArgumentTypeError(
"{} is not a valid document index (out of range)".format(value))
return ivalue
class Command(BaseCommand):
help = "Performs OCR on all documents again!"
def add_arguments(self, parser):
parser.add_argument(
"-s", "--start_index",
default=None,
type=document_index
)
def handle(self, *args, **options):
docs = Document.objects.all().order_by("added")
indices = range(options['start_index']-1, len(docs)) if options['start_index'] else range(len(docs))
for i in indices:
doc = docs[i]
print("==================================")
print("{} out of {}: {}".format(i+1, len(docs), doc.file_name))
print("==================================")
process_document(doc)

View File

@ -1,73 +0,0 @@
# Generated by Django 3.1.2 on 2020-10-29 14:29
import os
from django.db import migrations
from django.conf import settings
def make_index(apps, schema_editor):
Document = apps.get_model("documents", "Document")
documents = Document.objects.all()
print()
try:
print(" --> Creating document index...")
from whoosh.writing import AsyncWriter
from documents import index
ix = index.open_index(recreate=True)
with AsyncWriter(ix) as writer:
for document in documents:
index.update_document(writer, document)
except ImportError:
# index may not be relevant anymore
print(" --> Cannot create document index.")
def restore_filenames(apps, schema_editor):
Document = apps.get_model("documents", "Document")
for doc in Document.objects.all():
file_name = "{:07}.{}".format(doc.pk, doc.file_type)
if doc.storage_type == "gpg":
file_name += ".gpg"
if not doc.filename == file_name:
try:
print("file was renamed, restoring {} to {}".format(doc.filename, file_name))
os.rename(os.path.join(settings.ORIGINALS_DIR, doc.filename),
os.path.join(settings.ORIGINALS_DIR, file_name))
except PermissionError:
pass
except FileNotFoundError:
pass
def initialize_document_classifier(apps, schema_editor):
try:
print("Initalizing document classifier...")
from documents.classifier import DocumentClassifier
classifier = DocumentClassifier()
try:
classifier.train()
classifier.save_classifier()
except Exception as e:
print("Classifier error: {}".format(e))
except ImportError:
print("Document classifier not found, skipping")
class Migration(migrations.Migration):
dependencies = [
('documents', '0023_document_current_filename'),
]
operations = [
migrations.RunPython(make_index, migrations.RunPython.noop),
migrations.RunPython(restore_filenames),
migrations.RunPython(initialize_document_classifier, migrations.RunPython.noop),
migrations.RemoveField(
model_name='document',
name='filename',
),
]

View File

@ -0,0 +1,95 @@
# Generated by Django 3.1.3 on 2020-11-07 12:35
import os
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
def make_index(apps, schema_editor):
Document = apps.get_model("documents", "Document")
documents = Document.objects.all()
print()
try:
print(" --> Creating document index...")
from whoosh.writing import AsyncWriter
from documents import index
ix = index.open_index(recreate=True)
with AsyncWriter(ix) as writer:
for document in documents:
index.update_document(writer, document)
except ImportError:
# index may not be relevant anymore
print(" --> Cannot create document index.")
class Migration(migrations.Migration):
dependencies = [
('documents', '0023_document_current_filename'),
]
operations = [
migrations.AddField(
model_name='document',
name='archive_serial_number',
field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True),
),
migrations.AddField(
model_name='tag',
name='is_inbox_tag',
field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.'),
),
migrations.CreateModel(
name='DocumentType',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128, unique=True)),
('slug', models.SlugField(blank=True, editable=False)),
('match', models.CharField(blank=True, max_length=256)),
('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')),
('is_insensitive', models.BooleanField(default=True)),
],
options={
'abstract': False,
'ordering': ('name',),
},
),
migrations.AddField(
model_name='document',
name='document_type',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.documenttype'),
),
migrations.AlterField(
model_name='correspondent',
name='matching_algorithm',
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
),
migrations.AlterField(
model_name='tag',
name='matching_algorithm',
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
),
migrations.AlterField(
model_name='document',
name='content',
field=models.TextField(blank=True, help_text='The raw, text-only data of the document. This field is primarily used for searching.'),
),
migrations.AlterModelOptions(
name='log',
options={'ordering': ('-created',)},
),
migrations.RemoveField(
model_name='log',
name='modified',
),
migrations.AlterField(
model_name='log',
name='group',
field=models.UUIDField(blank=True, null=True),
),
migrations.RunPython(
code=make_index,
reverse_code=django.db.migrations.operations.special.RunPython.noop,
),
]

View File

@ -0,0 +1,28 @@
# Generated by Django 3.1.3 on 2020-11-09 16:36
from django.db import migrations
from django.db.migrations import RunPython
from django_q.models import Schedule
from django_q.tasks import schedule
def add_schedules(apps, schema_editor):
schedule('documents.tasks.train_classifier', name="Train the classifier", schedule_type=Schedule.HOURLY)
schedule('documents.tasks.index_optimize', name="Optimize the index", schedule_type=Schedule.DAILY)
schedule('documents.tasks.consume_mail', name="Check E-Mail", schedule_type=Schedule.MINUTES, minutes=10)
def remove_schedules(apps, schema_editor):
Schedule.objects.all().delete()
class Migration(migrations.Migration):
dependencies = [
('documents', '1000_update_paperless_all'),
('django_q', '0013_task_attempt_count'),
]
operations = [
RunPython(add_schedules, remove_schedules)
]

View File

@ -1,23 +0,0 @@
# Generated by Django 2.0.7 on 2018-07-12 09:52
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('documents', '1000_update_paperless'),
]
operations = [
migrations.AddField(
model_name='document',
name='archive_serial_number',
field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True),
),
migrations.AddField(
model_name='tag',
name='is_inbox_tag',
field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.'),
),
]

View File

@ -1,33 +0,0 @@
# Generated by Django 2.0.7 on 2018-08-23 11:55
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('documents', '1001_workflow_improvements'),
]
operations = [
migrations.CreateModel(
name='DocumentType',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128, unique=True)),
('slug', models.SlugField(blank=True, editable=False)),
('match', models.CharField(blank=True, max_length=256)),
('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')),
('is_insensitive', models.BooleanField(default=True)),
],
options={
'abstract': False,
},
),
migrations.AddField(
model_name='document',
name='document_type',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.DocumentType'),
),
]

View File

@ -1,32 +0,0 @@
# Generated by Django 3.1.2 on 2020-10-28 17:51
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('documents', '1002_auto_20180823_1155'),
]
operations = [
migrations.AlterModelOptions(
name='documenttype',
options={'ordering': ('name',)},
),
migrations.AlterField(
model_name='correspondent',
name='matching_algorithm',
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
),
migrations.AlterField(
model_name='documenttype',
name='matching_algorithm',
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
),
migrations.AlterField(
model_name='tag',
name='matching_algorithm',
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
),
]

View File

@ -1,18 +0,0 @@
# Generated by Django 3.1.2 on 2020-10-29 13:31
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('documents', '1003_auto_20201028_1751'),
]
operations = [
migrations.AlterField(
model_name='document',
name='content',
field=models.TextField(blank=True, help_text='The raw, text-only data of the document. This field is primarily used for searching.'),
),
]

View File

@ -1,26 +0,0 @@
# Generated by Django 3.1.2 on 2020-11-02 00:07
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('documents', '1004_auto_20201029_1331'),
]
operations = [
migrations.AlterModelOptions(
name='log',
options={'ordering': ('-created',)},
),
migrations.RemoveField(
model_name='log',
name='modified',
),
migrations.AlterField(
model_name='log',
name='group',
field=models.UUIDField(blank=True, null=True),
),
]

View File

@ -3,11 +3,12 @@
import logging
import os
import re
from collections import OrderedDict
from collections import OrderedDict, defaultdict
import dateutil.parser
from django.conf import settings
from django.db import models
from django.dispatch import receiver
from django.template.defaultfilters import slugify
from django.utils import timezone
from django.utils.text import slugify
@ -190,6 +191,14 @@ class Document(models.Model):
added = models.DateTimeField(
default=timezone.now, editable=False, db_index=True)
filename = models.FilePathField(
max_length=256,
editable=False,
default=None,
null=True,
help_text="Current filename in storage"
)
archive_serial_number = models.IntegerField(
blank=True,
null=True,
@ -211,15 +220,123 @@ class Document(models.Model):
return "{}: {}".format(created, self.correspondent or self.title)
return str(created)
def find_renamed_document(self, subdirectory=""):
suffix = "%07i.%s" % (self.pk, self.file_type)
# Append .gpg for encrypted files
if self.storage_type == self.STORAGE_TYPE_GPG:
suffix += ".gpg"
# Go up in the directory hierarchy and try to delete all directories
root = os.path.normpath(Document.filename_to_path(subdirectory))
for filename in os.listdir(root):
if filename.endswith(suffix):
return os.path.join(subdirectory, filename)
fullname = os.path.join(subdirectory, filename)
if os.path.isdir(Document.filename_to_path(fullname)):
return self.find_renamed_document(fullname)
return None
@property
def source_filename(self):
# Initial filename generation (for new documents)
if self.filename is None:
self.filename = self.generate_source_filename()
# Check if document is still available under filename
elif not os.path.isfile(Document.filename_to_path(self.filename)):
recovered_filename = self.find_renamed_document()
# If we have found the file so update the filename
if recovered_filename is not None:
logger = logging.getLogger(__name__)
logger.warning("Filename of document " + str(self.id) +
" has changed and was successfully updated")
self.filename = recovered_filename
# Remove all empty subdirectories from MEDIA_ROOT
Document.delete_all_empty_subdirectories(
Document.filename_to_path(""))
else:
logger = logging.getLogger(__name__)
logger.error("File of document " + str(self.id) + " has " +
"gone and could not be recovered")
return self.filename
@staticmethod
def many_to_dictionary(field):
# Converts ManyToManyField to dictionary by assuming, that field
# entries contain an _ or - which will be used as a delimiter
mydictionary = dict()
for index, t in enumerate(field.all()):
# Populate tag names by index
mydictionary[index] = slugify(t.name)
# Find delimiter
delimiter = t.name.find('_')
if delimiter == -1:
delimiter = t.name.find('-')
if delimiter == -1:
continue
key = t.name[:delimiter]
value = t.name[delimiter+1:]
mydictionary[slugify(key)] = slugify(value)
return mydictionary
def generate_source_filename(self):
# Create filename based on configured format
if settings.PAPERLESS_FILENAME_FORMAT is not None:
tags = defaultdict(lambda: slugify(None),
self.many_to_dictionary(self.tags))
path = settings.PAPERLESS_FILENAME_FORMAT.format(
correspondent=slugify(self.correspondent),
title=slugify(self.title),
created=slugify(self.created),
added=slugify(self.added),
tags=tags)
else:
path = ""
# Always append the primary key to guarantee uniqueness of filename
if len(path) > 0:
filename = "%s-%07i.%s" % (path, self.pk, self.file_type)
else:
filename = "%07i.%s" % (self.pk, self.file_type)
# Append .gpg for encrypted files
if self.storage_type == self.STORAGE_TYPE_GPG:
filename += ".gpg"
return filename
def create_source_directory(self):
new_filename = self.generate_source_filename()
# Determine the full "target" path
dir_new = Document.filename_to_path(os.path.dirname(new_filename))
# Create new path
os.makedirs(dir_new, exist_ok=True)
@property
def source_path(self):
file_name = "{:07}.{}".format(self.pk, self.file_type)
if self.storage_type == self.STORAGE_TYPE_GPG:
file_name += ".gpg"
return Document.filename_to_path(self.source_filename)
@staticmethod
def filename_to_path(filename):
return os.path.join(
settings.ORIGINALS_DIR,
file_name
filename
)
@property
@ -245,6 +362,125 @@ class Document(models.Model):
def thumbnail_file(self):
return open(self.thumbnail_path, "rb")
def set_filename(self, filename):
if os.path.isfile(Document.filename_to_path(filename)):
self.filename = filename
@staticmethod
def try_delete_empty_directories(directory):
# Go up in the directory hierarchy and try to delete all directories
directory = os.path.normpath(directory)
root = os.path.normpath(Document.filename_to_path(""))
while directory != root:
# Try to delete the current directory
try:
os.rmdir(directory)
except os.error:
# Directory not empty, no need to go further up
return
# Cut off actual directory and go one level up
directory, _ = os.path.split(directory)
directory = os.path.normpath(directory)
@staticmethod
def delete_all_empty_subdirectories(directory):
# Go through all folders and try to delete all directories
root = os.path.normpath(Document.filename_to_path(directory))
for filename in os.listdir(root):
fullname = os.path.join(directory, filename)
if not os.path.isdir(Document.filename_to_path(fullname)):
continue
# Go into subdirectory to see, if there is more to delete
Document.delete_all_empty_subdirectories(
os.path.join(directory, filename))
# Try to delete the directory
try:
os.rmdir(Document.filename_to_path(fullname))
continue
except os.error:
# Directory not empty, no need to go further up
continue
@receiver(models.signals.m2m_changed, sender=Document.tags.through)
@receiver(models.signals.post_save, sender=Document)
def update_filename(sender, instance, **kwargs):
# Skip if document has not been saved yet
if instance.filename is None:
return
# Check is file exists and update filename otherwise
if not os.path.isfile(Document.filename_to_path(instance.filename)):
instance.filename = instance.source_filename
# Build the new filename
new_filename = instance.generate_source_filename()
# If the filename is the same, then nothing needs to be done
if instance.filename == new_filename:
return
# Determine the full "target" path
path_new = instance.filename_to_path(new_filename)
dir_new = instance.filename_to_path(os.path.dirname(new_filename))
# Create new path
instance.create_source_directory()
# Determine the full "current" path
path_current = instance.filename_to_path(instance.source_filename)
# Move file
try:
os.rename(path_current, path_new)
except PermissionError:
# Do not update filename in object
return
except FileNotFoundError:
logger = logging.getLogger(__name__)
logger.error("Renaming of document " + str(instance.id) + " failed " +
"as file " + instance.filename + " was no longer present")
return
# Delete empty directory
old_dir = os.path.dirname(instance.filename)
old_path = instance.filename_to_path(old_dir)
Document.try_delete_empty_directories(old_path)
instance.filename = new_filename
# Save instance
# This will not cause a cascade of post_save signals, as next time
# nothing needs to be renamed
instance.save()
@receiver(models.signals.post_delete, sender=Document)
def delete_files(sender, instance, **kwargs):
if instance.filename is None:
return
# Remove the document
old_file = instance.filename_to_path(instance.filename)
try:
os.remove(old_file)
except FileNotFoundError:
logger = logging.getLogger(__name__)
logger.warning("Deleted document " + str(instance.id) + " but file " +
old_file + " was no longer present")
# And remove the directory (if applicable)
old_dir = os.path.dirname(instance.filename)
old_path = instance.filename_to_path(old_dir)
Document.try_delete_empty_directories(old_path)
class Log(models.Model):

View File

@ -166,3 +166,7 @@ def set_log_entry(sender, document=None, logging_group=None, **kwargs):
user=user,
object_repr=document.__str__(),
)
def add_to_index(sender, document, **kwargs):
index.add_or_update_document(document)

File diff suppressed because one or more lines are too long

View File

@ -1,23 +1,23 @@
.form-signin-container {
top: 0;
bottom: 0;
left: 0;
right: 0;
position: fixed;
html,
body {
height: 100%;
}
body {
display: -ms-flexbox;
display: flex;
-ms-flex-align: center;
align-items: center;
padding-top: 40px;
padding-bottom: 40px;
background-color: #f5f5f5;
}
.form-signin {
width: 100%;
max-width: 330px;
height: auto;
position: fixed;
left: 0;
right: 0;
top: 0;
bottom: 0;
padding: 15px;
margin: auto;
text-align: center;
}
.form-signin .checkbox {
font-weight: 400;
@ -41,4 +41,4 @@
margin-bottom: 10px;
border-top-left-radius: 0;
border-top-right-radius: 0;
}
}

57
src/documents/tasks.py Normal file
View File

@ -0,0 +1,57 @@
import logging
from django.conf import settings
from django_q.tasks import async_task, result
from whoosh.writing import AsyncWriter
from documents import index
from documents.classifier import DocumentClassifier, \
IncompatibleClassifierVersionError
from documents.mail import MailFetcher
from documents.models import Document
def consume_mail():
MailFetcher().pull()
def index_optimize():
index.open_index().optimize()
def index_reindex():
documents = Document.objects.all()
ix = index.open_index(recreate=True)
with AsyncWriter(ix) as writer:
for document in documents:
index.update_document(writer, document)
def train_classifier():
classifier = DocumentClassifier()
try:
# load the classifier, since we might not have to train it again.
classifier.reload()
except (FileNotFoundError, IncompatibleClassifierVersionError):
# This is what we're going to fix here.
pass
try:
if classifier.train():
logging.getLogger(__name__).info(
"Saving updated classifier model to {}...".format(
settings.MODEL_FILE)
)
classifier.save_classifier()
else:
logging.getLogger(__name__).debug(
"Training data unchanged."
)
except Exception as e:
logging.getLogger(__name__).error(
"Classifier error: " + str(e)
)

View File

@ -9,11 +9,11 @@
<base href="/">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="icon" type="image/x-icon" href="favicon.ico">
<link rel="stylesheet" href="{% static 'styles.css' %}"></head>
<link rel="stylesheet" href="{% static 'frontend/styles.css' %}"></head>
<body>
<app-root>Loading...</app-root>
<script src="{% static 'runtime.js' %}" defer></script>
<script src="{% static 'polyfills.js' %}" defer></script>
<script src="{% static 'main.js' %}" defer></script>
<script src="{% static 'frontend/runtime.js' %}" defer></script>
<script src="{% static 'frontend/polyfills.js' %}" defer></script>
<script src="{% static 'frontend/main.js' %}" defer></script>
</body>
</html>

View File

@ -0,0 +1,44 @@
<!doctype html>
{% load static %}
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<meta name="description" content="">
<meta name="author" content="Mark Otto, Jacob Thornton, and Bootstrap contributors">
<meta name="generator" content="Jekyll v4.1.1">
<title>Paperless Sign In</title>
<!-- Bootstrap core CSS -->
<link href="{% static 'bootstrap.min.css' %}" rel="stylesheet">
<style>
.bd-placeholder-img {
font-size: 1.125rem;
text-anchor: middle;
-webkit-user-select: none;
-moz-user-select: none;
-ms-user-select: none;
user-select: none;
}
@media (min-width: 768px) {
.bd-placeholder-img-lg {
font-size: 3.5rem;
}
}
</style>
<!-- Custom styles for this template -->
<link href="{% static 'signin.css' %}" rel="stylesheet">
</head>
<body class="text-center">
<div class="form-signin">
<img class="mb-4" src="{% static 'frontend/assets/logo.svg' %}" alt="" width="300">
<p>You have been successfully logged out. Bye!</p>
<a href="/">Sign in again</a>
</div>
</body>
</html>

View File

@ -0,0 +1,54 @@
<!doctype html>
{% load static %}
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<meta name="description" content="">
<meta name="author" content="Mark Otto, Jacob Thornton, and Bootstrap contributors">
<meta name="generator" content="Jekyll v4.1.1">
<title>Paperless Sign In</title>
<!-- Bootstrap core CSS -->
<link href="{% static 'bootstrap.min.css' %}" rel="stylesheet">
<style>
.bd-placeholder-img {
font-size: 1.125rem;
text-anchor: middle;
-webkit-user-select: none;
-moz-user-select: none;
-ms-user-select: none;
user-select: none;
}
@media (min-width: 768px) {
.bd-placeholder-img-lg {
font-size: 3.5rem;
}
}
</style>
<!-- Custom styles for this template -->
<link href="{% static 'signin.css' %}" rel="stylesheet">
</head>
<body class="text-center">
<form class="form-signin" method="post">
{% csrf_token %}
<img class="mb-4" src="{% static 'frontend/assets/logo.svg' %}" alt="" width="300">
<p>Please sign in.</p>
{% if form.errors %}
<div class="alert alert-danger" role="alert">
Your username and password didn't match. Please try again.
</div>
{% endif %}
<label for="inputUsername" class="sr-only">Username</label>
<input type="text" name="username" id="inputUsername" class="form-control" placeholder="Username" required autofocus>
<label for="inputPassword" class="sr-only">Password</label>
<input type="password" name="password" id="inputPassword" class="form-control" placeholder="Password" required>
<button class="btn btn-lg btn-primary btn-block" type="submit">Sign in</button>
</form>
</body>
</html>

View File

@ -1,66 +1,10 @@
import re
from django.test import TestCase
from unittest import mock
from tempfile import TemporaryDirectory
from ..consumer import Consumer
from ..models import FileInfo, Tag
class TestConsumer(TestCase):
class DummyParser(object):
pass
def test__get_parser_class_1_parser(self):
self.assertEqual(
self._get_consumer()._get_parser_class("doc.pdf"),
self.DummyParser
)
@mock.patch("documents.consumer.os.makedirs")
@mock.patch("documents.consumer.os.path.exists", return_value=True)
@mock.patch("documents.consumer.document_consumer_declaration.send")
def test__get_parser_class_n_parsers(self, m, *args):
class DummyParser1(object):
pass
class DummyParser2(object):
pass
m.return_value = (
(None, lambda _: {"weight": 0, "parser": DummyParser1}),
(None, lambda _: {"weight": 1, "parser": DummyParser2}),
)
with TemporaryDirectory() as tmpdir:
self.assertEqual(
Consumer(consume=tmpdir)._get_parser_class("doc.pdf"),
DummyParser2
)
@mock.patch("documents.consumer.os.makedirs")
@mock.patch("documents.consumer.os.path.exists", return_value=True)
@mock.patch("documents.consumer.document_consumer_declaration.send")
def test__get_parser_class_0_parsers(self, m, *args):
m.return_value = ((None, lambda _: None),)
with TemporaryDirectory() as tmpdir:
self.assertIsNone(
Consumer(consume=tmpdir)._get_parser_class("doc.pdf")
)
@mock.patch("documents.consumer.os.makedirs")
@mock.patch("documents.consumer.os.path.exists", return_value=True)
@mock.patch("documents.consumer.document_consumer_declaration.send")
def _get_consumer(self, m, *args):
m.return_value = (
(None, lambda _: {"weight": 0, "parser": self.DummyParser}),
)
with TemporaryDirectory() as tmpdir:
return Consumer(consume=tmpdir)
class TestAttributes(TestCase):
TAGS = ("tag1", "tag2", "tag3")

View File

@ -0,0 +1,559 @@
import datetime
import os
import shutil
from unittest import mock
from uuid import uuid4
from pathlib import Path
from shutil import rmtree
from dateutil import tz
from django.test import TestCase, override_settings
from django.utils.text import slugify
from ..models import Tag, Document, Correspondent
from django.conf import settings
class TestDate(TestCase):
deletion_list = []
def add_to_deletion_list(self, dirname):
self.deletion_list.append(dirname)
def setUp(self):
folder = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
os.makedirs(folder + "/documents/originals")
override_settings(MEDIA_ROOT=folder).enable()
override_settings(ORIGINALS_DIR=folder + "/documents/originals").enable()
self.add_to_deletion_list(folder)
def tearDown(self):
for dirname in self.deletion_list:
shutil.rmtree(dirname, ignore_errors=True)
@override_settings(PAPERLESS_FILENAME_FORMAT="")
def test_source_filename(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
self.assertEqual(document.source_filename, "0000001.pdf")
document.filename = "test.pdf"
self.assertEqual(document.source_filename, "test.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="")
def test_generate_source_filename(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
self.assertEqual(document.generate_source_filename(), "0000001.pdf")
document.storage_type = Document.STORAGE_TYPE_GPG
self.assertEqual(document.generate_source_filename(),
"0000001.pdf.gpg")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_file_renaming(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Test source_path
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf")
# Enable encryption and check again
document.storage_type = Document.STORAGE_TYPE_GPG
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf.gpg")
document.save()
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), True)
# Set a correspondent and save the document
document.correspondent = Correspondent.objects.get_or_create(
name="test")[0]
document.save()
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/test"), True)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), False)
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/test/test-0000001.pdf.gpg"), True)
self.assertEqual(document.generate_source_filename(),
"test/test-0000001.pdf.gpg")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_file_renaming_missing_permissions(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Test source_path
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf")
# Make the folder read- and execute-only (no writing and no renaming)
os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o555)
# Set a correspondent and save the document
document.correspondent = Correspondent.objects.get_or_create(
name="test")[0]
document.save()
# Check proper handling of files
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/none/none-0000001.pdf"), True)
self.assertEqual(document.source_filename,
"none/none-0000001.pdf")
os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o777)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_document_delete(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Ensure file deletion after delete
document.delete()
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf"), False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), False)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_document_delete_nofile(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_directory_not_empty(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
Path(document.source_path + "test").touch()
# Set a correspondent and save the document
document.correspondent = Correspondent.objects.get_or_create(
name="test")[0]
document.save()
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/test"), True)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), True)
# Cleanup
os.remove(settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdftest")
os.rmdir(settings.MEDIA_ROOT + "/documents/originals/none")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_with_underscore(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Add tag to document
document.tags.create(name="type_demo")
document.tags.create(name="foo_bar")
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"demo-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_with_dash(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Add tag to document
document.tags.create(name="type-demo")
document.tags.create(name="foo-bar")
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"demo-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_malformed(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Add tag to document
document.tags.create(name="type:demo")
document.tags.create(name="foo:bar")
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
def test_tags_all(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Add tag to document
document.tags.create(name="demo")
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"demo-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
def test_tags_out_of_bounds_0(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[10000000]}")
def test_tags_out_of_bounds_10000000(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[99]}")
def test_tags_out_of_bounds_99(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}/{correspondent}")
def test_nested_directory_cleanup(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none/none"), True)
document.delete()
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT +
"/documents/originals/none/none/none-0000001.pdf"),
False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none/none"), False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals"), True)
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
def test_format_none(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
self.assertEqual(document.generate_source_filename(), "0000001.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_document_renamed(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Test source_path
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf")
# Rename the document "illegaly"
os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test")
os.rename(settings.MEDIA_ROOT + "/documents/originals/" +
"none/none-0000001.pdf",
settings.MEDIA_ROOT + "/documents/originals/" +
"test/test-0000001.pdf")
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/test/test-0000001.pdf"), True)
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/none/none-0000001.pdf"), False)
# Set new correspondent and expect document to be saved properly
document.correspondent = Correspondent.objects.get_or_create(
name="foo")[0]
document.save()
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/foo/foo-0000001.pdf"), True)
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/foo"), True)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/test"), False)
self.assertEqual(document.generate_source_filename(),
"foo/foo-0000001.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_document_renamed_encrypted(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_GPG
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf.gpg")
document.create_source_directory()
Path(document.source_path).touch()
# Test source_path
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf.gpg")
# Rename the document "illegaly"
os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test")
os.rename(settings.MEDIA_ROOT + "/documents/originals/" +
"none/none-0000001.pdf.gpg",
settings.MEDIA_ROOT + "/documents/originals/" +
"test/test-0000001.pdf.gpg")
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/test/test-0000001.pdf.gpg"), True)
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/none/none-0000001.pdf"), False)
# Set new correspondent and expect document to be saved properly
document.correspondent = Correspondent.objects.get_or_create(
name="foo")[0]
document.save()
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/foo/foo-0000001.pdf.gpg"), True)
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/foo"), True)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/test"), False)
self.assertEqual(document.generate_source_filename(),
"foo/foo-0000001.pdf.gpg")
def test_delete_all_empty_subdirectories(self):
# Create our working directory
tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
os.makedirs(tmp)
self.add_to_deletion_list(tmp)
os.makedirs(os.path.join(tmp, "empty"))
os.makedirs(os.path.join(tmp, "empty", "subdirectory"))
os.makedirs(os.path.join(tmp, "notempty"))
Path(os.path.join(tmp, "notempty", "file")).touch()
Document.delete_all_empty_subdirectories(tmp)
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
self.assertEqual(os.path.isdir(os.path.join(tmp, "empty")), False)
self.assertEqual(os.path.isfile(
os.path.join(tmp, "notempty", "file")), True)
def test_try_delete_empty_directories(self):
# Create our working directory
tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
os.makedirs(tmp)
self.add_to_deletion_list(tmp)
os.makedirs(os.path.join(tmp, "notempty"))
Path(os.path.join(tmp, "notempty", "file")).touch()
os.makedirs(os.path.join(tmp, "notempty", "empty"))
Document.try_delete_empty_directories(
os.path.join(tmp, "notempty", "empty"))
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
self.assertEqual(os.path.isfile(
os.path.join(tmp, "notempty", "file")), True)
self.assertEqual(os.path.isdir(
os.path.join(tmp, "notempty", "empty")), False)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_document_accidentally_deleted(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Test source_path
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf")
# Delete the document "illegaly"
os.remove(settings.MEDIA_ROOT + "/documents/originals/" +
"none/none-0000001.pdf")
# Set new correspondent and expect document to be saved properly
document.correspondent = Correspondent.objects.get_or_create(
name="foo")[0]
document.save()
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), True)
self.assertEqual(document.source_filename,
"none/none-0000001.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_set_filename(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Set existing filename
document.set_filename(tmp)
self.assertEqual(document.source_filename, "none/none-0000001.pdf")
# Set non-existing filename
document.set_filename("doesnotexist")
self.assertEqual(document.source_filename, "none/none-0000001.pdf")

View File

@ -0,0 +1,50 @@
from tempfile import TemporaryDirectory
from unittest import mock
from django.test import TestCase
from documents.parsers import get_parser_class
class TestParserDiscovery(TestCase):
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test__get_parser_class_1_parser(self, m, *args):
class DummyParser(object):
pass
m.return_value = (
(None, lambda _: {"weight": 0, "parser": DummyParser}),
)
self.assertEqual(
get_parser_class("doc.pdf"),
DummyParser
)
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test__get_parser_class_n_parsers(self, m, *args):
class DummyParser1(object):
pass
class DummyParser2(object):
pass
m.return_value = (
(None, lambda _: {"weight": 0, "parser": DummyParser1}),
(None, lambda _: {"weight": 1, "parser": DummyParser2}),
)
self.assertEqual(
get_parser_class("doc.pdf"),
DummyParser2
)
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test__get_parser_class_0_parsers(self, m, *args):
m.return_value = ((None, lambda _: None),)
with TemporaryDirectory() as tmpdir:
self.assertIsNone(
get_parser_class("doc.pdf")
)

View File

@ -6,9 +6,6 @@ from django_filters.rest_framework import DjangoFilterBackend
from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework.views import APIView
from whoosh import highlight
from whoosh.qparser import QueryParser
from whoosh.query import terms
from paperless.db import GnuPG
from paperless.views import StandardPagination
@ -97,7 +94,16 @@ class DocumentViewSet(RetrieveModelMixin,
filter_class = DocumentFilterSet
search_fields = ("title", "correspondent__name", "content")
ordering_fields = (
"id", "title", "correspondent__name", "created", "modified", "added", "archive_serial_number")
"id", "title", "correspondent__name", "document_type__name", "created", "modified", "added", "archive_serial_number")
def update(self, request, *args, **kwargs):
response = super(DocumentViewSet, self).update(request, *args, **kwargs)
index.add_or_update_document(self.get_object())
return response
def destroy(self, request, *args, **kwargs):
index.remove_document_from_index(self.get_object())
return super(DocumentViewSet, self).destroy(request, *args, **kwargs)
def file_response(self, pk, disposition):
#TODO: this should not be necessary here.
@ -185,18 +191,13 @@ class SearchView(APIView):
except (ValueError, TypeError):
page = 1
with self.ix.searcher() as searcher:
query_parser = QueryParser("content", self.ix.schema).parse(query)
result_page = searcher.search_page(query_parser, page)
result_page.results.fragmenter = highlight.ContextFragmenter(
surround=50)
result_page.results.formatter = index.JsonFormatter()
result_page = index.query_page(self.ix, query, page)
return Response(
{'count': len(result_page),
'page': result_page.pagenum,
'page_count': result_page.pagecount,
'results': list(map(self.add_infos_to_hit, result_page))})
return Response(
{'count': len(result_page),
'page': result_page.pagenum,
'page_count': result_page.pagecount,
'results': list(map(self.add_infos_to_hit, result_page))})
else:
return Response({

View File

@ -1,11 +1,17 @@
from rest_framework.authentication import TokenAuthentication
from django.conf import settings
from django.contrib.auth.models import User
from rest_framework import authentication
class AngularApiAuthenticationOverride(authentication.BaseAuthentication):
""" This class is here to provide authentication to the angular dev server
during development. This is disabled in production.
"""
# This authentication method is required to serve documents and thumbnails for the front end.
# https://stackoverflow.com/questions/29433416/token-in-query-string-with-django-rest-frameworks-tokenauthentication
class QueryTokenAuthentication(TokenAuthentication):
def authenticate(self, request):
# Check if 'token_auth' is in the request query params.
if 'auth_token' in request.query_params and 'HTTP_AUTHORIZATION' not in request.META:
return self.authenticate_credentials(request.query_params.get('auth_token'))
if settings.DEBUG and 'Referer' in request.headers and request.headers['Referer'].startswith('http://localhost:4200/'):
user = User.objects.filter(is_staff=True).first()
print("Auto-Login with user {}".format(user))
return (user, None)
else:
return None

View File

@ -1,14 +0,0 @@
from django.utils.deprecation import MiddlewareMixin
from .models import User
class Middleware(MiddlewareMixin):
"""
This is a dummy authentication middleware class that creates what
is roughly an Anonymous authenticated user so we can disable login
and not interfere with existing user ID's. It's only used if
login is disabled in paperless.conf (default is to require login)
"""
def process_request(self, request):
request.user = User()

View File

@ -1,31 +0,0 @@
from django.contrib.auth.models import User as DjangoUser
class User:
"""
This is a dummy django User used with our middleware to disable
login authentication if that is configured in paperless.conf
"""
is_superuser = True
is_active = True
is_staff = True
is_authenticated = True
@property
def id(self):
return DjangoUser.objects.order_by("pk").first().pk
@property
def pk(self):
return self.id
"""
NOTE: These are here as a hack instead of being in the User definition
NOTE: above due to the way pycodestyle handles lamdbdas.
NOTE: See https://github.com/PyCQA/pycodestyle/issues/379 for more.
"""
User.has_module_perms = lambda *_: True
User.has_perm = lambda *_: True

View File

@ -21,6 +21,9 @@ def __get_boolean(key, default="NO"):
"""
return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
# NEVER RUN WITH DEBUG IN PRODUCTION.
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
###############################################################################
# Directories #
###############################################################################
@ -66,9 +69,10 @@ INSTALLED_APPS = [
"django.contrib.admin",
"rest_framework",
"rest_framework.authtoken",
"django_filters",
"django_q",
"channels",
]
@ -76,11 +80,15 @@ INSTALLED_APPS = [
REST_FRAMEWORK = {
'DEFAULT_AUTHENTICATION_CLASSES': [
'rest_framework.authentication.BasicAuthentication',
'rest_framework.authentication.TokenAuthentication',
'paperless.auth.QueryTokenAuthentication'
'rest_framework.authentication.SessionAuthentication'
]
}
if DEBUG:
REST_FRAMEWORK['DEFAULT_AUTHENTICATION_CLASSES'].append(
'paperless.auth.AngularApiAuthenticationOverride'
)
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'whitenoise.middleware.WhiteNoiseMiddleware',
@ -95,8 +103,6 @@ MIDDLEWARE = [
ROOT_URLCONF = 'paperless.urls'
LOGIN_URL = "admin:login"
FORCE_SCRIPT_NAME = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
WSGI_APPLICATION = 'paperless.wsgi.application'
@ -125,9 +131,6 @@ TEMPLATES = [
# Security #
###############################################################################
# NEVER RUN WITH DEBUG IN PRODUCTION.
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
if DEBUG:
X_FRAME_OPTIONS = ''
# this should really be 'allow-from uri' but its not supported in any mayor
@ -142,11 +145,6 @@ if DEBUG:
# Allow access from the angular development server during debugging
CORS_ORIGIN_WHITELIST += ('http://localhost:4200',)
# If auth is disabled, we just use our "bypass" authentication middleware
if bool(os.getenv("PAPERLESS_DISABLE_LOGIN", "false").lower() in ("yes", "y", "1", "t", "true")):
_index = MIDDLEWARE.index("django.contrib.auth.middleware.AuthenticationMiddleware")
MIDDLEWARE[_index] = "paperless.middleware.Middleware"
# The secret key has a default that should be fine so long as you're hosting
# Paperless on a closed network. However, if you're putting this anywhere
# public, you should change the key to something unique and verbose.
@ -249,6 +247,16 @@ LOGGING = {
},
}
###############################################################################
# Task queue #
###############################################################################
Q_CLUSTER = {
'name': 'paperless',
'catch_up': False,
'redis': os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")
}
###############################################################################
# Paperless Specific Settings #
###############################################################################
@ -303,6 +311,9 @@ FILENAME_PARSE_TRANSFORMS = []
for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
FILENAME_PARSE_TRANSFORMS.append((re.compile(t["pattern"]), t["repl"]))
# Specify the filename format for out files
PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
CHANNEL_LAYERS = {
"default": {
"BACKEND": "channels_redis.core.RedisChannelLayer",

View File

@ -1,9 +1,9 @@
from django.conf.urls import include, url
from django.contrib import admin
from django.contrib.auth.decorators import login_required
from django.urls import path, re_path
from django.views.decorators.csrf import csrf_exempt
from django.views.generic import RedirectView
from rest_framework.authtoken import views
from rest_framework.routers import DefaultRouter
from paperless.consumers import StatusConsumer
@ -35,7 +35,7 @@ urlpatterns = [
url(r"^api/search/autocomplete/", SearchAutoCompleteView.as_view(), name="autocomplete"),
url(r"^api/search/", SearchView.as_view(), name="search"),
url(r"^api/statistics/", StatisticsView.as_view(), name="statistics"),
url(r"^api/token/", views.obtain_auth_token), url(r"^api/", include((api_router.urls, 'drf'), namespace="drf")),
url(r"^api/", include((api_router.urls, 'drf'), namespace="drf")),
# Favicon
url(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),
@ -59,10 +59,12 @@ urlpatterns = [
url(r"^push$", csrf_exempt(RedirectView.as_view(url='/api/documents/post_document/'))),
# Frontend assets TODO: this is pretty bad.
path('assets/<path:path>', RedirectView.as_view(url='/static/assets/%(path)s')),
path('assets/<path:path>', RedirectView.as_view(url='/static/frontend/assets/%(path)s')),
path('accounts/', include('django.contrib.auth.urls')),
# Root of the Frontent
url(r".*", IndexView.as_view()),
url(r".*", login_required(IndexView.as_view())),
]