mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-10-24 03:26:11 -05:00
Merge remote-tracking branch 'upstream/dev' into feature-bulk-editor
This commit is contained in:
15
docs/api.rst
15
docs/api.rst
@@ -221,21 +221,16 @@ Each fragment contains a list of strings, and some of them are marked as a highl
|
|||||||
|
|
||||||
[
|
[
|
||||||
[
|
[
|
||||||
{"text": "This is a sample text with a "},
|
{"text": "This is a sample text with a ", "highlight": false},
|
||||||
{"text": "highlighted", "term": 0},
|
{"text": "highlighted", "highlight": true},
|
||||||
{"text": " word."}
|
{"text": " word.", "highlight": false}
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
{"text": "Another", "term": 1},
|
{"text": "Another", "highlight": true},
|
||||||
{"text": " fragment with a highlight."}
|
{"text": " fragment with a highlight.", "highlight": false}
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
When ``term`` is present within a string, the word within ``text`` should be highlighted.
|
|
||||||
The term index groups multiple matches together and words with the same index
|
|
||||||
should get identical highlighting.
|
|
||||||
A client may use this example to produce the following output:
|
A client may use this example to produce the following output:
|
||||||
|
|
||||||
... This is a sample text with a **highlighted** word. ... **Another** fragment with a highlight. ...
|
... This is a sample text with a **highlighted** word. ... **Another** fragment with a highlight. ...
|
||||||
|
@@ -1,4 +1,14 @@
|
|||||||
<app-page-header [(title)]="title">
|
<app-page-header [(title)]="title">
|
||||||
|
<div class="input-group input-group-sm mr-5" *ngIf="getContentType() == 'application/pdf'">
|
||||||
|
<div class="input-group-prepend">
|
||||||
|
<div class="input-group-text">Page </div>
|
||||||
|
</div>
|
||||||
|
<input class="form-control flex-grow-0 w-auto" type="number" min="1" [max]="previewNumPages" [(ngModel)]="previewCurrentPage" />
|
||||||
|
<div class="input-group-append">
|
||||||
|
<div class="input-group-text">of {{previewNumPages}}</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<button type="button" class="btn btn-sm btn-outline-danger mr-2" (click)="delete()">
|
<button type="button" class="btn btn-sm btn-outline-danger mr-2" (click)="delete()">
|
||||||
<svg class="buttonicon" fill="currentColor">
|
<svg class="buttonicon" fill="currentColor">
|
||||||
<use xlink:href="assets/bootstrap-icons.svg#trash" />
|
<use xlink:href="assets/bootstrap-icons.svg#trash" />
|
||||||
@@ -24,6 +34,12 @@
|
|||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<button type="button" class="btn btn-sm btn-outline-primary mr-2" (click)="moreLike()">
|
||||||
|
<svg class="buttonicon" fill="currentColor">
|
||||||
|
<use xlink:href="assets/bootstrap-icons.svg#three-dots" />
|
||||||
|
</svg>
|
||||||
|
<span class="d-none d-lg-inline"> More like this</span>
|
||||||
|
</button>
|
||||||
|
|
||||||
<button type="button" class="btn btn-sm btn-outline-primary" (click)="close()">
|
<button type="button" class="btn btn-sm btn-outline-primary" (click)="close()">
|
||||||
<svg class="buttonicon" fill="currentColor">
|
<svg class="buttonicon" fill="currentColor">
|
||||||
@@ -128,7 +144,7 @@
|
|||||||
|
|
||||||
<div class="col-md-6 col-xl-8 mb-3">
|
<div class="col-md-6 col-xl-8 mb-3">
|
||||||
<div class="pdf-viewer-container" *ngIf="getContentType() == 'application/pdf'">
|
<div class="pdf-viewer-container" *ngIf="getContentType() == 'application/pdf'">
|
||||||
<pdf-viewer [src]="previewUrl" [original-size]="false" [show-borders]="true"></pdf-viewer>
|
<pdf-viewer [src]="previewUrl" [original-size]="false" [show-borders]="true" [show-all]="true" [(page)]="previewCurrentPage" (after-load-complete)="pdfPreviewLoaded($event)"></pdf-viewer>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@@ -15,6 +15,7 @@ import { DocumentService } from 'src/app/services/rest/document.service';
|
|||||||
import { ConfirmDialogComponent } from '../common/confirm-dialog/confirm-dialog.component';
|
import { ConfirmDialogComponent } from '../common/confirm-dialog/confirm-dialog.component';
|
||||||
import { CorrespondentEditDialogComponent } from '../manage/correspondent-list/correspondent-edit-dialog/correspondent-edit-dialog.component';
|
import { CorrespondentEditDialogComponent } from '../manage/correspondent-list/correspondent-edit-dialog/correspondent-edit-dialog.component';
|
||||||
import { DocumentTypeEditDialogComponent } from '../manage/document-type-list/document-type-edit-dialog/document-type-edit-dialog.component';
|
import { DocumentTypeEditDialogComponent } from '../manage/document-type-list/document-type-edit-dialog/document-type-edit-dialog.component';
|
||||||
|
import { PDFDocumentProxy } from 'ng2-pdf-viewer';
|
||||||
|
|
||||||
@Component({
|
@Component({
|
||||||
selector: 'app-document-detail',
|
selector: 'app-document-detail',
|
||||||
@@ -47,6 +48,9 @@ export class DocumentDetailComponent implements OnInit {
|
|||||||
tags: new FormControl([])
|
tags: new FormControl([])
|
||||||
})
|
})
|
||||||
|
|
||||||
|
previewCurrentPage: number = 1
|
||||||
|
previewNumPages: number = 1
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private documentsService: DocumentService,
|
private documentsService: DocumentService,
|
||||||
private route: ActivatedRoute,
|
private route: ActivatedRoute,
|
||||||
@@ -168,7 +172,16 @@ export class DocumentDetailComponent implements OnInit {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
moreLike() {
|
||||||
|
this.router.navigate(["search"], {queryParams: {more_like:this.document.id}})
|
||||||
|
}
|
||||||
|
|
||||||
hasNext() {
|
hasNext() {
|
||||||
return this.documentListViewService.hasNext(this.documentId)
|
return this.documentListViewService.hasNext(this.documentId)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pdfPreviewLoaded(pdf: PDFDocumentProxy) {
|
||||||
|
this.previewNumPages = pdf.numPages
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -23,8 +23,14 @@
|
|||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
|
||||||
<div class="d-flex justify-content-between align-items-center">
|
<div class="d-flex align-items-center">
|
||||||
<div class="btn-group">
|
<div class="btn-group">
|
||||||
|
<a routerLink="/search" [queryParams]="{'more_like': document.id}" class="btn btn-sm btn-outline-secondary" *ngIf="moreLikeThis">
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-three-dots" viewBox="0 0 16 16">
|
||||||
|
<path fill-rule="evenodd" d="M3 9.5a1.5 1.5 0 1 1 0-3 1.5 1.5 0 0 1 0 3zm5 0a1.5 1.5 0 1 1 0-3 1.5 1.5 0 0 1 0 3zm5 0a1.5 1.5 0 1 1 0-3 1.5 1.5 0 0 1 0 3z"/>
|
||||||
|
</svg>
|
||||||
|
More like this
|
||||||
|
</a>
|
||||||
<a routerLink="/documents/{{document.id}}" class="btn btn-sm btn-outline-secondary">
|
<a routerLink="/documents/{{document.id}}" class="btn btn-sm btn-outline-secondary">
|
||||||
<svg width="1em" height="1em" viewBox="0 0 16 16" class="bi bi-pencil" fill="currentColor" xmlns="http://www.w3.org/2000/svg">
|
<svg width="1em" height="1em" viewBox="0 0 16 16" class="bi bi-pencil" fill="currentColor" xmlns="http://www.w3.org/2000/svg">
|
||||||
<path fill-rule="evenodd" d="M12.146.146a.5.5 0 0 1 .708 0l3 3a.5.5 0 0 1 0 .708l-10 10a.5.5 0 0 1-.168.11l-5 2a.5.5 0 0 1-.65-.65l2-5a.5.5 0 0 1 .11-.168l10-10zM11.207 2.5L13.5 4.793 14.793 3.5 12.5 1.207 11.207 2.5zm1.586 3L10.5 3.207 4 9.707V10h.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.5h.293l6.5-6.5zm-9.761 5.175l-.106.106-1.528 3.821 3.821-1.528.106-.106A.5.5 0 0 1 5 12.5V12h-.5a.5.5 0 0 1-.5-.5V11h-.5a.5.5 0 0 1-.468-.325z"/>
|
<path fill-rule="evenodd" d="M12.146.146a.5.5 0 0 1 .708 0l3 3a.5.5 0 0 1 0 .708l-10 10a.5.5 0 0 1-.168.11l-5 2a.5.5 0 0 1-.65-.65l2-5a.5.5 0 0 1 .11-.168l10-10zM11.207 2.5L13.5 4.793 14.793 3.5 12.5 1.207 11.207 2.5zm1.586 3L10.5 3.207 4 9.707V10h.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.5h.293l6.5-6.5zm-9.761 5.175l-.106.106-1.528 3.821 3.821-1.528.106-.106A.5.5 0 0 1 5 12.5V12h-.5a.5.5 0 0 1-.5-.5V11h-.5a.5.5 0 0 1-.468-.325z"/>
|
||||||
@@ -45,7 +51,13 @@
|
|||||||
</svg>
|
</svg>
|
||||||
Download
|
Download
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<small class="text-muted ml-auto">Score:</small>
|
||||||
|
|
||||||
|
<ngb-progressbar *ngIf="searchScore" [type]="searchScoreClass" [value]="searchScore" class="search-score-bar mx-2" [max]="1"></ngb-progressbar>
|
||||||
|
|
||||||
<small class="text-muted">Created: {{document.created | date}}</small>
|
<small class="text-muted">Created: {{document.created | date}}</small>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@@ -10,3 +10,9 @@
|
|||||||
position: absolute;
|
position: absolute;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.search-score-bar {
|
||||||
|
width: 100px;
|
||||||
|
height: 5px;
|
||||||
|
margin-top: 2px;
|
||||||
|
}
|
@@ -12,6 +12,9 @@ export class DocumentCardLargeComponent implements OnInit {
|
|||||||
|
|
||||||
constructor(private documentService: DocumentService, private sanitizer: DomSanitizer) { }
|
constructor(private documentService: DocumentService, private sanitizer: DomSanitizer) { }
|
||||||
|
|
||||||
|
@Input()
|
||||||
|
moreLikeThis: boolean = false
|
||||||
|
|
||||||
@Input()
|
@Input()
|
||||||
document: PaperlessDocument
|
document: PaperlessDocument
|
||||||
|
|
||||||
@@ -24,6 +27,19 @@ export class DocumentCardLargeComponent implements OnInit {
|
|||||||
@Output()
|
@Output()
|
||||||
clickCorrespondent = new EventEmitter<number>()
|
clickCorrespondent = new EventEmitter<number>()
|
||||||
|
|
||||||
|
@Input()
|
||||||
|
searchScore: number
|
||||||
|
|
||||||
|
get searchScoreClass() {
|
||||||
|
if (this.searchScore > 0.7) {
|
||||||
|
return "success"
|
||||||
|
} else if (this.searchScore > 0.3) {
|
||||||
|
return "warning"
|
||||||
|
} else {
|
||||||
|
return "danger"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ngOnInit(): void {
|
ngOnInit(): void {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
<div class="col p-2 h-100 document-card" style="width: 16rem;">
|
<div class="col p-2 h-100">
|
||||||
<div class="card h-100 shadow-sm" [class.card-selected]="selected">
|
<div class="card h-100 shadow-sm" [class.card-selected]="selected">
|
||||||
<div class="border-bottom" [class.doc-img-background-selected]="selected">
|
<div class="border-bottom" [class.doc-img-background-selected]="selected">
|
||||||
<img class="card-img doc-img" [src]="getThumbUrl()" (click)="selected = !selected">
|
<img class="card-img doc-img" [src]="getThumbUrl()" (click)="selected = !selected">
|
||||||
|
@@ -151,5 +151,5 @@
|
|||||||
|
|
||||||
|
|
||||||
<div class=" m-n2 row" *ngIf="displayMode == 'smallCards'">
|
<div class=" m-n2 row" *ngIf="displayMode == 'smallCards'">
|
||||||
<app-document-card-small [selected]="list.isSelected(d)" (selectedChange)="list.setSelected(d, $event)" [document]="d" *ngFor="let d of list.documents" (clickTag)="clickTag($event)" (clickCorrespondent)="clickCorrespondent($event)"></app-document-card-small>
|
<app-document-card-small [document]="d" [selected]="list.isSelected(d)" (selectedChange)="list.setSelected(d, $event)" *ngFor="let d of list.documents" (clickTag)="clickTag($event)" (clickCorrespondent)="clickCorrespondent($event)"></app-document-card-small>
|
||||||
</div>
|
</div>
|
||||||
|
@@ -1,3 +1,3 @@
|
|||||||
... <span *ngFor="let fragment of highlights">
|
... <span *ngFor="let fragment of highlights">
|
||||||
<span *ngFor="let token of fragment" [ngClass]="token.term != null ? 'match term'+ token.term : ''">{{token.text}}</span> ...
|
<span *ngFor="let token of fragment" [class.match]="token.highlight">{{token.text}}</span> ...
|
||||||
</span>
|
</span>
|
@@ -1,4 +1,4 @@
|
|||||||
.match {
|
.match {
|
||||||
color: black;
|
color: black;
|
||||||
background-color: orange;
|
background-color: rgb(255, 211, 66);
|
||||||
}
|
}
|
@@ -3,7 +3,12 @@
|
|||||||
|
|
||||||
<div *ngIf="errorMessage" class="alert alert-danger">Invalid search query: {{errorMessage}}</div>
|
<div *ngIf="errorMessage" class="alert alert-danger">Invalid search query: {{errorMessage}}</div>
|
||||||
|
|
||||||
<p>
|
<p *ngIf="more_like">
|
||||||
|
Showing documents similar to
|
||||||
|
<a routerLink="/documents/{{more_like}}">{{more_like_doc?.original_file_name}}</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p *ngIf="query">
|
||||||
Search string: <i>{{query}}</i>
|
Search string: <i>{{query}}</i>
|
||||||
<ng-container *ngIf="correctedQuery">
|
<ng-container *ngIf="correctedQuery">
|
||||||
- Did you mean "<a [routerLink]="" (click)="searchCorrectedQuery()">{{correctedQuery}}</a>"?
|
- Did you mean "<a [routerLink]="" (click)="searchCorrectedQuery()">{{correctedQuery}}</a>"?
|
||||||
@@ -15,7 +20,9 @@
|
|||||||
<p>{{resultCount}} result(s)</p>
|
<p>{{resultCount}} result(s)</p>
|
||||||
<app-document-card-large *ngFor="let result of results"
|
<app-document-card-large *ngFor="let result of results"
|
||||||
[document]="result.document"
|
[document]="result.document"
|
||||||
[details]="result.highlights">
|
[details]="result.highlights"
|
||||||
|
[searchScore]="result.score / maxScore"
|
||||||
|
[moreLikeThis]="true">
|
||||||
|
|
||||||
</app-document-card-large>
|
</app-document-card-large>
|
||||||
</div>
|
</div>
|
||||||
|
@@ -1,6 +1,9 @@
|
|||||||
import { Component, OnInit } from '@angular/core';
|
import { Component, OnInit } from '@angular/core';
|
||||||
import { ActivatedRoute, Router } from '@angular/router';
|
import { ActivatedRoute, Router } from '@angular/router';
|
||||||
|
import { PaperlessDocument } from 'src/app/data/paperless-document';
|
||||||
|
import { PaperlessDocumentType } from 'src/app/data/paperless-document-type';
|
||||||
import { SearchHit } from 'src/app/data/search-result';
|
import { SearchHit } from 'src/app/data/search-result';
|
||||||
|
import { DocumentService } from 'src/app/services/rest/document.service';
|
||||||
import { SearchService } from 'src/app/services/rest/search.service';
|
import { SearchService } from 'src/app/services/rest/search.service';
|
||||||
|
|
||||||
@Component({
|
@Component({
|
||||||
@@ -14,6 +17,10 @@ export class SearchComponent implements OnInit {
|
|||||||
|
|
||||||
query: string = ""
|
query: string = ""
|
||||||
|
|
||||||
|
more_like: number
|
||||||
|
|
||||||
|
more_like_doc: PaperlessDocument
|
||||||
|
|
||||||
searching = false
|
searching = false
|
||||||
|
|
||||||
currentPage = 1
|
currentPage = 1
|
||||||
@@ -26,11 +33,24 @@ export class SearchComponent implements OnInit {
|
|||||||
|
|
||||||
errorMessage: string
|
errorMessage: string
|
||||||
|
|
||||||
constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { }
|
get maxScore() {
|
||||||
|
return this.results?.length > 0 ? this.results[0].score : 100
|
||||||
|
}
|
||||||
|
|
||||||
|
constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router, private documentService: DocumentService) { }
|
||||||
|
|
||||||
ngOnInit(): void {
|
ngOnInit(): void {
|
||||||
this.route.queryParamMap.subscribe(paramMap => {
|
this.route.queryParamMap.subscribe(paramMap => {
|
||||||
|
window.scrollTo(0, 0)
|
||||||
this.query = paramMap.get('query')
|
this.query = paramMap.get('query')
|
||||||
|
this.more_like = paramMap.has('more_like') ? +paramMap.get('more_like') : null
|
||||||
|
if (this.more_like) {
|
||||||
|
this.documentService.get(this.more_like).subscribe(r => {
|
||||||
|
this.more_like_doc = r
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
this.more_like_doc = null
|
||||||
|
}
|
||||||
this.searching = true
|
this.searching = true
|
||||||
this.currentPage = 1
|
this.currentPage = 1
|
||||||
this.loadPage()
|
this.loadPage()
|
||||||
@@ -39,13 +59,14 @@ export class SearchComponent implements OnInit {
|
|||||||
}
|
}
|
||||||
|
|
||||||
searchCorrectedQuery() {
|
searchCorrectedQuery() {
|
||||||
this.router.navigate(["search"], {queryParams: {query: this.correctedQuery}})
|
this.router.navigate(["search"], {queryParams: {query: this.correctedQuery, more_like: this.more_like}})
|
||||||
}
|
}
|
||||||
|
|
||||||
loadPage(append: boolean = false) {
|
loadPage(append: boolean = false) {
|
||||||
this.errorMessage = null
|
this.errorMessage = null
|
||||||
this.correctedQuery = null
|
this.correctedQuery = null
|
||||||
this.searchService.search(this.query, this.currentPage).subscribe(result => {
|
|
||||||
|
this.searchService.search(this.query, this.currentPage, this.more_like).subscribe(result => {
|
||||||
if (append) {
|
if (append) {
|
||||||
this.results.push(...result.results)
|
this.results.push(...result.results)
|
||||||
} else {
|
} else {
|
||||||
|
@@ -15,11 +15,17 @@ export class SearchService {
|
|||||||
|
|
||||||
constructor(private http: HttpClient, private documentService: DocumentService) { }
|
constructor(private http: HttpClient, private documentService: DocumentService) { }
|
||||||
|
|
||||||
search(query: string, page?: number): Observable<SearchResult> {
|
search(query: string, page?: number, more_like?: number): Observable<SearchResult> {
|
||||||
let httpParams = new HttpParams().set('query', query)
|
let httpParams = new HttpParams()
|
||||||
|
if (query) {
|
||||||
|
httpParams = httpParams.set('query', query)
|
||||||
|
}
|
||||||
if (page) {
|
if (page) {
|
||||||
httpParams = httpParams.set('page', page.toString())
|
httpParams = httpParams.set('page', page.toString())
|
||||||
}
|
}
|
||||||
|
if (more_like) {
|
||||||
|
httpParams = httpParams.set('more_like', more_like.toString())
|
||||||
|
}
|
||||||
return this.http.get<SearchResult>(`${environment.apiBaseUrl}search/`, {params: httpParams}).pipe(
|
return this.http.get<SearchResult>(`${environment.apiBaseUrl}search/`, {params: httpParams}).pipe(
|
||||||
map(result => {
|
map(result => {
|
||||||
result.results.forEach(hit => this.documentService.addObservablesToDocument(hit.document))
|
result.results.forEach(hit => this.documentService.addObservablesToDocument(hit.document))
|
||||||
|
@@ -5,7 +5,8 @@
|
|||||||
export const environment = {
|
export const environment = {
|
||||||
production: false,
|
production: false,
|
||||||
apiBaseUrl: "http://localhost:8000/api/",
|
apiBaseUrl: "http://localhost:8000/api/",
|
||||||
appTitle: "DEVELOPMENT P-NG"
|
appTitle: "Paperless-ng",
|
||||||
|
version: "DEVELOPMENT"
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@@ -247,7 +247,6 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
with open(self.path, "rb") as f:
|
with open(self.path, "rb") as f:
|
||||||
document = Document.objects.create(
|
document = Document.objects.create(
|
||||||
correspondent=file_info.correspondent,
|
|
||||||
title=(self.override_title or file_info.title)[:127],
|
title=(self.override_title or file_info.title)[:127],
|
||||||
content=text,
|
content=text,
|
||||||
mime_type=mime_type,
|
mime_type=mime_type,
|
||||||
@@ -257,12 +256,6 @@ class Consumer(LoggingMixin):
|
|||||||
storage_type=storage_type
|
storage_type=storage_type
|
||||||
)
|
)
|
||||||
|
|
||||||
relevant_tags = set(file_info.tags)
|
|
||||||
if relevant_tags:
|
|
||||||
tag_names = ", ".join([t.name for t in relevant_tags])
|
|
||||||
self.log("debug", "Tagging with {}".format(tag_names))
|
|
||||||
document.tags.add(*relevant_tags)
|
|
||||||
|
|
||||||
self.apply_overrides(document)
|
self.apply_overrides(document)
|
||||||
|
|
||||||
document.save()
|
document.save()
|
||||||
|
@@ -3,7 +3,7 @@ import os
|
|||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from whoosh import highlight
|
from whoosh import highlight, classify, query
|
||||||
from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME
|
from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME
|
||||||
from whoosh.highlight import Formatter, get_text
|
from whoosh.highlight import Formatter, get_text
|
||||||
from whoosh.index import create_in, exists_in, open_dir
|
from whoosh.index import create_in, exists_in, open_dir
|
||||||
@@ -20,32 +20,37 @@ class JsonFormatter(Formatter):
|
|||||||
self.seen = {}
|
self.seen = {}
|
||||||
|
|
||||||
def format_token(self, text, token, replace=False):
|
def format_token(self, text, token, replace=False):
|
||||||
seen = self.seen
|
|
||||||
ttext = self._text(get_text(text, token, replace))
|
ttext = self._text(get_text(text, token, replace))
|
||||||
if ttext in seen:
|
return {'text': ttext, 'highlight': 'true'}
|
||||||
termnum = seen[ttext]
|
|
||||||
else:
|
|
||||||
termnum = len(seen)
|
|
||||||
seen[ttext] = termnum
|
|
||||||
|
|
||||||
return {'text': ttext, 'term': termnum}
|
|
||||||
|
|
||||||
def format_fragment(self, fragment, replace=False):
|
def format_fragment(self, fragment, replace=False):
|
||||||
output = []
|
output = []
|
||||||
index = fragment.startchar
|
index = fragment.startchar
|
||||||
text = fragment.text
|
text = fragment.text
|
||||||
|
amend_token = None
|
||||||
for t in fragment.matches:
|
for t in fragment.matches:
|
||||||
if t.startchar is None:
|
if t.startchar is None:
|
||||||
continue
|
continue
|
||||||
if t.startchar < index:
|
if t.startchar < index:
|
||||||
continue
|
continue
|
||||||
if t.startchar > index:
|
if t.startchar > index:
|
||||||
output.append({'text': text[index:t.startchar]})
|
text_inbetween = text[index:t.startchar]
|
||||||
output.append(self.format_token(text, t, replace))
|
if amend_token and t.startchar - index < 10:
|
||||||
|
amend_token['text'] += text_inbetween
|
||||||
|
else:
|
||||||
|
output.append({'text': text_inbetween,
|
||||||
|
'highlight': False})
|
||||||
|
amend_token = None
|
||||||
|
token = self.format_token(text, t, replace)
|
||||||
|
if amend_token:
|
||||||
|
amend_token['text'] += token['text']
|
||||||
|
else:
|
||||||
|
output.append(token)
|
||||||
|
amend_token = token
|
||||||
index = t.endchar
|
index = t.endchar
|
||||||
if index < fragment.endchar:
|
if index < fragment.endchar:
|
||||||
output.append({'text': text[index:fragment.endchar]})
|
output.append({'text': text[index:fragment.endchar],
|
||||||
|
'highlight': False})
|
||||||
return output
|
return output
|
||||||
|
|
||||||
def format(self, fragments, replace=False):
|
def format(self, fragments, replace=False):
|
||||||
@@ -120,22 +125,42 @@ def remove_document_from_index(document):
|
|||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def query_page(ix, querystring, page):
|
def query_page(ix, page, querystring, more_like_doc_id, more_like_doc_content):
|
||||||
searcher = ix.searcher()
|
searcher = ix.searcher()
|
||||||
try:
|
try:
|
||||||
|
if querystring:
|
||||||
qp = MultifieldParser(
|
qp = MultifieldParser(
|
||||||
["content", "title", "correspondent", "tag", "type"],
|
["content", "title", "correspondent", "tag", "type"],
|
||||||
ix.schema)
|
ix.schema)
|
||||||
qp.add_plugin(DateParserPlugin())
|
qp.add_plugin(DateParserPlugin())
|
||||||
|
str_q = qp.parse(querystring)
|
||||||
|
corrected = searcher.correct_query(str_q, querystring)
|
||||||
|
else:
|
||||||
|
str_q = None
|
||||||
|
corrected = None
|
||||||
|
|
||||||
|
if more_like_doc_id:
|
||||||
|
docnum = searcher.document_number(id=more_like_doc_id)
|
||||||
|
kts = searcher.key_terms_from_text(
|
||||||
|
'content', more_like_doc_content, numterms=20,
|
||||||
|
model=classify.Bo1Model, normalize=False)
|
||||||
|
more_like_q = query.Or(
|
||||||
|
[query.Term('content', word, boost=weight)
|
||||||
|
for word, weight in kts])
|
||||||
|
result_page = searcher.search_page(
|
||||||
|
more_like_q, page, filter=str_q, mask={docnum})
|
||||||
|
elif str_q:
|
||||||
|
result_page = searcher.search_page(str_q, page)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"Either querystring or more_like_doc_id is required."
|
||||||
|
)
|
||||||
|
|
||||||
q = qp.parse(querystring)
|
|
||||||
result_page = searcher.search_page(q, page)
|
|
||||||
result_page.results.fragmenter = highlight.ContextFragmenter(
|
result_page.results.fragmenter = highlight.ContextFragmenter(
|
||||||
surround=50)
|
surround=50)
|
||||||
result_page.results.formatter = JsonFormatter()
|
result_page.results.formatter = JsonFormatter()
|
||||||
|
|
||||||
corrected = searcher.correct_query(q, querystring)
|
if corrected and corrected.query != str_q:
|
||||||
if corrected.query != q:
|
|
||||||
corrected_query = corrected.string
|
corrected_query = corrected.string
|
||||||
else:
|
else:
|
||||||
corrected_query = None
|
corrected_query = None
|
||||||
|
@@ -11,6 +11,7 @@ from paperless.db import GnuPG
|
|||||||
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
|
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
|
||||||
STORAGE_TYPE_GPG = "gpg"
|
STORAGE_TYPE_GPG = "gpg"
|
||||||
|
|
||||||
|
|
||||||
def source_path(self):
|
def source_path(self):
|
||||||
if self.filename:
|
if self.filename:
|
||||||
fname = str(self.filename)
|
fname = str(self.filename)
|
||||||
|
@@ -357,54 +357,12 @@ class SavedViewFilterRule(models.Model):
|
|||||||
# TODO: why is this in the models file?
|
# TODO: why is this in the models file?
|
||||||
class FileInfo:
|
class FileInfo:
|
||||||
|
|
||||||
# This epic regex *almost* worked for our needs, so I'm keeping it here for
|
|
||||||
# posterity, in the hopes that we might find a way to make it work one day.
|
|
||||||
ALMOST_REGEX = re.compile(
|
|
||||||
r"^((?P<date>\d\d\d\d\d\d\d\d\d\d\d\d\d\dZ){separator})?"
|
|
||||||
r"((?P<correspondent>{non_separated_word}+){separator})??"
|
|
||||||
r"(?P<title>{non_separated_word}+)"
|
|
||||||
r"({separator}(?P<tags>[a-z,0-9-]+))?"
|
|
||||||
r"\.(?P<extension>[a-zA-Z.-]+)$".format(
|
|
||||||
separator=r"\s+-\s+",
|
|
||||||
non_separated_word=r"([\w,. ]|([^\s]-))"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
REGEXES = OrderedDict([
|
REGEXES = OrderedDict([
|
||||||
("created-correspondent-title-tags", re.compile(
|
|
||||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
|
||||||
r"(?P<correspondent>.*) - "
|
|
||||||
r"(?P<title>.*) - "
|
|
||||||
r"(?P<tags>[a-z0-9\-,]*)$",
|
|
||||||
flags=re.IGNORECASE
|
|
||||||
)),
|
|
||||||
("created-title-tags", re.compile(
|
|
||||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
|
||||||
r"(?P<title>.*) - "
|
|
||||||
r"(?P<tags>[a-z0-9\-,]*)$",
|
|
||||||
flags=re.IGNORECASE
|
|
||||||
)),
|
|
||||||
("created-correspondent-title", re.compile(
|
|
||||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
|
||||||
r"(?P<correspondent>.*) - "
|
|
||||||
r"(?P<title>.*)$",
|
|
||||||
flags=re.IGNORECASE
|
|
||||||
)),
|
|
||||||
("created-title", re.compile(
|
("created-title", re.compile(
|
||||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||||
r"(?P<title>.*)$",
|
r"(?P<title>.*)$",
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("correspondent-title-tags", re.compile(
|
|
||||||
r"(?P<correspondent>.*) - "
|
|
||||||
r"(?P<title>.*) - "
|
|
||||||
r"(?P<tags>[a-z0-9\-,]*)$",
|
|
||||||
flags=re.IGNORECASE
|
|
||||||
)),
|
|
||||||
("correspondent-title", re.compile(
|
|
||||||
r"(?P<correspondent>.*) - "
|
|
||||||
r"(?P<title>.*)?$",
|
|
||||||
flags=re.IGNORECASE
|
|
||||||
)),
|
|
||||||
("title", re.compile(
|
("title", re.compile(
|
||||||
r"(?P<title>.*)$",
|
r"(?P<title>.*)$",
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
@@ -427,23 +385,10 @@ class FileInfo:
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _get_correspondent(cls, name):
|
|
||||||
if not name:
|
|
||||||
return None
|
|
||||||
return Correspondent.objects.get_or_create(name=name)[0]
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _get_title(cls, title):
|
def _get_title(cls, title):
|
||||||
return title
|
return title
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _get_tags(cls, tags):
|
|
||||||
r = []
|
|
||||||
for t in tags.split(","):
|
|
||||||
r.append(Tag.objects.get_or_create(name=t)[0])
|
|
||||||
return tuple(r)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _mangle_property(cls, properties, name):
|
def _mangle_property(cls, properties, name):
|
||||||
if name in properties:
|
if name in properties:
|
||||||
@@ -453,15 +398,6 @@ class FileInfo:
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_filename(cls, filename):
|
def from_filename(cls, filename):
|
||||||
"""
|
|
||||||
We use a crude naming convention to make handling the correspondent,
|
|
||||||
title, and tags easier:
|
|
||||||
"<date> - <correspondent> - <title> - <tags>"
|
|
||||||
"<correspondent> - <title> - <tags>"
|
|
||||||
"<correspondent> - <title>"
|
|
||||||
"<title>"
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Mutate filename in-place before parsing its components
|
# Mutate filename in-place before parsing its components
|
||||||
# by applying at most one of the configured transformations.
|
# by applying at most one of the configured transformations.
|
||||||
for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS:
|
for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS:
|
||||||
@@ -492,7 +428,5 @@ class FileInfo:
|
|||||||
if m:
|
if m:
|
||||||
properties = m.groupdict()
|
properties = m.groupdict()
|
||||||
cls._mangle_property(properties, "created")
|
cls._mangle_property(properties, "created")
|
||||||
cls._mangle_property(properties, "correspondent")
|
|
||||||
cls._mangle_property(properties, "title")
|
cls._mangle_property(properties, "title")
|
||||||
cls._mangle_property(properties, "tags")
|
|
||||||
return cls(**properties)
|
return cls(**properties)
|
||||||
|
@@ -5,7 +5,7 @@
|
|||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="utf-8">
|
<meta charset="utf-8">
|
||||||
<title>PaperlessUi</title>
|
<title>Paperless-ng</title>
|
||||||
<base href="/">
|
<base href="/">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
<meta name="cookie_prefix" content="{{cookie_prefix}}">
|
<meta name="cookie_prefix" content="{{cookie_prefix}}">
|
||||||
|
57
src/documents/tests/test_admin.py
Normal file
57
src/documents/tests/test_admin.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
from django.contrib.admin.sites import AdminSite
|
||||||
|
from django.test import TestCase
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
from documents.admin import DocumentAdmin
|
||||||
|
from documents.models import Document, Tag
|
||||||
|
|
||||||
|
|
||||||
|
class TestDocumentAdmin(TestCase):
|
||||||
|
|
||||||
|
def setUp(self) -> None:
|
||||||
|
self.doc_admin = DocumentAdmin(model=Document, admin_site=AdminSite())
|
||||||
|
|
||||||
|
@mock.patch("documents.admin.index.add_or_update_document")
|
||||||
|
def test_save_model(self, m):
|
||||||
|
doc = Document.objects.create(title="test")
|
||||||
|
doc.title = "new title"
|
||||||
|
self.doc_admin.save_model(None, doc, None, None)
|
||||||
|
self.assertEqual(Document.objects.get(id=doc.id).title, "new title")
|
||||||
|
m.assert_called_once()
|
||||||
|
|
||||||
|
def test_tags(self):
|
||||||
|
doc = Document.objects.create(title="test")
|
||||||
|
doc.tags.create(name="t1")
|
||||||
|
doc.tags.create(name="t2")
|
||||||
|
|
||||||
|
self.assertEqual(self.doc_admin.tags_(doc), "<span >t1, </span><span >t2, </span>")
|
||||||
|
|
||||||
|
def test_tags_empty(self):
|
||||||
|
doc = Document.objects.create(title="test")
|
||||||
|
|
||||||
|
self.assertEqual(self.doc_admin.tags_(doc), "")
|
||||||
|
|
||||||
|
@mock.patch("documents.admin.index.remove_document")
|
||||||
|
def test_delete_model(self, m):
|
||||||
|
doc = Document.objects.create(title="test")
|
||||||
|
self.doc_admin.delete_model(None, doc)
|
||||||
|
self.assertRaises(Document.DoesNotExist, Document.objects.get, id=doc.id)
|
||||||
|
m.assert_called_once()
|
||||||
|
|
||||||
|
@mock.patch("documents.admin.index.remove_document")
|
||||||
|
def test_delete_queryset(self, m):
|
||||||
|
for i in range(42):
|
||||||
|
Document.objects.create(title="Many documents with the same title", checksum=f"{i:02}")
|
||||||
|
|
||||||
|
self.assertEqual(Document.objects.count(), 42)
|
||||||
|
|
||||||
|
self.doc_admin.delete_queryset(None, Document.objects.all())
|
||||||
|
|
||||||
|
self.assertEqual(m.call_count, 42)
|
||||||
|
self.assertEqual(Document.objects.count(), 0)
|
||||||
|
|
||||||
|
def test_created(self):
|
||||||
|
doc = Document.objects.create(title="test", created=timezone.datetime(2020, 4, 12))
|
||||||
|
self.assertEqual(self.doc_admin.created_(doc), "2020-04-12")
|
@@ -352,6 +352,25 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
|||||||
|
|
||||||
self.assertEqual(correction, None)
|
self.assertEqual(correction, None)
|
||||||
|
|
||||||
|
def test_search_more_like(self):
|
||||||
|
d1=Document.objects.create(title="invoice", content="the thing i bought at a shop and paid with bank account", checksum="A", pk=1)
|
||||||
|
d2=Document.objects.create(title="bank statement 1", content="things i paid for in august", pk=2, checksum="B")
|
||||||
|
d3=Document.objects.create(title="bank statement 3", content="things i paid for in september", pk=3, checksum="C")
|
||||||
|
with AsyncWriter(index.open_index()) as writer:
|
||||||
|
index.update_document(writer, d1)
|
||||||
|
index.update_document(writer, d2)
|
||||||
|
index.update_document(writer, d3)
|
||||||
|
|
||||||
|
response = self.client.get(f"/api/search/?more_like={d2.id}")
|
||||||
|
|
||||||
|
self.assertEqual(response.status_code, 200)
|
||||||
|
|
||||||
|
results = response.data['results']
|
||||||
|
|
||||||
|
self.assertEqual(len(results), 2)
|
||||||
|
self.assertEqual(results[0]['id'], d3.id)
|
||||||
|
self.assertEqual(results[1]['id'], d1.id)
|
||||||
|
|
||||||
def test_statistics(self):
|
def test_statistics(self):
|
||||||
|
|
||||||
doc1 = Document.objects.create(title="none1", checksum="A")
|
doc1 = Document.objects.create(title="none1", checksum="A")
|
||||||
|
@@ -29,81 +29,6 @@ class TestAttributes(TestCase):
|
|||||||
|
|
||||||
self.assertEqual(tuple([t.name for t in file_info.tags]), tags, filename)
|
self.assertEqual(tuple([t.name for t in file_info.tags]), tags, filename)
|
||||||
|
|
||||||
def test_guess_attributes_from_name0(self):
|
|
||||||
self._test_guess_attributes_from_name(
|
|
||||||
"Sender - Title.pdf", "Sender", "Title", ())
|
|
||||||
|
|
||||||
def test_guess_attributes_from_name1(self):
|
|
||||||
self._test_guess_attributes_from_name(
|
|
||||||
"Spaced Sender - Title.pdf", "Spaced Sender", "Title", ())
|
|
||||||
|
|
||||||
def test_guess_attributes_from_name2(self):
|
|
||||||
self._test_guess_attributes_from_name(
|
|
||||||
"Sender - Spaced Title.pdf", "Sender", "Spaced Title", ())
|
|
||||||
|
|
||||||
def test_guess_attributes_from_name3(self):
|
|
||||||
self._test_guess_attributes_from_name(
|
|
||||||
"Dashed-Sender - Title.pdf", "Dashed-Sender", "Title", ())
|
|
||||||
|
|
||||||
def test_guess_attributes_from_name4(self):
|
|
||||||
self._test_guess_attributes_from_name(
|
|
||||||
"Sender - Dashed-Title.pdf", "Sender", "Dashed-Title", ())
|
|
||||||
|
|
||||||
def test_guess_attributes_from_name5(self):
|
|
||||||
self._test_guess_attributes_from_name(
|
|
||||||
"Sender - Title - tag1,tag2,tag3.pdf",
|
|
||||||
"Sender",
|
|
||||||
"Title",
|
|
||||||
self.TAGS
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_guess_attributes_from_name6(self):
|
|
||||||
self._test_guess_attributes_from_name(
|
|
||||||
"Spaced Sender - Title - tag1,tag2,tag3.pdf",
|
|
||||||
"Spaced Sender",
|
|
||||||
"Title",
|
|
||||||
self.TAGS
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_guess_attributes_from_name7(self):
|
|
||||||
self._test_guess_attributes_from_name(
|
|
||||||
"Sender - Spaced Title - tag1,tag2,tag3.pdf",
|
|
||||||
"Sender",
|
|
||||||
"Spaced Title",
|
|
||||||
self.TAGS
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_guess_attributes_from_name8(self):
|
|
||||||
self._test_guess_attributes_from_name(
|
|
||||||
"Dashed-Sender - Title - tag1,tag2,tag3.pdf",
|
|
||||||
"Dashed-Sender",
|
|
||||||
"Title",
|
|
||||||
self.TAGS
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_guess_attributes_from_name9(self):
|
|
||||||
self._test_guess_attributes_from_name(
|
|
||||||
"Sender - Dashed-Title - tag1,tag2,tag3.pdf",
|
|
||||||
"Sender",
|
|
||||||
"Dashed-Title",
|
|
||||||
self.TAGS
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_guess_attributes_from_name10(self):
|
|
||||||
self._test_guess_attributes_from_name(
|
|
||||||
"Σενδερ - Τιτλε - tag1,tag2,tag3.pdf",
|
|
||||||
"Σενδερ",
|
|
||||||
"Τιτλε",
|
|
||||||
self.TAGS
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_guess_attributes_from_name_when_correspondent_empty(self):
|
|
||||||
self._test_guess_attributes_from_name(
|
|
||||||
' - weird empty correspondent but should not break.pdf',
|
|
||||||
None,
|
|
||||||
'weird empty correspondent but should not break',
|
|
||||||
()
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_guess_attributes_from_name_when_title_starts_with_dash(self):
|
def test_guess_attributes_from_name_when_title_starts_with_dash(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
@@ -121,28 +46,6 @@ class TestAttributes(TestCase):
|
|||||||
()
|
()
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_guess_attributes_from_name_when_title_is_empty(self):
|
|
||||||
self._test_guess_attributes_from_name(
|
|
||||||
'weird correspondent but should not break - .pdf',
|
|
||||||
'weird correspondent but should not break',
|
|
||||||
'',
|
|
||||||
()
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_case_insensitive_tag_creation(self):
|
|
||||||
"""
|
|
||||||
Tags should be detected and created as lower case.
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
|
|
||||||
filename = "Title - Correspondent - tAg1,TAG2.pdf"
|
|
||||||
self.assertEqual(len(FileInfo.from_filename(filename).tags), 2)
|
|
||||||
|
|
||||||
path = "Title - Correspondent - tag1,tag2.pdf"
|
|
||||||
self.assertEqual(len(FileInfo.from_filename(filename).tags), 2)
|
|
||||||
|
|
||||||
self.assertEqual(Tag.objects.all().count(), 2)
|
|
||||||
|
|
||||||
|
|
||||||
class TestFieldPermutations(TestCase):
|
class TestFieldPermutations(TestCase):
|
||||||
|
|
||||||
@@ -199,69 +102,7 @@ class TestFieldPermutations(TestCase):
|
|||||||
filename = template.format(**spec)
|
filename = template.format(**spec)
|
||||||
self._test_guessed_attributes(filename, **spec)
|
self._test_guessed_attributes(filename, **spec)
|
||||||
|
|
||||||
def test_title_and_correspondent(self):
|
|
||||||
template = '{correspondent} - {title}.pdf'
|
|
||||||
for correspondent in self.valid_correspondents:
|
|
||||||
for title in self.valid_titles:
|
|
||||||
spec = dict(correspondent=correspondent, title=title)
|
|
||||||
filename = template.format(**spec)
|
|
||||||
self._test_guessed_attributes(filename, **spec)
|
|
||||||
|
|
||||||
def test_title_and_correspondent_and_tags(self):
|
|
||||||
template = '{correspondent} - {title} - {tags}.pdf'
|
|
||||||
for correspondent in self.valid_correspondents:
|
|
||||||
for title in self.valid_titles:
|
|
||||||
for tags in self.valid_tags:
|
|
||||||
spec = dict(correspondent=correspondent, title=title,
|
|
||||||
tags=tags)
|
|
||||||
filename = template.format(**spec)
|
|
||||||
self._test_guessed_attributes(filename, **spec)
|
|
||||||
|
|
||||||
def test_created_and_correspondent_and_title_and_tags(self):
|
|
||||||
|
|
||||||
template = (
|
|
||||||
"{created} - "
|
|
||||||
"{correspondent} - "
|
|
||||||
"{title} - "
|
|
||||||
"{tags}.pdf"
|
|
||||||
)
|
|
||||||
|
|
||||||
for created in self.valid_dates:
|
|
||||||
for correspondent in self.valid_correspondents:
|
|
||||||
for title in self.valid_titles:
|
|
||||||
for tags in self.valid_tags:
|
|
||||||
spec = {
|
|
||||||
"created": created,
|
|
||||||
"correspondent": correspondent,
|
|
||||||
"title": title,
|
|
||||||
"tags": tags,
|
|
||||||
}
|
|
||||||
self._test_guessed_attributes(
|
|
||||||
template.format(**spec), **spec)
|
|
||||||
|
|
||||||
def test_created_and_correspondent_and_title(self):
|
|
||||||
|
|
||||||
template = "{created} - {correspondent} - {title}.pdf"
|
|
||||||
|
|
||||||
for created in self.valid_dates:
|
|
||||||
for correspondent in self.valid_correspondents:
|
|
||||||
for title in self.valid_titles:
|
|
||||||
|
|
||||||
# Skip cases where title looks like a tag as we can't
|
|
||||||
# accommodate such cases.
|
|
||||||
if title.lower() == title:
|
|
||||||
continue
|
|
||||||
|
|
||||||
spec = {
|
|
||||||
"created": created,
|
|
||||||
"correspondent": correspondent,
|
|
||||||
"title": title
|
|
||||||
}
|
|
||||||
self._test_guessed_attributes(
|
|
||||||
template.format(**spec), **spec)
|
|
||||||
|
|
||||||
def test_created_and_title(self):
|
def test_created_and_title(self):
|
||||||
|
|
||||||
template = "{created} - {title}.pdf"
|
template = "{created} - {title}.pdf"
|
||||||
|
|
||||||
for created in self.valid_dates:
|
for created in self.valid_dates:
|
||||||
@@ -273,21 +114,6 @@ class TestFieldPermutations(TestCase):
|
|||||||
self._test_guessed_attributes(
|
self._test_guessed_attributes(
|
||||||
template.format(**spec), **spec)
|
template.format(**spec), **spec)
|
||||||
|
|
||||||
def test_created_and_title_and_tags(self):
|
|
||||||
|
|
||||||
template = "{created} - {title} - {tags}.pdf"
|
|
||||||
|
|
||||||
for created in self.valid_dates:
|
|
||||||
for title in self.valid_titles:
|
|
||||||
for tags in self.valid_tags:
|
|
||||||
spec = {
|
|
||||||
"created": created,
|
|
||||||
"title": title,
|
|
||||||
"tags": tags
|
|
||||||
}
|
|
||||||
self._test_guessed_attributes(
|
|
||||||
template.format(**spec), **spec)
|
|
||||||
|
|
||||||
def test_invalid_date_format(self):
|
def test_invalid_date_format(self):
|
||||||
info = FileInfo.from_filename("06112017Z - title.pdf")
|
info = FileInfo.from_filename("06112017Z - title.pdf")
|
||||||
self.assertEqual(info.title, "title")
|
self.assertEqual(info.title, "title")
|
||||||
@@ -336,32 +162,6 @@ class TestFieldPermutations(TestCase):
|
|||||||
info = FileInfo.from_filename(filename)
|
info = FileInfo.from_filename(filename)
|
||||||
self.assertEqual(info.title, "anotherall")
|
self.assertEqual(info.title, "anotherall")
|
||||||
|
|
||||||
# Complex transformation without date in replacement string
|
|
||||||
with self.settings(
|
|
||||||
FILENAME_PARSE_TRANSFORMS=[(exact_patt, repl1)]):
|
|
||||||
info = FileInfo.from_filename(filename)
|
|
||||||
self.assertEqual(info.title, "0001")
|
|
||||||
self.assertEqual(len(info.tags), 2)
|
|
||||||
self.assertEqual(info.tags[0].name, "tag1")
|
|
||||||
self.assertEqual(info.tags[1].name, "tag2")
|
|
||||||
self.assertIsNone(info.created)
|
|
||||||
|
|
||||||
# Complex transformation with date in replacement string
|
|
||||||
with self.settings(
|
|
||||||
FILENAME_PARSE_TRANSFORMS=[
|
|
||||||
(none_patt, "none.gif"),
|
|
||||||
(exact_patt, repl2), # <-- matches
|
|
||||||
(exact_patt, repl1),
|
|
||||||
(all_patt, "all.gif")]):
|
|
||||||
info = FileInfo.from_filename(filename)
|
|
||||||
self.assertEqual(info.title, "0001")
|
|
||||||
self.assertEqual(len(info.tags), 2)
|
|
||||||
self.assertEqual(info.tags[0].name, "tag1")
|
|
||||||
self.assertEqual(info.tags[1].name, "tag2")
|
|
||||||
self.assertEqual(info.created.year, 2019)
|
|
||||||
self.assertEqual(info.created.month, 9)
|
|
||||||
self.assertEqual(info.created.day, 8)
|
|
||||||
|
|
||||||
|
|
||||||
class DummyParser(DocumentParser):
|
class DummyParser(DocumentParser):
|
||||||
|
|
||||||
@@ -476,15 +276,13 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
def testOverrideFilename(self):
|
def testOverrideFilename(self):
|
||||||
filename = self.get_test_file()
|
filename = self.get_test_file()
|
||||||
override_filename = "My Bank - Statement for November.pdf"
|
override_filename = "Statement for November.pdf"
|
||||||
|
|
||||||
document = self.consumer.try_consume_file(filename, override_filename=override_filename)
|
document = self.consumer.try_consume_file(filename, override_filename=override_filename)
|
||||||
|
|
||||||
self.assertEqual(document.correspondent.name, "My Bank")
|
|
||||||
self.assertEqual(document.title, "Statement for November")
|
self.assertEqual(document.title, "Statement for November")
|
||||||
|
|
||||||
def testOverrideTitle(self):
|
def testOverrideTitle(self):
|
||||||
|
|
||||||
document = self.consumer.try_consume_file(self.get_test_file(), override_title="Override Title")
|
document = self.consumer.try_consume_file(self.get_test_file(), override_title="Override Title")
|
||||||
self.assertEqual(document.title, "Override Title")
|
self.assertEqual(document.title, "Override Title")
|
||||||
|
|
||||||
@@ -594,11 +392,10 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
def testFilenameHandling(self):
|
def testFilenameHandling(self):
|
||||||
filename = self.get_test_file()
|
filename = self.get_test_file()
|
||||||
|
|
||||||
document = self.consumer.try_consume_file(filename, override_filename="Bank - Test.pdf", override_title="new docs")
|
document = self.consumer.try_consume_file(filename, override_title="new docs")
|
||||||
|
|
||||||
self.assertEqual(document.title, "new docs")
|
self.assertEqual(document.title, "new docs")
|
||||||
self.assertEqual(document.correspondent.name, "Bank")
|
self.assertEqual(document.filename, "none/new docs.pdf")
|
||||||
self.assertEqual(document.filename, "Bank/new docs.pdf")
|
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||||
@mock.patch("documents.signals.handlers.generate_unique_filename")
|
@mock.patch("documents.signals.handlers.generate_unique_filename")
|
||||||
@@ -617,10 +414,9 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
Tag.objects.create(name="test", is_inbox_tag=True)
|
Tag.objects.create(name="test", is_inbox_tag=True)
|
||||||
|
|
||||||
document = self.consumer.try_consume_file(filename, override_filename="Bank - Test.pdf", override_title="new docs")
|
document = self.consumer.try_consume_file(filename, override_title="new docs")
|
||||||
|
|
||||||
self.assertEqual(document.title, "new docs")
|
self.assertEqual(document.title, "new docs")
|
||||||
self.assertEqual(document.correspondent.name, "Bank")
|
|
||||||
self.assertIsNotNone(os.path.isfile(document.title))
|
self.assertIsNotNone(os.path.isfile(document.title))
|
||||||
self.assertTrue(os.path.isfile(document.source_path))
|
self.assertTrue(os.path.isfile(document.source_path))
|
||||||
|
|
||||||
@@ -642,3 +438,31 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
self.assertEqual(document.document_type, dtype)
|
self.assertEqual(document.document_type, dtype)
|
||||||
self.assertIn(t1, document.tags.all())
|
self.assertIn(t1, document.tags.all())
|
||||||
self.assertNotIn(t2, document.tags.all())
|
self.assertNotIn(t2, document.tags.all())
|
||||||
|
|
||||||
|
@override_settings(CONSUMER_DELETE_DUPLICATES=True)
|
||||||
|
def test_delete_duplicate(self):
|
||||||
|
dst = self.get_test_file()
|
||||||
|
self.assertTrue(os.path.isfile(dst))
|
||||||
|
doc = self.consumer.try_consume_file(dst)
|
||||||
|
|
||||||
|
self.assertFalse(os.path.isfile(dst))
|
||||||
|
self.assertIsNotNone(doc)
|
||||||
|
|
||||||
|
dst = self.get_test_file()
|
||||||
|
self.assertTrue(os.path.isfile(dst))
|
||||||
|
self.assertRaises(ConsumerError, self.consumer.try_consume_file, dst)
|
||||||
|
self.assertFalse(os.path.isfile(dst))
|
||||||
|
|
||||||
|
@override_settings(CONSUMER_DELETE_DUPLICATES=False)
|
||||||
|
def test_no_delete_duplicate(self):
|
||||||
|
dst = self.get_test_file()
|
||||||
|
self.assertTrue(os.path.isfile(dst))
|
||||||
|
doc = self.consumer.try_consume_file(dst)
|
||||||
|
|
||||||
|
self.assertFalse(os.path.isfile(dst))
|
||||||
|
self.assertIsNotNone(doc)
|
||||||
|
|
||||||
|
dst = self.get_test_file()
|
||||||
|
self.assertTrue(os.path.isfile(dst))
|
||||||
|
self.assertRaises(ConsumerError, self.consumer.try_consume_file, dst)
|
||||||
|
self.assertTrue(os.path.isfile(dst))
|
||||||
|
@@ -14,7 +14,7 @@ from django.utils import timezone
|
|||||||
from .utils import DirectoriesMixin
|
from .utils import DirectoriesMixin
|
||||||
from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories, \
|
from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories, \
|
||||||
generate_unique_filename
|
generate_unique_filename
|
||||||
from ..models import Document, Correspondent, Tag
|
from ..models import Document, Correspondent, Tag, DocumentType
|
||||||
|
|
||||||
|
|
||||||
class TestFileHandling(DirectoriesMixin, TestCase):
|
class TestFileHandling(DirectoriesMixin, TestCase):
|
||||||
@@ -190,6 +190,17 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True)
|
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True)
|
||||||
self.assertTrue(os.path.isfile(important_file))
|
self.assertTrue(os.path.isfile(important_file))
|
||||||
|
|
||||||
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{document_type} - {title}")
|
||||||
|
def test_document_type(self):
|
||||||
|
dt = DocumentType.objects.create(name="my_doc_type")
|
||||||
|
d = Document.objects.create(title="the_doc", mime_type="application/pdf")
|
||||||
|
|
||||||
|
self.assertEqual(generate_filename(d), "none - the_doc.pdf")
|
||||||
|
|
||||||
|
d.document_type = dt
|
||||||
|
|
||||||
|
self.assertEqual(generate_filename(d), "my_doc_type - the_doc.pdf")
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
|
||||||
def test_tags_with_underscore(self):
|
def test_tags_with_underscore(self):
|
||||||
document = Document()
|
document = Document()
|
||||||
|
135
src/documents/tests/test_management.py
Normal file
135
src/documents/tests/test_management.py
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
import hashlib
|
||||||
|
import tempfile
|
||||||
|
import filecmp
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
from django.test import TestCase, override_settings
|
||||||
|
|
||||||
|
|
||||||
|
from django.core.management import call_command
|
||||||
|
|
||||||
|
from documents.file_handling import generate_filename
|
||||||
|
from documents.management.commands.document_archiver import handle_document
|
||||||
|
from documents.models import Document
|
||||||
|
from documents.tests.utils import DirectoriesMixin
|
||||||
|
|
||||||
|
|
||||||
|
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
||||||
|
|
||||||
|
|
||||||
|
class TestArchiver(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
|
def make_models(self):
|
||||||
|
return Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf")
|
||||||
|
|
||||||
|
def test_archiver(self):
|
||||||
|
|
||||||
|
doc = self.make_models()
|
||||||
|
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
|
||||||
|
|
||||||
|
call_command('document_archiver')
|
||||||
|
|
||||||
|
def test_handle_document(self):
|
||||||
|
|
||||||
|
doc = self.make_models()
|
||||||
|
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
|
||||||
|
|
||||||
|
handle_document(doc.pk)
|
||||||
|
|
||||||
|
doc = Document.objects.get(id=doc.id)
|
||||||
|
|
||||||
|
self.assertIsNotNone(doc.checksum)
|
||||||
|
self.assertTrue(os.path.isfile(doc.archive_path))
|
||||||
|
self.assertTrue(os.path.isfile(doc.source_path))
|
||||||
|
self.assertTrue(filecmp.cmp(sample_file, doc.source_path))
|
||||||
|
|
||||||
|
|
||||||
|
class TestDecryptDocuments(TestCase):
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
|
||||||
|
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
|
||||||
|
PASSPHRASE="test",
|
||||||
|
PAPERLESS_FILENAME_FORMAT=None
|
||||||
|
)
|
||||||
|
@mock.patch("documents.management.commands.decrypt_documents.input")
|
||||||
|
def test_decrypt(self, m):
|
||||||
|
|
||||||
|
media_dir = tempfile.mkdtemp()
|
||||||
|
originals_dir = os.path.join(media_dir, "documents", "originals")
|
||||||
|
thumb_dir = os.path.join(media_dir, "documents", "thumbnails")
|
||||||
|
os.makedirs(originals_dir, exist_ok=True)
|
||||||
|
os.makedirs(thumb_dir, exist_ok=True)
|
||||||
|
|
||||||
|
override_settings(
|
||||||
|
ORIGINALS_DIR=originals_dir,
|
||||||
|
THUMBNAIL_DIR=thumb_dir,
|
||||||
|
PASSPHRASE="test"
|
||||||
|
).enable()
|
||||||
|
|
||||||
|
doc = Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
||||||
|
|
||||||
|
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg"))
|
||||||
|
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000002.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"))
|
||||||
|
|
||||||
|
call_command('decrypt_documents')
|
||||||
|
|
||||||
|
doc.refresh_from_db()
|
||||||
|
|
||||||
|
self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED)
|
||||||
|
self.assertEqual(doc.filename, "0000002.pdf")
|
||||||
|
self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000002.pdf")))
|
||||||
|
self.assertTrue(os.path.isfile(doc.source_path))
|
||||||
|
self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png")))
|
||||||
|
self.assertTrue(os.path.isfile(doc.thumbnail_path))
|
||||||
|
|
||||||
|
with doc.source_file as f:
|
||||||
|
checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
|
self.assertEqual(checksum, doc.checksum)
|
||||||
|
|
||||||
|
|
||||||
|
class TestMakeIndex(TestCase):
|
||||||
|
|
||||||
|
@mock.patch("documents.management.commands.document_index.index_reindex")
|
||||||
|
def test_reindex(self, m):
|
||||||
|
call_command("document_index", "reindex")
|
||||||
|
m.assert_called_once()
|
||||||
|
|
||||||
|
@mock.patch("documents.management.commands.document_index.index_optimize")
|
||||||
|
def test_optimize(self, m):
|
||||||
|
call_command("document_index", "optimize")
|
||||||
|
m.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
class TestRenamer(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
|
def test_rename(self):
|
||||||
|
doc = Document.objects.create(title="test", mime_type="application/pdf")
|
||||||
|
doc.filename = generate_filename(doc)
|
||||||
|
doc.save()
|
||||||
|
|
||||||
|
Path(doc.source_path).touch()
|
||||||
|
|
||||||
|
old_source_path = doc.source_path
|
||||||
|
|
||||||
|
with override_settings(PAPERLESS_FILENAME_FORMAT="{title}"):
|
||||||
|
call_command("document_renamer")
|
||||||
|
|
||||||
|
doc2 = Document.objects.get(id=doc.id)
|
||||||
|
|
||||||
|
self.assertEqual(doc2.filename, "test.pdf")
|
||||||
|
self.assertFalse(os.path.isfile(old_source_path))
|
||||||
|
self.assertFalse(os.path.isfile(doc.source_path))
|
||||||
|
self.assertTrue(os.path.isfile(doc2.source_path))
|
||||||
|
|
||||||
|
|
||||||
|
class TestCreateClassifier(TestCase):
|
||||||
|
|
||||||
|
@mock.patch("documents.management.commands.document_create_classifier.train_classifier")
|
||||||
|
def test_create_classifier(self, m):
|
||||||
|
call_command("document_create_classifier")
|
||||||
|
|
||||||
|
m.assert_called_once()
|
@@ -1,40 +0,0 @@
|
|||||||
import filecmp
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
|
|
||||||
from django.core.management import call_command
|
|
||||||
from django.test import TestCase
|
|
||||||
|
|
||||||
from documents.management.commands.document_archiver import handle_document
|
|
||||||
from documents.models import Document
|
|
||||||
from documents.tests.utils import DirectoriesMixin
|
|
||||||
|
|
||||||
|
|
||||||
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
|
||||||
|
|
||||||
|
|
||||||
class TestArchiver(DirectoriesMixin, TestCase):
|
|
||||||
|
|
||||||
def make_models(self):
|
|
||||||
return Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf")
|
|
||||||
|
|
||||||
def test_archiver(self):
|
|
||||||
|
|
||||||
doc = self.make_models()
|
|
||||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
|
|
||||||
|
|
||||||
call_command('document_archiver')
|
|
||||||
|
|
||||||
def test_handle_document(self):
|
|
||||||
|
|
||||||
doc = self.make_models()
|
|
||||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
|
|
||||||
|
|
||||||
handle_document(doc.pk)
|
|
||||||
|
|
||||||
doc = Document.objects.get(id=doc.id)
|
|
||||||
|
|
||||||
self.assertIsNotNone(doc.checksum)
|
|
||||||
self.assertTrue(os.path.isfile(doc.archive_path))
|
|
||||||
self.assertTrue(os.path.isfile(doc.source_path))
|
|
||||||
self.assertTrue(filecmp.cmp(sample_file, doc.source_path))
|
|
@@ -1,57 +0,0 @@
|
|||||||
import hashlib
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
import tempfile
|
|
||||||
from unittest import mock
|
|
||||||
|
|
||||||
from django.core.management import call_command
|
|
||||||
from django.test import TestCase, override_settings
|
|
||||||
|
|
||||||
from documents.management.commands import document_exporter
|
|
||||||
from documents.models import Document, Tag, DocumentType, Correspondent
|
|
||||||
|
|
||||||
|
|
||||||
class TestDecryptDocuments(TestCase):
|
|
||||||
|
|
||||||
@override_settings(
|
|
||||||
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
|
|
||||||
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
|
|
||||||
PASSPHRASE="test",
|
|
||||||
PAPERLESS_FILENAME_FORMAT=None
|
|
||||||
)
|
|
||||||
@mock.patch("documents.management.commands.decrypt_documents.input")
|
|
||||||
def test_decrypt(self, m):
|
|
||||||
|
|
||||||
media_dir = tempfile.mkdtemp()
|
|
||||||
originals_dir = os.path.join(media_dir, "documents", "originals")
|
|
||||||
thumb_dir = os.path.join(media_dir, "documents", "thumbnails")
|
|
||||||
os.makedirs(originals_dir, exist_ok=True)
|
|
||||||
os.makedirs(thumb_dir, exist_ok=True)
|
|
||||||
|
|
||||||
override_settings(
|
|
||||||
ORIGINALS_DIR=originals_dir,
|
|
||||||
THUMBNAIL_DIR=thumb_dir,
|
|
||||||
PASSPHRASE="test"
|
|
||||||
).enable()
|
|
||||||
|
|
||||||
doc = Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
|
||||||
|
|
||||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg"))
|
|
||||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000002.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"))
|
|
||||||
|
|
||||||
call_command('decrypt_documents')
|
|
||||||
|
|
||||||
doc.refresh_from_db()
|
|
||||||
|
|
||||||
self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED)
|
|
||||||
self.assertEqual(doc.filename, "0000002.pdf")
|
|
||||||
self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000002.pdf")))
|
|
||||||
self.assertTrue(os.path.isfile(doc.source_path))
|
|
||||||
self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png")))
|
|
||||||
self.assertTrue(os.path.isfile(doc.thumbnail_path))
|
|
||||||
|
|
||||||
with doc.source_file as f:
|
|
||||||
checksum = hashlib.md5(f.read()).hexdigest()
|
|
||||||
self.assertEqual(checksum, doc.checksum)
|
|
||||||
|
|
129
src/documents/tests/test_migrations.py
Normal file
129
src/documents/tests/test_migrations.py
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.apps import apps
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import connection
|
||||||
|
from django.db.migrations.executor import MigrationExecutor
|
||||||
|
from django.test import TestCase, TransactionTestCase, override_settings
|
||||||
|
|
||||||
|
from documents.models import Document
|
||||||
|
from documents.parsers import get_default_file_extension
|
||||||
|
from documents.tests.utils import DirectoriesMixin
|
||||||
|
|
||||||
|
|
||||||
|
class TestMigrations(TransactionTestCase):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def app(self):
|
||||||
|
return apps.get_containing_app_config(type(self).__module__).name
|
||||||
|
|
||||||
|
migrate_from = None
|
||||||
|
migrate_to = None
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
super(TestMigrations, self).setUp()
|
||||||
|
|
||||||
|
assert self.migrate_from and self.migrate_to, \
|
||||||
|
"TestCase '{}' must define migrate_from and migrate_to properties".format(type(self).__name__)
|
||||||
|
self.migrate_from = [(self.app, self.migrate_from)]
|
||||||
|
self.migrate_to = [(self.app, self.migrate_to)]
|
||||||
|
executor = MigrationExecutor(connection)
|
||||||
|
old_apps = executor.loader.project_state(self.migrate_from).apps
|
||||||
|
|
||||||
|
# Reverse to the original migration
|
||||||
|
executor.migrate(self.migrate_from)
|
||||||
|
|
||||||
|
self.setUpBeforeMigration(old_apps)
|
||||||
|
|
||||||
|
# Run the migration to test
|
||||||
|
executor = MigrationExecutor(connection)
|
||||||
|
executor.loader.build_graph() # reload.
|
||||||
|
executor.migrate(self.migrate_to)
|
||||||
|
|
||||||
|
self.apps = executor.loader.project_state(self.migrate_to).apps
|
||||||
|
|
||||||
|
def setUpBeforeMigration(self, apps):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
|
||||||
|
STORAGE_TYPE_GPG = "gpg"
|
||||||
|
|
||||||
|
|
||||||
|
def source_path_before(self):
|
||||||
|
if self.filename:
|
||||||
|
fname = str(self.filename)
|
||||||
|
else:
|
||||||
|
fname = "{:07}.{}".format(self.pk, self.file_type)
|
||||||
|
if self.storage_type == STORAGE_TYPE_GPG:
|
||||||
|
fname += ".gpg"
|
||||||
|
|
||||||
|
return os.path.join(
|
||||||
|
settings.ORIGINALS_DIR,
|
||||||
|
fname
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def file_type_after(self):
|
||||||
|
return get_default_file_extension(self.mime_type)
|
||||||
|
|
||||||
|
|
||||||
|
def source_path_after(doc):
|
||||||
|
if doc.filename:
|
||||||
|
fname = str(doc.filename)
|
||||||
|
else:
|
||||||
|
fname = "{:07}{}".format(doc.pk, file_type_after(doc))
|
||||||
|
if doc.storage_type == STORAGE_TYPE_GPG:
|
||||||
|
fname += ".gpg" # pragma: no cover
|
||||||
|
|
||||||
|
return os.path.join(
|
||||||
|
settings.ORIGINALS_DIR,
|
||||||
|
fname
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@override_settings(PASSPHRASE="test")
|
||||||
|
class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
|
||||||
|
|
||||||
|
migrate_from = '1002_auto_20201111_1105'
|
||||||
|
migrate_to = '1003_mime_types'
|
||||||
|
|
||||||
|
def setUpBeforeMigration(self, apps):
|
||||||
|
Document = apps.get_model("documents", "Document")
|
||||||
|
doc = Document.objects.create(title="test", file_type="pdf", filename="file1.pdf")
|
||||||
|
self.doc_id = doc.id
|
||||||
|
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), source_path_before(doc))
|
||||||
|
|
||||||
|
doc2 = Document.objects.create(checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG)
|
||||||
|
self.doc2_id = doc2.id
|
||||||
|
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), source_path_before(doc2))
|
||||||
|
|
||||||
|
def testMimeTypesMigrated(self):
|
||||||
|
Document = self.apps.get_model('documents', 'Document')
|
||||||
|
|
||||||
|
doc = Document.objects.get(id=self.doc_id)
|
||||||
|
self.assertEqual(doc.mime_type, "application/pdf")
|
||||||
|
|
||||||
|
doc2 = Document.objects.get(id=self.doc2_id)
|
||||||
|
self.assertEqual(doc2.mime_type, "application/pdf")
|
||||||
|
|
||||||
|
|
||||||
|
@override_settings(PASSPHRASE="test")
|
||||||
|
class TestMigrateMimeTypeBackwards(DirectoriesMixin, TestMigrations):
|
||||||
|
|
||||||
|
migrate_from = '1003_mime_types'
|
||||||
|
migrate_to = '1002_auto_20201111_1105'
|
||||||
|
|
||||||
|
def setUpBeforeMigration(self, apps):
|
||||||
|
Document = apps.get_model("documents", "Document")
|
||||||
|
doc = Document.objects.create(title="test", mime_type="application/pdf", filename="file1.pdf")
|
||||||
|
self.doc_id = doc.id
|
||||||
|
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), source_path_after(doc))
|
||||||
|
|
||||||
|
def testMimeTypesReverted(self):
|
||||||
|
Document = self.apps.get_model('documents', 'Document')
|
||||||
|
|
||||||
|
doc = Document.objects.get(id=self.doc_id)
|
||||||
|
self.assertEqual(doc.file_type, "pdf")
|
@@ -389,14 +389,27 @@ class SearchView(APIView):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def get(self, request, format=None):
|
def get(self, request, format=None):
|
||||||
if 'query' not in request.query_params:
|
|
||||||
|
if 'query' in request.query_params:
|
||||||
|
query = request.query_params['query']
|
||||||
|
else:
|
||||||
|
query = None
|
||||||
|
|
||||||
|
if 'more_like' in request.query_params:
|
||||||
|
more_like_id = request.query_params['more_like']
|
||||||
|
more_like_content = Document.objects.get(id=more_like_id).content
|
||||||
|
else:
|
||||||
|
more_like_id = None
|
||||||
|
more_like_content = None
|
||||||
|
|
||||||
|
if not query and not more_like_id:
|
||||||
return Response({
|
return Response({
|
||||||
'count': 0,
|
'count': 0,
|
||||||
'page': 0,
|
'page': 0,
|
||||||
'page_count': 0,
|
'page_count': 0,
|
||||||
|
'corrected_query': None,
|
||||||
'results': []})
|
'results': []})
|
||||||
|
|
||||||
query = request.query_params['query']
|
|
||||||
try:
|
try:
|
||||||
page = int(request.query_params.get('page', 1))
|
page = int(request.query_params.get('page', 1))
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
@@ -406,8 +419,7 @@ class SearchView(APIView):
|
|||||||
page = 1
|
page = 1
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with index.query_page(self.ix, query, page) as (result_page,
|
with index.query_page(self.ix, page, query, more_like_id, more_like_content) as (result_page, corrected_query): # NOQA: E501
|
||||||
corrected_query):
|
|
||||||
return Response(
|
return Response(
|
||||||
{'count': len(result_page),
|
{'count': len(result_page),
|
||||||
'page': result_page.pagenum,
|
'page': result_page.pagenum,
|
||||||
|
@@ -13,18 +13,17 @@ writeable_hint = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def path_check(env_var):
|
def path_check(var, directory):
|
||||||
messages = []
|
messages = []
|
||||||
directory = os.getenv(env_var)
|
|
||||||
if directory:
|
if directory:
|
||||||
if not os.path.exists(directory):
|
if not os.path.exists(directory):
|
||||||
messages.append(Error(
|
messages.append(Error(
|
||||||
exists_message.format(env_var),
|
exists_message.format(var),
|
||||||
exists_hint.format(directory)
|
exists_hint.format(directory)
|
||||||
))
|
))
|
||||||
elif not os.access(directory, os.W_OK | os.X_OK):
|
elif not os.access(directory, os.W_OK | os.X_OK):
|
||||||
messages.append(Error(
|
messages.append(Error(
|
||||||
writeable_message.format(env_var),
|
writeable_message.format(var),
|
||||||
writeable_hint.format(directory)
|
writeable_hint.format(directory)
|
||||||
))
|
))
|
||||||
return messages
|
return messages
|
||||||
@@ -36,12 +35,9 @@ def paths_check(app_configs, **kwargs):
|
|||||||
Check the various paths for existence, readability and writeability
|
Check the various paths for existence, readability and writeability
|
||||||
"""
|
"""
|
||||||
|
|
||||||
check_messages = path_check("PAPERLESS_DATA_DIR") + \
|
return path_check("PAPERLESS_DATA_DIR", settings.DATA_DIR) + \
|
||||||
path_check("PAPERLESS_MEDIA_ROOT") + \
|
path_check("PAPERLESS_MEDIA_ROOT", settings.MEDIA_ROOT) + \
|
||||||
path_check("PAPERLESS_CONSUMPTION_DIR") + \
|
path_check("PAPERLESS_CONSUMPTION_DIR", settings.CONSUMPTION_DIR)
|
||||||
path_check("PAPERLESS_STATICDIR")
|
|
||||||
|
|
||||||
return check_messages
|
|
||||||
|
|
||||||
|
|
||||||
@register()
|
@register()
|
||||||
|
@@ -160,13 +160,6 @@ if AUTO_LOGIN_USERNAME:
|
|||||||
MIDDLEWARE.insert(_index+1, 'paperless.auth.AutoLoginMiddleware')
|
MIDDLEWARE.insert(_index+1, 'paperless.auth.AutoLoginMiddleware')
|
||||||
|
|
||||||
|
|
||||||
if DEBUG:
|
|
||||||
X_FRAME_OPTIONS = ''
|
|
||||||
# this should really be 'allow-from uri' but its not supported in any mayor
|
|
||||||
# browser.
|
|
||||||
else:
|
|
||||||
X_FRAME_OPTIONS = 'SAMEORIGIN'
|
|
||||||
|
|
||||||
# We allow CORS from localhost:8080
|
# We allow CORS from localhost:8080
|
||||||
CORS_ALLOWED_ORIGINS = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(","))
|
CORS_ALLOWED_ORIGINS = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(","))
|
||||||
|
|
||||||
|
54
src/paperless/tests/test_checks.py
Normal file
54
src/paperless/tests/test_checks.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
from django.test import TestCase, override_settings
|
||||||
|
|
||||||
|
from documents.tests.utils import DirectoriesMixin
|
||||||
|
from paperless import binaries_check, paths_check
|
||||||
|
from paperless.checks import debug_mode_check
|
||||||
|
|
||||||
|
|
||||||
|
class TestChecks(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
|
def test_binaries(self):
|
||||||
|
self.assertEqual(binaries_check(None), [])
|
||||||
|
|
||||||
|
@override_settings(CONVERT_BINARY="uuuhh", OPTIPNG_BINARY="forgot")
|
||||||
|
def test_binaries_fail(self):
|
||||||
|
self.assertEqual(len(binaries_check(None)), 2)
|
||||||
|
|
||||||
|
def test_paths_check(self):
|
||||||
|
self.assertEqual(paths_check(None), [])
|
||||||
|
|
||||||
|
@override_settings(MEDIA_ROOT="uuh",
|
||||||
|
DATA_DIR="whatever",
|
||||||
|
CONSUMPTION_DIR="idontcare")
|
||||||
|
def test_paths_check_dont_exist(self):
|
||||||
|
msgs = paths_check(None)
|
||||||
|
self.assertEqual(len(msgs), 3, str(msgs))
|
||||||
|
|
||||||
|
for msg in msgs:
|
||||||
|
self.assertTrue(msg.msg.endswith("is set but doesn't exist."))
|
||||||
|
|
||||||
|
def test_paths_check_no_access(self):
|
||||||
|
os.chmod(self.dirs.data_dir, 0o000)
|
||||||
|
os.chmod(self.dirs.media_dir, 0o000)
|
||||||
|
os.chmod(self.dirs.consumption_dir, 0o000)
|
||||||
|
|
||||||
|
self.addCleanup(os.chmod, self.dirs.data_dir, 0o777)
|
||||||
|
self.addCleanup(os.chmod, self.dirs.media_dir, 0o777)
|
||||||
|
self.addCleanup(os.chmod, self.dirs.consumption_dir, 0o777)
|
||||||
|
|
||||||
|
msgs = paths_check(None)
|
||||||
|
self.assertEqual(len(msgs), 3)
|
||||||
|
|
||||||
|
for msg in msgs:
|
||||||
|
self.assertTrue(msg.msg.endswith("is not writeable"))
|
||||||
|
|
||||||
|
@override_settings(DEBUG=False)
|
||||||
|
def test_debug_disabled(self):
|
||||||
|
self.assertEqual(debug_mode_check(None), [])
|
||||||
|
|
||||||
|
@override_settings(DEBUG=True)
|
||||||
|
def test_debug_enabled(self):
|
||||||
|
self.assertEqual(len(debug_mode_check(None)), 1)
|
@@ -1,7 +1,7 @@
|
|||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.checks import Error, register
|
from django.core.checks import Error, Warning, register
|
||||||
|
|
||||||
|
|
||||||
def get_tesseract_langs():
|
def get_tesseract_langs():
|
||||||
|
@@ -1,194 +0,0 @@
|
|||||||
# Thanks to the Library of Congress and some creative use of sed and awk:
|
|
||||||
# http://www.loc.gov/standards/iso639-2/php/English_list.php
|
|
||||||
|
|
||||||
ISO639 = {
|
|
||||||
|
|
||||||
"aa": "aar",
|
|
||||||
"ab": "abk",
|
|
||||||
"ae": "ave",
|
|
||||||
"af": "afr",
|
|
||||||
"ak": "aka",
|
|
||||||
"am": "amh",
|
|
||||||
"an": "arg",
|
|
||||||
"ar": "ara",
|
|
||||||
"as": "asm",
|
|
||||||
"av": "ava",
|
|
||||||
"ay": "aym",
|
|
||||||
"az": "aze",
|
|
||||||
"ba": "bak",
|
|
||||||
"be": "bel",
|
|
||||||
"bg": "bul",
|
|
||||||
"bh": "bih",
|
|
||||||
"bi": "bis",
|
|
||||||
"bm": "bam",
|
|
||||||
"bn": "ben",
|
|
||||||
"bo": "bod",
|
|
||||||
"br": "bre",
|
|
||||||
"bs": "bos",
|
|
||||||
"ca": "cat",
|
|
||||||
"ce": "che",
|
|
||||||
"ch": "cha",
|
|
||||||
"co": "cos",
|
|
||||||
"cr": "cre",
|
|
||||||
"cs": "ces",
|
|
||||||
"cu": "chu",
|
|
||||||
"cv": "chv",
|
|
||||||
"cy": "cym",
|
|
||||||
"da": "dan",
|
|
||||||
"de": "deu",
|
|
||||||
"dv": "div",
|
|
||||||
"dz": "dzo",
|
|
||||||
"ee": "ewe",
|
|
||||||
"el": "ell",
|
|
||||||
"en": "eng",
|
|
||||||
"eo": "epo",
|
|
||||||
"es": "spa",
|
|
||||||
"et": "est",
|
|
||||||
"eu": "eus",
|
|
||||||
"fa": "fas",
|
|
||||||
"ff": "ful",
|
|
||||||
"fi": "fin",
|
|
||||||
"fj": "fij",
|
|
||||||
"fo": "fao",
|
|
||||||
"fr": "fra",
|
|
||||||
"fy": "fry",
|
|
||||||
"ga": "gle",
|
|
||||||
"gd": "gla",
|
|
||||||
"gl": "glg",
|
|
||||||
"gn": "grn",
|
|
||||||
"gu": "guj",
|
|
||||||
"gv": "glv",
|
|
||||||
"ha": "hau",
|
|
||||||
"he": "heb",
|
|
||||||
"hi": "hin",
|
|
||||||
"ho": "hmo",
|
|
||||||
"hr": "hrv",
|
|
||||||
"ht": "hat",
|
|
||||||
"hu": "hun",
|
|
||||||
"hy": "hye",
|
|
||||||
"hz": "her",
|
|
||||||
"ia": "ina",
|
|
||||||
"id": "ind",
|
|
||||||
"ie": "ile",
|
|
||||||
"ig": "ibo",
|
|
||||||
"ii": "iii",
|
|
||||||
"ik": "ipk",
|
|
||||||
"io": "ido",
|
|
||||||
"is": "isl",
|
|
||||||
"it": "ita",
|
|
||||||
"iu": "iku",
|
|
||||||
"ja": "jpn",
|
|
||||||
"jv": "jav",
|
|
||||||
"ka": "kat",
|
|
||||||
"kg": "kon",
|
|
||||||
"ki": "kik",
|
|
||||||
"kj": "kua",
|
|
||||||
"kk": "kaz",
|
|
||||||
"kl": "kal",
|
|
||||||
"km": "khm",
|
|
||||||
"kn": "kan",
|
|
||||||
"ko": "kor",
|
|
||||||
"kr": "kau",
|
|
||||||
"ks": "kas",
|
|
||||||
"ku": "kur",
|
|
||||||
"kv": "kom",
|
|
||||||
"kw": "cor",
|
|
||||||
"ky": "kir",
|
|
||||||
"la": "lat",
|
|
||||||
"lb": "ltz",
|
|
||||||
"lg": "lug",
|
|
||||||
"li": "lim",
|
|
||||||
"ln": "lin",
|
|
||||||
"lo": "lao",
|
|
||||||
"lt": "lit",
|
|
||||||
"lu": "lub",
|
|
||||||
"lv": "lav",
|
|
||||||
"mg": "mlg",
|
|
||||||
"mh": "mah",
|
|
||||||
"mi": "mri",
|
|
||||||
"mk": "mkd",
|
|
||||||
"ml": "mal",
|
|
||||||
"mn": "mon",
|
|
||||||
"mr": "mar",
|
|
||||||
"ms": "msa",
|
|
||||||
"mt": "mlt",
|
|
||||||
"my": "mya",
|
|
||||||
"na": "nau",
|
|
||||||
"nb": "nob",
|
|
||||||
"nd": "nde",
|
|
||||||
"ne": "nep",
|
|
||||||
"ng": "ndo",
|
|
||||||
"nl": "nld",
|
|
||||||
"no": "nor",
|
|
||||||
"nr": "nbl",
|
|
||||||
"nv": "nav",
|
|
||||||
"ny": "nya",
|
|
||||||
"oc": "oci",
|
|
||||||
"oj": "oji",
|
|
||||||
"om": "orm",
|
|
||||||
"or": "ori",
|
|
||||||
"os": "oss",
|
|
||||||
"pa": "pan",
|
|
||||||
"pi": "pli",
|
|
||||||
"pl": "pol",
|
|
||||||
"ps": "pus",
|
|
||||||
"pt": "por",
|
|
||||||
"qu": "que",
|
|
||||||
"rm": "roh",
|
|
||||||
"rn": "run",
|
|
||||||
"ro": "ron",
|
|
||||||
"ru": "rus",
|
|
||||||
"rw": "kin",
|
|
||||||
"sa": "san",
|
|
||||||
"sc": "srd",
|
|
||||||
"sd": "snd",
|
|
||||||
"se": "sme",
|
|
||||||
"sg": "sag",
|
|
||||||
"si": "sin",
|
|
||||||
"sk": "slk",
|
|
||||||
"sl": "slv",
|
|
||||||
"sm": "smo",
|
|
||||||
"sn": "sna",
|
|
||||||
"so": "som",
|
|
||||||
"sq": "sqi",
|
|
||||||
"sr": "srp",
|
|
||||||
"ss": "ssw",
|
|
||||||
"st": "sot",
|
|
||||||
"su": "sun",
|
|
||||||
"sv": "swe",
|
|
||||||
"sw": "swa",
|
|
||||||
"ta": "tam",
|
|
||||||
"te": "tel",
|
|
||||||
"tg": "tgk",
|
|
||||||
"th": "tha",
|
|
||||||
"ti": "tir",
|
|
||||||
"tk": "tuk",
|
|
||||||
"tl": "tgl",
|
|
||||||
"tn": "tsn",
|
|
||||||
"to": "ton",
|
|
||||||
"tr": "tur",
|
|
||||||
"ts": "tso",
|
|
||||||
"tt": "tat",
|
|
||||||
"tw": "twi",
|
|
||||||
"ty": "tah",
|
|
||||||
"ug": "uig",
|
|
||||||
"uk": "ukr",
|
|
||||||
"ur": "urd",
|
|
||||||
"uz": "uzb",
|
|
||||||
"ve": "ven",
|
|
||||||
"vi": "vie",
|
|
||||||
"vo": "vol",
|
|
||||||
"wa": "wln",
|
|
||||||
"wo": "wol",
|
|
||||||
"xh": "xho",
|
|
||||||
"yi": "yid",
|
|
||||||
"yo": "yor",
|
|
||||||
"za": "zha",
|
|
||||||
|
|
||||||
# Tessdata contains two values for Chinese, "chi_sim" and "chi_tra". I
|
|
||||||
# have no idea which one is better, so I just picked the bigger file.
|
|
||||||
"zh": "chi_tra",
|
|
||||||
|
|
||||||
"zu": "zul"
|
|
||||||
|
|
||||||
}
|
|
26
src/paperless_tesseract/tests/test_checks.py
Normal file
26
src/paperless_tesseract/tests/test_checks.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
from django.core.checks import ERROR
|
||||||
|
from django.test import TestCase, override_settings
|
||||||
|
|
||||||
|
from paperless_tesseract import check_default_language_available
|
||||||
|
|
||||||
|
|
||||||
|
class TestChecks(TestCase):
|
||||||
|
|
||||||
|
def test_default_language(self):
|
||||||
|
msgs = check_default_language_available(None)
|
||||||
|
|
||||||
|
@override_settings(OCR_LANGUAGE="")
|
||||||
|
def test_no_language(self):
|
||||||
|
msgs = check_default_language_available(None)
|
||||||
|
self.assertEqual(len(msgs), 1)
|
||||||
|
self.assertTrue(msgs[0].msg.startswith("No OCR language has been specified with PAPERLESS_OCR_LANGUAGE"))
|
||||||
|
|
||||||
|
@override_settings(OCR_LANGUAGE="ita")
|
||||||
|
@mock.patch("paperless_tesseract.checks.get_tesseract_langs")
|
||||||
|
def test_invalid_language(self, m):
|
||||||
|
m.return_value = ["deu", "eng"]
|
||||||
|
msgs = check_default_language_available(None)
|
||||||
|
self.assertEqual(len(msgs), 1)
|
||||||
|
self.assertEqual(msgs[0].level, ERROR)
|
@@ -35,15 +35,3 @@ class TextDocumentParser(DocumentParser):
|
|||||||
def parse(self, document_path, mime_type):
|
def parse(self, document_path, mime_type):
|
||||||
with open(document_path, 'r') as f:
|
with open(document_path, 'r') as f:
|
||||||
self.text = f.read()
|
self.text = f.read()
|
||||||
|
|
||||||
|
|
||||||
def run_command(*args):
|
|
||||||
environment = os.environ.copy()
|
|
||||||
if settings.CONVERT_MEMORY_LIMIT:
|
|
||||||
environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT
|
|
||||||
if settings.CONVERT_TMPDIR:
|
|
||||||
environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR
|
|
||||||
|
|
||||||
if not subprocess.Popen(' '.join(args), env=environment,
|
|
||||||
shell=True).wait() == 0:
|
|
||||||
raise ParseError("Convert failed at {}".format(args))
|
|
||||||
|
Reference in New Issue
Block a user