mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge remote-tracking branch 'upstream/dev' into feature-bulk-editor
This commit is contained in:
commit
f06e2c1089
15
docs/api.rst
15
docs/api.rst
@ -221,21 +221,16 @@ Each fragment contains a list of strings, and some of them are marked as a highl
|
||||
|
||||
[
|
||||
[
|
||||
{"text": "This is a sample text with a "},
|
||||
{"text": "highlighted", "term": 0},
|
||||
{"text": " word."}
|
||||
{"text": "This is a sample text with a ", "highlight": false},
|
||||
{"text": "highlighted", "highlight": true},
|
||||
{"text": " word.", "highlight": false}
|
||||
],
|
||||
[
|
||||
{"text": "Another", "term": 1},
|
||||
{"text": " fragment with a highlight."}
|
||||
{"text": "Another", "highlight": true},
|
||||
{"text": " fragment with a highlight.", "highlight": false}
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
|
||||
When ``term`` is present within a string, the word within ``text`` should be highlighted.
|
||||
The term index groups multiple matches together and words with the same index
|
||||
should get identical highlighting.
|
||||
A client may use this example to produce the following output:
|
||||
|
||||
... This is a sample text with a **highlighted** word. ... **Another** fragment with a highlight. ...
|
||||
|
@ -1,4 +1,14 @@
|
||||
<app-page-header [(title)]="title">
|
||||
<div class="input-group input-group-sm mr-5" *ngIf="getContentType() == 'application/pdf'">
|
||||
<div class="input-group-prepend">
|
||||
<div class="input-group-text">Page </div>
|
||||
</div>
|
||||
<input class="form-control flex-grow-0 w-auto" type="number" min="1" [max]="previewNumPages" [(ngModel)]="previewCurrentPage" />
|
||||
<div class="input-group-append">
|
||||
<div class="input-group-text">of {{previewNumPages}}</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<button type="button" class="btn btn-sm btn-outline-danger mr-2" (click)="delete()">
|
||||
<svg class="buttonicon" fill="currentColor">
|
||||
<use xlink:href="assets/bootstrap-icons.svg#trash" />
|
||||
@ -24,6 +34,12 @@
|
||||
|
||||
</div>
|
||||
|
||||
<button type="button" class="btn btn-sm btn-outline-primary mr-2" (click)="moreLike()">
|
||||
<svg class="buttonicon" fill="currentColor">
|
||||
<use xlink:href="assets/bootstrap-icons.svg#three-dots" />
|
||||
</svg>
|
||||
<span class="d-none d-lg-inline"> More like this</span>
|
||||
</button>
|
||||
|
||||
<button type="button" class="btn btn-sm btn-outline-primary" (click)="close()">
|
||||
<svg class="buttonicon" fill="currentColor">
|
||||
@ -128,7 +144,7 @@
|
||||
|
||||
<div class="col-md-6 col-xl-8 mb-3">
|
||||
<div class="pdf-viewer-container" *ngIf="getContentType() == 'application/pdf'">
|
||||
<pdf-viewer [src]="previewUrl" [original-size]="false" [show-borders]="true"></pdf-viewer>
|
||||
<pdf-viewer [src]="previewUrl" [original-size]="false" [show-borders]="true" [show-all]="true" [(page)]="previewCurrentPage" (after-load-complete)="pdfPreviewLoaded($event)"></pdf-viewer>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -15,6 +15,7 @@ import { DocumentService } from 'src/app/services/rest/document.service';
|
||||
import { ConfirmDialogComponent } from '../common/confirm-dialog/confirm-dialog.component';
|
||||
import { CorrespondentEditDialogComponent } from '../manage/correspondent-list/correspondent-edit-dialog/correspondent-edit-dialog.component';
|
||||
import { DocumentTypeEditDialogComponent } from '../manage/document-type-list/document-type-edit-dialog/document-type-edit-dialog.component';
|
||||
import { PDFDocumentProxy } from 'ng2-pdf-viewer';
|
||||
|
||||
@Component({
|
||||
selector: 'app-document-detail',
|
||||
@ -47,6 +48,9 @@ export class DocumentDetailComponent implements OnInit {
|
||||
tags: new FormControl([])
|
||||
})
|
||||
|
||||
previewCurrentPage: number = 1
|
||||
previewNumPages: number = 1
|
||||
|
||||
constructor(
|
||||
private documentsService: DocumentService,
|
||||
private route: ActivatedRoute,
|
||||
@ -168,7 +172,16 @@ export class DocumentDetailComponent implements OnInit {
|
||||
|
||||
}
|
||||
|
||||
moreLike() {
|
||||
this.router.navigate(["search"], {queryParams: {more_like:this.document.id}})
|
||||
}
|
||||
|
||||
hasNext() {
|
||||
return this.documentListViewService.hasNext(this.documentId)
|
||||
}
|
||||
|
||||
pdfPreviewLoaded(pdf: PDFDocumentProxy) {
|
||||
this.previewNumPages = pdf.numPages
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -23,8 +23,14 @@
|
||||
</p>
|
||||
|
||||
|
||||
<div class="d-flex justify-content-between align-items-center">
|
||||
<div class="d-flex align-items-center">
|
||||
<div class="btn-group">
|
||||
<a routerLink="/search" [queryParams]="{'more_like': document.id}" class="btn btn-sm btn-outline-secondary" *ngIf="moreLikeThis">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-three-dots" viewBox="0 0 16 16">
|
||||
<path fill-rule="evenodd" d="M3 9.5a1.5 1.5 0 1 1 0-3 1.5 1.5 0 0 1 0 3zm5 0a1.5 1.5 0 1 1 0-3 1.5 1.5 0 0 1 0 3zm5 0a1.5 1.5 0 1 1 0-3 1.5 1.5 0 0 1 0 3z"/>
|
||||
</svg>
|
||||
More like this
|
||||
</a>
|
||||
<a routerLink="/documents/{{document.id}}" class="btn btn-sm btn-outline-secondary">
|
||||
<svg width="1em" height="1em" viewBox="0 0 16 16" class="bi bi-pencil" fill="currentColor" xmlns="http://www.w3.org/2000/svg">
|
||||
<path fill-rule="evenodd" d="M12.146.146a.5.5 0 0 1 .708 0l3 3a.5.5 0 0 1 0 .708l-10 10a.5.5 0 0 1-.168.11l-5 2a.5.5 0 0 1-.65-.65l2-5a.5.5 0 0 1 .11-.168l10-10zM11.207 2.5L13.5 4.793 14.793 3.5 12.5 1.207 11.207 2.5zm1.586 3L10.5 3.207 4 9.707V10h.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.5h.293l6.5-6.5zm-9.761 5.175l-.106.106-1.528 3.821 3.821-1.528.106-.106A.5.5 0 0 1 5 12.5V12h-.5a.5.5 0 0 1-.5-.5V11h-.5a.5.5 0 0 1-.468-.325z"/>
|
||||
@ -45,7 +51,13 @@
|
||||
</svg>
|
||||
Download
|
||||
</a>
|
||||
|
||||
</div>
|
||||
|
||||
<small class="text-muted ml-auto">Score:</small>
|
||||
|
||||
<ngb-progressbar *ngIf="searchScore" [type]="searchScoreClass" [value]="searchScore" class="search-score-bar mx-2" [max]="1"></ngb-progressbar>
|
||||
|
||||
<small class="text-muted">Created: {{document.created | date}}</small>
|
||||
</div>
|
||||
|
||||
|
@ -10,3 +10,9 @@
|
||||
position: absolute;
|
||||
|
||||
}
|
||||
|
||||
.search-score-bar {
|
||||
width: 100px;
|
||||
height: 5px;
|
||||
margin-top: 2px;
|
||||
}
|
@ -12,6 +12,9 @@ export class DocumentCardLargeComponent implements OnInit {
|
||||
|
||||
constructor(private documentService: DocumentService, private sanitizer: DomSanitizer) { }
|
||||
|
||||
@Input()
|
||||
moreLikeThis: boolean = false
|
||||
|
||||
@Input()
|
||||
document: PaperlessDocument
|
||||
|
||||
@ -24,6 +27,19 @@ export class DocumentCardLargeComponent implements OnInit {
|
||||
@Output()
|
||||
clickCorrespondent = new EventEmitter<number>()
|
||||
|
||||
@Input()
|
||||
searchScore: number
|
||||
|
||||
get searchScoreClass() {
|
||||
if (this.searchScore > 0.7) {
|
||||
return "success"
|
||||
} else if (this.searchScore > 0.3) {
|
||||
return "warning"
|
||||
} else {
|
||||
return "danger"
|
||||
}
|
||||
}
|
||||
|
||||
ngOnInit(): void {
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
<div class="col p-2 h-100 document-card" style="width: 16rem;">
|
||||
<div class="col p-2 h-100">
|
||||
<div class="card h-100 shadow-sm" [class.card-selected]="selected">
|
||||
<div class="border-bottom" [class.doc-img-background-selected]="selected">
|
||||
<img class="card-img doc-img" [src]="getThumbUrl()" (click)="selected = !selected">
|
||||
|
@ -151,5 +151,5 @@
|
||||
|
||||
|
||||
<div class=" m-n2 row" *ngIf="displayMode == 'smallCards'">
|
||||
<app-document-card-small [selected]="list.isSelected(d)" (selectedChange)="list.setSelected(d, $event)" [document]="d" *ngFor="let d of list.documents" (clickTag)="clickTag($event)" (clickCorrespondent)="clickCorrespondent($event)"></app-document-card-small>
|
||||
<app-document-card-small [document]="d" [selected]="list.isSelected(d)" (selectedChange)="list.setSelected(d, $event)" *ngFor="let d of list.documents" (clickTag)="clickTag($event)" (clickCorrespondent)="clickCorrespondent($event)"></app-document-card-small>
|
||||
</div>
|
||||
|
@ -1,3 +1,3 @@
|
||||
... <span *ngFor="let fragment of highlights">
|
||||
<span *ngFor="let token of fragment" [ngClass]="token.term != null ? 'match term'+ token.term : ''">{{token.text}}</span> ...
|
||||
<span *ngFor="let token of fragment" [class.match]="token.highlight">{{token.text}}</span> ...
|
||||
</span>
|
@ -1,4 +1,4 @@
|
||||
.match {
|
||||
color: black;
|
||||
background-color: orange;
|
||||
background-color: rgb(255, 211, 66);
|
||||
}
|
@ -3,7 +3,12 @@
|
||||
|
||||
<div *ngIf="errorMessage" class="alert alert-danger">Invalid search query: {{errorMessage}}</div>
|
||||
|
||||
<p>
|
||||
<p *ngIf="more_like">
|
||||
Showing documents similar to
|
||||
<a routerLink="/documents/{{more_like}}">{{more_like_doc?.original_file_name}}</a>
|
||||
</p>
|
||||
|
||||
<p *ngIf="query">
|
||||
Search string: <i>{{query}}</i>
|
||||
<ng-container *ngIf="correctedQuery">
|
||||
- Did you mean "<a [routerLink]="" (click)="searchCorrectedQuery()">{{correctedQuery}}</a>"?
|
||||
@ -15,7 +20,9 @@
|
||||
<p>{{resultCount}} result(s)</p>
|
||||
<app-document-card-large *ngFor="let result of results"
|
||||
[document]="result.document"
|
||||
[details]="result.highlights">
|
||||
[details]="result.highlights"
|
||||
[searchScore]="result.score / maxScore"
|
||||
[moreLikeThis]="true">
|
||||
|
||||
</app-document-card-large>
|
||||
</div>
|
||||
|
@ -1,6 +1,9 @@
|
||||
import { Component, OnInit } from '@angular/core';
|
||||
import { ActivatedRoute, Router } from '@angular/router';
|
||||
import { PaperlessDocument } from 'src/app/data/paperless-document';
|
||||
import { PaperlessDocumentType } from 'src/app/data/paperless-document-type';
|
||||
import { SearchHit } from 'src/app/data/search-result';
|
||||
import { DocumentService } from 'src/app/services/rest/document.service';
|
||||
import { SearchService } from 'src/app/services/rest/search.service';
|
||||
|
||||
@Component({
|
||||
@ -14,6 +17,10 @@ export class SearchComponent implements OnInit {
|
||||
|
||||
query: string = ""
|
||||
|
||||
more_like: number
|
||||
|
||||
more_like_doc: PaperlessDocument
|
||||
|
||||
searching = false
|
||||
|
||||
currentPage = 1
|
||||
@ -26,11 +33,24 @@ export class SearchComponent implements OnInit {
|
||||
|
||||
errorMessage: string
|
||||
|
||||
constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { }
|
||||
get maxScore() {
|
||||
return this.results?.length > 0 ? this.results[0].score : 100
|
||||
}
|
||||
|
||||
constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router, private documentService: DocumentService) { }
|
||||
|
||||
ngOnInit(): void {
|
||||
this.route.queryParamMap.subscribe(paramMap => {
|
||||
window.scrollTo(0, 0)
|
||||
this.query = paramMap.get('query')
|
||||
this.more_like = paramMap.has('more_like') ? +paramMap.get('more_like') : null
|
||||
if (this.more_like) {
|
||||
this.documentService.get(this.more_like).subscribe(r => {
|
||||
this.more_like_doc = r
|
||||
})
|
||||
} else {
|
||||
this.more_like_doc = null
|
||||
}
|
||||
this.searching = true
|
||||
this.currentPage = 1
|
||||
this.loadPage()
|
||||
@ -39,13 +59,14 @@ export class SearchComponent implements OnInit {
|
||||
}
|
||||
|
||||
searchCorrectedQuery() {
|
||||
this.router.navigate(["search"], {queryParams: {query: this.correctedQuery}})
|
||||
this.router.navigate(["search"], {queryParams: {query: this.correctedQuery, more_like: this.more_like}})
|
||||
}
|
||||
|
||||
loadPage(append: boolean = false) {
|
||||
this.errorMessage = null
|
||||
this.correctedQuery = null
|
||||
this.searchService.search(this.query, this.currentPage).subscribe(result => {
|
||||
|
||||
this.searchService.search(this.query, this.currentPage, this.more_like).subscribe(result => {
|
||||
if (append) {
|
||||
this.results.push(...result.results)
|
||||
} else {
|
||||
|
@ -15,11 +15,17 @@ export class SearchService {
|
||||
|
||||
constructor(private http: HttpClient, private documentService: DocumentService) { }
|
||||
|
||||
search(query: string, page?: number): Observable<SearchResult> {
|
||||
let httpParams = new HttpParams().set('query', query)
|
||||
search(query: string, page?: number, more_like?: number): Observable<SearchResult> {
|
||||
let httpParams = new HttpParams()
|
||||
if (query) {
|
||||
httpParams = httpParams.set('query', query)
|
||||
}
|
||||
if (page) {
|
||||
httpParams = httpParams.set('page', page.toString())
|
||||
}
|
||||
if (more_like) {
|
||||
httpParams = httpParams.set('more_like', more_like.toString())
|
||||
}
|
||||
return this.http.get<SearchResult>(`${environment.apiBaseUrl}search/`, {params: httpParams}).pipe(
|
||||
map(result => {
|
||||
result.results.forEach(hit => this.documentService.addObservablesToDocument(hit.document))
|
||||
|
@ -5,7 +5,8 @@
|
||||
export const environment = {
|
||||
production: false,
|
||||
apiBaseUrl: "http://localhost:8000/api/",
|
||||
appTitle: "DEVELOPMENT P-NG"
|
||||
appTitle: "Paperless-ng",
|
||||
version: "DEVELOPMENT"
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -247,7 +247,6 @@ class Consumer(LoggingMixin):
|
||||
|
||||
with open(self.path, "rb") as f:
|
||||
document = Document.objects.create(
|
||||
correspondent=file_info.correspondent,
|
||||
title=(self.override_title or file_info.title)[:127],
|
||||
content=text,
|
||||
mime_type=mime_type,
|
||||
@ -257,12 +256,6 @@ class Consumer(LoggingMixin):
|
||||
storage_type=storage_type
|
||||
)
|
||||
|
||||
relevant_tags = set(file_info.tags)
|
||||
if relevant_tags:
|
||||
tag_names = ", ".join([t.name for t in relevant_tags])
|
||||
self.log("debug", "Tagging with {}".format(tag_names))
|
||||
document.tags.add(*relevant_tags)
|
||||
|
||||
self.apply_overrides(document)
|
||||
|
||||
document.save()
|
||||
|
@ -3,7 +3,7 @@ import os
|
||||
from contextlib import contextmanager
|
||||
|
||||
from django.conf import settings
|
||||
from whoosh import highlight
|
||||
from whoosh import highlight, classify, query
|
||||
from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME
|
||||
from whoosh.highlight import Formatter, get_text
|
||||
from whoosh.index import create_in, exists_in, open_dir
|
||||
@ -20,32 +20,37 @@ class JsonFormatter(Formatter):
|
||||
self.seen = {}
|
||||
|
||||
def format_token(self, text, token, replace=False):
|
||||
seen = self.seen
|
||||
ttext = self._text(get_text(text, token, replace))
|
||||
if ttext in seen:
|
||||
termnum = seen[ttext]
|
||||
else:
|
||||
termnum = len(seen)
|
||||
seen[ttext] = termnum
|
||||
|
||||
return {'text': ttext, 'term': termnum}
|
||||
return {'text': ttext, 'highlight': 'true'}
|
||||
|
||||
def format_fragment(self, fragment, replace=False):
|
||||
output = []
|
||||
index = fragment.startchar
|
||||
text = fragment.text
|
||||
|
||||
amend_token = None
|
||||
for t in fragment.matches:
|
||||
if t.startchar is None:
|
||||
continue
|
||||
if t.startchar < index:
|
||||
continue
|
||||
if t.startchar > index:
|
||||
output.append({'text': text[index:t.startchar]})
|
||||
output.append(self.format_token(text, t, replace))
|
||||
text_inbetween = text[index:t.startchar]
|
||||
if amend_token and t.startchar - index < 10:
|
||||
amend_token['text'] += text_inbetween
|
||||
else:
|
||||
output.append({'text': text_inbetween,
|
||||
'highlight': False})
|
||||
amend_token = None
|
||||
token = self.format_token(text, t, replace)
|
||||
if amend_token:
|
||||
amend_token['text'] += token['text']
|
||||
else:
|
||||
output.append(token)
|
||||
amend_token = token
|
||||
index = t.endchar
|
||||
if index < fragment.endchar:
|
||||
output.append({'text': text[index:fragment.endchar]})
|
||||
output.append({'text': text[index:fragment.endchar],
|
||||
'highlight': False})
|
||||
return output
|
||||
|
||||
def format(self, fragments, replace=False):
|
||||
@ -120,22 +125,42 @@ def remove_document_from_index(document):
|
||||
|
||||
|
||||
@contextmanager
|
||||
def query_page(ix, querystring, page):
|
||||
def query_page(ix, page, querystring, more_like_doc_id, more_like_doc_content):
|
||||
searcher = ix.searcher()
|
||||
try:
|
||||
if querystring:
|
||||
qp = MultifieldParser(
|
||||
["content", "title", "correspondent", "tag", "type"],
|
||||
ix.schema)
|
||||
qp.add_plugin(DateParserPlugin())
|
||||
str_q = qp.parse(querystring)
|
||||
corrected = searcher.correct_query(str_q, querystring)
|
||||
else:
|
||||
str_q = None
|
||||
corrected = None
|
||||
|
||||
if more_like_doc_id:
|
||||
docnum = searcher.document_number(id=more_like_doc_id)
|
||||
kts = searcher.key_terms_from_text(
|
||||
'content', more_like_doc_content, numterms=20,
|
||||
model=classify.Bo1Model, normalize=False)
|
||||
more_like_q = query.Or(
|
||||
[query.Term('content', word, boost=weight)
|
||||
for word, weight in kts])
|
||||
result_page = searcher.search_page(
|
||||
more_like_q, page, filter=str_q, mask={docnum})
|
||||
elif str_q:
|
||||
result_page = searcher.search_page(str_q, page)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Either querystring or more_like_doc_id is required."
|
||||
)
|
||||
|
||||
q = qp.parse(querystring)
|
||||
result_page = searcher.search_page(q, page)
|
||||
result_page.results.fragmenter = highlight.ContextFragmenter(
|
||||
surround=50)
|
||||
result_page.results.formatter = JsonFormatter()
|
||||
|
||||
corrected = searcher.correct_query(q, querystring)
|
||||
if corrected.query != q:
|
||||
if corrected and corrected.query != str_q:
|
||||
corrected_query = corrected.string
|
||||
else:
|
||||
corrected_query = None
|
||||
|
@ -11,6 +11,7 @@ from paperless.db import GnuPG
|
||||
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
|
||||
STORAGE_TYPE_GPG = "gpg"
|
||||
|
||||
|
||||
def source_path(self):
|
||||
if self.filename:
|
||||
fname = str(self.filename)
|
||||
|
@ -357,54 +357,12 @@ class SavedViewFilterRule(models.Model):
|
||||
# TODO: why is this in the models file?
|
||||
class FileInfo:
|
||||
|
||||
# This epic regex *almost* worked for our needs, so I'm keeping it here for
|
||||
# posterity, in the hopes that we might find a way to make it work one day.
|
||||
ALMOST_REGEX = re.compile(
|
||||
r"^((?P<date>\d\d\d\d\d\d\d\d\d\d\d\d\d\dZ){separator})?"
|
||||
r"((?P<correspondent>{non_separated_word}+){separator})??"
|
||||
r"(?P<title>{non_separated_word}+)"
|
||||
r"({separator}(?P<tags>[a-z,0-9-]+))?"
|
||||
r"\.(?P<extension>[a-zA-Z.-]+)$".format(
|
||||
separator=r"\s+-\s+",
|
||||
non_separated_word=r"([\w,. ]|([^\s]-))"
|
||||
)
|
||||
)
|
||||
REGEXES = OrderedDict([
|
||||
("created-correspondent-title-tags", re.compile(
|
||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||
r"(?P<correspondent>.*) - "
|
||||
r"(?P<title>.*) - "
|
||||
r"(?P<tags>[a-z0-9\-,]*)$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("created-title-tags", re.compile(
|
||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||
r"(?P<title>.*) - "
|
||||
r"(?P<tags>[a-z0-9\-,]*)$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("created-correspondent-title", re.compile(
|
||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||
r"(?P<correspondent>.*) - "
|
||||
r"(?P<title>.*)$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("created-title", re.compile(
|
||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||
r"(?P<title>.*)$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("correspondent-title-tags", re.compile(
|
||||
r"(?P<correspondent>.*) - "
|
||||
r"(?P<title>.*) - "
|
||||
r"(?P<tags>[a-z0-9\-,]*)$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("correspondent-title", re.compile(
|
||||
r"(?P<correspondent>.*) - "
|
||||
r"(?P<title>.*)?$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("title", re.compile(
|
||||
r"(?P<title>.*)$",
|
||||
flags=re.IGNORECASE
|
||||
@ -427,23 +385,10 @@ class FileInfo:
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _get_correspondent(cls, name):
|
||||
if not name:
|
||||
return None
|
||||
return Correspondent.objects.get_or_create(name=name)[0]
|
||||
|
||||
@classmethod
|
||||
def _get_title(cls, title):
|
||||
return title
|
||||
|
||||
@classmethod
|
||||
def _get_tags(cls, tags):
|
||||
r = []
|
||||
for t in tags.split(","):
|
||||
r.append(Tag.objects.get_or_create(name=t)[0])
|
||||
return tuple(r)
|
||||
|
||||
@classmethod
|
||||
def _mangle_property(cls, properties, name):
|
||||
if name in properties:
|
||||
@ -453,15 +398,6 @@ class FileInfo:
|
||||
|
||||
@classmethod
|
||||
def from_filename(cls, filename):
|
||||
"""
|
||||
We use a crude naming convention to make handling the correspondent,
|
||||
title, and tags easier:
|
||||
"<date> - <correspondent> - <title> - <tags>"
|
||||
"<correspondent> - <title> - <tags>"
|
||||
"<correspondent> - <title>"
|
||||
"<title>"
|
||||
"""
|
||||
|
||||
# Mutate filename in-place before parsing its components
|
||||
# by applying at most one of the configured transformations.
|
||||
for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS:
|
||||
@ -492,7 +428,5 @@ class FileInfo:
|
||||
if m:
|
||||
properties = m.groupdict()
|
||||
cls._mangle_property(properties, "created")
|
||||
cls._mangle_property(properties, "correspondent")
|
||||
cls._mangle_property(properties, "title")
|
||||
cls._mangle_property(properties, "tags")
|
||||
return cls(**properties)
|
||||
|
@ -5,7 +5,7 @@
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>PaperlessUi</title>
|
||||
<title>Paperless-ng</title>
|
||||
<base href="/">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<meta name="cookie_prefix" content="{{cookie_prefix}}">
|
||||
|
57
src/documents/tests/test_admin.py
Normal file
57
src/documents/tests/test_admin.py
Normal file
@ -0,0 +1,57 @@
|
||||
from unittest import mock
|
||||
|
||||
from django.contrib.admin.sites import AdminSite
|
||||
from django.test import TestCase
|
||||
from django.utils import timezone
|
||||
|
||||
from documents.admin import DocumentAdmin
|
||||
from documents.models import Document, Tag
|
||||
|
||||
|
||||
class TestDocumentAdmin(TestCase):
|
||||
|
||||
def setUp(self) -> None:
|
||||
self.doc_admin = DocumentAdmin(model=Document, admin_site=AdminSite())
|
||||
|
||||
@mock.patch("documents.admin.index.add_or_update_document")
|
||||
def test_save_model(self, m):
|
||||
doc = Document.objects.create(title="test")
|
||||
doc.title = "new title"
|
||||
self.doc_admin.save_model(None, doc, None, None)
|
||||
self.assertEqual(Document.objects.get(id=doc.id).title, "new title")
|
||||
m.assert_called_once()
|
||||
|
||||
def test_tags(self):
|
||||
doc = Document.objects.create(title="test")
|
||||
doc.tags.create(name="t1")
|
||||
doc.tags.create(name="t2")
|
||||
|
||||
self.assertEqual(self.doc_admin.tags_(doc), "<span >t1, </span><span >t2, </span>")
|
||||
|
||||
def test_tags_empty(self):
|
||||
doc = Document.objects.create(title="test")
|
||||
|
||||
self.assertEqual(self.doc_admin.tags_(doc), "")
|
||||
|
||||
@mock.patch("documents.admin.index.remove_document")
|
||||
def test_delete_model(self, m):
|
||||
doc = Document.objects.create(title="test")
|
||||
self.doc_admin.delete_model(None, doc)
|
||||
self.assertRaises(Document.DoesNotExist, Document.objects.get, id=doc.id)
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.admin.index.remove_document")
|
||||
def test_delete_queryset(self, m):
|
||||
for i in range(42):
|
||||
Document.objects.create(title="Many documents with the same title", checksum=f"{i:02}")
|
||||
|
||||
self.assertEqual(Document.objects.count(), 42)
|
||||
|
||||
self.doc_admin.delete_queryset(None, Document.objects.all())
|
||||
|
||||
self.assertEqual(m.call_count, 42)
|
||||
self.assertEqual(Document.objects.count(), 0)
|
||||
|
||||
def test_created(self):
|
||||
doc = Document.objects.create(title="test", created=timezone.datetime(2020, 4, 12))
|
||||
self.assertEqual(self.doc_admin.created_(doc), "2020-04-12")
|
@ -352,6 +352,25 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
self.assertEqual(correction, None)
|
||||
|
||||
def test_search_more_like(self):
|
||||
d1=Document.objects.create(title="invoice", content="the thing i bought at a shop and paid with bank account", checksum="A", pk=1)
|
||||
d2=Document.objects.create(title="bank statement 1", content="things i paid for in august", pk=2, checksum="B")
|
||||
d3=Document.objects.create(title="bank statement 3", content="things i paid for in september", pk=3, checksum="C")
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
|
||||
response = self.client.get(f"/api/search/?more_like={d2.id}")
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
results = response.data['results']
|
||||
|
||||
self.assertEqual(len(results), 2)
|
||||
self.assertEqual(results[0]['id'], d3.id)
|
||||
self.assertEqual(results[1]['id'], d1.id)
|
||||
|
||||
def test_statistics(self):
|
||||
|
||||
doc1 = Document.objects.create(title="none1", checksum="A")
|
||||
|
@ -29,81 +29,6 @@ class TestAttributes(TestCase):
|
||||
|
||||
self.assertEqual(tuple([t.name for t in file_info.tags]), tags, filename)
|
||||
|
||||
def test_guess_attributes_from_name0(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Sender - Title.pdf", "Sender", "Title", ())
|
||||
|
||||
def test_guess_attributes_from_name1(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Spaced Sender - Title.pdf", "Spaced Sender", "Title", ())
|
||||
|
||||
def test_guess_attributes_from_name2(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Sender - Spaced Title.pdf", "Sender", "Spaced Title", ())
|
||||
|
||||
def test_guess_attributes_from_name3(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Dashed-Sender - Title.pdf", "Dashed-Sender", "Title", ())
|
||||
|
||||
def test_guess_attributes_from_name4(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Sender - Dashed-Title.pdf", "Sender", "Dashed-Title", ())
|
||||
|
||||
def test_guess_attributes_from_name5(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Sender - Title - tag1,tag2,tag3.pdf",
|
||||
"Sender",
|
||||
"Title",
|
||||
self.TAGS
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name6(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Spaced Sender - Title - tag1,tag2,tag3.pdf",
|
||||
"Spaced Sender",
|
||||
"Title",
|
||||
self.TAGS
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name7(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Sender - Spaced Title - tag1,tag2,tag3.pdf",
|
||||
"Sender",
|
||||
"Spaced Title",
|
||||
self.TAGS
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name8(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Dashed-Sender - Title - tag1,tag2,tag3.pdf",
|
||||
"Dashed-Sender",
|
||||
"Title",
|
||||
self.TAGS
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name9(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Sender - Dashed-Title - tag1,tag2,tag3.pdf",
|
||||
"Sender",
|
||||
"Dashed-Title",
|
||||
self.TAGS
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name10(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Σενδερ - Τιτλε - tag1,tag2,tag3.pdf",
|
||||
"Σενδερ",
|
||||
"Τιτλε",
|
||||
self.TAGS
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name_when_correspondent_empty(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
' - weird empty correspondent but should not break.pdf',
|
||||
None,
|
||||
'weird empty correspondent but should not break',
|
||||
()
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name_when_title_starts_with_dash(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
@ -121,28 +46,6 @@ class TestAttributes(TestCase):
|
||||
()
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name_when_title_is_empty(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
'weird correspondent but should not break - .pdf',
|
||||
'weird correspondent but should not break',
|
||||
'',
|
||||
()
|
||||
)
|
||||
|
||||
def test_case_insensitive_tag_creation(self):
|
||||
"""
|
||||
Tags should be detected and created as lower case.
|
||||
:return:
|
||||
"""
|
||||
|
||||
filename = "Title - Correspondent - tAg1,TAG2.pdf"
|
||||
self.assertEqual(len(FileInfo.from_filename(filename).tags), 2)
|
||||
|
||||
path = "Title - Correspondent - tag1,tag2.pdf"
|
||||
self.assertEqual(len(FileInfo.from_filename(filename).tags), 2)
|
||||
|
||||
self.assertEqual(Tag.objects.all().count(), 2)
|
||||
|
||||
|
||||
class TestFieldPermutations(TestCase):
|
||||
|
||||
@ -199,69 +102,7 @@ class TestFieldPermutations(TestCase):
|
||||
filename = template.format(**spec)
|
||||
self._test_guessed_attributes(filename, **spec)
|
||||
|
||||
def test_title_and_correspondent(self):
|
||||
template = '{correspondent} - {title}.pdf'
|
||||
for correspondent in self.valid_correspondents:
|
||||
for title in self.valid_titles:
|
||||
spec = dict(correspondent=correspondent, title=title)
|
||||
filename = template.format(**spec)
|
||||
self._test_guessed_attributes(filename, **spec)
|
||||
|
||||
def test_title_and_correspondent_and_tags(self):
|
||||
template = '{correspondent} - {title} - {tags}.pdf'
|
||||
for correspondent in self.valid_correspondents:
|
||||
for title in self.valid_titles:
|
||||
for tags in self.valid_tags:
|
||||
spec = dict(correspondent=correspondent, title=title,
|
||||
tags=tags)
|
||||
filename = template.format(**spec)
|
||||
self._test_guessed_attributes(filename, **spec)
|
||||
|
||||
def test_created_and_correspondent_and_title_and_tags(self):
|
||||
|
||||
template = (
|
||||
"{created} - "
|
||||
"{correspondent} - "
|
||||
"{title} - "
|
||||
"{tags}.pdf"
|
||||
)
|
||||
|
||||
for created in self.valid_dates:
|
||||
for correspondent in self.valid_correspondents:
|
||||
for title in self.valid_titles:
|
||||
for tags in self.valid_tags:
|
||||
spec = {
|
||||
"created": created,
|
||||
"correspondent": correspondent,
|
||||
"title": title,
|
||||
"tags": tags,
|
||||
}
|
||||
self._test_guessed_attributes(
|
||||
template.format(**spec), **spec)
|
||||
|
||||
def test_created_and_correspondent_and_title(self):
|
||||
|
||||
template = "{created} - {correspondent} - {title}.pdf"
|
||||
|
||||
for created in self.valid_dates:
|
||||
for correspondent in self.valid_correspondents:
|
||||
for title in self.valid_titles:
|
||||
|
||||
# Skip cases where title looks like a tag as we can't
|
||||
# accommodate such cases.
|
||||
if title.lower() == title:
|
||||
continue
|
||||
|
||||
spec = {
|
||||
"created": created,
|
||||
"correspondent": correspondent,
|
||||
"title": title
|
||||
}
|
||||
self._test_guessed_attributes(
|
||||
template.format(**spec), **spec)
|
||||
|
||||
def test_created_and_title(self):
|
||||
|
||||
template = "{created} - {title}.pdf"
|
||||
|
||||
for created in self.valid_dates:
|
||||
@ -273,21 +114,6 @@ class TestFieldPermutations(TestCase):
|
||||
self._test_guessed_attributes(
|
||||
template.format(**spec), **spec)
|
||||
|
||||
def test_created_and_title_and_tags(self):
|
||||
|
||||
template = "{created} - {title} - {tags}.pdf"
|
||||
|
||||
for created in self.valid_dates:
|
||||
for title in self.valid_titles:
|
||||
for tags in self.valid_tags:
|
||||
spec = {
|
||||
"created": created,
|
||||
"title": title,
|
||||
"tags": tags
|
||||
}
|
||||
self._test_guessed_attributes(
|
||||
template.format(**spec), **spec)
|
||||
|
||||
def test_invalid_date_format(self):
|
||||
info = FileInfo.from_filename("06112017Z - title.pdf")
|
||||
self.assertEqual(info.title, "title")
|
||||
@ -336,32 +162,6 @@ class TestFieldPermutations(TestCase):
|
||||
info = FileInfo.from_filename(filename)
|
||||
self.assertEqual(info.title, "anotherall")
|
||||
|
||||
# Complex transformation without date in replacement string
|
||||
with self.settings(
|
||||
FILENAME_PARSE_TRANSFORMS=[(exact_patt, repl1)]):
|
||||
info = FileInfo.from_filename(filename)
|
||||
self.assertEqual(info.title, "0001")
|
||||
self.assertEqual(len(info.tags), 2)
|
||||
self.assertEqual(info.tags[0].name, "tag1")
|
||||
self.assertEqual(info.tags[1].name, "tag2")
|
||||
self.assertIsNone(info.created)
|
||||
|
||||
# Complex transformation with date in replacement string
|
||||
with self.settings(
|
||||
FILENAME_PARSE_TRANSFORMS=[
|
||||
(none_patt, "none.gif"),
|
||||
(exact_patt, repl2), # <-- matches
|
||||
(exact_patt, repl1),
|
||||
(all_patt, "all.gif")]):
|
||||
info = FileInfo.from_filename(filename)
|
||||
self.assertEqual(info.title, "0001")
|
||||
self.assertEqual(len(info.tags), 2)
|
||||
self.assertEqual(info.tags[0].name, "tag1")
|
||||
self.assertEqual(info.tags[1].name, "tag2")
|
||||
self.assertEqual(info.created.year, 2019)
|
||||
self.assertEqual(info.created.month, 9)
|
||||
self.assertEqual(info.created.day, 8)
|
||||
|
||||
|
||||
class DummyParser(DocumentParser):
|
||||
|
||||
@ -476,15 +276,13 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
|
||||
def testOverrideFilename(self):
|
||||
filename = self.get_test_file()
|
||||
override_filename = "My Bank - Statement for November.pdf"
|
||||
override_filename = "Statement for November.pdf"
|
||||
|
||||
document = self.consumer.try_consume_file(filename, override_filename=override_filename)
|
||||
|
||||
self.assertEqual(document.correspondent.name, "My Bank")
|
||||
self.assertEqual(document.title, "Statement for November")
|
||||
|
||||
def testOverrideTitle(self):
|
||||
|
||||
document = self.consumer.try_consume_file(self.get_test_file(), override_title="Override Title")
|
||||
self.assertEqual(document.title, "Override Title")
|
||||
|
||||
@ -594,11 +392,10 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
def testFilenameHandling(self):
|
||||
filename = self.get_test_file()
|
||||
|
||||
document = self.consumer.try_consume_file(filename, override_filename="Bank - Test.pdf", override_title="new docs")
|
||||
document = self.consumer.try_consume_file(filename, override_title="new docs")
|
||||
|
||||
self.assertEqual(document.title, "new docs")
|
||||
self.assertEqual(document.correspondent.name, "Bank")
|
||||
self.assertEqual(document.filename, "Bank/new docs.pdf")
|
||||
self.assertEqual(document.filename, "none/new docs.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||
@mock.patch("documents.signals.handlers.generate_unique_filename")
|
||||
@ -617,10 +414,9 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
|
||||
Tag.objects.create(name="test", is_inbox_tag=True)
|
||||
|
||||
document = self.consumer.try_consume_file(filename, override_filename="Bank - Test.pdf", override_title="new docs")
|
||||
document = self.consumer.try_consume_file(filename, override_title="new docs")
|
||||
|
||||
self.assertEqual(document.title, "new docs")
|
||||
self.assertEqual(document.correspondent.name, "Bank")
|
||||
self.assertIsNotNone(os.path.isfile(document.title))
|
||||
self.assertTrue(os.path.isfile(document.source_path))
|
||||
|
||||
@ -642,3 +438,31 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(document.document_type, dtype)
|
||||
self.assertIn(t1, document.tags.all())
|
||||
self.assertNotIn(t2, document.tags.all())
|
||||
|
||||
@override_settings(CONSUMER_DELETE_DUPLICATES=True)
|
||||
def test_delete_duplicate(self):
|
||||
dst = self.get_test_file()
|
||||
self.assertTrue(os.path.isfile(dst))
|
||||
doc = self.consumer.try_consume_file(dst)
|
||||
|
||||
self.assertFalse(os.path.isfile(dst))
|
||||
self.assertIsNotNone(doc)
|
||||
|
||||
dst = self.get_test_file()
|
||||
self.assertTrue(os.path.isfile(dst))
|
||||
self.assertRaises(ConsumerError, self.consumer.try_consume_file, dst)
|
||||
self.assertFalse(os.path.isfile(dst))
|
||||
|
||||
@override_settings(CONSUMER_DELETE_DUPLICATES=False)
|
||||
def test_no_delete_duplicate(self):
|
||||
dst = self.get_test_file()
|
||||
self.assertTrue(os.path.isfile(dst))
|
||||
doc = self.consumer.try_consume_file(dst)
|
||||
|
||||
self.assertFalse(os.path.isfile(dst))
|
||||
self.assertIsNotNone(doc)
|
||||
|
||||
dst = self.get_test_file()
|
||||
self.assertTrue(os.path.isfile(dst))
|
||||
self.assertRaises(ConsumerError, self.consumer.try_consume_file, dst)
|
||||
self.assertTrue(os.path.isfile(dst))
|
||||
|
@ -14,7 +14,7 @@ from django.utils import timezone
|
||||
from .utils import DirectoriesMixin
|
||||
from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories, \
|
||||
generate_unique_filename
|
||||
from ..models import Document, Correspondent, Tag
|
||||
from ..models import Document, Correspondent, Tag, DocumentType
|
||||
|
||||
|
||||
class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
@ -190,6 +190,17 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True)
|
||||
self.assertTrue(os.path.isfile(important_file))
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{document_type} - {title}")
|
||||
def test_document_type(self):
|
||||
dt = DocumentType.objects.create(name="my_doc_type")
|
||||
d = Document.objects.create(title="the_doc", mime_type="application/pdf")
|
||||
|
||||
self.assertEqual(generate_filename(d), "none - the_doc.pdf")
|
||||
|
||||
d.document_type = dt
|
||||
|
||||
self.assertEqual(generate_filename(d), "my_doc_type - the_doc.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
|
||||
def test_tags_with_underscore(self):
|
||||
document = Document()
|
||||
|
135
src/documents/tests/test_management.py
Normal file
135
src/documents/tests/test_management.py
Normal file
@ -0,0 +1,135 @@
|
||||
import hashlib
|
||||
import tempfile
|
||||
import filecmp
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
|
||||
from django.core.management import call_command
|
||||
|
||||
from documents.file_handling import generate_filename
|
||||
from documents.management.commands.document_archiver import handle_document
|
||||
from documents.models import Document
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
||||
|
||||
|
||||
class TestArchiver(DirectoriesMixin, TestCase):
|
||||
|
||||
def make_models(self):
|
||||
return Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf")
|
||||
|
||||
def test_archiver(self):
|
||||
|
||||
doc = self.make_models()
|
||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
|
||||
|
||||
call_command('document_archiver')
|
||||
|
||||
def test_handle_document(self):
|
||||
|
||||
doc = self.make_models()
|
||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
|
||||
|
||||
handle_document(doc.pk)
|
||||
|
||||
doc = Document.objects.get(id=doc.id)
|
||||
|
||||
self.assertIsNotNone(doc.checksum)
|
||||
self.assertTrue(os.path.isfile(doc.archive_path))
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(filecmp.cmp(sample_file, doc.source_path))
|
||||
|
||||
|
||||
class TestDecryptDocuments(TestCase):
|
||||
|
||||
@override_settings(
|
||||
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
|
||||
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
|
||||
PASSPHRASE="test",
|
||||
PAPERLESS_FILENAME_FORMAT=None
|
||||
)
|
||||
@mock.patch("documents.management.commands.decrypt_documents.input")
|
||||
def test_decrypt(self, m):
|
||||
|
||||
media_dir = tempfile.mkdtemp()
|
||||
originals_dir = os.path.join(media_dir, "documents", "originals")
|
||||
thumb_dir = os.path.join(media_dir, "documents", "thumbnails")
|
||||
os.makedirs(originals_dir, exist_ok=True)
|
||||
os.makedirs(thumb_dir, exist_ok=True)
|
||||
|
||||
override_settings(
|
||||
ORIGINALS_DIR=originals_dir,
|
||||
THUMBNAIL_DIR=thumb_dir,
|
||||
PASSPHRASE="test"
|
||||
).enable()
|
||||
|
||||
doc = Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
||||
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg"))
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000002.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"))
|
||||
|
||||
call_command('decrypt_documents')
|
||||
|
||||
doc.refresh_from_db()
|
||||
|
||||
self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED)
|
||||
self.assertEqual(doc.filename, "0000002.pdf")
|
||||
self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000002.pdf")))
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png")))
|
||||
self.assertTrue(os.path.isfile(doc.thumbnail_path))
|
||||
|
||||
with doc.source_file as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(checksum, doc.checksum)
|
||||
|
||||
|
||||
class TestMakeIndex(TestCase):
|
||||
|
||||
@mock.patch("documents.management.commands.document_index.index_reindex")
|
||||
def test_reindex(self, m):
|
||||
call_command("document_index", "reindex")
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.management.commands.document_index.index_optimize")
|
||||
def test_optimize(self, m):
|
||||
call_command("document_index", "optimize")
|
||||
m.assert_called_once()
|
||||
|
||||
|
||||
class TestRenamer(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_rename(self):
|
||||
doc = Document.objects.create(title="test", mime_type="application/pdf")
|
||||
doc.filename = generate_filename(doc)
|
||||
doc.save()
|
||||
|
||||
Path(doc.source_path).touch()
|
||||
|
||||
old_source_path = doc.source_path
|
||||
|
||||
with override_settings(PAPERLESS_FILENAME_FORMAT="{title}"):
|
||||
call_command("document_renamer")
|
||||
|
||||
doc2 = Document.objects.get(id=doc.id)
|
||||
|
||||
self.assertEqual(doc2.filename, "test.pdf")
|
||||
self.assertFalse(os.path.isfile(old_source_path))
|
||||
self.assertFalse(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(os.path.isfile(doc2.source_path))
|
||||
|
||||
|
||||
class TestCreateClassifier(TestCase):
|
||||
|
||||
@mock.patch("documents.management.commands.document_create_classifier.train_classifier")
|
||||
def test_create_classifier(self, m):
|
||||
call_command("document_create_classifier")
|
||||
|
||||
m.assert_called_once()
|
@ -1,40 +0,0 @@
|
||||
import filecmp
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from django.core.management import call_command
|
||||
from django.test import TestCase
|
||||
|
||||
from documents.management.commands.document_archiver import handle_document
|
||||
from documents.models import Document
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
||||
|
||||
|
||||
class TestArchiver(DirectoriesMixin, TestCase):
|
||||
|
||||
def make_models(self):
|
||||
return Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf")
|
||||
|
||||
def test_archiver(self):
|
||||
|
||||
doc = self.make_models()
|
||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
|
||||
|
||||
call_command('document_archiver')
|
||||
|
||||
def test_handle_document(self):
|
||||
|
||||
doc = self.make_models()
|
||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
|
||||
|
||||
handle_document(doc.pk)
|
||||
|
||||
doc = Document.objects.get(id=doc.id)
|
||||
|
||||
self.assertIsNotNone(doc.checksum)
|
||||
self.assertTrue(os.path.isfile(doc.archive_path))
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(filecmp.cmp(sample_file, doc.source_path))
|
@ -1,57 +0,0 @@
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from unittest import mock
|
||||
|
||||
from django.core.management import call_command
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from documents.management.commands import document_exporter
|
||||
from documents.models import Document, Tag, DocumentType, Correspondent
|
||||
|
||||
|
||||
class TestDecryptDocuments(TestCase):
|
||||
|
||||
@override_settings(
|
||||
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
|
||||
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
|
||||
PASSPHRASE="test",
|
||||
PAPERLESS_FILENAME_FORMAT=None
|
||||
)
|
||||
@mock.patch("documents.management.commands.decrypt_documents.input")
|
||||
def test_decrypt(self, m):
|
||||
|
||||
media_dir = tempfile.mkdtemp()
|
||||
originals_dir = os.path.join(media_dir, "documents", "originals")
|
||||
thumb_dir = os.path.join(media_dir, "documents", "thumbnails")
|
||||
os.makedirs(originals_dir, exist_ok=True)
|
||||
os.makedirs(thumb_dir, exist_ok=True)
|
||||
|
||||
override_settings(
|
||||
ORIGINALS_DIR=originals_dir,
|
||||
THUMBNAIL_DIR=thumb_dir,
|
||||
PASSPHRASE="test"
|
||||
).enable()
|
||||
|
||||
doc = Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
||||
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg"))
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000002.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"))
|
||||
|
||||
call_command('decrypt_documents')
|
||||
|
||||
doc.refresh_from_db()
|
||||
|
||||
self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED)
|
||||
self.assertEqual(doc.filename, "0000002.pdf")
|
||||
self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000002.pdf")))
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png")))
|
||||
self.assertTrue(os.path.isfile(doc.thumbnail_path))
|
||||
|
||||
with doc.source_file as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(checksum, doc.checksum)
|
||||
|
129
src/documents/tests/test_migrations.py
Normal file
129
src/documents/tests/test_migrations.py
Normal file
@ -0,0 +1,129 @@
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
from django.apps import apps
|
||||
from django.conf import settings
|
||||
from django.db import connection
|
||||
from django.db.migrations.executor import MigrationExecutor
|
||||
from django.test import TestCase, TransactionTestCase, override_settings
|
||||
|
||||
from documents.models import Document
|
||||
from documents.parsers import get_default_file_extension
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestMigrations(TransactionTestCase):
|
||||
|
||||
@property
|
||||
def app(self):
|
||||
return apps.get_containing_app_config(type(self).__module__).name
|
||||
|
||||
migrate_from = None
|
||||
migrate_to = None
|
||||
|
||||
def setUp(self):
|
||||
super(TestMigrations, self).setUp()
|
||||
|
||||
assert self.migrate_from and self.migrate_to, \
|
||||
"TestCase '{}' must define migrate_from and migrate_to properties".format(type(self).__name__)
|
||||
self.migrate_from = [(self.app, self.migrate_from)]
|
||||
self.migrate_to = [(self.app, self.migrate_to)]
|
||||
executor = MigrationExecutor(connection)
|
||||
old_apps = executor.loader.project_state(self.migrate_from).apps
|
||||
|
||||
# Reverse to the original migration
|
||||
executor.migrate(self.migrate_from)
|
||||
|
||||
self.setUpBeforeMigration(old_apps)
|
||||
|
||||
# Run the migration to test
|
||||
executor = MigrationExecutor(connection)
|
||||
executor.loader.build_graph() # reload.
|
||||
executor.migrate(self.migrate_to)
|
||||
|
||||
self.apps = executor.loader.project_state(self.migrate_to).apps
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
pass
|
||||
|
||||
|
||||
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
|
||||
STORAGE_TYPE_GPG = "gpg"
|
||||
|
||||
|
||||
def source_path_before(self):
|
||||
if self.filename:
|
||||
fname = str(self.filename)
|
||||
else:
|
||||
fname = "{:07}.{}".format(self.pk, self.file_type)
|
||||
if self.storage_type == STORAGE_TYPE_GPG:
|
||||
fname += ".gpg"
|
||||
|
||||
return os.path.join(
|
||||
settings.ORIGINALS_DIR,
|
||||
fname
|
||||
)
|
||||
|
||||
|
||||
def file_type_after(self):
|
||||
return get_default_file_extension(self.mime_type)
|
||||
|
||||
|
||||
def source_path_after(doc):
|
||||
if doc.filename:
|
||||
fname = str(doc.filename)
|
||||
else:
|
||||
fname = "{:07}{}".format(doc.pk, file_type_after(doc))
|
||||
if doc.storage_type == STORAGE_TYPE_GPG:
|
||||
fname += ".gpg" # pragma: no cover
|
||||
|
||||
return os.path.join(
|
||||
settings.ORIGINALS_DIR,
|
||||
fname
|
||||
)
|
||||
|
||||
|
||||
@override_settings(PASSPHRASE="test")
|
||||
class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
|
||||
|
||||
migrate_from = '1002_auto_20201111_1105'
|
||||
migrate_to = '1003_mime_types'
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
doc = Document.objects.create(title="test", file_type="pdf", filename="file1.pdf")
|
||||
self.doc_id = doc.id
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), source_path_before(doc))
|
||||
|
||||
doc2 = Document.objects.create(checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG)
|
||||
self.doc2_id = doc2.id
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), source_path_before(doc2))
|
||||
|
||||
def testMimeTypesMigrated(self):
|
||||
Document = self.apps.get_model('documents', 'Document')
|
||||
|
||||
doc = Document.objects.get(id=self.doc_id)
|
||||
self.assertEqual(doc.mime_type, "application/pdf")
|
||||
|
||||
doc2 = Document.objects.get(id=self.doc2_id)
|
||||
self.assertEqual(doc2.mime_type, "application/pdf")
|
||||
|
||||
|
||||
@override_settings(PASSPHRASE="test")
|
||||
class TestMigrateMimeTypeBackwards(DirectoriesMixin, TestMigrations):
|
||||
|
||||
migrate_from = '1003_mime_types'
|
||||
migrate_to = '1002_auto_20201111_1105'
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
doc = Document.objects.create(title="test", mime_type="application/pdf", filename="file1.pdf")
|
||||
self.doc_id = doc.id
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), source_path_after(doc))
|
||||
|
||||
def testMimeTypesReverted(self):
|
||||
Document = self.apps.get_model('documents', 'Document')
|
||||
|
||||
doc = Document.objects.get(id=self.doc_id)
|
||||
self.assertEqual(doc.file_type, "pdf")
|
@ -389,14 +389,27 @@ class SearchView(APIView):
|
||||
}
|
||||
|
||||
def get(self, request, format=None):
|
||||
if 'query' not in request.query_params:
|
||||
|
||||
if 'query' in request.query_params:
|
||||
query = request.query_params['query']
|
||||
else:
|
||||
query = None
|
||||
|
||||
if 'more_like' in request.query_params:
|
||||
more_like_id = request.query_params['more_like']
|
||||
more_like_content = Document.objects.get(id=more_like_id).content
|
||||
else:
|
||||
more_like_id = None
|
||||
more_like_content = None
|
||||
|
||||
if not query and not more_like_id:
|
||||
return Response({
|
||||
'count': 0,
|
||||
'page': 0,
|
||||
'page_count': 0,
|
||||
'corrected_query': None,
|
||||
'results': []})
|
||||
|
||||
query = request.query_params['query']
|
||||
try:
|
||||
page = int(request.query_params.get('page', 1))
|
||||
except (ValueError, TypeError):
|
||||
@ -406,8 +419,7 @@ class SearchView(APIView):
|
||||
page = 1
|
||||
|
||||
try:
|
||||
with index.query_page(self.ix, query, page) as (result_page,
|
||||
corrected_query):
|
||||
with index.query_page(self.ix, page, query, more_like_id, more_like_content) as (result_page, corrected_query): # NOQA: E501
|
||||
return Response(
|
||||
{'count': len(result_page),
|
||||
'page': result_page.pagenum,
|
||||
|
@ -13,18 +13,17 @@ writeable_hint = (
|
||||
)
|
||||
|
||||
|
||||
def path_check(env_var):
|
||||
def path_check(var, directory):
|
||||
messages = []
|
||||
directory = os.getenv(env_var)
|
||||
if directory:
|
||||
if not os.path.exists(directory):
|
||||
messages.append(Error(
|
||||
exists_message.format(env_var),
|
||||
exists_message.format(var),
|
||||
exists_hint.format(directory)
|
||||
))
|
||||
elif not os.access(directory, os.W_OK | os.X_OK):
|
||||
messages.append(Error(
|
||||
writeable_message.format(env_var),
|
||||
writeable_message.format(var),
|
||||
writeable_hint.format(directory)
|
||||
))
|
||||
return messages
|
||||
@ -36,12 +35,9 @@ def paths_check(app_configs, **kwargs):
|
||||
Check the various paths for existence, readability and writeability
|
||||
"""
|
||||
|
||||
check_messages = path_check("PAPERLESS_DATA_DIR") + \
|
||||
path_check("PAPERLESS_MEDIA_ROOT") + \
|
||||
path_check("PAPERLESS_CONSUMPTION_DIR") + \
|
||||
path_check("PAPERLESS_STATICDIR")
|
||||
|
||||
return check_messages
|
||||
return path_check("PAPERLESS_DATA_DIR", settings.DATA_DIR) + \
|
||||
path_check("PAPERLESS_MEDIA_ROOT", settings.MEDIA_ROOT) + \
|
||||
path_check("PAPERLESS_CONSUMPTION_DIR", settings.CONSUMPTION_DIR)
|
||||
|
||||
|
||||
@register()
|
||||
|
@ -160,13 +160,6 @@ if AUTO_LOGIN_USERNAME:
|
||||
MIDDLEWARE.insert(_index+1, 'paperless.auth.AutoLoginMiddleware')
|
||||
|
||||
|
||||
if DEBUG:
|
||||
X_FRAME_OPTIONS = ''
|
||||
# this should really be 'allow-from uri' but its not supported in any mayor
|
||||
# browser.
|
||||
else:
|
||||
X_FRAME_OPTIONS = 'SAMEORIGIN'
|
||||
|
||||
# We allow CORS from localhost:8080
|
||||
CORS_ALLOWED_ORIGINS = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(","))
|
||||
|
||||
|
54
src/paperless/tests/test_checks.py
Normal file
54
src/paperless/tests/test_checks.py
Normal file
@ -0,0 +1,54 @@
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from paperless import binaries_check, paths_check
|
||||
from paperless.checks import debug_mode_check
|
||||
|
||||
|
||||
class TestChecks(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_binaries(self):
|
||||
self.assertEqual(binaries_check(None), [])
|
||||
|
||||
@override_settings(CONVERT_BINARY="uuuhh", OPTIPNG_BINARY="forgot")
|
||||
def test_binaries_fail(self):
|
||||
self.assertEqual(len(binaries_check(None)), 2)
|
||||
|
||||
def test_paths_check(self):
|
||||
self.assertEqual(paths_check(None), [])
|
||||
|
||||
@override_settings(MEDIA_ROOT="uuh",
|
||||
DATA_DIR="whatever",
|
||||
CONSUMPTION_DIR="idontcare")
|
||||
def test_paths_check_dont_exist(self):
|
||||
msgs = paths_check(None)
|
||||
self.assertEqual(len(msgs), 3, str(msgs))
|
||||
|
||||
for msg in msgs:
|
||||
self.assertTrue(msg.msg.endswith("is set but doesn't exist."))
|
||||
|
||||
def test_paths_check_no_access(self):
|
||||
os.chmod(self.dirs.data_dir, 0o000)
|
||||
os.chmod(self.dirs.media_dir, 0o000)
|
||||
os.chmod(self.dirs.consumption_dir, 0o000)
|
||||
|
||||
self.addCleanup(os.chmod, self.dirs.data_dir, 0o777)
|
||||
self.addCleanup(os.chmod, self.dirs.media_dir, 0o777)
|
||||
self.addCleanup(os.chmod, self.dirs.consumption_dir, 0o777)
|
||||
|
||||
msgs = paths_check(None)
|
||||
self.assertEqual(len(msgs), 3)
|
||||
|
||||
for msg in msgs:
|
||||
self.assertTrue(msg.msg.endswith("is not writeable"))
|
||||
|
||||
@override_settings(DEBUG=False)
|
||||
def test_debug_disabled(self):
|
||||
self.assertEqual(debug_mode_check(None), [])
|
||||
|
||||
@override_settings(DEBUG=True)
|
||||
def test_debug_enabled(self):
|
||||
self.assertEqual(len(debug_mode_check(None)), 1)
|
@ -1,7 +1,7 @@
|
||||
import subprocess
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.checks import Error, register
|
||||
from django.core.checks import Error, Warning, register
|
||||
|
||||
|
||||
def get_tesseract_langs():
|
||||
|
@ -1,194 +0,0 @@
|
||||
# Thanks to the Library of Congress and some creative use of sed and awk:
|
||||
# http://www.loc.gov/standards/iso639-2/php/English_list.php
|
||||
|
||||
ISO639 = {
|
||||
|
||||
"aa": "aar",
|
||||
"ab": "abk",
|
||||
"ae": "ave",
|
||||
"af": "afr",
|
||||
"ak": "aka",
|
||||
"am": "amh",
|
||||
"an": "arg",
|
||||
"ar": "ara",
|
||||
"as": "asm",
|
||||
"av": "ava",
|
||||
"ay": "aym",
|
||||
"az": "aze",
|
||||
"ba": "bak",
|
||||
"be": "bel",
|
||||
"bg": "bul",
|
||||
"bh": "bih",
|
||||
"bi": "bis",
|
||||
"bm": "bam",
|
||||
"bn": "ben",
|
||||
"bo": "bod",
|
||||
"br": "bre",
|
||||
"bs": "bos",
|
||||
"ca": "cat",
|
||||
"ce": "che",
|
||||
"ch": "cha",
|
||||
"co": "cos",
|
||||
"cr": "cre",
|
||||
"cs": "ces",
|
||||
"cu": "chu",
|
||||
"cv": "chv",
|
||||
"cy": "cym",
|
||||
"da": "dan",
|
||||
"de": "deu",
|
||||
"dv": "div",
|
||||
"dz": "dzo",
|
||||
"ee": "ewe",
|
||||
"el": "ell",
|
||||
"en": "eng",
|
||||
"eo": "epo",
|
||||
"es": "spa",
|
||||
"et": "est",
|
||||
"eu": "eus",
|
||||
"fa": "fas",
|
||||
"ff": "ful",
|
||||
"fi": "fin",
|
||||
"fj": "fij",
|
||||
"fo": "fao",
|
||||
"fr": "fra",
|
||||
"fy": "fry",
|
||||
"ga": "gle",
|
||||
"gd": "gla",
|
||||
"gl": "glg",
|
||||
"gn": "grn",
|
||||
"gu": "guj",
|
||||
"gv": "glv",
|
||||
"ha": "hau",
|
||||
"he": "heb",
|
||||
"hi": "hin",
|
||||
"ho": "hmo",
|
||||
"hr": "hrv",
|
||||
"ht": "hat",
|
||||
"hu": "hun",
|
||||
"hy": "hye",
|
||||
"hz": "her",
|
||||
"ia": "ina",
|
||||
"id": "ind",
|
||||
"ie": "ile",
|
||||
"ig": "ibo",
|
||||
"ii": "iii",
|
||||
"ik": "ipk",
|
||||
"io": "ido",
|
||||
"is": "isl",
|
||||
"it": "ita",
|
||||
"iu": "iku",
|
||||
"ja": "jpn",
|
||||
"jv": "jav",
|
||||
"ka": "kat",
|
||||
"kg": "kon",
|
||||
"ki": "kik",
|
||||
"kj": "kua",
|
||||
"kk": "kaz",
|
||||
"kl": "kal",
|
||||
"km": "khm",
|
||||
"kn": "kan",
|
||||
"ko": "kor",
|
||||
"kr": "kau",
|
||||
"ks": "kas",
|
||||
"ku": "kur",
|
||||
"kv": "kom",
|
||||
"kw": "cor",
|
||||
"ky": "kir",
|
||||
"la": "lat",
|
||||
"lb": "ltz",
|
||||
"lg": "lug",
|
||||
"li": "lim",
|
||||
"ln": "lin",
|
||||
"lo": "lao",
|
||||
"lt": "lit",
|
||||
"lu": "lub",
|
||||
"lv": "lav",
|
||||
"mg": "mlg",
|
||||
"mh": "mah",
|
||||
"mi": "mri",
|
||||
"mk": "mkd",
|
||||
"ml": "mal",
|
||||
"mn": "mon",
|
||||
"mr": "mar",
|
||||
"ms": "msa",
|
||||
"mt": "mlt",
|
||||
"my": "mya",
|
||||
"na": "nau",
|
||||
"nb": "nob",
|
||||
"nd": "nde",
|
||||
"ne": "nep",
|
||||
"ng": "ndo",
|
||||
"nl": "nld",
|
||||
"no": "nor",
|
||||
"nr": "nbl",
|
||||
"nv": "nav",
|
||||
"ny": "nya",
|
||||
"oc": "oci",
|
||||
"oj": "oji",
|
||||
"om": "orm",
|
||||
"or": "ori",
|
||||
"os": "oss",
|
||||
"pa": "pan",
|
||||
"pi": "pli",
|
||||
"pl": "pol",
|
||||
"ps": "pus",
|
||||
"pt": "por",
|
||||
"qu": "que",
|
||||
"rm": "roh",
|
||||
"rn": "run",
|
||||
"ro": "ron",
|
||||
"ru": "rus",
|
||||
"rw": "kin",
|
||||
"sa": "san",
|
||||
"sc": "srd",
|
||||
"sd": "snd",
|
||||
"se": "sme",
|
||||
"sg": "sag",
|
||||
"si": "sin",
|
||||
"sk": "slk",
|
||||
"sl": "slv",
|
||||
"sm": "smo",
|
||||
"sn": "sna",
|
||||
"so": "som",
|
||||
"sq": "sqi",
|
||||
"sr": "srp",
|
||||
"ss": "ssw",
|
||||
"st": "sot",
|
||||
"su": "sun",
|
||||
"sv": "swe",
|
||||
"sw": "swa",
|
||||
"ta": "tam",
|
||||
"te": "tel",
|
||||
"tg": "tgk",
|
||||
"th": "tha",
|
||||
"ti": "tir",
|
||||
"tk": "tuk",
|
||||
"tl": "tgl",
|
||||
"tn": "tsn",
|
||||
"to": "ton",
|
||||
"tr": "tur",
|
||||
"ts": "tso",
|
||||
"tt": "tat",
|
||||
"tw": "twi",
|
||||
"ty": "tah",
|
||||
"ug": "uig",
|
||||
"uk": "ukr",
|
||||
"ur": "urd",
|
||||
"uz": "uzb",
|
||||
"ve": "ven",
|
||||
"vi": "vie",
|
||||
"vo": "vol",
|
||||
"wa": "wln",
|
||||
"wo": "wol",
|
||||
"xh": "xho",
|
||||
"yi": "yid",
|
||||
"yo": "yor",
|
||||
"za": "zha",
|
||||
|
||||
# Tessdata contains two values for Chinese, "chi_sim" and "chi_tra". I
|
||||
# have no idea which one is better, so I just picked the bigger file.
|
||||
"zh": "chi_tra",
|
||||
|
||||
"zu": "zul"
|
||||
|
||||
}
|
26
src/paperless_tesseract/tests/test_checks.py
Normal file
26
src/paperless_tesseract/tests/test_checks.py
Normal file
@ -0,0 +1,26 @@
|
||||
from unittest import mock
|
||||
|
||||
from django.core.checks import ERROR
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from paperless_tesseract import check_default_language_available
|
||||
|
||||
|
||||
class TestChecks(TestCase):
|
||||
|
||||
def test_default_language(self):
|
||||
msgs = check_default_language_available(None)
|
||||
|
||||
@override_settings(OCR_LANGUAGE="")
|
||||
def test_no_language(self):
|
||||
msgs = check_default_language_available(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
self.assertTrue(msgs[0].msg.startswith("No OCR language has been specified with PAPERLESS_OCR_LANGUAGE"))
|
||||
|
||||
@override_settings(OCR_LANGUAGE="ita")
|
||||
@mock.patch("paperless_tesseract.checks.get_tesseract_langs")
|
||||
def test_invalid_language(self, m):
|
||||
m.return_value = ["deu", "eng"]
|
||||
msgs = check_default_language_available(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
self.assertEqual(msgs[0].level, ERROR)
|
@ -35,15 +35,3 @@ class TextDocumentParser(DocumentParser):
|
||||
def parse(self, document_path, mime_type):
|
||||
with open(document_path, 'r') as f:
|
||||
self.text = f.read()
|
||||
|
||||
|
||||
def run_command(*args):
|
||||
environment = os.environ.copy()
|
||||
if settings.CONVERT_MEMORY_LIMIT:
|
||||
environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT
|
||||
if settings.CONVERT_TMPDIR:
|
||||
environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR
|
||||
|
||||
if not subprocess.Popen(' '.join(args), env=environment,
|
||||
shell=True).wait() == 0:
|
||||
raise ParseError("Convert failed at {}".format(args))
|
||||
|
Loading…
x
Reference in New Issue
Block a user