mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Merge remote-tracking branch 'upstream/dev' into feature-bulk-editor
This commit is contained in:
		
							
								
								
									
										15
									
								
								docs/api.rst
									
									
									
									
									
								
							
							
						
						
									
										15
									
								
								docs/api.rst
									
									
									
									
									
								
							| @@ -221,21 +221,16 @@ Each fragment contains a list of strings, and some of them are marked as a highl | ||||
|  | ||||
|     [ | ||||
|         [ | ||||
|             {"text": "This is a sample text with a "}, | ||||
|             {"text": "highlighted", "term": 0}, | ||||
|             {"text": " word."} | ||||
|             {"text": "This is a sample text with a ", "highlight": false}, | ||||
|             {"text": "highlighted", "highlight": true}, | ||||
|             {"text": " word.", "highlight": false} | ||||
|         ], | ||||
|         [ | ||||
|             {"text": "Another", "term": 1}, | ||||
|             {"text": " fragment with a highlight."} | ||||
|             {"text": "Another", "highlight": true}, | ||||
|             {"text": " fragment with a highlight.", "highlight": false} | ||||
|         ] | ||||
|     ] | ||||
|  | ||||
|  | ||||
|  | ||||
| When ``term`` is present within a string, the word within ``text`` should be highlighted. | ||||
| The term index groups multiple matches together and words with the same index | ||||
| should get identical highlighting. | ||||
| A client may use this example to produce the following output: | ||||
|  | ||||
| ... This is a sample text with a **highlighted** word. ... **Another** fragment with a highlight. ... | ||||
|   | ||||
| @@ -1,4 +1,14 @@ | ||||
| <app-page-header [(title)]="title"> | ||||
|     <div class="input-group input-group-sm mr-5" *ngIf="getContentType() == 'application/pdf'"> | ||||
|       <div class="input-group-prepend"> | ||||
|         <div class="input-group-text">Page </div> | ||||
|       </div> | ||||
|       <input class="form-control flex-grow-0 w-auto" type="number" min="1" [max]="previewNumPages" [(ngModel)]="previewCurrentPage" /> | ||||
|       <div class="input-group-append"> | ||||
|         <div class="input-group-text">of {{previewNumPages}}</div> | ||||
|       </div> | ||||
|     </div> | ||||
|  | ||||
|     <button type="button" class="btn btn-sm btn-outline-danger mr-2" (click)="delete()"> | ||||
|         <svg class="buttonicon" fill="currentColor"> | ||||
|             <use xlink:href="assets/bootstrap-icons.svg#trash" /> | ||||
| @@ -24,6 +34,12 @@ | ||||
|  | ||||
|     </div> | ||||
|  | ||||
|     <button type="button" class="btn btn-sm btn-outline-primary mr-2" (click)="moreLike()"> | ||||
|         <svg class="buttonicon" fill="currentColor"> | ||||
|             <use xlink:href="assets/bootstrap-icons.svg#three-dots" /> | ||||
|         </svg> | ||||
|         <span class="d-none d-lg-inline"> More like this</span> | ||||
|     </button> | ||||
|  | ||||
|     <button type="button" class="btn btn-sm btn-outline-primary" (click)="close()"> | ||||
|         <svg class="buttonicon" fill="currentColor"> | ||||
| @@ -128,7 +144,7 @@ | ||||
|  | ||||
|     <div class="col-md-6 col-xl-8 mb-3"> | ||||
|       <div class="pdf-viewer-container" *ngIf="getContentType() == 'application/pdf'"> | ||||
|         <pdf-viewer [src]="previewUrl" [original-size]="false" [show-borders]="true"></pdf-viewer> | ||||
|         <pdf-viewer [src]="previewUrl" [original-size]="false" [show-borders]="true" [show-all]="true" [(page)]="previewCurrentPage" (after-load-complete)="pdfPreviewLoaded($event)"></pdf-viewer> | ||||
|       </div> | ||||
|     </div> | ||||
| </div> | ||||
|   | ||||
| @@ -15,6 +15,7 @@ import { DocumentService } from 'src/app/services/rest/document.service'; | ||||
| import { ConfirmDialogComponent } from '../common/confirm-dialog/confirm-dialog.component'; | ||||
| import { CorrespondentEditDialogComponent } from '../manage/correspondent-list/correspondent-edit-dialog/correspondent-edit-dialog.component'; | ||||
| import { DocumentTypeEditDialogComponent } from '../manage/document-type-list/document-type-edit-dialog/document-type-edit-dialog.component'; | ||||
| import { PDFDocumentProxy } from 'ng2-pdf-viewer'; | ||||
|  | ||||
| @Component({ | ||||
|   selector: 'app-document-detail', | ||||
| @@ -47,6 +48,9 @@ export class DocumentDetailComponent implements OnInit { | ||||
|     tags: new FormControl([]) | ||||
|   }) | ||||
|  | ||||
|   previewCurrentPage: number = 1 | ||||
|   previewNumPages: number = 1 | ||||
|  | ||||
|   constructor( | ||||
|     private documentsService: DocumentService, | ||||
|     private route: ActivatedRoute, | ||||
| @@ -168,7 +172,16 @@ export class DocumentDetailComponent implements OnInit { | ||||
|  | ||||
|   } | ||||
|  | ||||
|   moreLike() { | ||||
|     this.router.navigate(["search"], {queryParams: {more_like:this.document.id}}) | ||||
|   } | ||||
|  | ||||
|   hasNext() { | ||||
|     return this.documentListViewService.hasNext(this.documentId) | ||||
|   } | ||||
|  | ||||
|   pdfPreviewLoaded(pdf: PDFDocumentProxy) { | ||||
|     this.previewNumPages = pdf.numPages | ||||
|   } | ||||
|  | ||||
| } | ||||
|   | ||||
| @@ -23,8 +23,14 @@ | ||||
|         </p> | ||||
|  | ||||
|  | ||||
|         <div class="d-flex justify-content-between align-items-center"> | ||||
|         <div class="d-flex align-items-center"> | ||||
|           <div class="btn-group"> | ||||
|             <a routerLink="/search" [queryParams]="{'more_like': document.id}" class="btn btn-sm btn-outline-secondary" *ngIf="moreLikeThis"> | ||||
|               <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-three-dots" viewBox="0 0 16 16"> | ||||
|                 <path fill-rule="evenodd" d="M3 9.5a1.5 1.5 0 1 1 0-3 1.5 1.5 0 0 1 0 3zm5 0a1.5 1.5 0 1 1 0-3 1.5 1.5 0 0 1 0 3zm5 0a1.5 1.5 0 1 1 0-3 1.5 1.5 0 0 1 0 3z"/> | ||||
|               </svg> | ||||
|               More like this | ||||
|             </a> | ||||
|             <a routerLink="/documents/{{document.id}}" class="btn btn-sm btn-outline-secondary"> | ||||
|               <svg width="1em" height="1em" viewBox="0 0 16 16" class="bi bi-pencil" fill="currentColor" xmlns="http://www.w3.org/2000/svg"> | ||||
|                 <path fill-rule="evenodd" d="M12.146.146a.5.5 0 0 1 .708 0l3 3a.5.5 0 0 1 0 .708l-10 10a.5.5 0 0 1-.168.11l-5 2a.5.5 0 0 1-.65-.65l2-5a.5.5 0 0 1 .11-.168l10-10zM11.207 2.5L13.5 4.793 14.793 3.5 12.5 1.207 11.207 2.5zm1.586 3L10.5 3.207 4 9.707V10h.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.5h.293l6.5-6.5zm-9.761 5.175l-.106.106-1.528 3.821 3.821-1.528.106-.106A.5.5 0 0 1 5 12.5V12h-.5a.5.5 0 0 1-.5-.5V11h-.5a.5.5 0 0 1-.468-.325z"/> | ||||
| @@ -45,7 +51,13 @@ | ||||
|               </svg> | ||||
|               Download | ||||
|             </a> | ||||
|              | ||||
|           </div> | ||||
|  | ||||
|           <small class="text-muted ml-auto">Score:</small> | ||||
|  | ||||
|           <ngb-progressbar *ngIf="searchScore" [type]="searchScoreClass" [value]="searchScore" class="search-score-bar mx-2" [max]="1"></ngb-progressbar> | ||||
|            | ||||
|           <small class="text-muted">Created: {{document.created | date}}</small> | ||||
|         </div> | ||||
|          | ||||
|   | ||||
| @@ -10,3 +10,9 @@ | ||||
|   position: absolute; | ||||
|  | ||||
| } | ||||
|  | ||||
| .search-score-bar { | ||||
|   width: 100px; | ||||
|   height: 5px; | ||||
|   margin-top: 2px; | ||||
| } | ||||
| @@ -12,6 +12,9 @@ export class DocumentCardLargeComponent implements OnInit { | ||||
|  | ||||
|   constructor(private documentService: DocumentService, private sanitizer: DomSanitizer) { } | ||||
|  | ||||
|   @Input() | ||||
|   moreLikeThis: boolean = false | ||||
|  | ||||
|   @Input() | ||||
|   document: PaperlessDocument | ||||
|  | ||||
| @@ -24,6 +27,19 @@ export class DocumentCardLargeComponent implements OnInit { | ||||
|   @Output() | ||||
|   clickCorrespondent = new EventEmitter<number>() | ||||
|  | ||||
|   @Input() | ||||
|   searchScore: number | ||||
|  | ||||
|   get searchScoreClass() { | ||||
|     if (this.searchScore > 0.7) { | ||||
|       return "success" | ||||
|     } else if (this.searchScore > 0.3) { | ||||
|       return "warning" | ||||
|     } else { | ||||
|       return "danger" | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   ngOnInit(): void { | ||||
|   } | ||||
|  | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| <div class="col p-2 h-100 document-card" style="width: 16rem;"> | ||||
| <div class="col p-2 h-100"> | ||||
|   <div class="card h-100 shadow-sm" [class.card-selected]="selected"> | ||||
|     <div class="border-bottom" [class.doc-img-background-selected]="selected"> | ||||
|       <img class="card-img doc-img" [src]="getThumbUrl()" (click)="selected = !selected"> | ||||
|   | ||||
| @@ -151,5 +151,5 @@ | ||||
|  | ||||
|  | ||||
| <div class=" m-n2 row" *ngIf="displayMode == 'smallCards'"> | ||||
|   <app-document-card-small [selected]="list.isSelected(d)" (selectedChange)="list.setSelected(d, $event)" [document]="d" *ngFor="let d of list.documents" (clickTag)="clickTag($event)" (clickCorrespondent)="clickCorrespondent($event)"></app-document-card-small> | ||||
|   <app-document-card-small [document]="d" [selected]="list.isSelected(d)" (selectedChange)="list.setSelected(d, $event)" *ngFor="let d of list.documents" (clickTag)="clickTag($event)" (clickCorrespondent)="clickCorrespondent($event)"></app-document-card-small> | ||||
| </div> | ||||
|   | ||||
| @@ -1,3 +1,3 @@ | ||||
| ... <span *ngFor="let fragment of highlights"> | ||||
|     <span *ngFor="let token of fragment" [ngClass]="token.term != null ? 'match term'+ token.term : ''">{{token.text}}</span> ...  | ||||
|     <span *ngFor="let token of fragment" [class.match]="token.highlight">{{token.text}}</span> ...  | ||||
| </span> | ||||
| @@ -1,4 +1,4 @@ | ||||
| .match { | ||||
|     color: black; | ||||
|     background-color: orange; | ||||
|     background-color: rgb(255, 211, 66); | ||||
| } | ||||
| @@ -3,7 +3,12 @@ | ||||
|  | ||||
| <div *ngIf="errorMessage" class="alert alert-danger">Invalid search query: {{errorMessage}}</div> | ||||
|  | ||||
| <p> | ||||
| <p *ngIf="more_like"> | ||||
|     Showing documents similar to | ||||
|     <a routerLink="/documents/{{more_like}}">{{more_like_doc?.original_file_name}}</a> | ||||
| </p> | ||||
|  | ||||
| <p *ngIf="query"> | ||||
|     Search string: <i>{{query}}</i> | ||||
|     <ng-container *ngIf="correctedQuery"> | ||||
|         - Did you mean "<a [routerLink]="" (click)="searchCorrectedQuery()">{{correctedQuery}}</a>"? | ||||
| @@ -15,7 +20,9 @@ | ||||
|     <p>{{resultCount}} result(s)</p> | ||||
|     <app-document-card-large *ngFor="let result of results" | ||||
|         [document]="result.document" | ||||
|         [details]="result.highlights"> | ||||
|         [details]="result.highlights" | ||||
|         [searchScore]="result.score / maxScore" | ||||
|         [moreLikeThis]="true"> | ||||
|  | ||||
| </app-document-card-large> | ||||
| </div> | ||||
|   | ||||
| @@ -1,6 +1,9 @@ | ||||
| import { Component, OnInit } from '@angular/core'; | ||||
| import { ActivatedRoute, Router } from '@angular/router'; | ||||
| import { PaperlessDocument } from 'src/app/data/paperless-document'; | ||||
| import { PaperlessDocumentType } from 'src/app/data/paperless-document-type'; | ||||
| import { SearchHit } from 'src/app/data/search-result'; | ||||
| import { DocumentService } from 'src/app/services/rest/document.service'; | ||||
| import { SearchService } from 'src/app/services/rest/search.service'; | ||||
|  | ||||
| @Component({ | ||||
| @@ -14,6 +17,10 @@ export class SearchComponent implements OnInit { | ||||
|  | ||||
|   query: string = "" | ||||
|  | ||||
|   more_like: number | ||||
|  | ||||
|   more_like_doc: PaperlessDocument | ||||
|  | ||||
|   searching = false | ||||
|  | ||||
|   currentPage = 1 | ||||
| @@ -26,11 +33,24 @@ export class SearchComponent implements OnInit { | ||||
|  | ||||
|   errorMessage: string | ||||
|  | ||||
|   constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { } | ||||
|   get maxScore() { | ||||
|     return this.results?.length > 0 ? this.results[0].score : 100 | ||||
|   } | ||||
|  | ||||
|   constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router, private documentService: DocumentService) { } | ||||
|  | ||||
|   ngOnInit(): void { | ||||
|     this.route.queryParamMap.subscribe(paramMap => { | ||||
|       window.scrollTo(0, 0) | ||||
|       this.query = paramMap.get('query') | ||||
|       this.more_like = paramMap.has('more_like') ? +paramMap.get('more_like') : null | ||||
|       if (this.more_like) { | ||||
|         this.documentService.get(this.more_like).subscribe(r => { | ||||
|           this.more_like_doc = r | ||||
|         }) | ||||
|       } else { | ||||
|         this.more_like_doc = null | ||||
|       } | ||||
|       this.searching = true | ||||
|       this.currentPage = 1 | ||||
|       this.loadPage() | ||||
| @@ -39,13 +59,14 @@ export class SearchComponent implements OnInit { | ||||
|   } | ||||
|  | ||||
|   searchCorrectedQuery() { | ||||
|     this.router.navigate(["search"], {queryParams: {query: this.correctedQuery}}) | ||||
|     this.router.navigate(["search"], {queryParams: {query: this.correctedQuery, more_like: this.more_like}}) | ||||
|   } | ||||
|  | ||||
|   loadPage(append: boolean = false) { | ||||
|     this.errorMessage = null | ||||
|     this.correctedQuery = null | ||||
|     this.searchService.search(this.query, this.currentPage).subscribe(result => { | ||||
|  | ||||
|     this.searchService.search(this.query, this.currentPage, this.more_like).subscribe(result => { | ||||
|       if (append) { | ||||
|         this.results.push(...result.results) | ||||
|       } else { | ||||
|   | ||||
| @@ -15,11 +15,17 @@ export class SearchService { | ||||
|    | ||||
|   constructor(private http: HttpClient, private documentService: DocumentService) { } | ||||
|  | ||||
|   search(query: string, page?: number): Observable<SearchResult> { | ||||
|     let httpParams = new HttpParams().set('query', query) | ||||
|   search(query: string, page?: number, more_like?: number): Observable<SearchResult> { | ||||
|     let httpParams = new HttpParams() | ||||
|     if (query) { | ||||
|       httpParams = httpParams.set('query', query) | ||||
|     } | ||||
|     if (page) { | ||||
|       httpParams = httpParams.set('page', page.toString()) | ||||
|     } | ||||
|     if (more_like) { | ||||
|       httpParams = httpParams.set('more_like', more_like.toString()) | ||||
|     } | ||||
|     return this.http.get<SearchResult>(`${environment.apiBaseUrl}search/`, {params: httpParams}).pipe( | ||||
|       map(result => { | ||||
|         result.results.forEach(hit => this.documentService.addObservablesToDocument(hit.document)) | ||||
|   | ||||
| @@ -5,7 +5,8 @@ | ||||
| export const environment = { | ||||
|   production: false, | ||||
|   apiBaseUrl: "http://localhost:8000/api/", | ||||
|   appTitle: "DEVELOPMENT P-NG" | ||||
|   appTitle: "Paperless-ng", | ||||
|   version: "DEVELOPMENT" | ||||
| }; | ||||
|  | ||||
| /* | ||||
|   | ||||
| @@ -247,7 +247,6 @@ class Consumer(LoggingMixin): | ||||
|  | ||||
|         with open(self.path, "rb") as f: | ||||
|             document = Document.objects.create( | ||||
|                 correspondent=file_info.correspondent, | ||||
|                 title=(self.override_title or file_info.title)[:127], | ||||
|                 content=text, | ||||
|                 mime_type=mime_type, | ||||
| @@ -257,12 +256,6 @@ class Consumer(LoggingMixin): | ||||
|                 storage_type=storage_type | ||||
|             ) | ||||
|  | ||||
|         relevant_tags = set(file_info.tags) | ||||
|         if relevant_tags: | ||||
|             tag_names = ", ".join([t.name for t in relevant_tags]) | ||||
|             self.log("debug", "Tagging with {}".format(tag_names)) | ||||
|             document.tags.add(*relevant_tags) | ||||
|  | ||||
|         self.apply_overrides(document) | ||||
|  | ||||
|         document.save() | ||||
|   | ||||
| @@ -3,7 +3,7 @@ import os | ||||
| from contextlib import contextmanager | ||||
|  | ||||
| from django.conf import settings | ||||
| from whoosh import highlight | ||||
| from whoosh import highlight, classify, query | ||||
| from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME | ||||
| from whoosh.highlight import Formatter, get_text | ||||
| from whoosh.index import create_in, exists_in, open_dir | ||||
| @@ -20,32 +20,37 @@ class JsonFormatter(Formatter): | ||||
|         self.seen = {} | ||||
|  | ||||
|     def format_token(self, text, token, replace=False): | ||||
|         seen = self.seen | ||||
|         ttext = self._text(get_text(text, token, replace)) | ||||
|         if ttext in seen: | ||||
|             termnum = seen[ttext] | ||||
|         else: | ||||
|             termnum = len(seen) | ||||
|             seen[ttext] = termnum | ||||
|  | ||||
|         return {'text': ttext, 'term': termnum} | ||||
|         return {'text': ttext, 'highlight': 'true'} | ||||
|  | ||||
|     def format_fragment(self, fragment, replace=False): | ||||
|         output = [] | ||||
|         index = fragment.startchar | ||||
|         text = fragment.text | ||||
|  | ||||
|         amend_token = None | ||||
|         for t in fragment.matches: | ||||
|             if t.startchar is None: | ||||
|                 continue | ||||
|             if t.startchar < index: | ||||
|                 continue | ||||
|             if t.startchar > index: | ||||
|                 output.append({'text': text[index:t.startchar]}) | ||||
|             output.append(self.format_token(text, t, replace)) | ||||
|                 text_inbetween = text[index:t.startchar] | ||||
|                 if amend_token and t.startchar - index < 10: | ||||
|                     amend_token['text'] += text_inbetween | ||||
|                 else: | ||||
|                     output.append({'text': text_inbetween, | ||||
|                                    'highlight': False}) | ||||
|                     amend_token = None | ||||
|             token = self.format_token(text, t, replace) | ||||
|             if amend_token: | ||||
|                 amend_token['text'] += token['text'] | ||||
|             else: | ||||
|                 output.append(token) | ||||
|                 amend_token = token | ||||
|             index = t.endchar | ||||
|         if index < fragment.endchar: | ||||
|             output.append({'text': text[index:fragment.endchar]}) | ||||
|             output.append({'text': text[index:fragment.endchar], | ||||
|                            'highlight': False}) | ||||
|         return output | ||||
|  | ||||
|     def format(self, fragments, replace=False): | ||||
| @@ -120,22 +125,42 @@ def remove_document_from_index(document): | ||||
|  | ||||
|  | ||||
| @contextmanager | ||||
| def query_page(ix, querystring, page): | ||||
| def query_page(ix, page, querystring, more_like_doc_id, more_like_doc_content): | ||||
|     searcher = ix.searcher() | ||||
|     try: | ||||
|         if querystring: | ||||
|             qp = MultifieldParser( | ||||
|                 ["content", "title", "correspondent", "tag", "type"], | ||||
|                 ix.schema) | ||||
|             qp.add_plugin(DateParserPlugin()) | ||||
|             str_q = qp.parse(querystring) | ||||
|             corrected = searcher.correct_query(str_q, querystring) | ||||
|         else: | ||||
|             str_q = None | ||||
|             corrected = None | ||||
|  | ||||
|         if more_like_doc_id: | ||||
|             docnum = searcher.document_number(id=more_like_doc_id) | ||||
|             kts = searcher.key_terms_from_text( | ||||
|                 'content', more_like_doc_content, numterms=20, | ||||
|                 model=classify.Bo1Model, normalize=False) | ||||
|             more_like_q = query.Or( | ||||
|                 [query.Term('content', word, boost=weight) | ||||
|                  for word, weight in kts]) | ||||
|             result_page = searcher.search_page( | ||||
|                 more_like_q, page, filter=str_q, mask={docnum}) | ||||
|         elif str_q: | ||||
|             result_page = searcher.search_page(str_q, page) | ||||
|         else: | ||||
|             raise ValueError( | ||||
|                 "Either querystring or more_like_doc_id is required." | ||||
|             ) | ||||
|  | ||||
|         q = qp.parse(querystring) | ||||
|         result_page = searcher.search_page(q, page) | ||||
|         result_page.results.fragmenter = highlight.ContextFragmenter( | ||||
|             surround=50) | ||||
|         result_page.results.formatter = JsonFormatter() | ||||
|  | ||||
|         corrected = searcher.correct_query(q, querystring) | ||||
|         if corrected.query != q: | ||||
|         if corrected and corrected.query != str_q: | ||||
|             corrected_query = corrected.string | ||||
|         else: | ||||
|             corrected_query = None | ||||
|   | ||||
| @@ -11,6 +11,7 @@ from paperless.db import GnuPG | ||||
| STORAGE_TYPE_UNENCRYPTED = "unencrypted" | ||||
| STORAGE_TYPE_GPG = "gpg" | ||||
|  | ||||
|  | ||||
| def source_path(self): | ||||
|     if self.filename: | ||||
|         fname = str(self.filename) | ||||
|   | ||||
| @@ -357,54 +357,12 @@ class SavedViewFilterRule(models.Model): | ||||
| # TODO: why is this in the models file? | ||||
| class FileInfo: | ||||
|  | ||||
|     # This epic regex *almost* worked for our needs, so I'm keeping it here for | ||||
|     # posterity, in the hopes that we might find a way to make it work one day. | ||||
|     ALMOST_REGEX = re.compile( | ||||
|         r"^((?P<date>\d\d\d\d\d\d\d\d\d\d\d\d\d\dZ){separator})?" | ||||
|         r"((?P<correspondent>{non_separated_word}+){separator})??" | ||||
|         r"(?P<title>{non_separated_word}+)" | ||||
|         r"({separator}(?P<tags>[a-z,0-9-]+))?" | ||||
|         r"\.(?P<extension>[a-zA-Z.-]+)$".format( | ||||
|             separator=r"\s+-\s+", | ||||
|             non_separated_word=r"([\w,. ]|([^\s]-))" | ||||
|         ) | ||||
|     ) | ||||
|     REGEXES = OrderedDict([ | ||||
|         ("created-correspondent-title-tags", re.compile( | ||||
|             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " | ||||
|             r"(?P<correspondent>.*) - " | ||||
|             r"(?P<title>.*) - " | ||||
|             r"(?P<tags>[a-z0-9\-,]*)$", | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("created-title-tags", re.compile( | ||||
|             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " | ||||
|             r"(?P<title>.*) - " | ||||
|             r"(?P<tags>[a-z0-9\-,]*)$", | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("created-correspondent-title", re.compile( | ||||
|             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " | ||||
|             r"(?P<correspondent>.*) - " | ||||
|             r"(?P<title>.*)$", | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("created-title", re.compile( | ||||
|             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " | ||||
|             r"(?P<title>.*)$", | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("correspondent-title-tags", re.compile( | ||||
|             r"(?P<correspondent>.*) - " | ||||
|             r"(?P<title>.*) - " | ||||
|             r"(?P<tags>[a-z0-9\-,]*)$", | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("correspondent-title", re.compile( | ||||
|             r"(?P<correspondent>.*) - " | ||||
|             r"(?P<title>.*)?$", | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("title", re.compile( | ||||
|             r"(?P<title>.*)$", | ||||
|             flags=re.IGNORECASE | ||||
| @@ -427,23 +385,10 @@ class FileInfo: | ||||
|         except ValueError: | ||||
|             return None | ||||
|  | ||||
|     @classmethod | ||||
|     def _get_correspondent(cls, name): | ||||
|         if not name: | ||||
|             return None | ||||
|         return Correspondent.objects.get_or_create(name=name)[0] | ||||
|  | ||||
|     @classmethod | ||||
|     def _get_title(cls, title): | ||||
|         return title | ||||
|  | ||||
|     @classmethod | ||||
|     def _get_tags(cls, tags): | ||||
|         r = [] | ||||
|         for t in tags.split(","): | ||||
|             r.append(Tag.objects.get_or_create(name=t)[0]) | ||||
|         return tuple(r) | ||||
|  | ||||
|     @classmethod | ||||
|     def _mangle_property(cls, properties, name): | ||||
|         if name in properties: | ||||
| @@ -453,15 +398,6 @@ class FileInfo: | ||||
|  | ||||
|     @classmethod | ||||
|     def from_filename(cls, filename): | ||||
|         """ | ||||
|         We use a crude naming convention to make handling the correspondent, | ||||
|         title, and tags easier: | ||||
|           "<date> - <correspondent> - <title> - <tags>" | ||||
|           "<correspondent> - <title> - <tags>" | ||||
|           "<correspondent> - <title>" | ||||
|           "<title>" | ||||
|         """ | ||||
|  | ||||
|         # Mutate filename in-place before parsing its components | ||||
|         # by applying at most one of the configured transformations. | ||||
|         for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS: | ||||
| @@ -492,7 +428,5 @@ class FileInfo: | ||||
|             if m: | ||||
|                 properties = m.groupdict() | ||||
|                 cls._mangle_property(properties, "created") | ||||
|                 cls._mangle_property(properties, "correspondent") | ||||
|                 cls._mangle_property(properties, "title") | ||||
|                 cls._mangle_property(properties, "tags") | ||||
|                 return cls(**properties) | ||||
|   | ||||
| @@ -5,7 +5,7 @@ | ||||
| <html lang="en"> | ||||
| <head> | ||||
|   <meta charset="utf-8"> | ||||
|   <title>PaperlessUi</title> | ||||
|   <title>Paperless-ng</title> | ||||
|   <base href="/"> | ||||
|   <meta name="viewport" content="width=device-width, initial-scale=1"> | ||||
| 	<meta name="cookie_prefix" content="{{cookie_prefix}}"> | ||||
|   | ||||
							
								
								
									
										57
									
								
								src/documents/tests/test_admin.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								src/documents/tests/test_admin.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,57 @@ | ||||
| from unittest import mock | ||||
|  | ||||
| from django.contrib.admin.sites import AdminSite | ||||
| from django.test import TestCase | ||||
| from django.utils import timezone | ||||
|  | ||||
| from documents.admin import DocumentAdmin | ||||
| from documents.models import Document, Tag | ||||
|  | ||||
|  | ||||
| class TestDocumentAdmin(TestCase): | ||||
|  | ||||
|     def setUp(self) -> None: | ||||
|         self.doc_admin = DocumentAdmin(model=Document, admin_site=AdminSite()) | ||||
|  | ||||
|     @mock.patch("documents.admin.index.add_or_update_document") | ||||
|     def test_save_model(self, m): | ||||
|         doc = Document.objects.create(title="test") | ||||
|         doc.title = "new title" | ||||
|         self.doc_admin.save_model(None, doc, None, None) | ||||
|         self.assertEqual(Document.objects.get(id=doc.id).title, "new title") | ||||
|         m.assert_called_once() | ||||
|  | ||||
|     def test_tags(self): | ||||
|         doc = Document.objects.create(title="test") | ||||
|         doc.tags.create(name="t1") | ||||
|         doc.tags.create(name="t2") | ||||
|  | ||||
|         self.assertEqual(self.doc_admin.tags_(doc), "<span >t1, </span><span >t2, </span>") | ||||
|  | ||||
|     def test_tags_empty(self): | ||||
|         doc = Document.objects.create(title="test") | ||||
|  | ||||
|         self.assertEqual(self.doc_admin.tags_(doc), "") | ||||
|  | ||||
|     @mock.patch("documents.admin.index.remove_document") | ||||
|     def test_delete_model(self, m): | ||||
|         doc = Document.objects.create(title="test") | ||||
|         self.doc_admin.delete_model(None, doc) | ||||
|         self.assertRaises(Document.DoesNotExist, Document.objects.get, id=doc.id) | ||||
|         m.assert_called_once() | ||||
|  | ||||
|     @mock.patch("documents.admin.index.remove_document") | ||||
|     def test_delete_queryset(self, m): | ||||
|         for i in range(42): | ||||
|             Document.objects.create(title="Many documents with the same title", checksum=f"{i:02}") | ||||
|  | ||||
|         self.assertEqual(Document.objects.count(), 42) | ||||
|  | ||||
|         self.doc_admin.delete_queryset(None, Document.objects.all()) | ||||
|  | ||||
|         self.assertEqual(m.call_count, 42) | ||||
|         self.assertEqual(Document.objects.count(), 0) | ||||
|  | ||||
|     def test_created(self): | ||||
|         doc = Document.objects.create(title="test", created=timezone.datetime(2020, 4, 12)) | ||||
|         self.assertEqual(self.doc_admin.created_(doc), "2020-04-12") | ||||
| @@ -352,6 +352,25 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|  | ||||
|         self.assertEqual(correction, None) | ||||
|  | ||||
|     def test_search_more_like(self): | ||||
|         d1=Document.objects.create(title="invoice", content="the thing i bought at a shop and paid with bank account", checksum="A", pk=1) | ||||
|         d2=Document.objects.create(title="bank statement 1", content="things i paid for in august", pk=2, checksum="B") | ||||
|         d3=Document.objects.create(title="bank statement 3", content="things i paid for in september", pk=3, checksum="C") | ||||
|         with AsyncWriter(index.open_index()) as writer: | ||||
|             index.update_document(writer, d1) | ||||
|             index.update_document(writer, d2) | ||||
|             index.update_document(writer, d3) | ||||
|  | ||||
|         response = self.client.get(f"/api/search/?more_like={d2.id}") | ||||
|  | ||||
|         self.assertEqual(response.status_code, 200) | ||||
|  | ||||
|         results = response.data['results'] | ||||
|  | ||||
|         self.assertEqual(len(results), 2) | ||||
|         self.assertEqual(results[0]['id'], d3.id) | ||||
|         self.assertEqual(results[1]['id'], d1.id) | ||||
|  | ||||
|     def test_statistics(self): | ||||
|  | ||||
|         doc1 = Document.objects.create(title="none1", checksum="A") | ||||
|   | ||||
| @@ -29,81 +29,6 @@ class TestAttributes(TestCase): | ||||
|  | ||||
|         self.assertEqual(tuple([t.name for t in file_info.tags]), tags, filename) | ||||
|  | ||||
|     def test_guess_attributes_from_name0(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Sender - Title.pdf", "Sender", "Title", ()) | ||||
|  | ||||
|     def test_guess_attributes_from_name1(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Spaced Sender - Title.pdf", "Spaced Sender", "Title", ()) | ||||
|  | ||||
|     def test_guess_attributes_from_name2(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Sender - Spaced Title.pdf", "Sender", "Spaced Title", ()) | ||||
|  | ||||
|     def test_guess_attributes_from_name3(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Dashed-Sender - Title.pdf", "Dashed-Sender", "Title", ()) | ||||
|  | ||||
|     def test_guess_attributes_from_name4(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Sender - Dashed-Title.pdf", "Sender", "Dashed-Title", ()) | ||||
|  | ||||
|     def test_guess_attributes_from_name5(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Sender - Title - tag1,tag2,tag3.pdf", | ||||
|             "Sender", | ||||
|             "Title", | ||||
|             self.TAGS | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name6(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Spaced Sender - Title - tag1,tag2,tag3.pdf", | ||||
|             "Spaced Sender", | ||||
|             "Title", | ||||
|             self.TAGS | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name7(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Sender - Spaced Title - tag1,tag2,tag3.pdf", | ||||
|             "Sender", | ||||
|             "Spaced Title", | ||||
|             self.TAGS | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name8(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Dashed-Sender - Title - tag1,tag2,tag3.pdf", | ||||
|             "Dashed-Sender", | ||||
|             "Title", | ||||
|             self.TAGS | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name9(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Sender - Dashed-Title - tag1,tag2,tag3.pdf", | ||||
|             "Sender", | ||||
|             "Dashed-Title", | ||||
|             self.TAGS | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name10(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Σενδερ - Τιτλε - tag1,tag2,tag3.pdf", | ||||
|             "Σενδερ", | ||||
|             "Τιτλε", | ||||
|             self.TAGS | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name_when_correspondent_empty(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             ' - weird empty correspondent but should not break.pdf', | ||||
|             None, | ||||
|             'weird empty correspondent but should not break', | ||||
|             () | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name_when_title_starts_with_dash(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
| @@ -121,28 +46,6 @@ class TestAttributes(TestCase): | ||||
|             () | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name_when_title_is_empty(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             'weird correspondent but should not break - .pdf', | ||||
|             'weird correspondent but should not break', | ||||
|             '', | ||||
|             () | ||||
|         ) | ||||
|  | ||||
|     def test_case_insensitive_tag_creation(self): | ||||
|         """ | ||||
|         Tags should be detected and created as lower case. | ||||
|         :return: | ||||
|         """ | ||||
|  | ||||
|         filename = "Title - Correspondent - tAg1,TAG2.pdf" | ||||
|         self.assertEqual(len(FileInfo.from_filename(filename).tags), 2) | ||||
|  | ||||
|         path = "Title - Correspondent - tag1,tag2.pdf" | ||||
|         self.assertEqual(len(FileInfo.from_filename(filename).tags), 2) | ||||
|  | ||||
|         self.assertEqual(Tag.objects.all().count(), 2) | ||||
|  | ||||
|  | ||||
| class TestFieldPermutations(TestCase): | ||||
|  | ||||
| @@ -199,69 +102,7 @@ class TestFieldPermutations(TestCase): | ||||
|             filename = template.format(**spec) | ||||
|             self._test_guessed_attributes(filename, **spec) | ||||
|  | ||||
|     def test_title_and_correspondent(self): | ||||
|         template = '{correspondent} - {title}.pdf' | ||||
|         for correspondent in self.valid_correspondents: | ||||
|             for title in self.valid_titles: | ||||
|                 spec = dict(correspondent=correspondent, title=title) | ||||
|                 filename = template.format(**spec) | ||||
|                 self._test_guessed_attributes(filename, **spec) | ||||
|  | ||||
|     def test_title_and_correspondent_and_tags(self): | ||||
|         template = '{correspondent} - {title} - {tags}.pdf' | ||||
|         for correspondent in self.valid_correspondents: | ||||
|             for title in self.valid_titles: | ||||
|                 for tags in self.valid_tags: | ||||
|                     spec = dict(correspondent=correspondent, title=title, | ||||
|                                 tags=tags) | ||||
|                     filename = template.format(**spec) | ||||
|                     self._test_guessed_attributes(filename, **spec) | ||||
|  | ||||
|     def test_created_and_correspondent_and_title_and_tags(self): | ||||
|  | ||||
|         template = ( | ||||
|             "{created} - " | ||||
|             "{correspondent} - " | ||||
|             "{title} - " | ||||
|             "{tags}.pdf" | ||||
|         ) | ||||
|  | ||||
|         for created in self.valid_dates: | ||||
|             for correspondent in self.valid_correspondents: | ||||
|                 for title in self.valid_titles: | ||||
|                     for tags in self.valid_tags: | ||||
|                         spec = { | ||||
|                             "created": created, | ||||
|                             "correspondent": correspondent, | ||||
|                             "title": title, | ||||
|                             "tags": tags, | ||||
|                         } | ||||
|                         self._test_guessed_attributes( | ||||
|                             template.format(**spec), **spec) | ||||
|  | ||||
|     def test_created_and_correspondent_and_title(self): | ||||
|  | ||||
|         template = "{created} - {correspondent} - {title}.pdf" | ||||
|  | ||||
|         for created in self.valid_dates: | ||||
|             for correspondent in self.valid_correspondents: | ||||
|                 for title in self.valid_titles: | ||||
|  | ||||
|                     # Skip cases where title looks like a tag as we can't | ||||
|                     # accommodate such cases. | ||||
|                     if title.lower() == title: | ||||
|                         continue | ||||
|  | ||||
|                     spec = { | ||||
|                         "created": created, | ||||
|                         "correspondent": correspondent, | ||||
|                         "title": title | ||||
|                     } | ||||
|                     self._test_guessed_attributes( | ||||
|                         template.format(**spec), **spec) | ||||
|  | ||||
|     def test_created_and_title(self): | ||||
|  | ||||
|         template = "{created} - {title}.pdf" | ||||
|  | ||||
|         for created in self.valid_dates: | ||||
| @@ -273,21 +114,6 @@ class TestFieldPermutations(TestCase): | ||||
|                 self._test_guessed_attributes( | ||||
|                     template.format(**spec), **spec) | ||||
|  | ||||
|     def test_created_and_title_and_tags(self): | ||||
|  | ||||
|         template = "{created} - {title} - {tags}.pdf" | ||||
|  | ||||
|         for created in self.valid_dates: | ||||
|             for title in self.valid_titles: | ||||
|                 for tags in self.valid_tags: | ||||
|                     spec = { | ||||
|                         "created": created, | ||||
|                         "title": title, | ||||
|                         "tags": tags | ||||
|                     } | ||||
|                     self._test_guessed_attributes( | ||||
|                         template.format(**spec), **spec) | ||||
|  | ||||
|     def test_invalid_date_format(self): | ||||
|         info = FileInfo.from_filename("06112017Z - title.pdf") | ||||
|         self.assertEqual(info.title, "title") | ||||
| @@ -336,32 +162,6 @@ class TestFieldPermutations(TestCase): | ||||
|             info = FileInfo.from_filename(filename) | ||||
|             self.assertEqual(info.title, "anotherall") | ||||
|  | ||||
|         # Complex transformation without date in replacement string | ||||
|         with self.settings( | ||||
|                 FILENAME_PARSE_TRANSFORMS=[(exact_patt, repl1)]): | ||||
|             info = FileInfo.from_filename(filename) | ||||
|             self.assertEqual(info.title, "0001") | ||||
|             self.assertEqual(len(info.tags), 2) | ||||
|             self.assertEqual(info.tags[0].name, "tag1") | ||||
|             self.assertEqual(info.tags[1].name, "tag2") | ||||
|             self.assertIsNone(info.created) | ||||
|  | ||||
|         # Complex transformation with date in replacement string | ||||
|         with self.settings( | ||||
|             FILENAME_PARSE_TRANSFORMS=[ | ||||
|                 (none_patt, "none.gif"), | ||||
|                 (exact_patt, repl2),    # <-- matches | ||||
|                 (exact_patt, repl1), | ||||
|                 (all_patt, "all.gif")]): | ||||
|             info = FileInfo.from_filename(filename) | ||||
|             self.assertEqual(info.title, "0001") | ||||
|             self.assertEqual(len(info.tags), 2) | ||||
|             self.assertEqual(info.tags[0].name, "tag1") | ||||
|             self.assertEqual(info.tags[1].name, "tag2") | ||||
|             self.assertEqual(info.created.year, 2019) | ||||
|             self.assertEqual(info.created.month, 9) | ||||
|             self.assertEqual(info.created.day, 8) | ||||
|  | ||||
|  | ||||
| class DummyParser(DocumentParser): | ||||
|  | ||||
| @@ -476,15 +276,13 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def testOverrideFilename(self): | ||||
|         filename = self.get_test_file() | ||||
|         override_filename = "My Bank - Statement for November.pdf" | ||||
|         override_filename = "Statement for November.pdf" | ||||
|  | ||||
|         document = self.consumer.try_consume_file(filename, override_filename=override_filename) | ||||
|  | ||||
|         self.assertEqual(document.correspondent.name, "My Bank") | ||||
|         self.assertEqual(document.title, "Statement for November") | ||||
|  | ||||
|     def testOverrideTitle(self): | ||||
|  | ||||
|         document = self.consumer.try_consume_file(self.get_test_file(), override_title="Override Title") | ||||
|         self.assertEqual(document.title, "Override Title") | ||||
|  | ||||
| @@ -594,11 +392,10 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|     def testFilenameHandling(self): | ||||
|         filename = self.get_test_file() | ||||
|  | ||||
|         document = self.consumer.try_consume_file(filename, override_filename="Bank - Test.pdf", override_title="new docs") | ||||
|         document = self.consumer.try_consume_file(filename, override_title="new docs") | ||||
|  | ||||
|         self.assertEqual(document.title, "new docs") | ||||
|         self.assertEqual(document.correspondent.name, "Bank") | ||||
|         self.assertEqual(document.filename, "Bank/new docs.pdf") | ||||
|         self.assertEqual(document.filename, "none/new docs.pdf") | ||||
|  | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") | ||||
|     @mock.patch("documents.signals.handlers.generate_unique_filename") | ||||
| @@ -617,10 +414,9 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         Tag.objects.create(name="test", is_inbox_tag=True) | ||||
|  | ||||
|         document = self.consumer.try_consume_file(filename, override_filename="Bank - Test.pdf", override_title="new docs") | ||||
|         document = self.consumer.try_consume_file(filename, override_title="new docs") | ||||
|  | ||||
|         self.assertEqual(document.title, "new docs") | ||||
|         self.assertEqual(document.correspondent.name, "Bank") | ||||
|         self.assertIsNotNone(os.path.isfile(document.title)) | ||||
|         self.assertTrue(os.path.isfile(document.source_path)) | ||||
|  | ||||
| @@ -642,3 +438,31 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|         self.assertEqual(document.document_type, dtype) | ||||
|         self.assertIn(t1, document.tags.all()) | ||||
|         self.assertNotIn(t2, document.tags.all()) | ||||
|  | ||||
|     @override_settings(CONSUMER_DELETE_DUPLICATES=True) | ||||
|     def test_delete_duplicate(self): | ||||
|         dst = self.get_test_file() | ||||
|         self.assertTrue(os.path.isfile(dst)) | ||||
|         doc = self.consumer.try_consume_file(dst) | ||||
|  | ||||
|         self.assertFalse(os.path.isfile(dst)) | ||||
|         self.assertIsNotNone(doc) | ||||
|  | ||||
|         dst = self.get_test_file() | ||||
|         self.assertTrue(os.path.isfile(dst)) | ||||
|         self.assertRaises(ConsumerError, self.consumer.try_consume_file, dst) | ||||
|         self.assertFalse(os.path.isfile(dst)) | ||||
|  | ||||
|     @override_settings(CONSUMER_DELETE_DUPLICATES=False) | ||||
|     def test_no_delete_duplicate(self): | ||||
|         dst = self.get_test_file() | ||||
|         self.assertTrue(os.path.isfile(dst)) | ||||
|         doc = self.consumer.try_consume_file(dst) | ||||
|  | ||||
|         self.assertFalse(os.path.isfile(dst)) | ||||
|         self.assertIsNotNone(doc) | ||||
|  | ||||
|         dst = self.get_test_file() | ||||
|         self.assertTrue(os.path.isfile(dst)) | ||||
|         self.assertRaises(ConsumerError, self.consumer.try_consume_file, dst) | ||||
|         self.assertTrue(os.path.isfile(dst)) | ||||
|   | ||||
| @@ -14,7 +14,7 @@ from django.utils import timezone | ||||
| from .utils import DirectoriesMixin | ||||
| from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories, \ | ||||
|     generate_unique_filename | ||||
| from ..models import Document, Correspondent, Tag | ||||
| from ..models import Document, Correspondent, Tag, DocumentType | ||||
|  | ||||
|  | ||||
| class TestFileHandling(DirectoriesMixin, TestCase): | ||||
| @@ -190,6 +190,17 @@ class TestFileHandling(DirectoriesMixin, TestCase): | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True) | ||||
|         self.assertTrue(os.path.isfile(important_file)) | ||||
|  | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{document_type} - {title}") | ||||
|     def test_document_type(self): | ||||
|         dt = DocumentType.objects.create(name="my_doc_type") | ||||
|         d = Document.objects.create(title="the_doc", mime_type="application/pdf") | ||||
|  | ||||
|         self.assertEqual(generate_filename(d), "none - the_doc.pdf") | ||||
|  | ||||
|         d.document_type = dt | ||||
|  | ||||
|         self.assertEqual(generate_filename(d), "my_doc_type - the_doc.pdf") | ||||
|  | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") | ||||
|     def test_tags_with_underscore(self): | ||||
|         document = Document() | ||||
|   | ||||
							
								
								
									
										135
									
								
								src/documents/tests/test_management.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										135
									
								
								src/documents/tests/test_management.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,135 @@ | ||||
| import hashlib | ||||
| import tempfile | ||||
| import filecmp | ||||
| import os | ||||
| import shutil | ||||
| from pathlib import Path | ||||
| from unittest import mock | ||||
|  | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
|  | ||||
| from django.core.management import call_command | ||||
|  | ||||
| from documents.file_handling import generate_filename | ||||
| from documents.management.commands.document_archiver import handle_document | ||||
| from documents.models import Document | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
| sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") | ||||
|  | ||||
|  | ||||
| class TestArchiver(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def make_models(self): | ||||
|         return Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf") | ||||
|  | ||||
|     def test_archiver(self): | ||||
|  | ||||
|         doc = self.make_models() | ||||
|         shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")) | ||||
|  | ||||
|         call_command('document_archiver') | ||||
|  | ||||
|     def test_handle_document(self): | ||||
|  | ||||
|         doc = self.make_models() | ||||
|         shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")) | ||||
|  | ||||
|         handle_document(doc.pk) | ||||
|  | ||||
|         doc = Document.objects.get(id=doc.id) | ||||
|  | ||||
|         self.assertIsNotNone(doc.checksum) | ||||
|         self.assertTrue(os.path.isfile(doc.archive_path)) | ||||
|         self.assertTrue(os.path.isfile(doc.source_path)) | ||||
|         self.assertTrue(filecmp.cmp(sample_file, doc.source_path)) | ||||
|  | ||||
|  | ||||
| class TestDecryptDocuments(TestCase): | ||||
|  | ||||
|     @override_settings( | ||||
|         ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"), | ||||
|         THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"), | ||||
|         PASSPHRASE="test", | ||||
|         PAPERLESS_FILENAME_FORMAT=None | ||||
|     ) | ||||
|     @mock.patch("documents.management.commands.decrypt_documents.input") | ||||
|     def test_decrypt(self, m): | ||||
|  | ||||
|         media_dir = tempfile.mkdtemp() | ||||
|         originals_dir = os.path.join(media_dir, "documents", "originals") | ||||
|         thumb_dir = os.path.join(media_dir, "documents", "thumbnails") | ||||
|         os.makedirs(originals_dir, exist_ok=True) | ||||
|         os.makedirs(thumb_dir, exist_ok=True) | ||||
|  | ||||
|         override_settings( | ||||
|             ORIGINALS_DIR=originals_dir, | ||||
|             THUMBNAIL_DIR=thumb_dir, | ||||
|             PASSPHRASE="test" | ||||
|         ).enable() | ||||
|  | ||||
|         doc = Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg",  mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG) | ||||
|  | ||||
|         shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg")) | ||||
|         shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000002.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg")) | ||||
|  | ||||
|         call_command('decrypt_documents') | ||||
|  | ||||
|         doc.refresh_from_db() | ||||
|  | ||||
|         self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED) | ||||
|         self.assertEqual(doc.filename, "0000002.pdf") | ||||
|         self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000002.pdf"))) | ||||
|         self.assertTrue(os.path.isfile(doc.source_path)) | ||||
|         self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png"))) | ||||
|         self.assertTrue(os.path.isfile(doc.thumbnail_path)) | ||||
|  | ||||
|         with doc.source_file as f: | ||||
|             checksum = hashlib.md5(f.read()).hexdigest() | ||||
|             self.assertEqual(checksum, doc.checksum) | ||||
|  | ||||
|  | ||||
| class TestMakeIndex(TestCase): | ||||
|  | ||||
|     @mock.patch("documents.management.commands.document_index.index_reindex") | ||||
|     def test_reindex(self, m): | ||||
|         call_command("document_index", "reindex") | ||||
|         m.assert_called_once() | ||||
|  | ||||
|     @mock.patch("documents.management.commands.document_index.index_optimize") | ||||
|     def test_optimize(self, m): | ||||
|         call_command("document_index", "optimize") | ||||
|         m.assert_called_once() | ||||
|  | ||||
|  | ||||
| class TestRenamer(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_rename(self): | ||||
|         doc = Document.objects.create(title="test", mime_type="application/pdf") | ||||
|         doc.filename = generate_filename(doc) | ||||
|         doc.save() | ||||
|  | ||||
|         Path(doc.source_path).touch() | ||||
|  | ||||
|         old_source_path = doc.source_path | ||||
|  | ||||
|         with override_settings(PAPERLESS_FILENAME_FORMAT="{title}"): | ||||
|             call_command("document_renamer") | ||||
|  | ||||
|         doc2 = Document.objects.get(id=doc.id) | ||||
|  | ||||
|         self.assertEqual(doc2.filename, "test.pdf") | ||||
|         self.assertFalse(os.path.isfile(old_source_path)) | ||||
|         self.assertFalse(os.path.isfile(doc.source_path)) | ||||
|         self.assertTrue(os.path.isfile(doc2.source_path)) | ||||
|  | ||||
|  | ||||
| class TestCreateClassifier(TestCase): | ||||
|  | ||||
|     @mock.patch("documents.management.commands.document_create_classifier.train_classifier") | ||||
|     def test_create_classifier(self, m): | ||||
|         call_command("document_create_classifier") | ||||
|  | ||||
|         m.assert_called_once() | ||||
| @@ -1,40 +0,0 @@ | ||||
| import filecmp | ||||
| import os | ||||
| import shutil | ||||
|  | ||||
| from django.core.management import call_command | ||||
| from django.test import TestCase | ||||
|  | ||||
| from documents.management.commands.document_archiver import handle_document | ||||
| from documents.models import Document | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
| sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") | ||||
|  | ||||
|  | ||||
| class TestArchiver(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def make_models(self): | ||||
|         return Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf") | ||||
|  | ||||
|     def test_archiver(self): | ||||
|  | ||||
|         doc = self.make_models() | ||||
|         shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")) | ||||
|  | ||||
|         call_command('document_archiver') | ||||
|  | ||||
|     def test_handle_document(self): | ||||
|  | ||||
|         doc = self.make_models() | ||||
|         shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")) | ||||
|  | ||||
|         handle_document(doc.pk) | ||||
|  | ||||
|         doc = Document.objects.get(id=doc.id) | ||||
|  | ||||
|         self.assertIsNotNone(doc.checksum) | ||||
|         self.assertTrue(os.path.isfile(doc.archive_path)) | ||||
|         self.assertTrue(os.path.isfile(doc.source_path)) | ||||
|         self.assertTrue(filecmp.cmp(sample_file, doc.source_path)) | ||||
| @@ -1,57 +0,0 @@ | ||||
| import hashlib | ||||
| import json | ||||
| import os | ||||
| import shutil | ||||
| import tempfile | ||||
| from unittest import mock | ||||
|  | ||||
| from django.core.management import call_command | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
| from documents.management.commands import document_exporter | ||||
| from documents.models import Document, Tag, DocumentType, Correspondent | ||||
|  | ||||
|  | ||||
| class TestDecryptDocuments(TestCase): | ||||
|  | ||||
|     @override_settings( | ||||
|         ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"), | ||||
|         THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"), | ||||
|         PASSPHRASE="test", | ||||
|         PAPERLESS_FILENAME_FORMAT=None | ||||
|     ) | ||||
|     @mock.patch("documents.management.commands.decrypt_documents.input") | ||||
|     def test_decrypt(self, m): | ||||
|  | ||||
|         media_dir = tempfile.mkdtemp() | ||||
|         originals_dir = os.path.join(media_dir, "documents", "originals") | ||||
|         thumb_dir = os.path.join(media_dir, "documents", "thumbnails") | ||||
|         os.makedirs(originals_dir, exist_ok=True) | ||||
|         os.makedirs(thumb_dir, exist_ok=True) | ||||
|  | ||||
|         override_settings( | ||||
|             ORIGINALS_DIR=originals_dir, | ||||
|             THUMBNAIL_DIR=thumb_dir, | ||||
|             PASSPHRASE="test" | ||||
|         ).enable() | ||||
|  | ||||
|         doc = Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg",  mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG) | ||||
|  | ||||
|         shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg")) | ||||
|         shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000002.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg")) | ||||
|  | ||||
|         call_command('decrypt_documents') | ||||
|  | ||||
|         doc.refresh_from_db() | ||||
|  | ||||
|         self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED) | ||||
|         self.assertEqual(doc.filename, "0000002.pdf") | ||||
|         self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000002.pdf"))) | ||||
|         self.assertTrue(os.path.isfile(doc.source_path)) | ||||
|         self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png"))) | ||||
|         self.assertTrue(os.path.isfile(doc.thumbnail_path)) | ||||
|  | ||||
|         with doc.source_file as f: | ||||
|             checksum = hashlib.md5(f.read()).hexdigest() | ||||
|             self.assertEqual(checksum, doc.checksum) | ||||
|  | ||||
							
								
								
									
										129
									
								
								src/documents/tests/test_migrations.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										129
									
								
								src/documents/tests/test_migrations.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,129 @@ | ||||
| import os | ||||
| import shutil | ||||
| from pathlib import Path | ||||
|  | ||||
| from django.apps import apps | ||||
| from django.conf import settings | ||||
| from django.db import connection | ||||
| from django.db.migrations.executor import MigrationExecutor | ||||
| from django.test import TestCase, TransactionTestCase, override_settings | ||||
|  | ||||
| from documents.models import Document | ||||
| from documents.parsers import get_default_file_extension | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
| class TestMigrations(TransactionTestCase): | ||||
|  | ||||
|     @property | ||||
|     def app(self): | ||||
|         return apps.get_containing_app_config(type(self).__module__).name | ||||
|  | ||||
|     migrate_from = None | ||||
|     migrate_to = None | ||||
|  | ||||
|     def setUp(self): | ||||
|         super(TestMigrations, self).setUp() | ||||
|  | ||||
|         assert self.migrate_from and self.migrate_to, \ | ||||
|             "TestCase '{}' must define migrate_from and migrate_to     properties".format(type(self).__name__) | ||||
|         self.migrate_from = [(self.app, self.migrate_from)] | ||||
|         self.migrate_to = [(self.app, self.migrate_to)] | ||||
|         executor = MigrationExecutor(connection) | ||||
|         old_apps = executor.loader.project_state(self.migrate_from).apps | ||||
|  | ||||
|         # Reverse to the original migration | ||||
|         executor.migrate(self.migrate_from) | ||||
|  | ||||
|         self.setUpBeforeMigration(old_apps) | ||||
|  | ||||
|         # Run the migration to test | ||||
|         executor = MigrationExecutor(connection) | ||||
|         executor.loader.build_graph()  # reload. | ||||
|         executor.migrate(self.migrate_to) | ||||
|  | ||||
|         self.apps = executor.loader.project_state(self.migrate_to).apps | ||||
|  | ||||
|     def setUpBeforeMigration(self, apps): | ||||
|         pass | ||||
|  | ||||
|  | ||||
| STORAGE_TYPE_UNENCRYPTED = "unencrypted" | ||||
| STORAGE_TYPE_GPG = "gpg" | ||||
|  | ||||
|  | ||||
| def source_path_before(self): | ||||
|     if self.filename: | ||||
|         fname = str(self.filename) | ||||
|     else: | ||||
|         fname = "{:07}.{}".format(self.pk, self.file_type) | ||||
|         if self.storage_type == STORAGE_TYPE_GPG: | ||||
|             fname += ".gpg" | ||||
|  | ||||
|     return os.path.join( | ||||
|         settings.ORIGINALS_DIR, | ||||
|         fname | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def file_type_after(self): | ||||
|     return get_default_file_extension(self.mime_type) | ||||
|  | ||||
|  | ||||
| def source_path_after(doc): | ||||
|     if doc.filename: | ||||
|         fname = str(doc.filename) | ||||
|     else: | ||||
|         fname = "{:07}{}".format(doc.pk, file_type_after(doc)) | ||||
|         if doc.storage_type == STORAGE_TYPE_GPG: | ||||
|             fname += ".gpg"  # pragma: no cover | ||||
|  | ||||
|     return os.path.join( | ||||
|         settings.ORIGINALS_DIR, | ||||
|         fname | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @override_settings(PASSPHRASE="test") | ||||
| class TestMigrateMimeType(DirectoriesMixin, TestMigrations): | ||||
|  | ||||
|     migrate_from = '1002_auto_20201111_1105' | ||||
|     migrate_to = '1003_mime_types' | ||||
|  | ||||
|     def setUpBeforeMigration(self, apps): | ||||
|         Document = apps.get_model("documents", "Document") | ||||
|         doc = Document.objects.create(title="test", file_type="pdf", filename="file1.pdf") | ||||
|         self.doc_id = doc.id | ||||
|         shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), source_path_before(doc)) | ||||
|  | ||||
|         doc2 = Document.objects.create(checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG) | ||||
|         self.doc2_id = doc2.id | ||||
|         shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), source_path_before(doc2)) | ||||
|  | ||||
|     def testMimeTypesMigrated(self): | ||||
|         Document = self.apps.get_model('documents', 'Document') | ||||
|  | ||||
|         doc = Document.objects.get(id=self.doc_id) | ||||
|         self.assertEqual(doc.mime_type, "application/pdf") | ||||
|  | ||||
|         doc2 = Document.objects.get(id=self.doc2_id) | ||||
|         self.assertEqual(doc2.mime_type, "application/pdf") | ||||
|  | ||||
|  | ||||
| @override_settings(PASSPHRASE="test") | ||||
| class TestMigrateMimeTypeBackwards(DirectoriesMixin, TestMigrations): | ||||
|  | ||||
|     migrate_from = '1003_mime_types' | ||||
|     migrate_to = '1002_auto_20201111_1105' | ||||
|  | ||||
|     def setUpBeforeMigration(self, apps): | ||||
|         Document = apps.get_model("documents", "Document") | ||||
|         doc = Document.objects.create(title="test", mime_type="application/pdf", filename="file1.pdf") | ||||
|         self.doc_id = doc.id | ||||
|         shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), source_path_after(doc)) | ||||
|  | ||||
|     def testMimeTypesReverted(self): | ||||
|         Document = self.apps.get_model('documents', 'Document') | ||||
|  | ||||
|         doc = Document.objects.get(id=self.doc_id) | ||||
|         self.assertEqual(doc.file_type, "pdf") | ||||
| @@ -389,14 +389,27 @@ class SearchView(APIView): | ||||
|                 } | ||||
|  | ||||
|     def get(self, request, format=None): | ||||
|         if 'query' not in request.query_params: | ||||
|  | ||||
|         if 'query' in request.query_params: | ||||
|             query = request.query_params['query'] | ||||
|         else: | ||||
|             query = None | ||||
|  | ||||
|         if 'more_like' in request.query_params: | ||||
|             more_like_id = request.query_params['more_like'] | ||||
|             more_like_content = Document.objects.get(id=more_like_id).content | ||||
|         else: | ||||
|             more_like_id = None | ||||
|             more_like_content = None | ||||
|  | ||||
|         if not query and not more_like_id: | ||||
|             return Response({ | ||||
|                 'count': 0, | ||||
|                 'page': 0, | ||||
|                 'page_count': 0, | ||||
|                 'corrected_query': None, | ||||
|                 'results': []}) | ||||
|  | ||||
|         query = request.query_params['query'] | ||||
|         try: | ||||
|             page = int(request.query_params.get('page', 1)) | ||||
|         except (ValueError, TypeError): | ||||
| @@ -406,8 +419,7 @@ class SearchView(APIView): | ||||
|             page = 1 | ||||
|  | ||||
|         try: | ||||
|             with index.query_page(self.ix, query, page) as (result_page, | ||||
|                                                             corrected_query): | ||||
|             with index.query_page(self.ix, page, query, more_like_id, more_like_content) as (result_page, corrected_query):  # NOQA: E501 | ||||
|                 return Response( | ||||
|                     {'count': len(result_page), | ||||
|                      'page': result_page.pagenum, | ||||
|   | ||||
| @@ -13,18 +13,17 @@ writeable_hint = ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| def path_check(env_var): | ||||
| def path_check(var, directory): | ||||
|     messages = [] | ||||
|     directory = os.getenv(env_var) | ||||
|     if directory: | ||||
|         if not os.path.exists(directory): | ||||
|             messages.append(Error( | ||||
|                 exists_message.format(env_var), | ||||
|                 exists_message.format(var), | ||||
|                 exists_hint.format(directory) | ||||
|             )) | ||||
|         elif not os.access(directory, os.W_OK | os.X_OK): | ||||
|             messages.append(Error( | ||||
|                 writeable_message.format(env_var), | ||||
|                 writeable_message.format(var), | ||||
|                 writeable_hint.format(directory) | ||||
|             )) | ||||
|     return messages | ||||
| @@ -36,12 +35,9 @@ def paths_check(app_configs, **kwargs): | ||||
|     Check the various paths for existence, readability and writeability | ||||
|     """ | ||||
|  | ||||
|     check_messages = path_check("PAPERLESS_DATA_DIR") + \ | ||||
|         path_check("PAPERLESS_MEDIA_ROOT") + \ | ||||
|         path_check("PAPERLESS_CONSUMPTION_DIR") + \ | ||||
|         path_check("PAPERLESS_STATICDIR") | ||||
|  | ||||
|     return check_messages | ||||
|     return path_check("PAPERLESS_DATA_DIR", settings.DATA_DIR) + \ | ||||
|         path_check("PAPERLESS_MEDIA_ROOT", settings.MEDIA_ROOT) + \ | ||||
|         path_check("PAPERLESS_CONSUMPTION_DIR", settings.CONSUMPTION_DIR) | ||||
|  | ||||
|  | ||||
| @register() | ||||
|   | ||||
| @@ -160,13 +160,6 @@ if AUTO_LOGIN_USERNAME: | ||||
|     MIDDLEWARE.insert(_index+1, 'paperless.auth.AutoLoginMiddleware') | ||||
|  | ||||
|  | ||||
| if DEBUG: | ||||
|     X_FRAME_OPTIONS = '' | ||||
|     # this should really be 'allow-from uri' but its not supported in any mayor | ||||
|     # browser. | ||||
| else: | ||||
|     X_FRAME_OPTIONS = 'SAMEORIGIN' | ||||
|  | ||||
| # We allow CORS from localhost:8080 | ||||
| CORS_ALLOWED_ORIGINS = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(",")) | ||||
|  | ||||
|   | ||||
							
								
								
									
										54
									
								
								src/paperless/tests/test_checks.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								src/paperless/tests/test_checks.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,54 @@ | ||||
| import os | ||||
| import shutil | ||||
|  | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
| from paperless import binaries_check, paths_check | ||||
| from paperless.checks import debug_mode_check | ||||
|  | ||||
|  | ||||
| class TestChecks(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_binaries(self): | ||||
|         self.assertEqual(binaries_check(None), []) | ||||
|  | ||||
|     @override_settings(CONVERT_BINARY="uuuhh", OPTIPNG_BINARY="forgot") | ||||
|     def test_binaries_fail(self): | ||||
|         self.assertEqual(len(binaries_check(None)), 2) | ||||
|  | ||||
|     def test_paths_check(self): | ||||
|         self.assertEqual(paths_check(None), []) | ||||
|  | ||||
|     @override_settings(MEDIA_ROOT="uuh", | ||||
|                        DATA_DIR="whatever", | ||||
|                        CONSUMPTION_DIR="idontcare") | ||||
|     def test_paths_check_dont_exist(self): | ||||
|         msgs = paths_check(None) | ||||
|         self.assertEqual(len(msgs), 3, str(msgs)) | ||||
|  | ||||
|         for msg in msgs: | ||||
|             self.assertTrue(msg.msg.endswith("is set but doesn't exist.")) | ||||
|  | ||||
|     def test_paths_check_no_access(self): | ||||
|         os.chmod(self.dirs.data_dir, 0o000) | ||||
|         os.chmod(self.dirs.media_dir, 0o000) | ||||
|         os.chmod(self.dirs.consumption_dir, 0o000) | ||||
|  | ||||
|         self.addCleanup(os.chmod, self.dirs.data_dir, 0o777) | ||||
|         self.addCleanup(os.chmod, self.dirs.media_dir, 0o777) | ||||
|         self.addCleanup(os.chmod, self.dirs.consumption_dir, 0o777) | ||||
|  | ||||
|         msgs = paths_check(None) | ||||
|         self.assertEqual(len(msgs), 3) | ||||
|  | ||||
|         for msg in msgs: | ||||
|             self.assertTrue(msg.msg.endswith("is not writeable")) | ||||
|  | ||||
|     @override_settings(DEBUG=False) | ||||
|     def test_debug_disabled(self): | ||||
|         self.assertEqual(debug_mode_check(None), []) | ||||
|  | ||||
|     @override_settings(DEBUG=True) | ||||
|     def test_debug_enabled(self): | ||||
|         self.assertEqual(len(debug_mode_check(None)), 1) | ||||
| @@ -1,7 +1,7 @@ | ||||
| import subprocess | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.core.checks import Error, register | ||||
| from django.core.checks import Error, Warning, register | ||||
|  | ||||
|  | ||||
| def get_tesseract_langs(): | ||||
|   | ||||
| @@ -1,194 +0,0 @@ | ||||
| # Thanks to the Library of Congress and some creative use of sed and awk: | ||||
| # http://www.loc.gov/standards/iso639-2/php/English_list.php | ||||
|  | ||||
| ISO639 = { | ||||
|  | ||||
|     "aa": "aar", | ||||
|     "ab": "abk", | ||||
|     "ae": "ave", | ||||
|     "af": "afr", | ||||
|     "ak": "aka", | ||||
|     "am": "amh", | ||||
|     "an": "arg", | ||||
|     "ar": "ara", | ||||
|     "as": "asm", | ||||
|     "av": "ava", | ||||
|     "ay": "aym", | ||||
|     "az": "aze", | ||||
|     "ba": "bak", | ||||
|     "be": "bel", | ||||
|     "bg": "bul", | ||||
|     "bh": "bih", | ||||
|     "bi": "bis", | ||||
|     "bm": "bam", | ||||
|     "bn": "ben", | ||||
|     "bo": "bod", | ||||
|     "br": "bre", | ||||
|     "bs": "bos", | ||||
|     "ca": "cat", | ||||
|     "ce": "che", | ||||
|     "ch": "cha", | ||||
|     "co": "cos", | ||||
|     "cr": "cre", | ||||
|     "cs": "ces", | ||||
|     "cu": "chu", | ||||
|     "cv": "chv", | ||||
|     "cy": "cym", | ||||
|     "da": "dan", | ||||
|     "de": "deu", | ||||
|     "dv": "div", | ||||
|     "dz": "dzo", | ||||
|     "ee": "ewe", | ||||
|     "el": "ell", | ||||
|     "en": "eng", | ||||
|     "eo": "epo", | ||||
|     "es": "spa", | ||||
|     "et": "est", | ||||
|     "eu": "eus", | ||||
|     "fa": "fas", | ||||
|     "ff": "ful", | ||||
|     "fi": "fin", | ||||
|     "fj": "fij", | ||||
|     "fo": "fao", | ||||
|     "fr": "fra", | ||||
|     "fy": "fry", | ||||
|     "ga": "gle", | ||||
|     "gd": "gla", | ||||
|     "gl": "glg", | ||||
|     "gn": "grn", | ||||
|     "gu": "guj", | ||||
|     "gv": "glv", | ||||
|     "ha": "hau", | ||||
|     "he": "heb", | ||||
|     "hi": "hin", | ||||
|     "ho": "hmo", | ||||
|     "hr": "hrv", | ||||
|     "ht": "hat", | ||||
|     "hu": "hun", | ||||
|     "hy": "hye", | ||||
|     "hz": "her", | ||||
|     "ia": "ina", | ||||
|     "id": "ind", | ||||
|     "ie": "ile", | ||||
|     "ig": "ibo", | ||||
|     "ii": "iii", | ||||
|     "ik": "ipk", | ||||
|     "io": "ido", | ||||
|     "is": "isl", | ||||
|     "it": "ita", | ||||
|     "iu": "iku", | ||||
|     "ja": "jpn", | ||||
|     "jv": "jav", | ||||
|     "ka": "kat", | ||||
|     "kg": "kon", | ||||
|     "ki": "kik", | ||||
|     "kj": "kua", | ||||
|     "kk": "kaz", | ||||
|     "kl": "kal", | ||||
|     "km": "khm", | ||||
|     "kn": "kan", | ||||
|     "ko": "kor", | ||||
|     "kr": "kau", | ||||
|     "ks": "kas", | ||||
|     "ku": "kur", | ||||
|     "kv": "kom", | ||||
|     "kw": "cor", | ||||
|     "ky": "kir", | ||||
|     "la": "lat", | ||||
|     "lb": "ltz", | ||||
|     "lg": "lug", | ||||
|     "li": "lim", | ||||
|     "ln": "lin", | ||||
|     "lo": "lao", | ||||
|     "lt": "lit", | ||||
|     "lu": "lub", | ||||
|     "lv": "lav", | ||||
|     "mg": "mlg", | ||||
|     "mh": "mah", | ||||
|     "mi": "mri", | ||||
|     "mk": "mkd", | ||||
|     "ml": "mal", | ||||
|     "mn": "mon", | ||||
|     "mr": "mar", | ||||
|     "ms": "msa", | ||||
|     "mt": "mlt", | ||||
|     "my": "mya", | ||||
|     "na": "nau", | ||||
|     "nb": "nob", | ||||
|     "nd": "nde", | ||||
|     "ne": "nep", | ||||
|     "ng": "ndo", | ||||
|     "nl": "nld", | ||||
|     "no": "nor", | ||||
|     "nr": "nbl", | ||||
|     "nv": "nav", | ||||
|     "ny": "nya", | ||||
|     "oc": "oci", | ||||
|     "oj": "oji", | ||||
|     "om": "orm", | ||||
|     "or": "ori", | ||||
|     "os": "oss", | ||||
|     "pa": "pan", | ||||
|     "pi": "pli", | ||||
|     "pl": "pol", | ||||
|     "ps": "pus", | ||||
|     "pt": "por", | ||||
|     "qu": "que", | ||||
|     "rm": "roh", | ||||
|     "rn": "run", | ||||
|     "ro": "ron", | ||||
|     "ru": "rus", | ||||
|     "rw": "kin", | ||||
|     "sa": "san", | ||||
|     "sc": "srd", | ||||
|     "sd": "snd", | ||||
|     "se": "sme", | ||||
|     "sg": "sag", | ||||
|     "si": "sin", | ||||
|     "sk": "slk", | ||||
|     "sl": "slv", | ||||
|     "sm": "smo", | ||||
|     "sn": "sna", | ||||
|     "so": "som", | ||||
|     "sq": "sqi", | ||||
|     "sr": "srp", | ||||
|     "ss": "ssw", | ||||
|     "st": "sot", | ||||
|     "su": "sun", | ||||
|     "sv": "swe", | ||||
|     "sw": "swa", | ||||
|     "ta": "tam", | ||||
|     "te": "tel", | ||||
|     "tg": "tgk", | ||||
|     "th": "tha", | ||||
|     "ti": "tir", | ||||
|     "tk": "tuk", | ||||
|     "tl": "tgl", | ||||
|     "tn": "tsn", | ||||
|     "to": "ton", | ||||
|     "tr": "tur", | ||||
|     "ts": "tso", | ||||
|     "tt": "tat", | ||||
|     "tw": "twi", | ||||
|     "ty": "tah", | ||||
|     "ug": "uig", | ||||
|     "uk": "ukr", | ||||
|     "ur": "urd", | ||||
|     "uz": "uzb", | ||||
|     "ve": "ven", | ||||
|     "vi": "vie", | ||||
|     "vo": "vol", | ||||
|     "wa": "wln", | ||||
|     "wo": "wol", | ||||
|     "xh": "xho", | ||||
|     "yi": "yid", | ||||
|     "yo": "yor", | ||||
|     "za": "zha", | ||||
|  | ||||
|     # Tessdata contains two values for Chinese, "chi_sim" and "chi_tra".  I | ||||
|     # have no idea which one is better, so I just picked the bigger file. | ||||
|     "zh": "chi_tra", | ||||
|  | ||||
|     "zu": "zul" | ||||
|  | ||||
| } | ||||
							
								
								
									
										26
									
								
								src/paperless_tesseract/tests/test_checks.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								src/paperless_tesseract/tests/test_checks.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,26 @@ | ||||
| from unittest import mock | ||||
|  | ||||
| from django.core.checks import ERROR | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
| from paperless_tesseract import check_default_language_available | ||||
|  | ||||
|  | ||||
| class TestChecks(TestCase): | ||||
|  | ||||
|     def test_default_language(self): | ||||
|         msgs = check_default_language_available(None) | ||||
|  | ||||
|     @override_settings(OCR_LANGUAGE="") | ||||
|     def test_no_language(self): | ||||
|         msgs = check_default_language_available(None) | ||||
|         self.assertEqual(len(msgs), 1) | ||||
|         self.assertTrue(msgs[0].msg.startswith("No OCR language has been specified with PAPERLESS_OCR_LANGUAGE")) | ||||
|  | ||||
|     @override_settings(OCR_LANGUAGE="ita") | ||||
|     @mock.patch("paperless_tesseract.checks.get_tesseract_langs") | ||||
|     def test_invalid_language(self, m): | ||||
|         m.return_value = ["deu", "eng"] | ||||
|         msgs = check_default_language_available(None) | ||||
|         self.assertEqual(len(msgs), 1) | ||||
|         self.assertEqual(msgs[0].level, ERROR) | ||||
| @@ -35,15 +35,3 @@ class TextDocumentParser(DocumentParser): | ||||
|     def parse(self, document_path, mime_type): | ||||
|         with open(document_path, 'r') as f: | ||||
|             self.text = f.read() | ||||
|  | ||||
|  | ||||
| def run_command(*args): | ||||
|     environment = os.environ.copy() | ||||
|     if settings.CONVERT_MEMORY_LIMIT: | ||||
|         environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT | ||||
|     if settings.CONVERT_TMPDIR: | ||||
|         environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR | ||||
|  | ||||
|     if not subprocess.Popen(' '.join(args), env=environment, | ||||
|                             shell=True).wait() == 0: | ||||
|         raise ParseError("Convert failed at {}".format(args)) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Michael Shamoon
					Michael Shamoon