mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Merge branch 'dev' into feature-ocrmypdf
This commit is contained in:
		| @@ -15,7 +15,7 @@ services: | ||||
|       POSTGRES_PASSWORD: paperless | ||||
|  | ||||
|   webserver: | ||||
|     image: jonaswinkler/paperless-ng:0.9.3 | ||||
|     image: jonaswinkler/paperless-ng:0.9.4 | ||||
|     restart: always | ||||
|     depends_on: | ||||
|       - db | ||||
|   | ||||
| @@ -5,7 +5,7 @@ services: | ||||
|     restart: always | ||||
|  | ||||
|   webserver: | ||||
|     image: jonaswinkler/paperless-ng:0.9.3 | ||||
|     image: jonaswinkler/paperless-ng:0.9.4 | ||||
|     restart: always | ||||
|     depends_on: | ||||
|       - broker | ||||
|   | ||||
| @@ -274,6 +274,7 @@ management command: | ||||
|  | ||||
| This command takes no arguments. | ||||
|  | ||||
| .. _`administration-index`: | ||||
|  | ||||
| Managing the document search index | ||||
| ================================== | ||||
|   | ||||
| @@ -8,12 +8,31 @@ Changelog | ||||
| paperless-ng 0.9.4 | ||||
| ################## | ||||
|  | ||||
| * Front end: Clickable tags, correspondents and types allow quick filtering for related documents. | ||||
| * Front end: Saved views are now editable. | ||||
| * Front end: Preview documents directly in the browser. | ||||
| * Searching: | ||||
|  | ||||
|   * Paperless now supports searching by tags, types and dates. In order to have this applied to your | ||||
|     existing documents, you need to perform a ``document_index reindex`` management command | ||||
|     (see :ref:`administration-index`) | ||||
|     that adds the new data to the search index. You only need to do this once, so that paperless can find | ||||
|     your documents by tags,types and dates. Paperless keeps the index updated after that whenever | ||||
|     something changes. | ||||
|   * Paperless now has spelling corrections ("Did you mean") for misstyped queries. | ||||
|   * The documentation contains :ref:`information about the query syntax <basic-searching>`. | ||||
|  | ||||
| * Front end: | ||||
|  | ||||
|   * Clickable tags, correspondents and types allow quick filtering for related documents. | ||||
|   * Saved views are now editable. | ||||
|   * Preview documents directly in the browser. | ||||
|   * Navigation from the dashboard to saved views. | ||||
|  | ||||
| * Fixes: | ||||
|  | ||||
|   * A severe error when trying to use post consume scripts. | ||||
| * The documentation now contains information about bare metal installs. | ||||
|   * An error in the consumer that cause invalid messages of missing files to show up in the log. | ||||
|  | ||||
| * The documentation now contains information about bare metal installs and a section about | ||||
|   how to setup the development environment. | ||||
|  | ||||
| paperless-ng 0.9.3 | ||||
| ################## | ||||
|   | ||||
| @@ -156,6 +156,62 @@ REST API | ||||
|  | ||||
| You can also submit a document using the REST API, see :ref:`api-file_uploads` for details. | ||||
|  | ||||
| .. _basic-searching: | ||||
|  | ||||
| Searching | ||||
| ######### | ||||
|  | ||||
| Paperless offers an extensive searching mechanism that is designed to allow you to quickly | ||||
| find a document you're looking for (for example, that thing that just broke and you bought | ||||
| a couple months ago, that contract you signed 8 years ago). | ||||
|  | ||||
| When you search paperless for a document, it tries to match this query against your documents. | ||||
| Paperless will look for matching documents by inspecting their content, title, correspondent, | ||||
| type and tags. Paperless returns a scored list of results, so that documents matching your query | ||||
| better will appear further up in the search results. | ||||
|  | ||||
| By default, paperless returns only documents which contain all words typed in the search bar. | ||||
| However, paperless also offers advanced search syntax if you want to drill down the results | ||||
| further. | ||||
|  | ||||
| Matching documents with logical expressions: | ||||
|  | ||||
| .. code:: none | ||||
|  | ||||
|   shopname AND (product1 OR product2) | ||||
|  | ||||
| Matching specific tags, correspondents or types: | ||||
|  | ||||
| .. code:: none | ||||
|  | ||||
|   type:invoice tag:unpaid | ||||
|   correspondent:university certificate | ||||
|  | ||||
| Matching dates: | ||||
|  | ||||
| .. code:: none | ||||
|    | ||||
|   created:[2005 to 2009] | ||||
|   added:yesterday | ||||
|   modified:today | ||||
|  | ||||
| Matching inexact words: | ||||
|  | ||||
| .. code:: none | ||||
|  | ||||
|   produ*name | ||||
|  | ||||
| .. note:: | ||||
|  | ||||
|   Inexact terms are hard for search indexes. These queries might take a while to execute. That's why paperless offers | ||||
|   auto complete and query correction. | ||||
|  | ||||
| All of these constructs can be combined as you see fit. | ||||
| If you want to learn more about the query language used by paperless, paperless uses Whoosh's default query language.  | ||||
| Head over to `Whoosh query language <https://whoosh.readthedocs.io/en/latest/querylang.html>`_. | ||||
| For details on what date parsing utilities are available, see | ||||
| `Date parsing <https://whoosh.readthedocs.io/en/latest/dates.html#parsing-date-queries>`_. | ||||
|   | ||||
|  | ||||
| .. _usage-recommended_workflow: | ||||
|  | ||||
|   | ||||
| @@ -1,6 +1,9 @@ | ||||
| <app-widget-frame [title]="savedView.title"> | ||||
|  | ||||
|   <table class="table table-sm table-hover table-borderless"> | ||||
|   <a header-buttons [routerLink]="" (click)="showAll()">Show all</a> | ||||
|  | ||||
|  | ||||
|   <table content class="table table-sm table-hover table-borderless"> | ||||
|     <thead> | ||||
|       <tr> | ||||
|         <th>Created</th> | ||||
|   | ||||
| @@ -1,6 +1,8 @@ | ||||
| import { Component, Input, OnInit } from '@angular/core'; | ||||
| import { Router } from '@angular/router'; | ||||
| import { PaperlessDocument } from 'src/app/data/paperless-document'; | ||||
| import { SavedViewConfig } from 'src/app/data/saved-view-config'; | ||||
| import { DocumentListViewService } from 'src/app/services/document-list-view.service'; | ||||
| import { DocumentService } from 'src/app/services/rest/document.service'; | ||||
|  | ||||
| @Component({ | ||||
| @@ -10,7 +12,10 @@ import { DocumentService } from 'src/app/services/rest/document.service'; | ||||
| }) | ||||
| export class SavedViewWidgetComponent implements OnInit { | ||||
|  | ||||
|   constructor(private documentService: DocumentService) { } | ||||
|   constructor( | ||||
|     private documentService: DocumentService, | ||||
|     private router: Router, | ||||
|     private list: DocumentListViewService) { } | ||||
|    | ||||
|   @Input() | ||||
|   savedView: SavedViewConfig | ||||
| @@ -23,4 +28,9 @@ export class SavedViewWidgetComponent implements OnInit { | ||||
|     }) | ||||
|   } | ||||
|  | ||||
|   showAll() { | ||||
|     this.list.load(this.savedView) | ||||
|     this.router.navigate(["documents"]) | ||||
|   } | ||||
|  | ||||
| } | ||||
|   | ||||
| @@ -1,4 +1,6 @@ | ||||
| <app-widget-frame title="Statistics"> | ||||
|   <ng-container content> | ||||
|     <p class="card-text">Documents in inbox: {{statistics.documents_inbox}}</p> | ||||
|     <p class="card-text">Total documents: {{statistics.documents_total}}</p> | ||||
|   </ng-container> | ||||
| </app-widget-frame> | ||||
| @@ -1,6 +1,6 @@ | ||||
| <app-widget-frame title="Upload new documents"> | ||||
|  | ||||
|   <form> | ||||
|   <form content> | ||||
|     <ngx-file-drop  | ||||
|       dropZoneLabel="Drop documents here or" (onFileDrop)="dropped($event)" | ||||
|       (onFileOver)="fileOver($event)" (onFileLeave)="fileLeave($event)" | ||||
|   | ||||
| @@ -1,8 +1,12 @@ | ||||
| <div class="card mb-3 shadow"> | ||||
|   <div class="card-header"> | ||||
|     <div class="d-flex justify-content-between align-items-center"> | ||||
|       <h5 class="card-title mb-0">{{title}}</h5> | ||||
|       <ng-content select ="[header-buttons]"></ng-content> | ||||
|     </div> | ||||
|      | ||||
|   </div> | ||||
|   <div class="card-body text-dark"> | ||||
|     <ng-content></ng-content> | ||||
|     <ng-content select ="[content]"></ng-content> | ||||
|   </div> | ||||
| </div> | ||||
| @@ -9,9 +9,11 @@ | ||||
|         <div class="d-flex justify-content-between align-items-center"> | ||||
|           <h5 class="card-title">     | ||||
|             <ng-container *ngIf="document.correspondent"> | ||||
|               <a [routerLink]="" title="Filter by correspondent" (click)="clickCorrespondent.emit(document.correspondent)" class="font-weight-bold">{{document.correspondent.name}}</a>: | ||||
|               <a *ngIf="clickCorrespondent.observers.length ; else nolink" [routerLink]="" title="Filter by correspondent" (click)="clickCorrespondent.emit(document.correspondent)" class="font-weight-bold">{{document.correspondent.name}}</a> | ||||
|               <ng-template #nolink>{{document.correspondent.name}}</ng-template>: | ||||
|             </ng-container> | ||||
|             {{document.title}}<app-tag [tag]="t" linkTitle="Filter by tag" *ngFor="let t of document.tags" class="ml-1" (click)="clickTag.emit(t)" [clickable]="true"></app-tag> | ||||
|             {{document.title}} | ||||
|             <app-tag [tag]="t" linkTitle="Filter by tag" *ngFor="let t of document.tags" class="ml-1" (click)="clickTag.emit(t)" [clickable]="clickTag.observers.length"></app-tag> | ||||
|           </h5> | ||||
|           <h5 class="card-title" *ngIf="document.archive_serial_number">#{{document.archive_serial_number}}</h5> | ||||
|         </div> | ||||
|   | ||||
| @@ -1,9 +1,17 @@ | ||||
| <app-page-header title="Search results"> | ||||
| </app-page-header> | ||||
|  | ||||
| <p>Search string: <i>{{query}}</i></p> | ||||
| <div *ngIf="errorMessage" class="alert alert-danger">Invalid search query: {{errorMessage}}</div> | ||||
|  | ||||
| <div [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()"> | ||||
| <p> | ||||
|     Search string: <i>{{query}}</i> | ||||
|     <ng-container *ngIf="correctedQuery"> | ||||
|         - Did you mean "<a [routerLink]="" (click)="searchCorrectedQuery()">{{correctedQuery}}</a>"? | ||||
|     </ng-container> | ||||
|  | ||||
| </p> | ||||
|  | ||||
| <div *ngIf="!errorMessage" [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()"> | ||||
|     <p>{{resultCount}} result(s)</p> | ||||
|     <app-document-card-large *ngFor="let result of results" | ||||
|         [document]="result.document" | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| import { Component, OnInit } from '@angular/core'; | ||||
| import { ActivatedRoute } from '@angular/router'; | ||||
| import { ActivatedRoute, Router } from '@angular/router'; | ||||
| import { SearchHit } from 'src/app/data/search-result'; | ||||
| import { SearchService } from 'src/app/services/rest/search.service'; | ||||
|  | ||||
| @@ -22,7 +22,11 @@ export class SearchComponent implements OnInit { | ||||
|  | ||||
|   resultCount | ||||
|  | ||||
|   constructor(private searchService: SearchService, private route: ActivatedRoute) { } | ||||
|   correctedQuery: string = null | ||||
|  | ||||
|   errorMessage: string | ||||
|  | ||||
|   constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { } | ||||
|  | ||||
|   ngOnInit(): void { | ||||
|     this.route.queryParamMap.subscribe(paramMap => { | ||||
| @@ -34,7 +38,13 @@ export class SearchComponent implements OnInit { | ||||
|  | ||||
|   } | ||||
|  | ||||
|   searchCorrectedQuery() { | ||||
|     this.router.navigate(["search"], {queryParams: {query: this.correctedQuery}}) | ||||
|   } | ||||
|  | ||||
|   loadPage(append: boolean = false) { | ||||
|     this.errorMessage = null | ||||
|     this.correctedQuery = null | ||||
|     this.searchService.search(this.query, this.currentPage).subscribe(result => { | ||||
|       if (append) { | ||||
|         this.results.push(...result.results) | ||||
| @@ -44,12 +54,17 @@ export class SearchComponent implements OnInit { | ||||
|       this.pageCount = result.page_count | ||||
|       this.searching = false | ||||
|       this.resultCount = result.count | ||||
|       this.correctedQuery = result.corrected_query | ||||
|     }, error => { | ||||
|       this.searching = false | ||||
|       this.resultCount = 1 | ||||
|       this.pageCount = 1 | ||||
|       this.results = [] | ||||
|       this.errorMessage = error.error | ||||
|     }) | ||||
|   } | ||||
|  | ||||
|   onScroll() { | ||||
|     console.log(this.currentPage) | ||||
|     console.log(this.pageCount) | ||||
|     if (this.currentPage < this.pageCount) { | ||||
|       this.currentPage += 1 | ||||
|       this.loadPage(true) | ||||
|   | ||||
| @@ -21,6 +21,8 @@ export interface SearchResult { | ||||
|   page?: number | ||||
|   page_count?: number | ||||
|  | ||||
|   corrected_query?: string | ||||
|  | ||||
|   results?: SearchHit[] | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -10,10 +10,11 @@ from django.db.models import Q | ||||
| from django.utils import timezone | ||||
|  | ||||
| from .classifier import DocumentClassifier, IncompatibleClassifierVersionError | ||||
| from .file_handling import generate_filename, create_source_path_directory | ||||
| from .file_handling import create_source_path_directory | ||||
| from .loggers import LoggingMixin | ||||
| from .models import Document, FileInfo, Correspondent, DocumentType, Tag | ||||
| from .parsers import ParseError, get_parser_class_for_mime_type, parse_date | ||||
| from .parsers import ParseError, get_parser_class_for_mime_type, \ | ||||
|     get_supported_file_extensions, parse_date | ||||
| from .signals import ( | ||||
|     document_consumption_finished, | ||||
|     document_consumption_started | ||||
| @@ -40,6 +41,21 @@ class Consumer(LoggingMixin): | ||||
|             raise ConsumerError("Cannot consume {}: It is not a file".format( | ||||
|                 self.path)) | ||||
|  | ||||
|     def pre_check_file_extension(self): | ||||
|         extensions = get_supported_file_extensions() | ||||
|         _, ext = os.path.splitext(self.filename) | ||||
|  | ||||
|         if not ext: | ||||
|             raise ConsumerError( | ||||
|                 f"Not consuming {self.filename}: File type unknown." | ||||
|             ) | ||||
|  | ||||
|         if ext not in extensions: | ||||
|             raise ConsumerError( | ||||
|                 f"Not consuming {self.filename}: File extension {ext} does " | ||||
|                 f"not map to any known file type ({str(extensions)})" | ||||
|             ) | ||||
|  | ||||
|     def pre_check_duplicate(self): | ||||
|         with open(self.path, "rb") as f: | ||||
|             checksum = hashlib.md5(f.read()).hexdigest() | ||||
| @@ -82,6 +98,7 @@ class Consumer(LoggingMixin): | ||||
|         # Make sure that preconditions for consuming the file are met. | ||||
|  | ||||
|         self.pre_check_file_exists() | ||||
|         self.pre_check_file_extension() | ||||
|         self.pre_check_directories() | ||||
|         self.pre_check_duplicate() | ||||
|  | ||||
|   | ||||
| @@ -4,10 +4,11 @@ from contextlib import contextmanager | ||||
|  | ||||
| from django.conf import settings | ||||
| from whoosh import highlight | ||||
| from whoosh.fields import Schema, TEXT, NUMERIC | ||||
| from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME | ||||
| from whoosh.highlight import Formatter, get_text | ||||
| from whoosh.index import create_in, exists_in, open_dir | ||||
| from whoosh.qparser import MultifieldParser | ||||
| from whoosh.qparser.dateparse import DateParserPlugin | ||||
| from whoosh.writing import AsyncWriter | ||||
|  | ||||
|  | ||||
| @@ -59,14 +60,19 @@ def get_schema(): | ||||
|         id=NUMERIC(stored=True, unique=True, numtype=int), | ||||
|         title=TEXT(stored=True), | ||||
|         content=TEXT(), | ||||
|         correspondent=TEXT(stored=True) | ||||
|         correspondent=TEXT(stored=True), | ||||
|         tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True), | ||||
|         type=TEXT(stored=True), | ||||
|         created=DATETIME(stored=True, sortable=True), | ||||
|         modified=DATETIME(stored=True, sortable=True), | ||||
|         added=DATETIME(stored=True, sortable=True), | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def open_index(recreate=False): | ||||
|     try: | ||||
|         if exists_in(settings.INDEX_DIR) and not recreate: | ||||
|             return open_dir(settings.INDEX_DIR) | ||||
|             return open_dir(settings.INDEX_DIR, schema=get_schema()) | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error while opening the index: {e}, recreating.") | ||||
|  | ||||
| @@ -77,11 +83,17 @@ def open_index(recreate=False): | ||||
|  | ||||
| def update_document(writer, doc): | ||||
|     logger.debug("Indexing {}...".format(doc)) | ||||
|     tags = ",".join([t.name for t in doc.tags.all()]) | ||||
|     writer.update_document( | ||||
|         id=doc.pk, | ||||
|         title=doc.title, | ||||
|         content=doc.content, | ||||
|         correspondent=doc.correspondent.name if doc.correspondent else None | ||||
|         correspondent=doc.correspondent.name if doc.correspondent else None, | ||||
|         tag=tags if tags else None, | ||||
|         type=doc.document_type.name if doc.document_type else None, | ||||
|         created=doc.created, | ||||
|         added=doc.added, | ||||
|         modified=doc.modified, | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @@ -103,16 +115,27 @@ def remove_document_from_index(document): | ||||
|  | ||||
|  | ||||
| @contextmanager | ||||
| def query_page(ix, query, page): | ||||
| def query_page(ix, querystring, page): | ||||
|     searcher = ix.searcher() | ||||
|     try: | ||||
|         query_parser = MultifieldParser(["content", "title", "correspondent"], | ||||
|                                         ix.schema).parse(query) | ||||
|         result_page = searcher.search_page(query_parser, page) | ||||
|         qp = MultifieldParser( | ||||
|             ["content", "title", "correspondent", "tag", "type"], | ||||
|             ix.schema) | ||||
|         qp.add_plugin(DateParserPlugin()) | ||||
|  | ||||
|         q = qp.parse(querystring) | ||||
|         result_page = searcher.search_page(q, page) | ||||
|         result_page.results.fragmenter = highlight.ContextFragmenter( | ||||
|             surround=50) | ||||
|         result_page.results.formatter = JsonFormatter() | ||||
|         yield result_page | ||||
|  | ||||
|         corrected = searcher.correct_query(q, querystring) | ||||
|         if corrected.query != q: | ||||
|             corrected_query = corrected.string | ||||
|         else: | ||||
|             corrected_query = None | ||||
|  | ||||
|         yield result_page, corrected_query | ||||
|     finally: | ||||
|         searcher.close() | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,6 @@ | ||||
| # coding=utf-8 | ||||
|  | ||||
| import logging | ||||
| import mimetypes | ||||
| import os | ||||
| import re | ||||
| from collections import OrderedDict | ||||
| @@ -12,6 +11,8 @@ from django.db import models | ||||
| from django.utils import timezone | ||||
| from django.utils.text import slugify | ||||
|  | ||||
| from documents.parsers import get_default_file_extension | ||||
|  | ||||
|  | ||||
| class MatchingModel(models.Model): | ||||
|  | ||||
| @@ -204,7 +205,7 @@ class Document(models.Model): | ||||
|         ordering = ("correspondent", "title") | ||||
|  | ||||
|     def __str__(self): | ||||
|         created = self.created.strftime("%Y%m%d%H%M%S") | ||||
|         created = self.created.strftime("%Y%m%d") | ||||
|         if self.correspondent and self.title: | ||||
|             return "{}: {} - {}".format( | ||||
|                 created, self.correspondent, self.title) | ||||
| @@ -255,8 +256,7 @@ class Document(models.Model): | ||||
|  | ||||
|     @property | ||||
|     def file_type(self): | ||||
|         # TODO: this is not stable across python versions | ||||
|         return mimetypes.guess_extension(str(self.mime_type)) | ||||
|         return get_default_file_extension(self.mime_type) | ||||
|  | ||||
|     @property | ||||
|     def thumbnail_path(self): | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| import logging | ||||
| import mimetypes | ||||
| import os | ||||
| import re | ||||
| import shutil | ||||
| @@ -42,6 +43,29 @@ def is_mime_type_supported(mime_type): | ||||
|     return get_parser_class_for_mime_type(mime_type) is not None | ||||
|  | ||||
|  | ||||
| def get_default_file_extension(mime_type): | ||||
|     for response in document_consumer_declaration.send(None): | ||||
|         parser_declaration = response[1] | ||||
|         supported_mime_types = parser_declaration["mime_types"] | ||||
|  | ||||
|         if mime_type in supported_mime_types: | ||||
|             return supported_mime_types[mime_type] | ||||
|  | ||||
|     return None | ||||
|  | ||||
|  | ||||
| def get_supported_file_extensions(): | ||||
|     extensions = set() | ||||
|     for response in document_consumer_declaration.send(None): | ||||
|         parser_declaration = response[1] | ||||
|         supported_mime_types = parser_declaration["mime_types"] | ||||
|  | ||||
|         for mime_type in supported_mime_types: | ||||
|             extensions.update(mimetypes.guess_all_extensions(mime_type)) | ||||
|  | ||||
|     return extensions | ||||
|  | ||||
|  | ||||
| def get_parser_class_for_mime_type(mime_type): | ||||
|  | ||||
|     options = [] | ||||
|   | ||||
| @@ -325,6 +325,22 @@ class DocumentApiTest(DirectoriesMixin, APITestCase): | ||||
|         self.assertEqual(response.status_code, 200) | ||||
|         self.assertEqual(len(response.data), 10) | ||||
|  | ||||
|     def test_search_spelling_correction(self): | ||||
|         with AsyncWriter(index.open_index()) as writer: | ||||
|             for i in range(55): | ||||
|                 doc = Document.objects.create(checksum=str(i), pk=i+1, title=f"Document {i+1}", content=f"Things document {i+1}") | ||||
|                 index.update_document(writer, doc) | ||||
|  | ||||
|         response = self.client.get("/api/search/?query=thing") | ||||
|         correction = response.data['corrected_query'] | ||||
|  | ||||
|         self.assertEqual(correction, "things") | ||||
|  | ||||
|         response = self.client.get("/api/search/?query=things") | ||||
|         correction = response.data['corrected_query'] | ||||
|  | ||||
|         self.assertEqual(correction, None) | ||||
|  | ||||
|     def test_statistics(self): | ||||
|  | ||||
|         doc1 = Document.objects.create(title="none1", checksum="A") | ||||
|   | ||||
| @@ -425,7 +425,7 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|         m = patcher.start() | ||||
|         m.return_value = [(None, { | ||||
|             "parser": self.make_dummy_parser, | ||||
|             "mime_types": ["application/pdf"], | ||||
|             "mime_types": {"application/pdf": ".pdf"}, | ||||
|             "weight": 0 | ||||
|         })] | ||||
|  | ||||
| @@ -551,7 +551,7 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|         try: | ||||
|             self.consumer.try_consume_file(self.get_test_file()) | ||||
|         except ConsumerError as e: | ||||
|             self.assertTrue(str(e).startswith("No parsers abvailable")) | ||||
|             self.assertTrue("File extension .pdf does not map to any" in str(e)) | ||||
|             return | ||||
|  | ||||
|         self.fail("Should throw exception") | ||||
| @@ -560,7 +560,7 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|     def testFaultyParser(self, m): | ||||
|         m.return_value = [(None, { | ||||
|             "parser": self.make_faulty_parser, | ||||
|             "mime_types": ["application/pdf"], | ||||
|             "mime_types": {"application/pdf": ".pdf"}, | ||||
|             "weight": 0 | ||||
|         })] | ||||
|  | ||||
|   | ||||
| @@ -6,7 +6,10 @@ from unittest import mock | ||||
|  | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
| from documents.parsers import get_parser_class, DocumentParser | ||||
| from documents.parsers import get_parser_class, get_supported_file_extensions, get_default_file_extension, \ | ||||
|     get_parser_class_for_mime_type, DocumentParser | ||||
| from paperless_tesseract.parsers import RasterisedDocumentParser | ||||
| from paperless_text.parsers import TextDocumentParser | ||||
|  | ||||
|  | ||||
| def fake_magic_from_file(file, mime=False): | ||||
| @@ -29,7 +32,7 @@ class TestParserDiscovery(TestCase): | ||||
|             pass | ||||
|  | ||||
|         m.return_value = ( | ||||
|             (None, {"weight": 0, "parser": DummyParser, "mime_types": ["application/pdf"]}), | ||||
|             (None, {"weight": 0, "parser": DummyParser, "mime_types": {"application/pdf": ".pdf"}}), | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual( | ||||
| @@ -47,8 +50,8 @@ class TestParserDiscovery(TestCase): | ||||
|             pass | ||||
|  | ||||
|         m.return_value = ( | ||||
|             (None, {"weight": 0, "parser": DummyParser1, "mime_types": ["application/pdf"]}), | ||||
|             (None, {"weight": 1, "parser": DummyParser2, "mime_types": ["application/pdf"]}), | ||||
|             (None, {"weight": 0, "parser": DummyParser1, "mime_types": {"application/pdf": ".pdf"}}), | ||||
|             (None, {"weight": 1, "parser": DummyParser2, "mime_types": {"application/pdf": ".pdf"}}), | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual( | ||||
| @@ -96,3 +99,20 @@ class TestBaseParser(TestCase): | ||||
|         path = parser.get_optimised_thumbnail("any", "not important") | ||||
|         self.assertEqual(path, fake_get_thumbnail(None, None, None)) | ||||
|  | ||||
|  | ||||
| class TestParserAvailability(TestCase): | ||||
|  | ||||
|     def test_file_extensions(self): | ||||
|  | ||||
|         for ext in [".pdf", ".jpe", ".jpg", ".jpeg", ".txt", ".csv"]: | ||||
|             self.assertIn(ext, get_supported_file_extensions()) | ||||
|         self.assertEqual(get_default_file_extension('application/pdf'), ".pdf") | ||||
|         self.assertEqual(get_default_file_extension('image/png'), ".png") | ||||
|         self.assertEqual(get_default_file_extension('image/jpeg'), ".jpg") | ||||
|         self.assertEqual(get_default_file_extension('text/plain'), ".txt") | ||||
|         self.assertEqual(get_default_file_extension('text/csv'), ".csv") | ||||
|         self.assertEqual(get_default_file_extension('aasdasd/dgfgf'), None) | ||||
|  | ||||
|         self.assertEqual(get_parser_class_for_mime_type('application/pdf'), RasterisedDocumentParser) | ||||
|         self.assertEqual(get_parser_class_for_mime_type('text/plain'), TextDocumentParser) | ||||
|         self.assertEqual(get_parser_class_for_mime_type('text/sdgsdf'), None) | ||||
|   | ||||
| @@ -236,7 +236,13 @@ class SearchView(APIView): | ||||
|                 } | ||||
|  | ||||
|     def get(self, request, format=None): | ||||
|         if 'query' in request.query_params: | ||||
|         if 'query' not in request.query_params: | ||||
|             return Response({ | ||||
|                 'count': 0, | ||||
|                 'page': 0, | ||||
|                 'page_count': 0, | ||||
|                 'results': []}) | ||||
|  | ||||
|         query = request.query_params['query'] | ||||
|         try: | ||||
|             page = int(request.query_params.get('page', 1)) | ||||
| @@ -246,19 +252,17 @@ class SearchView(APIView): | ||||
|         if page < 1: | ||||
|             page = 1 | ||||
|  | ||||
|             with index.query_page(self.ix, query, page) as result_page: | ||||
|         try: | ||||
|             with index.query_page(self.ix, query, page) as (result_page, | ||||
|                                                             corrected_query): | ||||
|                 return Response( | ||||
|                     {'count': len(result_page), | ||||
|                      'page': result_page.pagenum, | ||||
|                      'page_count': result_page.pagecount, | ||||
|                      'corrected_query': corrected_query, | ||||
|                      'results': list(map(self.add_infos_to_hit, result_page))}) | ||||
|  | ||||
|         else: | ||||
|             return Response({ | ||||
|                 'count': 0, | ||||
|                 'page': 0, | ||||
|                 'page_count': 0, | ||||
|                 'results': []}) | ||||
|         except Exception as e: | ||||
|             return HttpResponseBadRequest(str(e)) | ||||
|  | ||||
|  | ||||
| class SearchAutoCompleteView(APIView): | ||||
|   | ||||
| @@ -1 +1 @@ | ||||
| __version__ = (0, 9, 3) | ||||
| __version__ = (0, 9, 4) | ||||
|   | ||||
| @@ -5,9 +5,9 @@ def tesseract_consumer_declaration(sender, **kwargs): | ||||
|     return { | ||||
|         "parser": RasterisedDocumentParser, | ||||
|         "weight": 0, | ||||
|         "mime_types": [ | ||||
|             "application/pdf", | ||||
|             "image/jpeg", | ||||
|             "image/png" | ||||
|         ] | ||||
|         "mime_types": { | ||||
|             "application/pdf": ".pdf", | ||||
|             "image/jpeg": ".jpg", | ||||
|             "image/png": ".png" | ||||
|         } | ||||
|     } | ||||
|   | ||||
| @@ -5,8 +5,8 @@ def text_consumer_declaration(sender, **kwargs): | ||||
|     return { | ||||
|         "parser": TextDocumentParser, | ||||
|         "weight": 10, | ||||
|         "mime_types": [ | ||||
|             "text/plain", | ||||
|             "text/comma-separated-values" | ||||
|         ] | ||||
|         "mime_types": { | ||||
|             "text/plain": ".txt", | ||||
|             "text/csv": ".csv", | ||||
|         } | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 jonaswinkler
					jonaswinkler