Merge branch 'dev' into feature-ocrmypdf

2025-10-16 02:46:16 -05:00 · 2020-11-30 16:48:09 +01:00
parent ac1b701000 183d432f84
commit aaa6599283
25 changed files with 301 additions and 75 deletions
--- a/docker/hub/docker-compose.postgres.yml
+++ b/docker/hub/docker-compose.postgres.yml
@@ -15,7 +15,7 @@ services:
      POSTGRES_PASSWORD: paperless
  webserver:
-    image: jonaswinkler/paperless-ng:0.9.3
+    image: jonaswinkler/paperless-ng:0.9.4
    restart: always
    depends_on:
      - db
--- a/docker/hub/docker-compose.sqlite.yml
+++ b/docker/hub/docker-compose.sqlite.yml
@@ -5,7 +5,7 @@ services:
    restart: always
  webserver:
-    image: jonaswinkler/paperless-ng:0.9.3
+    image: jonaswinkler/paperless-ng:0.9.4
    restart: always
    depends_on:
      - broker
--- a/docs/administration.rst
+++ b/docs/administration.rst
@@ -274,6 +274,7 @@ management command:
 This command takes no arguments.
 .. _`administration-index`:
 Managing the document search index
 ==================================
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -8,12 +8,31 @@ Changelog
 paperless-ng 0.9.4
 ##################
-* Front end: Clickable tags, correspondents and types allow quick filtering for related documents.
+* Searching:
-* Front end: Saved views are now editable.
+
-* Front end: Preview documents directly in the browser.
+  * Paperless now supports searching by tags, types and dates. In order to have this applied to your
    existing documents, you need to perform a ``document_index reindex`` management command
    (see :ref:`administration-index`)
    that adds the new data to the search index. You only need to do this once, so that paperless can find
    your documents by tags,types and dates. Paperless keeps the index updated after that whenever
    something changes.
  * Paperless now has spelling corrections ("Did you mean") for misstyped queries.
  * The documentation contains :ref:`information about the query syntax <basic-searching>`.
 * Front end:
  * Clickable tags, correspondents and types allow quick filtering for related documents.
  * Saved views are now editable.
  * Preview documents directly in the browser.
  * Navigation from the dashboard to saved views.
 * Fixes:
  * A severe error when trying to use post consume scripts.
-* The documentation now contains information about bare metal installs.
+  * An error in the consumer that cause invalid messages of missing files to show up in the log.
 * The documentation now contains information about bare metal installs and a section about
  how to setup the development environment.
 paperless-ng 0.9.3
 ##################
--- a/docs/usage_overview.rst
+++ b/docs/usage_overview.rst
@@ -156,6 +156,62 @@ REST API
 You can also submit a document using the REST API, see :ref:`api-file_uploads` for details.
 .. _basic-searching:
 Searching
 #########
 Paperless offers an extensive searching mechanism that is designed to allow you to quickly
 find a document you're looking for (for example, that thing that just broke and you bought
 a couple months ago, that contract you signed 8 years ago).
 When you search paperless for a document, it tries to match this query against your documents.
 Paperless will look for matching documents by inspecting their content, title, correspondent,
 type and tags. Paperless returns a scored list of results, so that documents matching your query
 better will appear further up in the search results.
 By default, paperless returns only documents which contain all words typed in the search bar.
 However, paperless also offers advanced search syntax if you want to drill down the results
 further.
 Matching documents with logical expressions:
 .. code:: none
  shopname AND (product1 OR product2)
 Matching specific tags, correspondents or types:
 .. code:: none
  type:invoice tag:unpaid
  correspondent:university certificate
 Matching dates:
 .. code:: none
  created:[2005 to 2009]
  added:yesterday
  modified:today
 Matching inexact words:
 .. code:: none
  produ*name
 .. note::
  Inexact terms are hard for search indexes. These queries might take a while to execute. That's why paperless offers
  auto complete and query correction.
 All of these constructs can be combined as you see fit.
 If you want to learn more about the query language used by paperless, paperless uses Whoosh's default query language. 
 Head over to `Whoosh query language <https://whoosh.readthedocs.io/en/latest/querylang.html>`_.
 For details on what date parsing utilities are available, see
 `Date parsing <https://whoosh.readthedocs.io/en/latest/dates.html#parsing-date-queries>`_.
 .. _usage-recommended_workflow:
--- a/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.html
+++ b/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.html
@@ -1,6 +1,9 @@
 <app-widget-frame [title]="savedView.title">
-  <table class="table table-sm table-hover table-borderless">
+  <a header-buttons [routerLink]="" (click)="showAll()">Show all</a>
  <table content class="table table-sm table-hover table-borderless">
    <thead>
      <tr>
        <th>Created</th>
--- a/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.ts
+++ b/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.ts
@@ -1,6 +1,8 @@
 import { Component, Input, OnInit } from '@angular/core';
 import { Router } from '@angular/router';
 import { PaperlessDocument } from 'src/app/data/paperless-document';
 import { SavedViewConfig } from 'src/app/data/saved-view-config';
 import { DocumentListViewService } from 'src/app/services/document-list-view.service';
 import { DocumentService } from 'src/app/services/rest/document.service';
@Component({
@@ -10,7 +12,10 @@ import { DocumentService } from 'src/app/services/rest/document.service';
 })
 export class SavedViewWidgetComponent implements OnInit {
-  constructor(private documentService: DocumentService) { }
+  constructor(
    private documentService: DocumentService,
    private router: Router,
    private list: DocumentListViewService) { }
  @Input()
  savedView: SavedViewConfig
@@ -23,4 +28,9 @@ export class SavedViewWidgetComponent implements OnInit {
    })
  }
  showAll() {
    this.list.load(this.savedView)
    this.router.navigate(["documents"])
  }
 }
--- a/src-ui/src/app/components/dashboard/widgets/statistics-widget/statistics-widget.component.html
+++ b/src-ui/src/app/components/dashboard/widgets/statistics-widget/statistics-widget.component.html
@@ -1,4 +1,6 @@
 <app-widget-frame title="Statistics">
  <ng-container content>
    <p class="card-text">Documents in inbox: {{statistics.documents_inbox}}</p>
    <p class="card-text">Total documents: {{statistics.documents_total}}</p>
  </ng-container>
 </app-widget-frame>
--- a/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.html
+++ b/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.html
@@ -1,6 +1,6 @@
 <app-widget-frame title="Upload new documents">
-  <form>
+  <form content>
    <ngx-file-drop 
      dropZoneLabel="Drop documents here or" (onFileDrop)="dropped($event)"
      (onFileOver)="fileOver($event)" (onFileLeave)="fileLeave($event)"
--- a/src-ui/src/app/components/dashboard/widgets/widget-frame/widget-frame.component.html
+++ b/src-ui/src/app/components/dashboard/widgets/widget-frame/widget-frame.component.html
@@ -1,8 +1,12 @@
 <div class="card mb-3 shadow">
  <div class="card-header">
    <div class="d-flex justify-content-between align-items-center">
      <h5 class="card-title mb-0">{{title}}</h5>
      <ng-content select ="[header-buttons]"></ng-content>
    </div>
  </div>
  <div class="card-body text-dark">
-    <ng-content></ng-content>
+    <ng-content select ="[content]"></ng-content>
  </div>
 </div>
--- a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html
+++ b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html
@@ -9,9 +9,11 @@
        <div class="d-flex justify-content-between align-items-center">
          <h5 class="card-title">    
            <ng-container *ngIf="document.correspondent">
-              <a [routerLink]="" title="Filter by correspondent" (click)="clickCorrespondent.emit(document.correspondent)" class="font-weight-bold">{{document.correspondent.name}}</a>:
+              <a *ngIf="clickCorrespondent.observers.length ; else nolink" [routerLink]="" title="Filter by correspondent" (click)="clickCorrespondent.emit(document.correspondent)" class="font-weight-bold">{{document.correspondent.name}}</a>
              <ng-template #nolink>{{document.correspondent.name}}</ng-template>:
            </ng-container>
-            {{document.title}}<app-tag [tag]="t" linkTitle="Filter by tag" *ngFor="let t of document.tags" class="ml-1" (click)="clickTag.emit(t)" [clickable]="true"></app-tag>
+            {{document.title}}
            <app-tag [tag]="t" linkTitle="Filter by tag" *ngFor="let t of document.tags" class="ml-1" (click)="clickTag.emit(t)" [clickable]="clickTag.observers.length"></app-tag>
          </h5>
          <h5 class="card-title" *ngIf="document.archive_serial_number">#{{document.archive_serial_number}}</h5>
        </div>
--- a/src-ui/src/app/components/search/search.component.html
+++ b/src-ui/src/app/components/search/search.component.html
@@ -1,9 +1,17 @@
 <app-page-header title="Search results">
 </app-page-header>
-<p>Search string: <i>{{query}}</i></p>
+<div *ngIf="errorMessage" class="alert alert-danger">Invalid search query: {{errorMessage}}</div>
-<div [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()">
+<p>
    Search string: <i>{{query}}</i>
    <ng-container *ngIf="correctedQuery">
        - Did you mean "<a [routerLink]="" (click)="searchCorrectedQuery()">{{correctedQuery}}</a>"?
    </ng-container>
 </p>
 <div *ngIf="!errorMessage" [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()">
    <p>{{resultCount}} result(s)</p>
    <app-document-card-large *ngFor="let result of results"
        [document]="result.document"
--- a/src-ui/src/app/components/search/search.component.ts
+++ b/src-ui/src/app/components/search/search.component.ts
@@ -1,5 +1,5 @@
 import { Component, OnInit } from '@angular/core';
-import { ActivatedRoute } from '@angular/router';
+import { ActivatedRoute, Router } from '@angular/router';
 import { SearchHit } from 'src/app/data/search-result';
 import { SearchService } from 'src/app/services/rest/search.service';
@@ -22,7 +22,11 @@ export class SearchComponent implements OnInit {
  resultCount
-  constructor(private searchService: SearchService, private route: ActivatedRoute) { }
+  correctedQuery: string = null
  errorMessage: string
  constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { }
  ngOnInit(): void {
    this.route.queryParamMap.subscribe(paramMap => {
@@ -34,7 +38,13 @@ export class SearchComponent implements OnInit {
  }
  searchCorrectedQuery() {
    this.router.navigate(["search"], {queryParams: {query: this.correctedQuery}})
  }
  loadPage(append: boolean = false) {
    this.errorMessage = null
    this.correctedQuery = null
    this.searchService.search(this.query, this.currentPage).subscribe(result => {
      if (append) {
        this.results.push(...result.results)
@@ -44,12 +54,17 @@ export class SearchComponent implements OnInit {
      this.pageCount = result.page_count
      this.searching = false
      this.resultCount = result.count
      this.correctedQuery = result.corrected_query
    }, error => {
      this.searching = false
      this.resultCount = 1
      this.pageCount = 1
      this.results = []
      this.errorMessage = error.error
    })
  }
  onScroll() {
    console.log(this.currentPage)
    console.log(this.pageCount)
    if (this.currentPage < this.pageCount) {
      this.currentPage += 1
      this.loadPage(true)
--- a/src-ui/src/app/data/search-result.ts
+++ b/src-ui/src/app/data/search-result.ts
@@ -21,6 +21,8 @@ export interface SearchResult {
  page?: number
  page_count?: number
  corrected_query?: string
  results?: SearchHit[]
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -10,10 +10,11 @@ from django.db.models import Q
 from django.utils import timezone
 from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
-from .file_handling import generate_filename, create_source_path_directory
+from .file_handling import create_source_path_directory
 from .loggers import LoggingMixin
 from .models import Document, FileInfo, Correspondent, DocumentType, Tag
-from .parsers import ParseError, get_parser_class_for_mime_type, parse_date
+from .parsers import ParseError, get_parser_class_for_mime_type, \
    get_supported_file_extensions, parse_date
 from .signals import (
    document_consumption_finished,
    document_consumption_started
@@ -40,6 +41,21 @@ class Consumer(LoggingMixin):
            raise ConsumerError("Cannot consume {}: It is not a file".format(
                self.path))
    def pre_check_file_extension(self):
        extensions = get_supported_file_extensions()
        _, ext = os.path.splitext(self.filename)
        if not ext:
            raise ConsumerError(
                f"Not consuming {self.filename}: File type unknown."
            )
        if ext not in extensions:
            raise ConsumerError(
                f"Not consuming {self.filename}: File extension {ext} does "
                f"not map to any known file type ({str(extensions)})"
            )
    def pre_check_duplicate(self):
        with open(self.path, "rb") as f:
            checksum = hashlib.md5(f.read()).hexdigest()
@@ -82,6 +98,7 @@ class Consumer(LoggingMixin):
        # Make sure that preconditions for consuming the file are met.
        self.pre_check_file_exists()
        self.pre_check_file_extension()
        self.pre_check_directories()
        self.pre_check_duplicate()
--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -4,10 +4,11 @@ from contextlib import contextmanager
 from django.conf import settings
 from whoosh import highlight
-from whoosh.fields import Schema, TEXT, NUMERIC
+from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME
 from whoosh.highlight import Formatter, get_text
 from whoosh.index import create_in, exists_in, open_dir
 from whoosh.qparser import MultifieldParser
 from whoosh.qparser.dateparse import DateParserPlugin
 from whoosh.writing import AsyncWriter
@@ -59,14 +60,19 @@ def get_schema():
        id=NUMERIC(stored=True, unique=True, numtype=int),
        title=TEXT(stored=True),
        content=TEXT(),
-        correspondent=TEXT(stored=True)
+        correspondent=TEXT(stored=True),
        tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True),
        type=TEXT(stored=True),
        created=DATETIME(stored=True, sortable=True),
        modified=DATETIME(stored=True, sortable=True),
        added=DATETIME(stored=True, sortable=True),
    )
 def open_index(recreate=False):
    try:
        if exists_in(settings.INDEX_DIR) and not recreate:
-            return open_dir(settings.INDEX_DIR)
+            return open_dir(settings.INDEX_DIR, schema=get_schema())
    except Exception as e:
        logger.error(f"Error while opening the index: {e}, recreating.")
@@ -77,11 +83,17 @@ def open_index(recreate=False):
 def update_document(writer, doc):
    logger.debug("Indexing {}...".format(doc))
    tags = ",".join([t.name for t in doc.tags.all()])
    writer.update_document(
        id=doc.pk,
        title=doc.title,
        content=doc.content,
-        correspondent=doc.correspondent.name if doc.correspondent else None
+        correspondent=doc.correspondent.name if doc.correspondent else None,
        tag=tags if tags else None,
        type=doc.document_type.name if doc.document_type else None,
        created=doc.created,
        added=doc.added,
        modified=doc.modified,
    )
@@ -103,16 +115,27 @@ def remove_document_from_index(document):
@contextmanager
-def query_page(ix, query, page):
+def query_page(ix, querystring, page):
    searcher = ix.searcher()
    try:
-        query_parser = MultifieldParser(["content", "title", "correspondent"],
+        qp = MultifieldParser(
-                                        ix.schema).parse(query)
+            ["content", "title", "correspondent", "tag", "type"],
-        result_page = searcher.search_page(query_parser, page)
+            ix.schema)
        qp.add_plugin(DateParserPlugin())
        q = qp.parse(querystring)
        result_page = searcher.search_page(q, page)
        result_page.results.fragmenter = highlight.ContextFragmenter(
            surround=50)
        result_page.results.formatter = JsonFormatter()
-        yield result_page
+
        corrected = searcher.correct_query(q, querystring)
        if corrected.query != q:
            corrected_query = corrected.string
        else:
            corrected_query = None
        yield result_page, corrected_query
    finally:
        searcher.close()
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -1,7 +1,6 @@
 # coding=utf-8
 import logging
 import mimetypes
 import os
 import re
 from collections import OrderedDict
@@ -12,6 +11,8 @@ from django.db import models
 from django.utils import timezone
 from django.utils.text import slugify
 from documents.parsers import get_default_file_extension
 class MatchingModel(models.Model):
@@ -204,7 +205,7 @@ class Document(models.Model):
        ordering = ("correspondent", "title")
    def __str__(self):
-        created = self.created.strftime("%Y%m%d%H%M%S")
+        created = self.created.strftime("%Y%m%d")
        if self.correspondent and self.title:
            return "{}: {} - {}".format(
                created, self.correspondent, self.title)
@@ -255,8 +256,7 @@ class Document(models.Model):
    @property
    def file_type(self):
-        # TODO: this is not stable across python versions
+        return get_default_file_extension(self.mime_type)
        return mimetypes.guess_extension(str(self.mime_type))
    @property
    def thumbnail_path(self):
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -1,4 +1,5 @@
 import logging
 import mimetypes
 import os
 import re
 import shutil
@@ -42,6 +43,29 @@ def is_mime_type_supported(mime_type):
    return get_parser_class_for_mime_type(mime_type) is not None
 def get_default_file_extension(mime_type):
    for response in document_consumer_declaration.send(None):
        parser_declaration = response[1]
        supported_mime_types = parser_declaration["mime_types"]
        if mime_type in supported_mime_types:
            return supported_mime_types[mime_type]
    return None
 def get_supported_file_extensions():
    extensions = set()
    for response in document_consumer_declaration.send(None):
        parser_declaration = response[1]
        supported_mime_types = parser_declaration["mime_types"]
        for mime_type in supported_mime_types:
            extensions.update(mimetypes.guess_all_extensions(mime_type))
    return extensions
 def get_parser_class_for_mime_type(mime_type):
    options = []
--- a/src/documents/tests/test_api.py
+++ b/src/documents/tests/test_api.py
@@ -325,6 +325,22 @@ class DocumentApiTest(DirectoriesMixin, APITestCase):
        self.assertEqual(response.status_code, 200)
        self.assertEqual(len(response.data), 10)
    def test_search_spelling_correction(self):
        with AsyncWriter(index.open_index()) as writer:
            for i in range(55):
                doc = Document.objects.create(checksum=str(i), pk=i+1, title=f"Document {i+1}", content=f"Things document {i+1}")
                index.update_document(writer, doc)
        response = self.client.get("/api/search/?query=thing")
        correction = response.data['corrected_query']
        self.assertEqual(correction, "things")
        response = self.client.get("/api/search/?query=things")
        correction = response.data['corrected_query']
        self.assertEqual(correction, None)
    def test_statistics(self):
        doc1 = Document.objects.create(title="none1", checksum="A")
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -425,7 +425,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
        m = patcher.start()
        m.return_value = [(None, {
            "parser": self.make_dummy_parser,
-            "mime_types": ["application/pdf"],
+            "mime_types": {"application/pdf": ".pdf"},
            "weight": 0
        })]
@@ -551,7 +551,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
        try:
            self.consumer.try_consume_file(self.get_test_file())
        except ConsumerError as e:
-            self.assertTrue(str(e).startswith("No parsers abvailable"))
+            self.assertTrue("File extension .pdf does not map to any" in str(e))
            return
        self.fail("Should throw exception")
@@ -560,7 +560,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
    def testFaultyParser(self, m):
        m.return_value = [(None, {
            "parser": self.make_faulty_parser,
-            "mime_types": ["application/pdf"],
+            "mime_types": {"application/pdf": ".pdf"},
            "weight": 0
        })]
--- a/src/documents/tests/test_parsers.py
+++ b/src/documents/tests/test_parsers.py
@@ -6,7 +6,10 @@ from unittest import mock
 from django.test import TestCase, override_settings
-from documents.parsers import get_parser_class, DocumentParser
+from documents.parsers import get_parser_class, get_supported_file_extensions, get_default_file_extension, \
    get_parser_class_for_mime_type, DocumentParser
 from paperless_tesseract.parsers import RasterisedDocumentParser
 from paperless_text.parsers import TextDocumentParser
 def fake_magic_from_file(file, mime=False):
@@ -29,7 +32,7 @@ class TestParserDiscovery(TestCase):
            pass
        m.return_value = (
-            (None, {"weight": 0, "parser": DummyParser, "mime_types": ["application/pdf"]}),
+            (None, {"weight": 0, "parser": DummyParser, "mime_types": {"application/pdf": ".pdf"}}),
        )
        self.assertEqual(
@@ -47,8 +50,8 @@ class TestParserDiscovery(TestCase):
            pass
        m.return_value = (
-            (None, {"weight": 0, "parser": DummyParser1, "mime_types": ["application/pdf"]}),
+            (None, {"weight": 0, "parser": DummyParser1, "mime_types": {"application/pdf": ".pdf"}}),
-            (None, {"weight": 1, "parser": DummyParser2, "mime_types": ["application/pdf"]}),
+            (None, {"weight": 1, "parser": DummyParser2, "mime_types": {"application/pdf": ".pdf"}}),
        )
        self.assertEqual(
@@ -96,3 +99,20 @@ class TestBaseParser(TestCase):
        path = parser.get_optimised_thumbnail("any", "not important")
        self.assertEqual(path, fake_get_thumbnail(None, None, None))
 class TestParserAvailability(TestCase):
    def test_file_extensions(self):
        for ext in [".pdf", ".jpe", ".jpg", ".jpeg", ".txt", ".csv"]:
            self.assertIn(ext, get_supported_file_extensions())
        self.assertEqual(get_default_file_extension('application/pdf'), ".pdf")
        self.assertEqual(get_default_file_extension('image/png'), ".png")
        self.assertEqual(get_default_file_extension('image/jpeg'), ".jpg")
        self.assertEqual(get_default_file_extension('text/plain'), ".txt")
        self.assertEqual(get_default_file_extension('text/csv'), ".csv")
        self.assertEqual(get_default_file_extension('aasdasd/dgfgf'), None)
        self.assertEqual(get_parser_class_for_mime_type('application/pdf'), RasterisedDocumentParser)
        self.assertEqual(get_parser_class_for_mime_type('text/plain'), TextDocumentParser)
        self.assertEqual(get_parser_class_for_mime_type('text/sdgsdf'), None)
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -236,7 +236,13 @@ class SearchView(APIView):
                }
    def get(self, request, format=None):
-        if 'query' in request.query_params:
+        if 'query' not in request.query_params:
            return Response({
                'count': 0,
                'page': 0,
                'page_count': 0,
                'results': []})
        query = request.query_params['query']
        try:
            page = int(request.query_params.get('page', 1))
@@ -246,19 +252,17 @@ class SearchView(APIView):
        if page < 1:
            page = 1
-            with index.query_page(self.ix, query, page) as result_page:
+        try:
            with index.query_page(self.ix, query, page) as (result_page,
                                                            corrected_query):
                return Response(
                    {'count': len(result_page),
                     'page': result_page.pagenum,
                     'page_count': result_page.pagecount,
                     'corrected_query': corrected_query,
                     'results': list(map(self.add_infos_to_hit, result_page))})
-
+        except Exception as e:
-        else:
+            return HttpResponseBadRequest(str(e))
            return Response({
                'count': 0,
                'page': 0,
                'page_count': 0,
                'results': []})
 class SearchAutoCompleteView(APIView):
--- a/src/paperless/version.py
+++ b/src/paperless/version.py
@@ -1 +1 @@
-__version__ = (0, 9, 3)
+__version__ = (0, 9, 4)
--- a/src/paperless_tesseract/signals.py
+++ b/src/paperless_tesseract/signals.py
@@ -5,9 +5,9 @@ def tesseract_consumer_declaration(sender, **kwargs):
    return {
        "parser": RasterisedDocumentParser,
        "weight": 0,
-        "mime_types": [
+        "mime_types": {
-            "application/pdf",
+            "application/pdf": ".pdf",
-            "image/jpeg",
+            "image/jpeg": ".jpg",
-            "image/png"
+            "image/png": ".png"
-        ]
+        }
    }
--- a/src/paperless_text/signals.py
+++ b/src/paperless_text/signals.py
@@ -5,8 +5,8 @@ def text_consumer_declaration(sender, **kwargs):
    return {
        "parser": TextDocumentParser,
        "weight": 10,
-        "mime_types": [
+        "mime_types": {
-            "text/plain",
+            "text/plain": ".txt",
-            "text/comma-separated-values"
+            "text/csv": ".csv",
-        ]
+        }
    }
`@@ -1 +1 @@`
	`__version__ = (0, 9, 3)`	`__version__ = (0, 9, 4)`