mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	mime type handling
This commit is contained in:
		| @@ -50,7 +50,7 @@ class DocumentTypeAdmin(admin.ModelAdmin): | ||||
| class DocumentAdmin(admin.ModelAdmin): | ||||
|  | ||||
|     search_fields = ("correspondent__name", "title", "content", "tags__name") | ||||
|     readonly_fields = ("added", "file_type", "storage_type", "filename") | ||||
|     readonly_fields = ("added", "mime_type", "storage_type", "filename") | ||||
|     list_display = ( | ||||
|         "title", | ||||
|         "created", | ||||
| @@ -58,8 +58,7 @@ class DocumentAdmin(admin.ModelAdmin): | ||||
|         "correspondent", | ||||
|         "tags_", | ||||
|         "archive_serial_number", | ||||
|         "document_type", | ||||
|         "filename" | ||||
|         "document_type" | ||||
|     ) | ||||
|     list_filter = ( | ||||
|         "document_type", | ||||
|   | ||||
| @@ -2,8 +2,8 @@ import datetime | ||||
| import hashlib | ||||
| import logging | ||||
| import os | ||||
| import re | ||||
|  | ||||
| import magic | ||||
| from django.conf import settings | ||||
| from django.db import transaction | ||||
| from django.utils import timezone | ||||
| @@ -13,7 +13,7 @@ from .classifier import DocumentClassifier, IncompatibleClassifierVersionError | ||||
| from .file_handling import generate_filename, create_source_path_directory | ||||
| from .loggers import LoggingMixin | ||||
| from .models import Document, FileInfo, Correspondent, DocumentType, Tag | ||||
| from .parsers import ParseError, get_parser_class | ||||
| from .parsers import ParseError, get_parser_class_for_mime_type | ||||
| from .signals import ( | ||||
|     document_consumption_finished, | ||||
|     document_consumption_started | ||||
| @@ -51,12 +51,6 @@ class Consumer(LoggingMixin): | ||||
|                 "Consumption directory {} does not exist".format( | ||||
|                     settings.CONSUMPTION_DIR)) | ||||
|  | ||||
|     def pre_check_regex(self): | ||||
|         if not re.match(FileInfo.REGEXES["title"], self.filename): | ||||
|             raise ConsumerError( | ||||
|                 "Filename {} does not seem to be safe to " | ||||
|                 "consume".format(self.filename)) | ||||
|  | ||||
|     def pre_check_duplicate(self): | ||||
|         with open(self.path, "rb") as f: | ||||
|             checksum = hashlib.md5(f.read()).hexdigest() | ||||
| @@ -100,18 +94,19 @@ class Consumer(LoggingMixin): | ||||
|         self.pre_check_file_exists() | ||||
|         self.pre_check_consumption_dir() | ||||
|         self.pre_check_directories() | ||||
|         self.pre_check_regex() | ||||
|         self.pre_check_duplicate() | ||||
|  | ||||
|         self.log("info", "Consuming {}".format(self.filename)) | ||||
|  | ||||
|         # Determine the parser class. | ||||
|  | ||||
|         parser_class = get_parser_class(self.filename) | ||||
|         mime_type = magic.from_file(self.path, mime=True) | ||||
|  | ||||
|         parser_class = get_parser_class_for_mime_type(mime_type) | ||||
|         if not parser_class: | ||||
|             raise ConsumerError("No parsers abvailable for {}".format(self.filename)) | ||||
|         else: | ||||
|             self.log("debug", "Parser: {}".format(parser_class.__name__)) | ||||
|             self.log("debug", "Parser: {} based on mime type {}".format(parser_class.__name__, mime_type)) | ||||
|  | ||||
|         # Notify all listeners that we're going to do some work. | ||||
|  | ||||
| @@ -162,7 +157,8 @@ class Consumer(LoggingMixin): | ||||
|                 # store the document. | ||||
|                 document = self._store( | ||||
|                     text=text, | ||||
|                     date=date | ||||
|                     date=date, | ||||
|                     mime_type=mime_type | ||||
|                 ) | ||||
|  | ||||
|                 # If we get here, it was successful. Proceed with post-consume | ||||
| @@ -197,7 +193,7 @@ class Consumer(LoggingMixin): | ||||
|  | ||||
|         return document | ||||
|  | ||||
|     def _store(self, text, date): | ||||
|     def _store(self, text, date, mime_type): | ||||
|  | ||||
|         # If someone gave us the original filename, use it instead of doc. | ||||
|  | ||||
| @@ -220,7 +216,7 @@ class Consumer(LoggingMixin): | ||||
|                 correspondent=file_info.correspondent, | ||||
|                 title=file_info.title, | ||||
|                 content=text, | ||||
|                 file_type=file_info.extension, | ||||
|                 mime_type=mime_type, | ||||
|                 checksum=hashlib.md5(f.read()).hexdigest(), | ||||
|                 created=created, | ||||
|                 modified=created, | ||||
|   | ||||
| @@ -91,9 +91,9 @@ def generate_filename(document): | ||||
|  | ||||
|     # Always append the primary key to guarantee uniqueness of filename | ||||
|     if len(path) > 0: | ||||
|         filename = "%s-%07i.%s" % (path, document.pk, document.file_type) | ||||
|         filename = "%s-%07i%s" % (path, document.pk, document.file_type) | ||||
|     else: | ||||
|         filename = "%07i.%s" % (document.pk, document.file_type) | ||||
|         filename = "%07i%s" % (document.pk, document.file_type) | ||||
|  | ||||
|     # Append .gpg for encrypted files | ||||
|     if document.storage_type == document.STORAGE_TYPE_GPG: | ||||
|   | ||||
| @@ -127,8 +127,8 @@ class Command(Renderable, BaseCommand): | ||||
|         tags = ",".join([t.slug for t in doc.tags.all()]) | ||||
|  | ||||
|         if tags: | ||||
|             return "{} - {} - {} - {}.{}".format( | ||||
|             return "{} - {} - {} - {}{}".format( | ||||
|                 created, doc.correspondent, doc.title, tags, doc.file_type) | ||||
|  | ||||
|         return "{} - {} - {}.{}".format( | ||||
|         return "{} - {} - {}{}".format( | ||||
|             created, doc.correspondent, doc.title, doc.file_type) | ||||
|   | ||||
							
								
								
									
										50
									
								
								src/documents/migrations/1003_mime_types.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								src/documents/migrations/1003_mime_types.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | ||||
| # Generated by Django 3.1.3 on 2020-11-20 11:21 | ||||
| import os | ||||
|  | ||||
| import magic | ||||
| from django.conf import settings | ||||
| from django.db import migrations, models | ||||
|  | ||||
|  | ||||
| def source_path(self): | ||||
|     if self.filename: | ||||
|         fname = str(self.filename) | ||||
|     else: | ||||
|         fname = "{:07}.{}".format(self.pk, self.file_type) | ||||
|         if self.storage_type == self.STORAGE_TYPE_GPG: | ||||
|             fname += ".gpg" | ||||
|  | ||||
|     return os.path.join( | ||||
|         settings.ORIGINALS_DIR, | ||||
|         fname | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def add_mime_types(apps, schema_editor): | ||||
|     Document = apps.get_model("documents", "Document") | ||||
|     documents = Document.objects.all() | ||||
|  | ||||
|     for d in documents: | ||||
|         d.mime_type = magic.from_file(source_path(d), mime=True) | ||||
|         d.save() | ||||
|  | ||||
|  | ||||
| class Migration(migrations.Migration): | ||||
|  | ||||
|     dependencies = [ | ||||
|         ('documents', '1002_auto_20201111_1105'), | ||||
|     ] | ||||
|  | ||||
|     operations = [ | ||||
|         migrations.AddField( | ||||
|             model_name='document', | ||||
|             name='mime_type', | ||||
|             field=models.CharField(default="-", editable=False, max_length=256), | ||||
|             preserve_default=False, | ||||
|         ), | ||||
|         migrations.RunPython(add_mime_types), | ||||
|         migrations.RemoveField( | ||||
|             model_name='document', | ||||
|             name='file_type', | ||||
|         ), | ||||
|     ] | ||||
| @@ -1,6 +1,7 @@ | ||||
| # coding=utf-8 | ||||
|  | ||||
| import logging | ||||
| import mimetypes | ||||
| import os | ||||
| import re | ||||
| from collections import OrderedDict | ||||
| @@ -113,18 +114,6 @@ class DocumentType(MatchingModel): | ||||
|  | ||||
| class Document(models.Model): | ||||
|  | ||||
|     # TODO: why do we need an explicit list | ||||
|     TYPE_PDF = "pdf" | ||||
|     TYPE_PNG = "png" | ||||
|     TYPE_JPG = "jpg" | ||||
|     TYPE_GIF = "gif" | ||||
|     TYPE_TIF = "tiff" | ||||
|     TYPE_TXT = "txt" | ||||
|     TYPE_CSV = "csv" | ||||
|     TYPE_MD = "md" | ||||
|     TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF, | ||||
|              TYPE_TXT, TYPE_CSV, TYPE_MD) | ||||
|  | ||||
|     STORAGE_TYPE_UNENCRYPTED = "unencrypted" | ||||
|     STORAGE_TYPE_GPG = "gpg" | ||||
|     STORAGE_TYPES = ( | ||||
| @@ -156,10 +145,9 @@ class Document(models.Model): | ||||
|                   "primarily used for searching." | ||||
|     ) | ||||
|  | ||||
|     file_type = models.CharField( | ||||
|         max_length=4, | ||||
|         editable=False, | ||||
|         choices=tuple([(t, t.upper()) for t in TYPES]) | ||||
|     mime_type = models.CharField( | ||||
|         max_length=256, | ||||
|         editable=False | ||||
|     ) | ||||
|  | ||||
|     tags = models.ManyToManyField( | ||||
| @@ -223,7 +211,7 @@ class Document(models.Model): | ||||
|         if self.filename: | ||||
|             fname = str(self.filename) | ||||
|         else: | ||||
|             fname = "{:07}.{}".format(self.pk, self.file_type) | ||||
|             fname = "{:07}{}".format(self.pk, self.file_type) | ||||
|             if self.storage_type == self.STORAGE_TYPE_GPG: | ||||
|                 fname += ".gpg" | ||||
|  | ||||
| @@ -238,7 +226,11 @@ class Document(models.Model): | ||||
|  | ||||
|     @property | ||||
|     def file_name(self): | ||||
|         return slugify(str(self)) + "." + self.file_type | ||||
|         return slugify(str(self)) + self.file_type | ||||
|  | ||||
|     @property | ||||
|     def file_type(self): | ||||
|         return mimetypes.guess_extension(str(self.mime_type)) | ||||
|  | ||||
|     @property | ||||
|     def thumbnail_path(self): | ||||
|   | ||||
| @@ -6,6 +6,7 @@ import subprocess | ||||
| import tempfile | ||||
|  | ||||
| import dateparser | ||||
| import magic | ||||
| from django.conf import settings | ||||
| from django.utils import timezone | ||||
|  | ||||
| @@ -37,10 +38,11 @@ DATE_REGEX = re.compile( | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| def get_parser_class(doc): | ||||
|     """ | ||||
|     Determine the appropriate parser class based on the file | ||||
|     """ | ||||
| def is_mime_type_supported(mime_type): | ||||
|     return get_parser_class_for_mime_type(mime_type) is not None | ||||
|  | ||||
|  | ||||
| def get_parser_class_for_mime_type(mime_type): | ||||
|  | ||||
|     options = [] | ||||
|  | ||||
| @@ -48,9 +50,9 @@ def get_parser_class(doc): | ||||
|  | ||||
|     for response in document_consumer_declaration.send(None): | ||||
|         parser_declaration = response[1] | ||||
|         parser_test = parser_declaration["test"] | ||||
|         supported_mime_types = parser_declaration["mime_types"] | ||||
|  | ||||
|         if parser_test(doc): | ||||
|         if mime_type in supported_mime_types: | ||||
|             options.append(parser_declaration) | ||||
|  | ||||
|     if not options: | ||||
| @@ -61,6 +63,16 @@ def get_parser_class(doc): | ||||
|         options, key=lambda _: _["weight"], reverse=True)[0]["parser"] | ||||
|  | ||||
|  | ||||
| def get_parser_class(path): | ||||
|     """ | ||||
|     Determine the appropriate parser class based on the file | ||||
|     """ | ||||
|  | ||||
|     mime_type = magic.from_file(path, mime=True) | ||||
|  | ||||
|     return get_parser_class_for_mime_type(mime_type) | ||||
|  | ||||
|  | ||||
| def run_convert(input_file, output_file, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None): | ||||
|     environment = os.environ.copy() | ||||
|     if settings.CONVERT_MEMORY_LIMIT: | ||||
|   | ||||
| @@ -91,7 +91,7 @@ class DocumentSerializer(serializers.ModelSerializer): | ||||
|             "document_type_id", | ||||
|             "title", | ||||
|             "content", | ||||
|             "file_type", | ||||
|             "mime_type", | ||||
|             "tags", | ||||
|             "tags_id", | ||||
|             "checksum", | ||||
|   | ||||
| @@ -45,7 +45,7 @@ class DocumentApiTest(APITestCase): | ||||
|         dt = DocumentType.objects.create(name="dt", pk=63) | ||||
|         tag = Tag.objects.create(name="t", pk=85) | ||||
|  | ||||
|         doc = Document.objects.create(title="WOW", content="the content", correspondent=c, document_type=dt, checksum="123") | ||||
|         doc = Document.objects.create(title="WOW", content="the content", correspondent=c, document_type=dt, checksum="123", mime_type="application/pdf") | ||||
|  | ||||
|         doc.tags.add(tag) | ||||
|  | ||||
| @@ -95,7 +95,7 @@ class DocumentApiTest(APITestCase): | ||||
|         with open(filename, "wb") as f: | ||||
|             f.write(content) | ||||
|  | ||||
|         doc = Document.objects.create(title="none", filename=os.path.basename(filename), file_type="pdf") | ||||
|         doc = Document.objects.create(title="none", filename=os.path.basename(filename), mime_type="application/pdf") | ||||
|  | ||||
|         with open(os.path.join(self.thumbnail_dir, "{:07d}.png".format(doc.pk)), "wb") as f: | ||||
|             f.write(content_thumbnail) | ||||
| @@ -117,7 +117,7 @@ class DocumentApiTest(APITestCase): | ||||
|  | ||||
|     def test_document_actions_not_existing_file(self): | ||||
|  | ||||
|         doc = Document.objects.create(title="none", filename=os.path.basename("asd"), file_type="pdf") | ||||
|         doc = Document.objects.create(title="none", filename=os.path.basename("asd"), mime_type="application/pdf") | ||||
|  | ||||
|         response = self.client.get('/api/documents/{}/download/'.format(doc.pk)) | ||||
|         self.assertEqual(response.status_code, 404) | ||||
| @@ -130,9 +130,9 @@ class DocumentApiTest(APITestCase): | ||||
|  | ||||
|     def test_document_filters(self): | ||||
|  | ||||
|         doc1 = Document.objects.create(title="none1", checksum="A") | ||||
|         doc2 = Document.objects.create(title="none2", checksum="B") | ||||
|         doc3 = Document.objects.create(title="none3", checksum="C") | ||||
|         doc1 = Document.objects.create(title="none1", checksum="A", mime_type="application/pdf") | ||||
|         doc2 = Document.objects.create(title="none2", checksum="B", mime_type="application/pdf") | ||||
|         doc3 = Document.objects.create(title="none3", checksum="C", mime_type="application/pdf") | ||||
|  | ||||
|         tag_inbox = Tag.objects.create(name="t1", is_inbox_tag=True) | ||||
|         tag_2 = Tag.objects.create(name="t2") | ||||
|   | ||||
| @@ -437,6 +437,18 @@ class FaultyParser(DocumentParser): | ||||
|         raise ParseError("Does not compute.") | ||||
|  | ||||
|  | ||||
| def fake_magic_from_file(file, mime=False): | ||||
|  | ||||
|     if mime: | ||||
|         if os.path.splitext(file)[1] == ".pdf": | ||||
|             return "application/pdf" | ||||
|         else: | ||||
|             return "unknown" | ||||
|     else: | ||||
|         return "A verbose string that describes the contents of the file" | ||||
|  | ||||
|  | ||||
| @mock.patch("documents.consumer.magic.from_file", fake_magic_from_file) | ||||
| class TestConsumer(TestCase): | ||||
|  | ||||
|     def make_dummy_parser(self, path, logging_group): | ||||
| @@ -462,7 +474,7 @@ class TestConsumer(TestCase): | ||||
|         m = patcher.start() | ||||
|         m.return_value = [(None, { | ||||
|             "parser": self.make_dummy_parser, | ||||
|             "test": lambda _: True, | ||||
|             "mime_types": ["application/pdf"], | ||||
|             "weight": 0 | ||||
|         })] | ||||
|  | ||||
| @@ -592,7 +604,7 @@ class TestConsumer(TestCase): | ||||
|     def testFaultyParser(self, m): | ||||
|         m.return_value = [(None, { | ||||
|             "parser": self.make_faulty_parser, | ||||
|             "test": lambda _: True, | ||||
|             "mime_types": ["application/pdf"], | ||||
|             "weight": 0 | ||||
|         })] | ||||
|  | ||||
|   | ||||
| @@ -13,9 +13,12 @@ class TestDocument(TestCase): | ||||
|             title="Title", | ||||
|             content="content", | ||||
|             checksum="checksum", | ||||
|             mime_type="application/pdf" | ||||
|         ) | ||||
|  | ||||
|         file_path = document.source_path | ||||
|         thumb_path = document.thumbnail_path | ||||
|  | ||||
|         with mock.patch("documents.signals.handlers.os.unlink") as mock_unlink: | ||||
|             document.delete() | ||||
|             mock_unlink.assert_any_call(file_path) | ||||
|   | ||||
| @@ -31,7 +31,7 @@ class TestDate(TestCase): | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="") | ||||
|     def test_generate_source_filename(self): | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
|         document.mime_type = "application/pdf" | ||||
|         document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|         document.save() | ||||
|  | ||||
| @@ -44,7 +44,7 @@ class TestDate(TestCase): | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||
|     def test_file_renaming(self): | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
|         document.mime_type = "application/pdf" | ||||
|         document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|         document.save() | ||||
|  | ||||
| @@ -81,7 +81,7 @@ class TestDate(TestCase): | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||
|     def test_file_renaming_missing_permissions(self): | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
|         document.mime_type = "application/pdf" | ||||
|         document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|         document.save() | ||||
|  | ||||
| @@ -111,10 +111,10 @@ class TestDate(TestCase): | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||
|     def test_file_renaming_database_error(self): | ||||
|  | ||||
|         document1 = Document.objects.create(file_type="pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA") | ||||
|         document1 = Document.objects.create(mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA") | ||||
|  | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
|         document.mime_type = "application/pdf" | ||||
|         document.checksum = "BBBBB" | ||||
|         document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|         document.save() | ||||
| @@ -149,7 +149,7 @@ class TestDate(TestCase): | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||
|     def test_document_delete(self): | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
|         document.mime_type = "application/pdf" | ||||
|         document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|         document.save() | ||||
|  | ||||
| @@ -170,7 +170,7 @@ class TestDate(TestCase): | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||
|     def test_document_delete_nofile(self): | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
|         document.mime_type = "application/pdf" | ||||
|         document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|         document.save() | ||||
|  | ||||
| @@ -179,7 +179,7 @@ class TestDate(TestCase): | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||
|     def test_directory_not_empty(self): | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
|         document.mime_type = "application/pdf" | ||||
|         document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|         document.save() | ||||
|  | ||||
| @@ -206,7 +206,7 @@ class TestDate(TestCase): | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") | ||||
|     def test_tags_with_underscore(self): | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
|         document.mime_type = "application/pdf" | ||||
|         document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|         document.save() | ||||
|  | ||||
| @@ -222,7 +222,7 @@ class TestDate(TestCase): | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") | ||||
|     def test_tags_with_dash(self): | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
|         document.mime_type = "application/pdf" | ||||
|         document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|         document.save() | ||||
|  | ||||
| @@ -238,7 +238,7 @@ class TestDate(TestCase): | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") | ||||
|     def test_tags_malformed(self): | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
|         document.mime_type = "application/pdf" | ||||
|         document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|         document.save() | ||||
|  | ||||
| @@ -254,7 +254,7 @@ class TestDate(TestCase): | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}") | ||||
|     def test_tags_all(self): | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
|         document.mime_type = "application/pdf" | ||||
|         document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|         document.save() | ||||
|  | ||||
| @@ -269,7 +269,7 @@ class TestDate(TestCase): | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}") | ||||
|     def test_tags_out_of_bounds(self): | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
|         document.mime_type = "application/pdf" | ||||
|         document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|         document.save() | ||||
|  | ||||
| @@ -284,7 +284,7 @@ class TestDate(TestCase): | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}") | ||||
|     def test_nested_directory_cleanup(self): | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
|         document.mime_type = "application/pdf" | ||||
|         document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|         document.save() | ||||
|  | ||||
| @@ -309,7 +309,7 @@ class TestDate(TestCase): | ||||
|     def test_format_none(self): | ||||
|         document = Document() | ||||
|         document.pk = 1 | ||||
|         document.file_type = "pdf" | ||||
|         document.mime_type = "application/pdf" | ||||
|         document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|  | ||||
|         self.assertEqual(generate_filename(document), "0000001.pdf") | ||||
| @@ -335,7 +335,7 @@ class TestDate(TestCase): | ||||
|     def test_invalid_format(self): | ||||
|         document = Document() | ||||
|         document.pk = 1 | ||||
|         document.file_type = "pdf" | ||||
|         document.mime_type = "application/pdf" | ||||
|         document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|  | ||||
|         self.assertEqual(generate_filename(document), "0000001.pdf") | ||||
| @@ -344,7 +344,7 @@ class TestDate(TestCase): | ||||
|     def test_invalid_format_key(self): | ||||
|         document = Document() | ||||
|         document.pk = 1 | ||||
|         document.file_type = "pdf" | ||||
|         document.mime_type = "application/pdf" | ||||
|         document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|  | ||||
|         self.assertEqual(generate_filename(document), "0000001.pdf") | ||||
|   | ||||
| @@ -213,7 +213,7 @@ class TestDocumentConsumptionFinishedSignal(TestCase): | ||||
|         TestCase.setUp(self) | ||||
|         User.objects.create_user(username='test_consumer', password='12345') | ||||
|         self.doc_contains = Document.objects.create( | ||||
|             content="I contain the keyword.", file_type="pdf") | ||||
|             content="I contain the keyword.", mime_type="application/pdf") | ||||
|  | ||||
|     def test_tag_applied_any(self): | ||||
|         t1 = Tag.objects.create( | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| import os | ||||
| from tempfile import TemporaryDirectory | ||||
| from unittest import mock | ||||
|  | ||||
| @@ -5,7 +6,18 @@ from django.test import TestCase | ||||
|  | ||||
| from documents.parsers import get_parser_class | ||||
|  | ||||
| def fake_magic_from_file(file, mime=False): | ||||
|  | ||||
|     if mime: | ||||
|         if os.path.splitext(file)[1] == ".pdf": | ||||
|             return "application/pdf" | ||||
|         else: | ||||
|             return "unknown" | ||||
|     else: | ||||
|         return "A verbose string that describes the contents of the file" | ||||
|  | ||||
|  | ||||
| @mock.patch("documents.parsers.magic.from_file", fake_magic_from_file) | ||||
| class TestParserDiscovery(TestCase): | ||||
|  | ||||
|     @mock.patch("documents.parsers.document_consumer_declaration.send") | ||||
| @@ -14,7 +26,7 @@ class TestParserDiscovery(TestCase): | ||||
|             pass | ||||
|  | ||||
|         m.return_value = ( | ||||
|             (None, {"weight": 0, "parser": DummyParser, "test": lambda _: True}), | ||||
|             (None, {"weight": 0, "parser": DummyParser, "mime_types": ["application/pdf"]}), | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual( | ||||
| @@ -32,8 +44,8 @@ class TestParserDiscovery(TestCase): | ||||
|             pass | ||||
|  | ||||
|         m.return_value = ( | ||||
|             (None, {"weight": 0, "parser": DummyParser1, "test": lambda _: True}), | ||||
|             (None, {"weight": 1, "parser": DummyParser2, "test": lambda _: True}), | ||||
|             (None, {"weight": 0, "parser": DummyParser1, "mime_types": ["application/pdf"]}), | ||||
|             (None, {"weight": 1, "parser": DummyParser2, "mime_types": ["application/pdf"]}), | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual( | ||||
|   | ||||
| @@ -104,18 +104,6 @@ class DocumentViewSet(RetrieveModelMixin, | ||||
|         return super(DocumentViewSet, self).destroy(request, *args, **kwargs) | ||||
|  | ||||
|     def file_response(self, pk, disposition): | ||||
|         # TODO: this should not be necessary here. | ||||
|         content_types = { | ||||
|             Document.TYPE_PDF: "application/pdf", | ||||
|             Document.TYPE_PNG: "image/png", | ||||
|             Document.TYPE_JPG: "image/jpeg", | ||||
|             Document.TYPE_GIF: "image/gif", | ||||
|             Document.TYPE_TIF: "image/tiff", | ||||
|             Document.TYPE_CSV: "text/csv", | ||||
|             Document.TYPE_MD: "text/markdown", | ||||
|             Document.TYPE_TXT: "text/plain" | ||||
|         } | ||||
|  | ||||
|         doc = Document.objects.get(id=pk) | ||||
|  | ||||
|         if doc.storage_type == Document.STORAGE_TYPE_UNENCRYPTED: | ||||
| @@ -123,7 +111,7 @@ class DocumentViewSet(RetrieveModelMixin, | ||||
|         else: | ||||
|             file_handle = GnuPG.decrypted(doc.source_file) | ||||
|  | ||||
|         response = HttpResponse(file_handle, content_type=content_types[doc.file_type]) | ||||
|         response = HttpResponse(file_handle, content_type=doc.mime_type) | ||||
|         response["Content-Disposition"] = '{}; filename="{}"'.format( | ||||
|             disposition, doc.file_name) | ||||
|         return response | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jonas Winkler
					Jonas Winkler