From 8d6071e977c023c0f2515f6cea6929633dd86d3d Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Tue, 9 Feb 2021 22:12:43 +0100 Subject: [PATCH] fix a bug with thumbnail generation when TIKA was enabled --- src/documents/consumer.py | 2 +- .../management/commands/document_thumbnails.py | 5 ++++- src/documents/parsers.py | 9 ++++++--- src/documents/tests/test_consumer.py | 12 ++++++------ src/documents/tests/test_parsers.py | 8 ++++---- src/paperless_tesseract/parsers.py | 2 +- src/paperless_text/parsers.py | 2 +- src/paperless_tika/parsers.py | 4 ++-- 8 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index acb3ad33f..3fd62fabc 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -241,7 +241,7 @@ class Consumer(LoggingMixin): self._send_progress(70, 100, 'WORKING', MESSAGE_GENERATING_THUMBNAIL) thumbnail = document_parser.get_optimised_thumbnail( - self.path, mime_type) + self.path, mime_type, self.filename) text = document_parser.get_text() date = document_parser.get_date() diff --git a/src/documents/management/commands/document_thumbnails.py b/src/documents/management/commands/document_thumbnails.py index cf2cbeb77..b7f935e3b 100644 --- a/src/documents/management/commands/document_thumbnails.py +++ b/src/documents/management/commands/document_thumbnails.py @@ -22,7 +22,10 @@ def _process_document(doc_in): try: thumb = parser.get_optimised_thumbnail( - document.source_path, document.mime_type) + document.source_path, + document.mime_type, + document.get_public_filename() + ) shutil.move(thumb, document.thumbnail_path) finally: diff --git a/src/documents/parsers.py b/src/documents/parsers.py index 98af4f080..1ed5deb3c 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -288,14 +288,17 @@ class DocumentParser(LoggingMixin): def get_archive_path(self): return self.archive_path - def get_thumbnail(self, document_path, mime_type): + def get_thumbnail(self, document_path, mime_type, file_name=None): """ Returns the path to a file we can use as a thumbnail for this document. """ raise NotImplementedError() - def get_optimised_thumbnail(self, document_path, mime_type): - thumbnail = self.get_thumbnail(document_path, mime_type) + def get_optimised_thumbnail(self, + document_path, + mime_type, + file_name=None): + thumbnail = self.get_thumbnail(document_path, mime_type, file_name) if settings.OPTIMIZE_THUMBNAILS: out_path = os.path.join(self.tempdir, "thumb_optipng.png") diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index 1ed041187..44effd39e 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -167,7 +167,7 @@ class TestFieldPermutations(TestCase): class DummyParser(DocumentParser): - def get_thumbnail(self, document_path, mime_type): + def get_thumbnail(self, document_path, mime_type, file_name=None): # not important during tests raise NotImplementedError() @@ -176,7 +176,7 @@ class DummyParser(DocumentParser): _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir) self.archive_path = archive_path - def get_optimised_thumbnail(self, document_path, mime_type): + def get_optimised_thumbnail(self, document_path, mime_type, file_name=None): return self.fake_thumb def parse(self, document_path, mime_type, file_name=None): @@ -185,10 +185,10 @@ class DummyParser(DocumentParser): class CopyParser(DocumentParser): - def get_thumbnail(self, document_path, mime_type): + def get_thumbnail(self, document_path, mime_type, file_name=None): return self.fake_thumb - def get_optimised_thumbnail(self, document_path, mime_type): + def get_optimised_thumbnail(self, document_path, mime_type, file_name=None): return self.fake_thumb def __init__(self, logging_group, progress_callback=None): @@ -203,7 +203,7 @@ class CopyParser(DocumentParser): class FaultyParser(DocumentParser): - def get_thumbnail(self, document_path, mime_type): + def get_thumbnail(self, document_path, mime_type, file_name=None): # not important during tests raise NotImplementedError() @@ -211,7 +211,7 @@ class FaultyParser(DocumentParser): super(FaultyParser, self).__init__(logging_group) _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir) - def get_optimised_thumbnail(self, document_path, mime_type): + def get_optimised_thumbnail(self, document_path, mime_type, file_name=None): return self.fake_thumb def parse(self, document_path, mime_type, file_name=None): diff --git a/src/documents/tests/test_parsers.py b/src/documents/tests/test_parsers.py index 8da6470a4..9dd74313f 100644 --- a/src/documents/tests/test_parsers.py +++ b/src/documents/tests/test_parsers.py @@ -68,7 +68,7 @@ class TestParserDiscovery(TestCase): ) -def fake_get_thumbnail(self, path, mimetype): +def fake_get_thumbnail(self, path, mimetype, file_name): return os.path.join(os.path.dirname(__file__), "examples", "no-text.png") @@ -89,15 +89,15 @@ class TestBaseParser(TestCase): def test_get_optimised_thumbnail(self): parser = DocumentParser(None) - parser.get_optimised_thumbnail("any", "not important") + parser.get_optimised_thumbnail("any", "not important", "document.pdf") @mock.patch("documents.parsers.DocumentParser.get_thumbnail", fake_get_thumbnail) @override_settings(OPTIMIZE_THUMBNAILS=False) def test_get_optimised_thumb_disabled(self): parser = DocumentParser(None) - path = parser.get_optimised_thumbnail("any", "not important") - self.assertEqual(path, fake_get_thumbnail(None, None, None)) + path = parser.get_optimised_thumbnail("any", "not important", "document.pdf") + self.assertEqual(path, fake_get_thumbnail(None, None, None, None)) class TestParserAvailability(TestCase): diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index 0a976b569..271a840df 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -48,7 +48,7 @@ class RasterisedDocumentParser(DocumentParser): ) return result - def get_thumbnail(self, document_path, mime_type): + def get_thumbnail(self, document_path, mime_type, file_name=None): return make_thumbnail_from_pdf( document_path, self.tempdir, self.logging_group) diff --git a/src/paperless_text/parsers.py b/src/paperless_text/parsers.py index c307bf10b..837f05c9f 100644 --- a/src/paperless_text/parsers.py +++ b/src/paperless_text/parsers.py @@ -13,7 +13,7 @@ class TextDocumentParser(DocumentParser): logging_name = "paperless.parsing.text" - def get_thumbnail(self, document_path, mime_type): + def get_thumbnail(self, document_path, mime_type, file_name=None): def read_text(): with open(document_path, 'r') as src: diff --git a/src/paperless_tika/parsers.py b/src/paperless_tika/parsers.py index b888af820..6b0f62ada 100644 --- a/src/paperless_tika/parsers.py +++ b/src/paperless_tika/parsers.py @@ -16,9 +16,9 @@ class TikaDocumentParser(DocumentParser): logging_name = "paperless.parsing.tika" - def get_thumbnail(self, document_path, mime_type): + def get_thumbnail(self, document_path, mime_type, file_name=None): if not self.archive_path: - self.archive_path = self.convert_to_pdf(document_path) + self.archive_path = self.convert_to_pdf(document_path, file_name) return make_thumbnail_from_pdf( self.archive_path, self.tempdir, self.logging_group)