fix a bug with thumbnail generation when TIKA was enabled

This commit is contained in:
jonaswinkler 2021-02-09 22:12:43 +01:00
parent 7d67766508
commit 8d6071e977
8 changed files with 25 additions and 19 deletions

View File

@ -241,7 +241,7 @@ class Consumer(LoggingMixin):
self._send_progress(70, 100, 'WORKING',
MESSAGE_GENERATING_THUMBNAIL)
thumbnail = document_parser.get_optimised_thumbnail(
self.path, mime_type)
self.path, mime_type, self.filename)
text = document_parser.get_text()
date = document_parser.get_date()

View File

@ -22,7 +22,10 @@ def _process_document(doc_in):
try:
thumb = parser.get_optimised_thumbnail(
document.source_path, document.mime_type)
document.source_path,
document.mime_type,
document.get_public_filename()
)
shutil.move(thumb, document.thumbnail_path)
finally:

View File

@ -288,14 +288,17 @@ class DocumentParser(LoggingMixin):
def get_archive_path(self):
return self.archive_path
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
"""
Returns the path to a file we can use as a thumbnail for this document.
"""
raise NotImplementedError()
def get_optimised_thumbnail(self, document_path, mime_type):
thumbnail = self.get_thumbnail(document_path, mime_type)
def get_optimised_thumbnail(self,
document_path,
mime_type,
file_name=None):
thumbnail = self.get_thumbnail(document_path, mime_type, file_name)
if settings.OPTIMIZE_THUMBNAILS:
out_path = os.path.join(self.tempdir, "thumb_optipng.png")

View File

@ -167,7 +167,7 @@ class TestFieldPermutations(TestCase):
class DummyParser(DocumentParser):
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
# not important during tests
raise NotImplementedError()
@ -176,7 +176,7 @@ class DummyParser(DocumentParser):
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
self.archive_path = archive_path
def get_optimised_thumbnail(self, document_path, mime_type):
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def parse(self, document_path, mime_type, file_name=None):
@ -185,10 +185,10 @@ class DummyParser(DocumentParser):
class CopyParser(DocumentParser):
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def get_optimised_thumbnail(self, document_path, mime_type):
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def __init__(self, logging_group, progress_callback=None):
@ -203,7 +203,7 @@ class CopyParser(DocumentParser):
class FaultyParser(DocumentParser):
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
# not important during tests
raise NotImplementedError()
@ -211,7 +211,7 @@ class FaultyParser(DocumentParser):
super(FaultyParser, self).__init__(logging_group)
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
def get_optimised_thumbnail(self, document_path, mime_type):
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def parse(self, document_path, mime_type, file_name=None):

View File

@ -68,7 +68,7 @@ class TestParserDiscovery(TestCase):
)
def fake_get_thumbnail(self, path, mimetype):
def fake_get_thumbnail(self, path, mimetype, file_name):
return os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
@ -89,15 +89,15 @@ class TestBaseParser(TestCase):
def test_get_optimised_thumbnail(self):
parser = DocumentParser(None)
parser.get_optimised_thumbnail("any", "not important")
parser.get_optimised_thumbnail("any", "not important", "document.pdf")
@mock.patch("documents.parsers.DocumentParser.get_thumbnail", fake_get_thumbnail)
@override_settings(OPTIMIZE_THUMBNAILS=False)
def test_get_optimised_thumb_disabled(self):
parser = DocumentParser(None)
path = parser.get_optimised_thumbnail("any", "not important")
self.assertEqual(path, fake_get_thumbnail(None, None, None))
path = parser.get_optimised_thumbnail("any", "not important", "document.pdf")
self.assertEqual(path, fake_get_thumbnail(None, None, None, None))
class TestParserAvailability(TestCase):

View File

@ -48,7 +48,7 @@ class RasterisedDocumentParser(DocumentParser):
)
return result
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
return make_thumbnail_from_pdf(
document_path, self.tempdir, self.logging_group)

View File

@ -13,7 +13,7 @@ class TextDocumentParser(DocumentParser):
logging_name = "paperless.parsing.text"
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
def read_text():
with open(document_path, 'r') as src:

View File

@ -16,9 +16,9 @@ class TikaDocumentParser(DocumentParser):
logging_name = "paperless.parsing.tika"
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
if not self.archive_path:
self.archive_path = self.convert_to_pdf(document_path)
self.archive_path = self.convert_to_pdf(document_path, file_name)
return make_thumbnail_from_pdf(
self.archive_path, self.tempdir, self.logging_group)