Use optipng to optimise document thumbnails

This commit is contained in:
Daniel Quinn
2018-10-07 14:56:38 +01:00
parent 2a3f766b93
commit 750ab5bf85
9 changed files with 85 additions and 22 deletions

View File

@@ -149,7 +149,7 @@ class Consumer:
parsed_document = parser_class(doc)
try:
thumbnail = parsed_document.get_thumbnail()
thumbnail = parsed_document.get_optimised_thumbnail()
date = parsed_document.get_date()
document = self._store(
parsed_document.get_text(),

View File

@@ -2,6 +2,7 @@ import logging
import os
import re
import shutil
import subprocess
import tempfile
import dateparser
@@ -36,6 +37,7 @@ class DocumentParser:
SCRATCH = settings.SCRATCH_DIR
DATE_ORDER = settings.DATE_ORDER
OPTIPNG = settings.OPTIPNG_BINARY
def __init__(self, path):
self.document_path = path
@@ -49,6 +51,19 @@ class DocumentParser:
"""
raise NotImplementedError()
def optimise_thumbnail(self, in_path):
out_path = os.path.join(self.tempdir, "optipng.png")
args = (self.OPTIPNG, "-o5", in_path, "-out", out_path)
if not subprocess.Popen(args).wait() == 0:
raise ParseError("Optipng failed at {}".format(args))
return out_path
def get_optimised_thumbnail(self):
return self.optimise_thumbnail(self.get_thumbnail())
def get_text(self):
"""
Returns the text from the document and only the text.