mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Entirely removes the optipng, updates ghostscript fall back to also use WebP. Updates the conversion to use a multiprocessing pool
This commit is contained in:
@@ -273,7 +273,7 @@ class Consumer(LoggingMixin):
|
||||
|
||||
self.log("debug", f"Generating thumbnail for {self.filename}...")
|
||||
self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL)
|
||||
thumbnail = document_parser.get_optimised_thumbnail(
|
||||
thumbnail = document_parser.get_thumbnail(
|
||||
self.path,
|
||||
mime_type,
|
||||
self.filename,
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
import multiprocessing.pool
|
||||
import shutil
|
||||
import tempfile
|
||||
import time
|
||||
@@ -8,10 +9,44 @@ from django.core.management.base import BaseCommand
|
||||
from documents.models import Document
|
||||
from documents.parsers import run_convert
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.management.convert_thumbnails")
|
||||
|
||||
|
||||
def _do_convert(work_package):
|
||||
_, existing_thumbnail, converted_thumbnail = work_package
|
||||
try:
|
||||
|
||||
logger.info(f"Converting thumbnail: {existing_thumbnail}")
|
||||
|
||||
# Run actual conversion
|
||||
run_convert(
|
||||
density=300,
|
||||
scale="500x5000>",
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=False,
|
||||
auto_orient=True,
|
||||
input_file=f"{existing_thumbnail}[0]",
|
||||
output_file=str(converted_thumbnail),
|
||||
)
|
||||
|
||||
# Copy newly created thumbnail to thumbnail directory
|
||||
shutil.copy(converted_thumbnail, existing_thumbnail.parent)
|
||||
|
||||
# Remove the PNG version
|
||||
existing_thumbnail.unlink()
|
||||
|
||||
logger.info(
|
||||
"Conversion to WebP completed, "
|
||||
f"replaced {existing_thumbnail.name} with {converted_thumbnail.name}",
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error converting thumbnail" f" (existing file unchanged): {e}",
|
||||
)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
||||
help = """
|
||||
@@ -24,21 +59,19 @@ class Command(BaseCommand):
|
||||
|
||||
def handle(self, *args, **options):
|
||||
|
||||
self.stdout.write("Converting all PNG thumbnails to WebP")
|
||||
|
||||
logger.info("Converting all PNG thumbnails to WebP")
|
||||
start = time.time()
|
||||
|
||||
documents = Document.objects.all()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
|
||||
work_packages = []
|
||||
|
||||
for document in documents:
|
||||
existing_thumbnail = Path(document.thumbnail_path).resolve()
|
||||
|
||||
if existing_thumbnail.suffix == ".png":
|
||||
|
||||
self.stdout.write(f"Converting thumbnail: {existing_thumbnail}")
|
||||
|
||||
# Change the existing filename suffix from png to webp
|
||||
converted_thumbnail_name = existing_thumbnail.with_suffix(
|
||||
".webp",
|
||||
@@ -49,46 +82,16 @@ class Command(BaseCommand):
|
||||
Path(tempdir) / Path(converted_thumbnail_name)
|
||||
).resolve()
|
||||
|
||||
try:
|
||||
# Run actual conversion
|
||||
run_convert(
|
||||
density=300,
|
||||
scale="500x5000>",
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=False,
|
||||
auto_orient=True,
|
||||
input_file=f"{existing_thumbnail}[0]",
|
||||
output_file=str(converted_thumbnail),
|
||||
)
|
||||
# Package up the necessary info
|
||||
work_packages.append(
|
||||
(document, existing_thumbnail, converted_thumbnail),
|
||||
)
|
||||
|
||||
if converted_thumbnail.exists():
|
||||
# Copy newly created thumbnail to thumbnail directory
|
||||
shutil.copy(converted_thumbnail, existing_thumbnail.parent)
|
||||
|
||||
# Remove the PNG version
|
||||
existing_thumbnail.unlink()
|
||||
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
"Conversion to WebP completed",
|
||||
),
|
||||
)
|
||||
else:
|
||||
# Highly unlike to reach here
|
||||
self.stderr.write(
|
||||
self.style.WARNING("Converted thumbnail doesn't exist"),
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.stderr.write(
|
||||
self.style.ERROR(
|
||||
f"Error converting thumbnail"
|
||||
f" (existing file unchanged): {e}",
|
||||
),
|
||||
)
|
||||
if len(work_packages):
|
||||
with multiprocessing.pool.Pool(processes=4, maxtasksperchild=4) as pool:
|
||||
pool.map(_do_convert, work_packages)
|
||||
|
||||
end = time.time()
|
||||
duration = end - start
|
||||
|
||||
self.stdout.write(f"Conversion completed in {duration:.3f}s")
|
||||
logger.info(f"Conversion completed in {duration:.3f}s")
|
||||
|
@@ -41,7 +41,7 @@ def handle_document(document_id):
|
||||
try:
|
||||
parser.parse(document.source_path, mime_type, document.get_public_filename())
|
||||
|
||||
thumbnail = parser.get_optimised_thumbnail(
|
||||
thumbnail = parser.get_thumbnail(
|
||||
document.source_path,
|
||||
mime_type,
|
||||
document.get_public_filename(),
|
||||
|
@@ -29,7 +29,7 @@ def _process_document(doc_in):
|
||||
if existing_thumbnail.exists() and existing_thumbnail.suffix == ".png":
|
||||
existing_thumbnail.unlink()
|
||||
|
||||
thumb = parser.get_optimised_thumbnail(
|
||||
thumb = parser.get_thumbnail(
|
||||
document.source_path,
|
||||
document.mime_type,
|
||||
document.get_public_filename(),
|
||||
|
@@ -308,17 +308,11 @@ class Document(models.Model):
|
||||
png_file_path = os.path.join(settings.THUMBNAIL_DIR, png_file_name)
|
||||
|
||||
# 1. Assume the thumbnail is WebP
|
||||
if not os.path.exists(webp_file_path):
|
||||
# 2. If WebP doesn't exist, check PNG
|
||||
if not os.path.exists(png_file_path):
|
||||
# 3. If PNG doesn't exist, filename is being constructed, return WebP
|
||||
thumb = webp_file_path
|
||||
else:
|
||||
# 2.1 - PNG file exists, return path to it
|
||||
thumb = png_file_path
|
||||
if os.path.exists(png_file_path):
|
||||
thumb = png_file_path
|
||||
else:
|
||||
# 1.1 - WebP file exists, return path to it
|
||||
thumb = webp_file_path
|
||||
|
||||
return os.path.normpath(thumb)
|
||||
|
||||
@property
|
||||
|
@@ -150,11 +150,14 @@ def run_convert(
|
||||
|
||||
|
||||
def get_default_thumbnail() -> str:
|
||||
"""
|
||||
Returns the path to a generic thumbnail
|
||||
"""
|
||||
return os.path.join(os.path.dirname(__file__), "resources", "document.png")
|
||||
|
||||
|
||||
def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -> str:
|
||||
out_path = os.path.join(temp_dir, "convert_gs.png")
|
||||
out_path = os.path.join(temp_dir, "convert_gs.webp")
|
||||
|
||||
# if convert fails, fall back to extracting
|
||||
# the first PDF page as a PNG using Ghostscript
|
||||
@@ -319,29 +322,6 @@ class DocumentParser(LoggingMixin):
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
thumbnail = self.get_thumbnail(document_path, mime_type, file_name)
|
||||
if settings.OPTIMIZE_THUMBNAILS and os.path.splitext(thumbnail)[1] == ".png":
|
||||
out_path = os.path.join(self.tempdir, "thumb_optipng.png")
|
||||
|
||||
args = (
|
||||
settings.OPTIPNG_BINARY,
|
||||
"-silent",
|
||||
"-o5",
|
||||
thumbnail,
|
||||
"-out",
|
||||
out_path,
|
||||
)
|
||||
|
||||
self.log("debug", f"Execute: {' '.join(args)}")
|
||||
|
||||
if not subprocess.Popen(args).wait() == 0:
|
||||
raise ParseError(f"Optipng failed at {args}")
|
||||
|
||||
return out_path
|
||||
else:
|
||||
return thumbnail
|
||||
|
||||
def get_text(self):
|
||||
return self.text
|
||||
|
||||
|
@@ -183,7 +183,7 @@ class DummyParser(DocumentParser):
|
||||
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
|
||||
self.archive_path = archive_path
|
||||
|
||||
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
return self.fake_thumb
|
||||
|
||||
def parse(self, document_path, mime_type, file_name=None):
|
||||
@@ -194,7 +194,7 @@ class CopyParser(DocumentParser):
|
||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
return self.fake_thumb
|
||||
|
||||
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
return self.fake_thumb
|
||||
|
||||
def __init__(self, logging_group, progress_callback=None):
|
||||
@@ -216,7 +216,7 @@ class FaultyParser(DocumentParser):
|
||||
super().__init__(logging_group)
|
||||
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
|
||||
|
||||
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
return self.fake_thumb
|
||||
|
||||
def parse(self, document_path, mime_type, file_name=None):
|
||||
|
@@ -137,32 +137,3 @@ class TestConvertThumbnails(TestCase):
|
||||
run_convert_mock.assert_called_once()
|
||||
self.assertIn("Error converting thumbnail", stderr)
|
||||
self.assertTrue(thumb_file.exists())
|
||||
|
||||
@mock.patch("documents.management.commands.convert_thumbnails.run_convert")
|
||||
def test_convert_single_thumbnail_no_output(self, run_convert_mock):
|
||||
"""
|
||||
GIVEN:
|
||||
- Document exists with PNG thumbnail
|
||||
WHEN:
|
||||
- Thumbnail conversion is attempted, but there is no output WebP
|
||||
THEN:
|
||||
- Single thumbnail is converted
|
||||
"""
|
||||
|
||||
with tempfile.TemporaryDirectory() as thumbnail_dir:
|
||||
|
||||
with override_settings(
|
||||
THUMBNAIL_DIR=thumbnail_dir,
|
||||
):
|
||||
|
||||
thumb_file = self.create_png_thumbnail_file(thumbnail_dir)
|
||||
|
||||
stdout, stderr = self.call_command()
|
||||
|
||||
run_convert_mock.assert_called_once()
|
||||
self.assertIn(f"{thumb_file}", stdout)
|
||||
self.assertNotIn("Conversion to WebP completed", stdout)
|
||||
self.assertIn("Converted thumbnail doesn't exist", stderr)
|
||||
|
||||
self.assertTrue(thumb_file.exists())
|
||||
self.assertFalse(thumb_file.with_suffix(".webp").exists())
|
||||
|
@@ -87,31 +87,6 @@ def fake_get_thumbnail(self, path, mimetype, file_name):
|
||||
return os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
|
||||
|
||||
|
||||
class TestBaseParser(TestCase):
|
||||
def setUp(self) -> None:
|
||||
|
||||
self.scratch = tempfile.mkdtemp()
|
||||
override_settings(SCRATCH_DIR=self.scratch).enable()
|
||||
|
||||
def tearDown(self) -> None:
|
||||
shutil.rmtree(self.scratch)
|
||||
|
||||
@mock.patch("documents.parsers.DocumentParser.get_thumbnail", fake_get_thumbnail)
|
||||
@override_settings(OPTIMIZE_THUMBNAILS=True)
|
||||
def test_get_optimised_thumbnail(self):
|
||||
parser = DocumentParser(None)
|
||||
|
||||
parser.get_optimised_thumbnail("any", "not important", "document.pdf")
|
||||
|
||||
@mock.patch("documents.parsers.DocumentParser.get_thumbnail", fake_get_thumbnail)
|
||||
@override_settings(OPTIMIZE_THUMBNAILS=False)
|
||||
def test_get_optimised_thumb_disabled(self):
|
||||
parser = DocumentParser(None)
|
||||
|
||||
path = parser.get_optimised_thumbnail("any", "not important", "document.pdf")
|
||||
self.assertEqual(path, fake_get_thumbnail(None, None, None, None))
|
||||
|
||||
|
||||
class TestParserAvailability(TestCase):
|
||||
def test_file_extensions(self):
|
||||
|
||||
|
Reference in New Issue
Block a user