Merge pull request #1127 from paperless-ngx/feature-webp-thumbnails

Feature: Change document thumbnails to WebP
2025-06-06 14:07:26 -05:00 · 2022-06-12 09:44:53 -07:00 · 2022-06-12 09:44:53 -07:00 · 72ee904e67
commit 72ee904e67
parent e4a26164de 222e1968d8
33 changed files with 398 additions and 110 deletions
--- a/.github/workflows/reusable-ci-backend.yml
+++ b/.github/workflows/reusable-ci-backend.yml
@ -74,7 +74,7 @@ jobs:
        name: Install system dependencies
        run: |
          sudo apt-get update -qq
-          sudo apt-get install -qq --no-install-recommends unpaper tesseract-ocr imagemagick ghostscript optipng libzbar0 poppler-utils
+          sudo apt-get install -qq --no-install-recommends unpaper tesseract-ocr imagemagick ghostscript libzbar0 poppler-utils
      -
        name: Install Python dependencies
        run: |
--- a/3
+++ b/3
@ -77,15 +77,12 @@ ARG RUNTIME_PACKAGES="\
  libraqm0 \
  libgnutls30 \
  libjpeg62-turbo \
  optipng \
  python3 \
  python3-pip \
  python3-setuptools \
  postgresql-client \
  # For Numpy
  libatlas3-base \
  # thumbnail size reduction
  pngquant \
  # OCRmyPDF dependencies
  tesseract-ocr \
  tesseract-ocr-eng \
--- a/docker/install_management_commands.sh
+++ b/docker/install_management_commands.sh
@ -2,7 +2,18 @@
 set -eu
-for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails document_sanity_checker manage_superuser;
+for command in decrypt_documents \
 	document_archiver \
 	document_exporter \
 	document_importer \
 	mail_fetcher \
 	document_create_classifier \
 	document_index \
 	document_renamer \
 	document_retagger \
 	document_thumbnails \
 	document_sanity_checker \
 	manage_superuser;
 do
 	echo "installing $command..."
 	sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@ -712,13 +712,6 @@ PAPERLESS_CONVERT_TMPDIR=<path>
    Default is none, which disables the temporary directory.
 PAPERLESS_OPTIMIZE_THUMBNAILS=<bool>
    Use optipng to optimize thumbnails. This usually reduces the size of
    thumbnails by about 20%, but uses considerable compute time during
    consumption.
    Defaults to true.
 PAPERLESS_POST_CONSUME_SCRIPT=<filename>
    After a document is consumed, Paperless can trigger an arbitrary script if
    you like.  This script will be passed a number of arguments for you to work
@ -789,9 +782,6 @@ PAPERLESS_CONVERT_BINARY=<path>
 PAPERLESS_GS_BINARY=<path>
    Defaults to "/usr/bin/gs".
 PAPERLESS_OPTIPNG_BINARY=<path>
    Defaults to "/usr/bin/optipng".
 .. _configuration-docker:
--- a/docs/setup.rst
+++ b/docs/setup.rst
@ -286,7 +286,6 @@ writing. Windows is not and will never be supported.
    *   ``fonts-liberation`` for generating thumbnails for plain text files
    *   ``imagemagick`` >= 6 for PDF conversion
    *   ``optipng`` for optimizing thumbnails
    *   ``gnupg`` for handling encrypted documents
    *   ``libpq-dev`` for PostgreSQL
    *   ``libmagic-dev`` for mime type detection
@ -298,7 +297,7 @@ writing. Windows is not and will never be supported.
    .. code::
-        python3 python3-pip python3-dev imagemagick fonts-liberation optipng gnupg libpq-dev libmagic-dev mime-support libzbar0 poppler-utils
+        python3 python3-pip python3-dev imagemagick fonts-liberation gnupg libpq-dev libmagic-dev mime-support libzbar0 poppler-utils
    These dependencies are required for OCRmyPDF, which is used for text recognition.
@ -730,8 +729,6 @@ configuring some options in paperless can help improve performance immensely:
 *   If you want to perform OCR on the device, consider using ``PAPERLESS_OCR_CLEAN=none``.
    This will speed up OCR times and use less memory at the expense of slightly worse
    OCR results.
 *   Set ``PAPERLESS_OPTIMIZE_THUMBNAILS`` to 'false' if you want faster consumption
    times. Thumbnails will be about 20% larger.
 *   If using docker, consider setting ``PAPERLESS_WEBSERVER_WORKERS`` to
    1. This will save some memory.
--- a/paperless.conf.example
+++ b/paperless.conf.example
@ -65,7 +65,6 @@
 #PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false
 #PAPERLESS_CONSUMER_ENABLE_BARCODES=false
 #PAPERLESS_CONSUMER_ENABLE_BARCODES=PATCHT
 #PAPERLESS_OPTIMIZE_THUMBNAILS=true
 #PAPERLESS_PRE_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
 #PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
 #PAPERLESS_FILENAME_DATE_ORDER=YMD
@ -84,4 +83,3 @@
 #PAPERLESS_CONVERT_BINARY=/usr/bin/convert
 #PAPERLESS_GS_BINARY=/usr/bin/gs
 #PAPERLESS_OPTIPNG_BINARY=/usr/bin/optipng
--- a/src/documents/checks.py
+++ b/src/documents/checks.py
@ -11,7 +11,6 @@ from documents.signals import document_consumer_declaration
@register()
 def changed_password_check(app_configs, **kwargs):
    from documents.models import Document
    from paperless.db import GnuPG
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@ -273,7 +273,7 @@ class Consumer(LoggingMixin):
            self.log("debug", f"Generating thumbnail for {self.filename}...")
            self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL)
-            thumbnail = document_parser.get_optimised_thumbnail(
+            thumbnail = document_parser.get_thumbnail(
                self.path,
                mime_type,
                self.filename,
--- a/src/documents/management/commands/document_archiver.py
+++ b/src/documents/management/commands/document_archiver.py
@ -41,7 +41,7 @@ def handle_document(document_id):
    try:
        parser.parse(document.source_path, mime_type, document.get_public_filename())
-        thumbnail = parser.get_optimised_thumbnail(
+        thumbnail = parser.get_thumbnail(
            document.source_path,
            mime_type,
            document.get_public_filename(),
--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@ -189,7 +189,7 @@ class Command(BaseCommand):
            original_target = os.path.join(self.target, original_name)
            document_dict[EXPORTER_FILE_NAME] = original_name
-            thumbnail_name = base_name + "-thumbnail.png"
+            thumbnail_name = base_name + "-thumbnail.webp"
            thumbnail_target = os.path.join(self.target, thumbnail_name)
            document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
--- a/src/documents/management/commands/document_thumbnails.py
+++ b/src/documents/management/commands/document_thumbnails.py
@ -11,7 +11,7 @@ from ...parsers import get_parser_class_for_mime_type
 def _process_document(doc_in):
-    document = Document.objects.get(id=doc_in)
+    document: Document = Document.objects.get(id=doc_in)
    parser_class = get_parser_class_for_mime_type(document.mime_type)
    if parser_class:
@ -21,7 +21,8 @@ def _process_document(doc_in):
        return
    try:
-        thumb = parser.get_optimised_thumbnail(
+
        thumb = parser.get_thumbnail(
            document.source_path,
            document.mime_type,
            document.get_public_filename(),
@ -69,7 +70,7 @@ class Command(BaseCommand):
        ids = [doc.id for doc in documents]
        # Note to future self: this prevents django from reusing database
-        # conncetions between processes, which is bad and does not work
+        # connections between processes, which is bad and does not work
        # with postgres.
        db.connections.close_all()
--- a/src/documents/migrations/1021_webp_thumbnail_conversion.py
+++ b/src/documents/migrations/1021_webp_thumbnail_conversion.py
@ -0,0 +1,107 @@
 # Generated by Django 4.0.5 on 2022-06-11 15:40
 import logging
 import multiprocessing.pool
 import shutil
 import tempfile
 import time
 from pathlib import Path
 from django.conf import settings
 from django.db import migrations
 from documents.parsers import run_convert
 logger = logging.getLogger("paperless.migrations")
 def _do_convert(work_package):
    existing_thumbnail, converted_thumbnail = work_package
    try:
        logger.info(f"Converting thumbnail: {existing_thumbnail}")
        # Run actual conversion
        run_convert(
            density=300,
            scale="500x5000>",
            alpha="remove",
            strip=True,
            trim=False,
            auto_orient=True,
            input_file=f"{existing_thumbnail}[0]",
            output_file=str(converted_thumbnail),
        )
        # Copy newly created thumbnail to thumbnail directory
        shutil.copy(converted_thumbnail, existing_thumbnail.parent)
        # Remove the PNG version
        existing_thumbnail.unlink()
        logger.info(
            "Conversion to WebP completed, "
            f"replaced {existing_thumbnail.name} with {converted_thumbnail.name}",
        )
    except Exception as e:
        logger.error(f"Error converting thumbnail (existing file unchanged): {e}")
 def _convert_thumbnails_to_webp(apps, schema_editor):
    start = time.time()
    with tempfile.TemporaryDirectory() as tempdir:
        work_packages = []
        for file in Path(settings.THUMBNAIL_DIR).glob("*.png"):
            existing_thumbnail = file.resolve()
            # Change the existing filename suffix from png to webp
            converted_thumbnail_name = existing_thumbnail.with_suffix(
                ".webp",
            ).name
            # Create the expected output filename in the tempdir
            converted_thumbnail = (
                Path(tempdir) / Path(converted_thumbnail_name)
            ).resolve()
            # Package up the necessary info
            work_packages.append(
                (existing_thumbnail, converted_thumbnail),
            )
        if len(work_packages):
            logger.info(
                "\n\n"
                "  This is a one-time only migration to convert thumbnails for all of your\n"
                "  documents into WebP format.  If you have a lot of documents though, \n"
                "  this may take a while, so a coffee break may be in order."
                "\n",
            )
            with multiprocessing.pool.Pool(
                processes=min(multiprocessing.cpu_count(), 4),
                maxtasksperchild=4,
            ) as pool:
                pool.map(_do_convert, work_packages)
        end = time.time()
        duration = end - start
    logger.info(f"Conversion completed in {duration:.3f}s")
 class Migration(migrations.Migration):
    dependencies = [
        ("documents", "1020_merge_20220518_1839"),
    ]
    operations = [
        migrations.RunPython(
            code=_convert_thumbnails_to_webp,
            reverse_code=migrations.RunPython.noop,
        ),
    ]
--- a/src/documents/models.py
+++ b/src/documents/models.py
@ -3,6 +3,7 @@ import logging
 import os
 import re
 from collections import OrderedDict
 from typing import Optional
 import dateutil.parser
 import pathvalidate
@ -228,7 +229,7 @@ class Document(models.Model):
        verbose_name = _("document")
        verbose_name_plural = _("documents")
-    def __str__(self):
+    def __str__(self) -> str:
        # Convert UTC database time to local time
        created = datetime.date.isoformat(timezone.localdate(self.created))
@ -242,7 +243,7 @@ class Document(models.Model):
        return res
    @property
-    def source_path(self):
+    def source_path(self) -> str:
        if self.filename:
            fname = str(self.filename)
        else:
@ -257,11 +258,11 @@ class Document(models.Model):
        return open(self.source_path, "rb")
    @property
-    def has_archive_version(self):
+    def has_archive_version(self) -> bool:
        return self.archive_filename is not None
    @property
-    def archive_path(self):
+    def archive_path(self) -> Optional[str]:
        if self.has_archive_version:
            return os.path.join(settings.ARCHIVE_DIR, str(self.archive_filename))
        else:
@ -271,7 +272,7 @@ class Document(models.Model):
    def archive_file(self):
        return open(self.archive_path, "rb")
-    def get_public_filename(self, archive=False, counter=0, suffix=None):
+    def get_public_filename(self, archive=False, counter=0, suffix=None) -> str:
        result = str(self)
        if counter:
@ -292,12 +293,14 @@ class Document(models.Model):
        return get_default_file_extension(self.mime_type)
    @property
-    def thumbnail_path(self):
+    def thumbnail_path(self) -> str:
-        file_name = f"{self.pk:07}.png"
+        webp_file_name = f"{self.pk:07}.webp"
        if self.storage_type == self.STORAGE_TYPE_GPG:
-            file_name += ".gpg"
+            webp_file_name += ".gpg"
-        return os.path.join(settings.THUMBNAIL_DIR, file_name)
+        webp_file_path = os.path.join(settings.THUMBNAIL_DIR, webp_file_name)
        return os.path.normpath(webp_file_path)
    @property
    def thumbnail_file(self):
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@ -150,11 +150,14 @@ def run_convert(
 def get_default_thumbnail() -> str:
    """
    Returns the path to a generic thumbnail
    """
    return os.path.join(os.path.dirname(__file__), "resources", "document.png")
 def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -> str:
-    out_path = os.path.join(temp_dir, "convert_gs.png")
+    out_path = os.path.join(temp_dir, "convert_gs.webp")
    # if convert fails, fall back to extracting
    # the first PDF page as a PNG using Ghostscript
@ -191,7 +194,7 @@ def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None) -> str:
    """
    The thumbnail of a PDF is just a 500px wide image of the first page.
    """
-    out_path = os.path.join(temp_dir, "convert.png")
+    out_path = os.path.join(temp_dir, "convert.webp")
    # Run convert to get a decent thumbnail
    try:
@ -319,29 +322,6 @@ class DocumentParser(LoggingMixin):
        """
        raise NotImplementedError()
    def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
        thumbnail = self.get_thumbnail(document_path, mime_type, file_name)
        if settings.OPTIMIZE_THUMBNAILS:
            out_path = os.path.join(self.tempdir, "thumb_optipng.png")
            args = (
                settings.OPTIPNG_BINARY,
                "-silent",
                "-o5",
                thumbnail,
                "-out",
                out_path,
            )
            self.log("debug", f"Execute: {' '.join(args)}")
            if not subprocess.Popen(args).wait() == 0:
                raise ParseError(f"Optipng failed at {args}")
            return out_path
        else:
            return thumbnail
    def get_text(self):
        return self.text
--- a/src/documents/tests/samples/documents/thumbnails/0000001.png
+++ b/src/documents/tests/samples/documents/thumbnails/0000001.png
--- a/src/documents/tests/samples/documents/thumbnails/0000001.webp
+++ b/src/documents/tests/samples/documents/thumbnails/0000001.webp
--- a/src/documents/tests/samples/documents/thumbnails/0000002.png
+++ b/src/documents/tests/samples/documents/thumbnails/0000002.png
--- a/src/documents/tests/samples/documents/thumbnails/0000002.webp
+++ b/src/documents/tests/samples/documents/thumbnails/0000002.webp
--- a/src/documents/tests/samples/documents/thumbnails/0000003.png
+++ b/src/documents/tests/samples/documents/thumbnails/0000003.png
--- a/src/documents/tests/samples/documents/thumbnails/0000003.webp
+++ b/src/documents/tests/samples/documents/thumbnails/0000003.webp
--- a/src/documents/tests/samples/documents/thumbnails/0000004.png.gpg
+++ b/src/documents/tests/samples/documents/thumbnails/0000004.png.gpg
--- a/src/documents/tests/samples/documents/thumbnails/0000004.webp.gpg
+++ b/src/documents/tests/samples/documents/thumbnails/0000004.webp.gpg
--- a/src/documents/tests/test_api.py
+++ b/src/documents/tests/test_api.py
@ -176,7 +176,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        )
        with open(
-            os.path.join(self.dirs.thumbnail_dir, f"{doc.pk:07d}.png"),
+            os.path.join(self.dirs.thumbnail_dir, f"{doc.pk:07d}.webp"),
            "wb",
        ) as f:
            f.write(content_thumbnail)
@ -1022,7 +1022,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
            "samples",
            "documents",
            "thumbnails",
-            "0000001.png",
+            "0000001.webp",
        )
        archive_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@ -180,10 +180,10 @@ class DummyParser(DocumentParser):
    def __init__(self, logging_group, scratch_dir, archive_path):
        super().__init__(logging_group, None)
-        _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
+        _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
        self.archive_path = archive_path
-    def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
+    def get_thumbnail(self, document_path, mime_type, file_name=None):
        return self.fake_thumb
    def parse(self, document_path, mime_type, file_name=None):
@ -194,12 +194,12 @@ class CopyParser(DocumentParser):
    def get_thumbnail(self, document_path, mime_type, file_name=None):
        return self.fake_thumb
-    def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
+    def get_thumbnail(self, document_path, mime_type, file_name=None):
        return self.fake_thumb
    def __init__(self, logging_group, progress_callback=None):
        super().__init__(logging_group, progress_callback)
-        _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=self.tempdir)
+        _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=self.tempdir)
    def parse(self, document_path, mime_type, file_name=None):
        self.text = "The text"
@ -214,9 +214,9 @@ class FaultyParser(DocumentParser):
    def __init__(self, logging_group, scratch_dir):
        super().__init__(logging_group)
-        _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
+        _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
-    def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
+    def get_thumbnail(self, document_path, mime_type, file_name=None):
        return self.fake_thumb
    def parse(self, document_path, mime_type, file_name=None):
@ -230,6 +230,8 @@ def fake_magic_from_file(file, mime=False):
            return "application/pdf"
        elif os.path.splitext(file)[1] == ".png":
            return "image/png"
        elif os.path.splitext(file)[1] == ".webp":
            return "image/webp"
        else:
            return "unknown"
    else:
--- a/src/documents/tests/test_management.py
+++ b/src/documents/tests/test_management.py
@ -150,9 +150,9 @@ class TestDecryptDocuments(TestCase):
                "samples",
                "documents",
                "thumbnails",
-                f"0000004.png.gpg",
+                f"0000004.webp.gpg",
            ),
-            os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"),
+            os.path.join(thumb_dir, f"{doc.id:07}.webp.gpg"),
        )
        call_command("decrypt_documents")
@ -163,7 +163,7 @@ class TestDecryptDocuments(TestCase):
        self.assertEqual(doc.filename, "0000004.pdf")
        self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000004.pdf")))
        self.assertTrue(os.path.isfile(doc.source_path))
-        self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png")))
+        self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.webp")))
        self.assertTrue(os.path.isfile(doc.thumbnail_path))
        with doc.source_file as f:
--- a/src/documents/tests/test_migration_webp_conversion.py
+++ b/src/documents/tests/test_migration_webp_conversion.py
@ -0,0 +1,231 @@
 import shutil
 import tempfile
 from pathlib import Path
 from typing import Callable
 from typing import Iterable
 from typing import Union
 from unittest import mock
 from django.test import override_settings
 from documents.tests.test_migration_archive_files import thumbnail_path
 from documents.tests.utils import TestMigrations
@mock.patch(
    "documents.migrations.1021_webp_thumbnail_conversion.multiprocessing.pool.Pool.map",
 )
@mock.patch("documents.migrations.1021_webp_thumbnail_conversion.run_convert")
 class TestMigrateWebPThumbnails(TestMigrations):
    migrate_from = "1020_merge_20220518_1839"
    migrate_to = "1021_webp_thumbnail_conversion"
    auto_migrate = False
    def pretend_convert_output(self, *args, **kwargs):
        """
        Pretends to do the conversion, by copying the input file
        to the output file
        """
        shutil.copy2(
            Path(kwargs["input_file"].rstrip("[0]")),
            Path(kwargs["output_file"]),
        )
    def pretend_map(self, func: Callable, iterable: Iterable):
        """
        Pretends to be the map of a multiprocessing.Pool, but secretly does
        everything in series
        """
        for item in iterable:
            func(item)
    def create_dummy_thumbnails(
        self,
        thumb_dir: Path,
        ext: str,
        count: int,
        start_count: int = 0,
    ):
        """
        Helper to create a certain count of files of given extension in a given directory
        """
        for idx in range(count):
            (Path(thumb_dir) / Path(f"{start_count + idx:07}.{ext}")).touch()
        # Triple check expected files exist
        self.assert_file_count_by_extension(ext, thumb_dir, count)
    def create_webp_thumbnail_files(
        self,
        thumb_dir: Path,
        count: int,
        start_count: int = 0,
    ):
        """
        Creates a dummy WebP thumbnail file in the given directory, based on
        the database Document
        """
        self.create_dummy_thumbnails(thumb_dir, "webp", count, start_count)
    def create_png_thumbnail_file(
        self,
        thumb_dir: Path,
        count: int,
        start_count: int = 0,
    ):
        """
        Creates a dummy PNG thumbnail file in the given directory, based on
        the database Document
        """
        self.create_dummy_thumbnails(thumb_dir, "png", count, start_count)
    def assert_file_count_by_extension(
        self,
        ext: str,
        dir: Union[str, Path],
        expected_count: int,
    ):
        """
        Helper to assert a certain count of given extension files in given directory
        """
        if not isinstance(dir, Path):
            dir = Path(dir)
        matching_files = list(dir.glob(f"*.{ext}"))
        self.assertEqual(len(matching_files), expected_count)
    def assert_png_file_count(self, dir: Path, expected_count: int):
        """
        Helper to assert a certain count of PNG extension files in given directory
        """
        self.assert_file_count_by_extension("png", dir, expected_count)
    def assert_webp_file_count(self, dir: Path, expected_count: int):
        """
        Helper to assert a certain count of WebP extension files in given directory
        """
        self.assert_file_count_by_extension("webp", dir, expected_count)
    def setUp(self):
        self.thumbnail_dir = Path(tempfile.mkdtemp()).resolve()
        return super().setUp()
    def tearDown(self) -> None:
        shutil.rmtree(self.thumbnail_dir)
        return super().tearDown()
    def test_do_nothing_if_converted(
        self,
        run_convert_mock: mock.MagicMock,
        map_mock: mock.MagicMock,
    ):
        """
        GIVEN:
            - Document exists with default WebP thumbnail path
        WHEN:
            - Thumbnail conversion is attempted
        THEN:
            - Nothing is converted
        """
        map_mock.side_effect = self.pretend_map
        with override_settings(
            THUMBNAIL_DIR=self.thumbnail_dir,
        ):
            self.create_webp_thumbnail_files(self.thumbnail_dir, 3)
            self.performMigration()
            run_convert_mock.assert_not_called()
            self.assert_webp_file_count(self.thumbnail_dir, 3)
    def test_convert_single_thumbnail(
        self,
        run_convert_mock: mock.MagicMock,
        map_mock: mock.MagicMock,
    ):
        """
        GIVEN:
            - Document exists with PNG thumbnail
        WHEN:
            - Thumbnail conversion is attempted
        THEN:
            - Single thumbnail is converted
        """
        map_mock.side_effect = self.pretend_map
        run_convert_mock.side_effect = self.pretend_convert_output
        with override_settings(
            THUMBNAIL_DIR=self.thumbnail_dir,
        ):
            self.create_png_thumbnail_file(self.thumbnail_dir, 3)
            self.performMigration()
            run_convert_mock.assert_called()
            self.assertEqual(run_convert_mock.call_count, 3)
            self.assert_webp_file_count(self.thumbnail_dir, 3)
    def test_convert_errors_out(
        self,
        run_convert_mock: mock.MagicMock,
        map_mock: mock.MagicMock,
    ):
        """
        GIVEN:
            - Document exists with PNG thumbnail
        WHEN:
            - Thumbnail conversion is attempted, but raises an exception
        THEN:
            - Single thumbnail is converted
        """
        map_mock.side_effect = self.pretend_map
        run_convert_mock.side_effect = OSError
        with override_settings(
            THUMBNAIL_DIR=self.thumbnail_dir,
        ):
            self.create_png_thumbnail_file(self.thumbnail_dir, 3)
            self.performMigration()
            run_convert_mock.assert_called()
            self.assertEqual(run_convert_mock.call_count, 3)
            self.assert_png_file_count(self.thumbnail_dir, 3)
    def test_convert_mixed(
        self,
        run_convert_mock: mock.MagicMock,
        map_mock: mock.MagicMock,
    ):
        """
        GIVEN:
            - Document exists with PNG thumbnail
        WHEN:
            - Thumbnail conversion is attempted, but raises an exception
        THEN:
            - Single thumbnail is converted
        """
        map_mock.side_effect = self.pretend_map
        run_convert_mock.side_effect = self.pretend_convert_output
        with override_settings(
            THUMBNAIL_DIR=self.thumbnail_dir,
        ):
            self.create_png_thumbnail_file(self.thumbnail_dir, 3)
            self.create_webp_thumbnail_files(self.thumbnail_dir, 2, start_count=3)
            self.performMigration()
            run_convert_mock.assert_called()
            self.assertEqual(run_convert_mock.call_count, 3)
            self.assert_png_file_count(self.thumbnail_dir, 0)
            self.assert_webp_file_count(self.thumbnail_dir, 5)
--- a/src/documents/tests/test_parsers.py
+++ b/src/documents/tests/test_parsers.py
@ -87,31 +87,6 @@ def fake_get_thumbnail(self, path, mimetype, file_name):
    return os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
 class TestBaseParser(TestCase):
    def setUp(self) -> None:
        self.scratch = tempfile.mkdtemp()
        override_settings(SCRATCH_DIR=self.scratch).enable()
    def tearDown(self) -> None:
        shutil.rmtree(self.scratch)
    @mock.patch("documents.parsers.DocumentParser.get_thumbnail", fake_get_thumbnail)
    @override_settings(OPTIMIZE_THUMBNAILS=True)
    def test_get_optimised_thumbnail(self):
        parser = DocumentParser(None)
        parser.get_optimised_thumbnail("any", "not important", "document.pdf")
    @mock.patch("documents.parsers.DocumentParser.get_thumbnail", fake_get_thumbnail)
    @override_settings(OPTIMIZE_THUMBNAILS=False)
    def test_get_optimised_thumb_disabled(self):
        parser = DocumentParser(None)
        path = parser.get_optimised_thumbnail("any", "not important", "document.pdf")
        self.assertEqual(path, fake_get_thumbnail(None, None, None, None))
 class TestParserAvailability(TestCase):
    def test_file_extensions(self):
--- a/src/documents/tests/test_sanity_check.py
+++ b/src/documents/tests/test_sanity_check.py
@ -42,9 +42,9 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
                    "samples",
                    "documents",
                    "thumbnails",
-                    "0000001.png",
+                    "0000001.webp",
                ),
-                os.path.join(self.dirs.thumbnail_dir, "0000001.png"),
+                os.path.join(self.dirs.thumbnail_dir, "0000001.webp"),
            )
        return Document.objects.create(
--- a/src/documents/views.py
+++ b/src/documents/views.py
@ -362,7 +362,8 @@ class DocumentViewSet(
                handle = doc.thumbnail_file
            # TODO: Send ETag information and use that to send new thumbnails
            #  if available
-            return HttpResponse(handle, content_type="image/png")
+
            return HttpResponse(handle, content_type="image/webp")
        except (FileNotFoundError, Document.DoesNotExist):
            raise Http404()
--- a/src/paperless/checks.py
+++ b/src/paperless/checks.py
@ -72,7 +72,7 @@ def binaries_check(app_configs, **kwargs):
    error = "Paperless can't find {}. Without it, consumption is impossible."
    hint = "Either it's not in your ${PATH} or it's not installed."
-    binaries = (settings.CONVERT_BINARY, settings.OPTIPNG_BINARY, "tesseract")
+    binaries = (settings.CONVERT_BINARY, "tesseract")
    check_messages = []
    for binary in binaries:
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@ -526,8 +526,6 @@ CONSUMER_BARCODE_TIFF_SUPPORT = __get_boolean(
 CONSUMER_BARCODE_STRING = os.getenv("PAPERLESS_CONSUMER_BARCODE_STRING", "PATCHT")
 OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
 OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
 # The default language that tesseract will attempt to use when parsing
@ -570,8 +568,6 @@ CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
 GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
 OPTIPNG_BINARY = os.getenv("PAPERLESS_OPTIPNG_BINARY", "optipng")
 # Pre-2.x versions of Paperless stored your documents locally with GPG
 # encryption, but that is no longer the default.  This behaviour is still
--- a/src/paperless/tests/test_checks.py
+++ b/src/paperless/tests/test_checks.py
@ -13,9 +13,9 @@ class TestChecks(DirectoriesMixin, TestCase):
    def test_binaries(self):
        self.assertEqual(binaries_check(None), [])
-    @override_settings(CONVERT_BINARY="uuuhh", OPTIPNG_BINARY="forgot")
+    @override_settings(CONVERT_BINARY="uuuhh")
    def test_binaries_fail(self):
-        self.assertEqual(len(binaries_check(None)), 2)
+        self.assertEqual(len(binaries_check(None)), 1)
    def test_paths_check(self):
        self.assertEqual(paths_check(None), [])
--- a/src/paperless_text/parsers.py
+++ b/src/paperless_text/parsers.py
@ -30,8 +30,8 @@ class TextDocumentParser(DocumentParser):
        )
        draw.text((5, 5), read_text(), font=font, fill="black")
-        out_path = os.path.join(self.tempdir, "thumb.png")
+        out_path = os.path.join(self.tempdir, "thumb.webp")
-        img.save(out_path)
+        img.save(out_path, format="WEBP")
        return out_path