Merge pull request #1127 from paperless-ngx/feature-webp-thumbnails

Feature: Change document thumbnails to WebP
This commit is contained in:
shamoon 2022-06-12 09:44:53 -07:00 committed by GitHub
commit 72ee904e67
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 398 additions and 110 deletions

View File

@ -74,7 +74,7 @@ jobs:
name: Install system dependencies
run: |
sudo apt-get update -qq
sudo apt-get install -qq --no-install-recommends unpaper tesseract-ocr imagemagick ghostscript optipng libzbar0 poppler-utils
sudo apt-get install -qq --no-install-recommends unpaper tesseract-ocr imagemagick ghostscript libzbar0 poppler-utils
-
name: Install Python dependencies
run: |

View File

@ -77,15 +77,12 @@ ARG RUNTIME_PACKAGES="\
libraqm0 \
libgnutls30 \
libjpeg62-turbo \
optipng \
python3 \
python3-pip \
python3-setuptools \
postgresql-client \
# For Numpy
libatlas3-base \
# thumbnail size reduction
pngquant \
# OCRmyPDF dependencies
tesseract-ocr \
tesseract-ocr-eng \

View File

@ -2,7 +2,18 @@
set -eu
for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails document_sanity_checker manage_superuser;
for command in decrypt_documents \
document_archiver \
document_exporter \
document_importer \
mail_fetcher \
document_create_classifier \
document_index \
document_renamer \
document_retagger \
document_thumbnails \
document_sanity_checker \
manage_superuser;
do
echo "installing $command..."
sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command

View File

@ -712,13 +712,6 @@ PAPERLESS_CONVERT_TMPDIR=<path>
Default is none, which disables the temporary directory.
PAPERLESS_OPTIMIZE_THUMBNAILS=<bool>
Use optipng to optimize thumbnails. This usually reduces the size of
thumbnails by about 20%, but uses considerable compute time during
consumption.
Defaults to true.
PAPERLESS_POST_CONSUME_SCRIPT=<filename>
After a document is consumed, Paperless can trigger an arbitrary script if
you like. This script will be passed a number of arguments for you to work
@ -789,9 +782,6 @@ PAPERLESS_CONVERT_BINARY=<path>
PAPERLESS_GS_BINARY=<path>
Defaults to "/usr/bin/gs".
PAPERLESS_OPTIPNG_BINARY=<path>
Defaults to "/usr/bin/optipng".
.. _configuration-docker:

View File

@ -286,7 +286,6 @@ writing. Windows is not and will never be supported.
* ``fonts-liberation`` for generating thumbnails for plain text files
* ``imagemagick`` >= 6 for PDF conversion
* ``optipng`` for optimizing thumbnails
* ``gnupg`` for handling encrypted documents
* ``libpq-dev`` for PostgreSQL
* ``libmagic-dev`` for mime type detection
@ -298,7 +297,7 @@ writing. Windows is not and will never be supported.
.. code::
python3 python3-pip python3-dev imagemagick fonts-liberation optipng gnupg libpq-dev libmagic-dev mime-support libzbar0 poppler-utils
python3 python3-pip python3-dev imagemagick fonts-liberation gnupg libpq-dev libmagic-dev mime-support libzbar0 poppler-utils
These dependencies are required for OCRmyPDF, which is used for text recognition.
@ -730,8 +729,6 @@ configuring some options in paperless can help improve performance immensely:
* If you want to perform OCR on the device, consider using ``PAPERLESS_OCR_CLEAN=none``.
This will speed up OCR times and use less memory at the expense of slightly worse
OCR results.
* Set ``PAPERLESS_OPTIMIZE_THUMBNAILS`` to 'false' if you want faster consumption
times. Thumbnails will be about 20% larger.
* If using docker, consider setting ``PAPERLESS_WEBSERVER_WORKERS`` to
1. This will save some memory.

View File

@ -65,7 +65,6 @@
#PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false
#PAPERLESS_CONSUMER_ENABLE_BARCODES=false
#PAPERLESS_CONSUMER_ENABLE_BARCODES=PATCHT
#PAPERLESS_OPTIMIZE_THUMBNAILS=true
#PAPERLESS_PRE_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
#PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
#PAPERLESS_FILENAME_DATE_ORDER=YMD
@ -84,4 +83,3 @@
#PAPERLESS_CONVERT_BINARY=/usr/bin/convert
#PAPERLESS_GS_BINARY=/usr/bin/gs
#PAPERLESS_OPTIPNG_BINARY=/usr/bin/optipng

View File

@ -11,7 +11,6 @@ from documents.signals import document_consumer_declaration
@register()
def changed_password_check(app_configs, **kwargs):
from documents.models import Document
from paperless.db import GnuPG

View File

@ -273,7 +273,7 @@ class Consumer(LoggingMixin):
self.log("debug", f"Generating thumbnail for {self.filename}...")
self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL)
thumbnail = document_parser.get_optimised_thumbnail(
thumbnail = document_parser.get_thumbnail(
self.path,
mime_type,
self.filename,

View File

@ -41,7 +41,7 @@ def handle_document(document_id):
try:
parser.parse(document.source_path, mime_type, document.get_public_filename())
thumbnail = parser.get_optimised_thumbnail(
thumbnail = parser.get_thumbnail(
document.source_path,
mime_type,
document.get_public_filename(),

View File

@ -189,7 +189,7 @@ class Command(BaseCommand):
original_target = os.path.join(self.target, original_name)
document_dict[EXPORTER_FILE_NAME] = original_name
thumbnail_name = base_name + "-thumbnail.png"
thumbnail_name = base_name + "-thumbnail.webp"
thumbnail_target = os.path.join(self.target, thumbnail_name)
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name

View File

@ -11,7 +11,7 @@ from ...parsers import get_parser_class_for_mime_type
def _process_document(doc_in):
document = Document.objects.get(id=doc_in)
document: Document = Document.objects.get(id=doc_in)
parser_class = get_parser_class_for_mime_type(document.mime_type)
if parser_class:
@ -21,7 +21,8 @@ def _process_document(doc_in):
return
try:
thumb = parser.get_optimised_thumbnail(
thumb = parser.get_thumbnail(
document.source_path,
document.mime_type,
document.get_public_filename(),
@ -69,7 +70,7 @@ class Command(BaseCommand):
ids = [doc.id for doc in documents]
# Note to future self: this prevents django from reusing database
# conncetions between processes, which is bad and does not work
# connections between processes, which is bad and does not work
# with postgres.
db.connections.close_all()

View File

@ -0,0 +1,107 @@
# Generated by Django 4.0.5 on 2022-06-11 15:40
import logging
import multiprocessing.pool
import shutil
import tempfile
import time
from pathlib import Path
from django.conf import settings
from django.db import migrations
from documents.parsers import run_convert
logger = logging.getLogger("paperless.migrations")
def _do_convert(work_package):
existing_thumbnail, converted_thumbnail = work_package
try:
logger.info(f"Converting thumbnail: {existing_thumbnail}")
# Run actual conversion
run_convert(
density=300,
scale="500x5000>",
alpha="remove",
strip=True,
trim=False,
auto_orient=True,
input_file=f"{existing_thumbnail}[0]",
output_file=str(converted_thumbnail),
)
# Copy newly created thumbnail to thumbnail directory
shutil.copy(converted_thumbnail, existing_thumbnail.parent)
# Remove the PNG version
existing_thumbnail.unlink()
logger.info(
"Conversion to WebP completed, "
f"replaced {existing_thumbnail.name} with {converted_thumbnail.name}",
)
except Exception as e:
logger.error(f"Error converting thumbnail (existing file unchanged): {e}")
def _convert_thumbnails_to_webp(apps, schema_editor):
start = time.time()
with tempfile.TemporaryDirectory() as tempdir:
work_packages = []
for file in Path(settings.THUMBNAIL_DIR).glob("*.png"):
existing_thumbnail = file.resolve()
# Change the existing filename suffix from png to webp
converted_thumbnail_name = existing_thumbnail.with_suffix(
".webp",
).name
# Create the expected output filename in the tempdir
converted_thumbnail = (
Path(tempdir) / Path(converted_thumbnail_name)
).resolve()
# Package up the necessary info
work_packages.append(
(existing_thumbnail, converted_thumbnail),
)
if len(work_packages):
logger.info(
"\n\n"
" This is a one-time only migration to convert thumbnails for all of your\n"
" documents into WebP format. If you have a lot of documents though, \n"
" this may take a while, so a coffee break may be in order."
"\n",
)
with multiprocessing.pool.Pool(
processes=min(multiprocessing.cpu_count(), 4),
maxtasksperchild=4,
) as pool:
pool.map(_do_convert, work_packages)
end = time.time()
duration = end - start
logger.info(f"Conversion completed in {duration:.3f}s")
class Migration(migrations.Migration):
dependencies = [
("documents", "1020_merge_20220518_1839"),
]
operations = [
migrations.RunPython(
code=_convert_thumbnails_to_webp,
reverse_code=migrations.RunPython.noop,
),
]

View File

@ -3,6 +3,7 @@ import logging
import os
import re
from collections import OrderedDict
from typing import Optional
import dateutil.parser
import pathvalidate
@ -228,7 +229,7 @@ class Document(models.Model):
verbose_name = _("document")
verbose_name_plural = _("documents")
def __str__(self):
def __str__(self) -> str:
# Convert UTC database time to local time
created = datetime.date.isoformat(timezone.localdate(self.created))
@ -242,7 +243,7 @@ class Document(models.Model):
return res
@property
def source_path(self):
def source_path(self) -> str:
if self.filename:
fname = str(self.filename)
else:
@ -257,11 +258,11 @@ class Document(models.Model):
return open(self.source_path, "rb")
@property
def has_archive_version(self):
def has_archive_version(self) -> bool:
return self.archive_filename is not None
@property
def archive_path(self):
def archive_path(self) -> Optional[str]:
if self.has_archive_version:
return os.path.join(settings.ARCHIVE_DIR, str(self.archive_filename))
else:
@ -271,7 +272,7 @@ class Document(models.Model):
def archive_file(self):
return open(self.archive_path, "rb")
def get_public_filename(self, archive=False, counter=0, suffix=None):
def get_public_filename(self, archive=False, counter=0, suffix=None) -> str:
result = str(self)
if counter:
@ -292,12 +293,14 @@ class Document(models.Model):
return get_default_file_extension(self.mime_type)
@property
def thumbnail_path(self):
file_name = f"{self.pk:07}.png"
def thumbnail_path(self) -> str:
webp_file_name = f"{self.pk:07}.webp"
if self.storage_type == self.STORAGE_TYPE_GPG:
file_name += ".gpg"
webp_file_name += ".gpg"
return os.path.join(settings.THUMBNAIL_DIR, file_name)
webp_file_path = os.path.join(settings.THUMBNAIL_DIR, webp_file_name)
return os.path.normpath(webp_file_path)
@property
def thumbnail_file(self):

View File

@ -150,11 +150,14 @@ def run_convert(
def get_default_thumbnail() -> str:
"""
Returns the path to a generic thumbnail
"""
return os.path.join(os.path.dirname(__file__), "resources", "document.png")
def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -> str:
out_path = os.path.join(temp_dir, "convert_gs.png")
out_path = os.path.join(temp_dir, "convert_gs.webp")
# if convert fails, fall back to extracting
# the first PDF page as a PNG using Ghostscript
@ -191,7 +194,7 @@ def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None) -> str:
"""
The thumbnail of a PDF is just a 500px wide image of the first page.
"""
out_path = os.path.join(temp_dir, "convert.png")
out_path = os.path.join(temp_dir, "convert.webp")
# Run convert to get a decent thumbnail
try:
@ -319,29 +322,6 @@ class DocumentParser(LoggingMixin):
"""
raise NotImplementedError()
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
thumbnail = self.get_thumbnail(document_path, mime_type, file_name)
if settings.OPTIMIZE_THUMBNAILS:
out_path = os.path.join(self.tempdir, "thumb_optipng.png")
args = (
settings.OPTIPNG_BINARY,
"-silent",
"-o5",
thumbnail,
"-out",
out_path,
)
self.log("debug", f"Execute: {' '.join(args)}")
if not subprocess.Popen(args).wait() == 0:
raise ParseError(f"Optipng failed at {args}")
return out_path
else:
return thumbnail
def get_text(self):
return self.text

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 KiB

View File

@ -176,7 +176,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
)
with open(
os.path.join(self.dirs.thumbnail_dir, f"{doc.pk:07d}.png"),
os.path.join(self.dirs.thumbnail_dir, f"{doc.pk:07d}.webp"),
"wb",
) as f:
f.write(content_thumbnail)
@ -1022,7 +1022,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
"samples",
"documents",
"thumbnails",
"0000001.png",
"0000001.webp",
)
archive_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")

View File

@ -180,10 +180,10 @@ class DummyParser(DocumentParser):
def __init__(self, logging_group, scratch_dir, archive_path):
super().__init__(logging_group, None)
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
_, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
self.archive_path = archive_path
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
def get_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def parse(self, document_path, mime_type, file_name=None):
@ -194,12 +194,12 @@ class CopyParser(DocumentParser):
def get_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
def get_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def __init__(self, logging_group, progress_callback=None):
super().__init__(logging_group, progress_callback)
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=self.tempdir)
_, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=self.tempdir)
def parse(self, document_path, mime_type, file_name=None):
self.text = "The text"
@ -214,9 +214,9 @@ class FaultyParser(DocumentParser):
def __init__(self, logging_group, scratch_dir):
super().__init__(logging_group)
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
_, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
def get_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def parse(self, document_path, mime_type, file_name=None):
@ -230,6 +230,8 @@ def fake_magic_from_file(file, mime=False):
return "application/pdf"
elif os.path.splitext(file)[1] == ".png":
return "image/png"
elif os.path.splitext(file)[1] == ".webp":
return "image/webp"
else:
return "unknown"
else:

View File

@ -150,9 +150,9 @@ class TestDecryptDocuments(TestCase):
"samples",
"documents",
"thumbnails",
f"0000004.png.gpg",
f"0000004.webp.gpg",
),
os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"),
os.path.join(thumb_dir, f"{doc.id:07}.webp.gpg"),
)
call_command("decrypt_documents")
@ -163,7 +163,7 @@ class TestDecryptDocuments(TestCase):
self.assertEqual(doc.filename, "0000004.pdf")
self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000004.pdf")))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png")))
self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.webp")))
self.assertTrue(os.path.isfile(doc.thumbnail_path))
with doc.source_file as f:

View File

@ -0,0 +1,231 @@
import shutil
import tempfile
from pathlib import Path
from typing import Callable
from typing import Iterable
from typing import Union
from unittest import mock
from django.test import override_settings
from documents.tests.test_migration_archive_files import thumbnail_path
from documents.tests.utils import TestMigrations
@mock.patch(
"documents.migrations.1021_webp_thumbnail_conversion.multiprocessing.pool.Pool.map",
)
@mock.patch("documents.migrations.1021_webp_thumbnail_conversion.run_convert")
class TestMigrateWebPThumbnails(TestMigrations):
migrate_from = "1020_merge_20220518_1839"
migrate_to = "1021_webp_thumbnail_conversion"
auto_migrate = False
def pretend_convert_output(self, *args, **kwargs):
"""
Pretends to do the conversion, by copying the input file
to the output file
"""
shutil.copy2(
Path(kwargs["input_file"].rstrip("[0]")),
Path(kwargs["output_file"]),
)
def pretend_map(self, func: Callable, iterable: Iterable):
"""
Pretends to be the map of a multiprocessing.Pool, but secretly does
everything in series
"""
for item in iterable:
func(item)
def create_dummy_thumbnails(
self,
thumb_dir: Path,
ext: str,
count: int,
start_count: int = 0,
):
"""
Helper to create a certain count of files of given extension in a given directory
"""
for idx in range(count):
(Path(thumb_dir) / Path(f"{start_count + idx:07}.{ext}")).touch()
# Triple check expected files exist
self.assert_file_count_by_extension(ext, thumb_dir, count)
def create_webp_thumbnail_files(
self,
thumb_dir: Path,
count: int,
start_count: int = 0,
):
"""
Creates a dummy WebP thumbnail file in the given directory, based on
the database Document
"""
self.create_dummy_thumbnails(thumb_dir, "webp", count, start_count)
def create_png_thumbnail_file(
self,
thumb_dir: Path,
count: int,
start_count: int = 0,
):
"""
Creates a dummy PNG thumbnail file in the given directory, based on
the database Document
"""
self.create_dummy_thumbnails(thumb_dir, "png", count, start_count)
def assert_file_count_by_extension(
self,
ext: str,
dir: Union[str, Path],
expected_count: int,
):
"""
Helper to assert a certain count of given extension files in given directory
"""
if not isinstance(dir, Path):
dir = Path(dir)
matching_files = list(dir.glob(f"*.{ext}"))
self.assertEqual(len(matching_files), expected_count)
def assert_png_file_count(self, dir: Path, expected_count: int):
"""
Helper to assert a certain count of PNG extension files in given directory
"""
self.assert_file_count_by_extension("png", dir, expected_count)
def assert_webp_file_count(self, dir: Path, expected_count: int):
"""
Helper to assert a certain count of WebP extension files in given directory
"""
self.assert_file_count_by_extension("webp", dir, expected_count)
def setUp(self):
self.thumbnail_dir = Path(tempfile.mkdtemp()).resolve()
return super().setUp()
def tearDown(self) -> None:
shutil.rmtree(self.thumbnail_dir)
return super().tearDown()
def test_do_nothing_if_converted(
self,
run_convert_mock: mock.MagicMock,
map_mock: mock.MagicMock,
):
"""
GIVEN:
- Document exists with default WebP thumbnail path
WHEN:
- Thumbnail conversion is attempted
THEN:
- Nothing is converted
"""
map_mock.side_effect = self.pretend_map
with override_settings(
THUMBNAIL_DIR=self.thumbnail_dir,
):
self.create_webp_thumbnail_files(self.thumbnail_dir, 3)
self.performMigration()
run_convert_mock.assert_not_called()
self.assert_webp_file_count(self.thumbnail_dir, 3)
def test_convert_single_thumbnail(
self,
run_convert_mock: mock.MagicMock,
map_mock: mock.MagicMock,
):
"""
GIVEN:
- Document exists with PNG thumbnail
WHEN:
- Thumbnail conversion is attempted
THEN:
- Single thumbnail is converted
"""
map_mock.side_effect = self.pretend_map
run_convert_mock.side_effect = self.pretend_convert_output
with override_settings(
THUMBNAIL_DIR=self.thumbnail_dir,
):
self.create_png_thumbnail_file(self.thumbnail_dir, 3)
self.performMigration()
run_convert_mock.assert_called()
self.assertEqual(run_convert_mock.call_count, 3)
self.assert_webp_file_count(self.thumbnail_dir, 3)
def test_convert_errors_out(
self,
run_convert_mock: mock.MagicMock,
map_mock: mock.MagicMock,
):
"""
GIVEN:
- Document exists with PNG thumbnail
WHEN:
- Thumbnail conversion is attempted, but raises an exception
THEN:
- Single thumbnail is converted
"""
map_mock.side_effect = self.pretend_map
run_convert_mock.side_effect = OSError
with override_settings(
THUMBNAIL_DIR=self.thumbnail_dir,
):
self.create_png_thumbnail_file(self.thumbnail_dir, 3)
self.performMigration()
run_convert_mock.assert_called()
self.assertEqual(run_convert_mock.call_count, 3)
self.assert_png_file_count(self.thumbnail_dir, 3)
def test_convert_mixed(
self,
run_convert_mock: mock.MagicMock,
map_mock: mock.MagicMock,
):
"""
GIVEN:
- Document exists with PNG thumbnail
WHEN:
- Thumbnail conversion is attempted, but raises an exception
THEN:
- Single thumbnail is converted
"""
map_mock.side_effect = self.pretend_map
run_convert_mock.side_effect = self.pretend_convert_output
with override_settings(
THUMBNAIL_DIR=self.thumbnail_dir,
):
self.create_png_thumbnail_file(self.thumbnail_dir, 3)
self.create_webp_thumbnail_files(self.thumbnail_dir, 2, start_count=3)
self.performMigration()
run_convert_mock.assert_called()
self.assertEqual(run_convert_mock.call_count, 3)
self.assert_png_file_count(self.thumbnail_dir, 0)
self.assert_webp_file_count(self.thumbnail_dir, 5)

View File

@ -87,31 +87,6 @@ def fake_get_thumbnail(self, path, mimetype, file_name):
return os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
class TestBaseParser(TestCase):
def setUp(self) -> None:
self.scratch = tempfile.mkdtemp()
override_settings(SCRATCH_DIR=self.scratch).enable()
def tearDown(self) -> None:
shutil.rmtree(self.scratch)
@mock.patch("documents.parsers.DocumentParser.get_thumbnail", fake_get_thumbnail)
@override_settings(OPTIMIZE_THUMBNAILS=True)
def test_get_optimised_thumbnail(self):
parser = DocumentParser(None)
parser.get_optimised_thumbnail("any", "not important", "document.pdf")
@mock.patch("documents.parsers.DocumentParser.get_thumbnail", fake_get_thumbnail)
@override_settings(OPTIMIZE_THUMBNAILS=False)
def test_get_optimised_thumb_disabled(self):
parser = DocumentParser(None)
path = parser.get_optimised_thumbnail("any", "not important", "document.pdf")
self.assertEqual(path, fake_get_thumbnail(None, None, None, None))
class TestParserAvailability(TestCase):
def test_file_extensions(self):

View File

@ -42,9 +42,9 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
"samples",
"documents",
"thumbnails",
"0000001.png",
"0000001.webp",
),
os.path.join(self.dirs.thumbnail_dir, "0000001.png"),
os.path.join(self.dirs.thumbnail_dir, "0000001.webp"),
)
return Document.objects.create(

View File

@ -362,7 +362,8 @@ class DocumentViewSet(
handle = doc.thumbnail_file
# TODO: Send ETag information and use that to send new thumbnails
# if available
return HttpResponse(handle, content_type="image/png")
return HttpResponse(handle, content_type="image/webp")
except (FileNotFoundError, Document.DoesNotExist):
raise Http404()

View File

@ -72,7 +72,7 @@ def binaries_check(app_configs, **kwargs):
error = "Paperless can't find {}. Without it, consumption is impossible."
hint = "Either it's not in your ${PATH} or it's not installed."
binaries = (settings.CONVERT_BINARY, settings.OPTIPNG_BINARY, "tesseract")
binaries = (settings.CONVERT_BINARY, "tesseract")
check_messages = []
for binary in binaries:

View File

@ -526,8 +526,6 @@ CONSUMER_BARCODE_TIFF_SUPPORT = __get_boolean(
CONSUMER_BARCODE_STRING = os.getenv("PAPERLESS_CONSUMER_BARCODE_STRING", "PATCHT")
OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
# The default language that tesseract will attempt to use when parsing
@ -570,8 +568,6 @@ CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
OPTIPNG_BINARY = os.getenv("PAPERLESS_OPTIPNG_BINARY", "optipng")
# Pre-2.x versions of Paperless stored your documents locally with GPG
# encryption, but that is no longer the default. This behaviour is still

View File

@ -13,9 +13,9 @@ class TestChecks(DirectoriesMixin, TestCase):
def test_binaries(self):
self.assertEqual(binaries_check(None), [])
@override_settings(CONVERT_BINARY="uuuhh", OPTIPNG_BINARY="forgot")
@override_settings(CONVERT_BINARY="uuuhh")
def test_binaries_fail(self):
self.assertEqual(len(binaries_check(None)), 2)
self.assertEqual(len(binaries_check(None)), 1)
def test_paths_check(self):
self.assertEqual(paths_check(None), [])

View File

@ -30,8 +30,8 @@ class TextDocumentParser(DocumentParser):
)
draw.text((5, 5), read_text(), font=font, fill="black")
out_path = os.path.join(self.tempdir, "thumb.png")
img.save(out_path)
out_path = os.path.join(self.tempdir, "thumb.webp")
img.save(out_path, format="WEBP")
return out_path