Removes last vestiges of PNG from the tests, code, docs and samples

This commit is contained in:
Trenton Holmes 2022-06-11 14:15:27 -07:00
parent cc4cea1a41
commit 1df517afd3
19 changed files with 14 additions and 60 deletions

@ -83,8 +83,6 @@ ARG RUNTIME_PACKAGES="\
postgresql-client \ postgresql-client \
# For Numpy # For Numpy
libatlas3-base \ libatlas3-base \
# thumbnail size reduction
pngquant \
# OCRmyPDF dependencies # OCRmyPDF dependencies
tesseract-ocr \ tesseract-ocr \
tesseract-ocr-eng \ tesseract-ocr-eng \

@ -518,15 +518,3 @@ Basic usage to disable encryption of your document store:
.. code:: .. code::
decrypt_documents [--passphrase SECR3TP4SSPHRA$E] decrypt_documents [--passphrase SECR3TP4SSPHRA$E]
Managing thumbnail format
===================
Document thumbnails were originally created as PNG format. Newly
uploaded documents are now using WebP to reduce both storage space and
page loading times. To convert older PNG format thumbnails to WebP
run:
.. code::
convert_thumbnails

@ -1,6 +1,5 @@
# this is here so that django finds the checks. # this is here so that django finds the checks.
from .checks import changed_password_check from .checks import changed_password_check
from .checks import parser_check from .checks import parser_check
from .checks import png_thumbnail_check
__all__ = ["changed_password_check", "parser_check", "png_thumbnail_check"] __all__ = ["changed_password_check", "parser_check"]

@ -1,9 +1,7 @@
import textwrap import textwrap
from pathlib import Path
from django.conf import settings from django.conf import settings
from django.core.checks import Error from django.core.checks import Error
from django.core.checks import Info
from django.core.checks import register from django.core.checks import register
from django.core.exceptions import FieldError from django.core.exceptions import FieldError
from django.db.utils import OperationalError from django.db.utils import OperationalError
@ -68,23 +66,3 @@ def parser_check(app_configs, **kwargs):
] ]
else: else:
return [] return []
@register()
def png_thumbnail_check(app_configs, **kwargs):
from documents.models import Document
try:
documents = Document.objects.all()
for document in documents:
existing_thumbnail = Path(document.thumbnail_path).resolve()
if existing_thumbnail.suffix == ".png":
return [
Info(
"PNG thumbnails found, consider running convert_thumbnails "
"to convert to WebP",
),
]
return []
except (OperationalError, ProgrammingError, FieldError):
return [] # No documents table yet

@ -189,7 +189,7 @@ class Command(BaseCommand):
original_target = os.path.join(self.target, original_name) original_target = os.path.join(self.target, original_name)
document_dict[EXPORTER_FILE_NAME] = original_name document_dict[EXPORTER_FILE_NAME] = original_name
thumbnail_name = base_name + "-thumbnail.png" thumbnail_name = base_name + "-thumbnail.webp"
thumbnail_target = os.path.join(self.target, thumbnail_name) thumbnail_target = os.path.join(self.target, thumbnail_name)
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name

@ -1,7 +1,6 @@
import logging import logging
import multiprocessing import multiprocessing
import shutil import shutil
from pathlib import Path
import tqdm import tqdm
from django import db from django import db
@ -23,12 +22,6 @@ def _process_document(doc_in):
try: try:
existing_thumbnail = Path(document.thumbnail_path).resolve()
# Remove an existing PNG format thumbnail, if it existed
if existing_thumbnail.exists() and existing_thumbnail.suffix == ".png":
existing_thumbnail.unlink()
thumb = parser.get_thumbnail( thumb = parser.get_thumbnail(
document.source_path, document.source_path,
document.mime_type, document.mime_type,

Binary file not shown.

Before

(image error) Size: 7.7 KiB

Binary file not shown.

After

(image error) Size: 2.6 KiB

Binary file not shown.

Before

(image error) Size: 7.7 KiB

Binary file not shown.

After

(image error) Size: 2.6 KiB

Binary file not shown.

Before

(image error) Size: 7.7 KiB

Binary file not shown.

After

(image error) Size: 2.6 KiB

@ -176,7 +176,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
) )
with open( with open(
os.path.join(self.dirs.thumbnail_dir, f"{doc.pk:07d}.png"), os.path.join(self.dirs.thumbnail_dir, f"{doc.pk:07d}.webp"),
"wb", "wb",
) as f: ) as f:
f.write(content_thumbnail) f.write(content_thumbnail)
@ -1022,7 +1022,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
"samples", "samples",
"documents", "documents",
"thumbnails", "thumbnails",
"0000001.png", "0000001.webp",
) )
archive_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") archive_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")

@ -180,7 +180,7 @@ class DummyParser(DocumentParser):
def __init__(self, logging_group, scratch_dir, archive_path): def __init__(self, logging_group, scratch_dir, archive_path):
super().__init__(logging_group, None) super().__init__(logging_group, None)
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir) _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
self.archive_path = archive_path self.archive_path = archive_path
def get_thumbnail(self, document_path, mime_type, file_name=None): def get_thumbnail(self, document_path, mime_type, file_name=None):
@ -199,7 +199,7 @@ class CopyParser(DocumentParser):
def __init__(self, logging_group, progress_callback=None): def __init__(self, logging_group, progress_callback=None):
super().__init__(logging_group, progress_callback) super().__init__(logging_group, progress_callback)
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=self.tempdir) _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=self.tempdir)
def parse(self, document_path, mime_type, file_name=None): def parse(self, document_path, mime_type, file_name=None):
self.text = "The text" self.text = "The text"
@ -214,7 +214,7 @@ class FaultyParser(DocumentParser):
def __init__(self, logging_group, scratch_dir): def __init__(self, logging_group, scratch_dir):
super().__init__(logging_group) super().__init__(logging_group)
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir) _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
def get_thumbnail(self, document_path, mime_type, file_name=None): def get_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb return self.fake_thumb
@ -230,6 +230,8 @@ def fake_magic_from_file(file, mime=False):
return "application/pdf" return "application/pdf"
elif os.path.splitext(file)[1] == ".png": elif os.path.splitext(file)[1] == ".png":
return "image/png" return "image/png"
elif os.path.splitext(file)[1] == ".webp":
return "image/webp"
else: else:
return "unknown" return "unknown"
else: else:

@ -150,9 +150,9 @@ class TestDecryptDocuments(TestCase):
"samples", "samples",
"documents", "documents",
"thumbnails", "thumbnails",
f"0000004.png.gpg", f"0000004.webp.gpg",
), ),
os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.webp.gpg"),
) )
call_command("decrypt_documents") call_command("decrypt_documents")

@ -42,9 +42,9 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
"samples", "samples",
"documents", "documents",
"thumbnails", "thumbnails",
"0000001.png", "0000001.webp",
), ),
os.path.join(self.dirs.thumbnail_dir, "0000001.png"), os.path.join(self.dirs.thumbnail_dir, "0000001.webp"),
) )
return Document.objects.create( return Document.objects.create(

@ -362,11 +362,7 @@ class DocumentViewSet(
handle = doc.thumbnail_file handle = doc.thumbnail_file
# TODO: Send ETag information and use that to send new thumbnails # TODO: Send ETag information and use that to send new thumbnails
# if available # if available
thumbnail_path = doc.thumbnail_path
if os.path.splitext(thumbnail_path)[1] == ".webp":
content_type = "image/webp" content_type = "image/webp"
else:
content_type = "image/png"
return HttpResponse(handle, content_type=content_type) return HttpResponse(handle, content_type=content_type)
except (FileNotFoundError, Document.DoesNotExist): except (FileNotFoundError, Document.DoesNotExist):