mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Converts the conversion into a database migration
This commit is contained in:
parent
e8868d7ebf
commit
cc4cea1a41
@ -2,8 +2,7 @@
|
||||
|
||||
set -eu
|
||||
|
||||
for command in convert_thumbnails \
|
||||
decrypt_documents \
|
||||
for command in decrypt_documents \
|
||||
document_archiver \
|
||||
document_exporter \
|
||||
document_importer \
|
||||
|
@ -1,97 +0,0 @@
|
||||
import logging
|
||||
import multiprocessing.pool
|
||||
import shutil
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from documents.models import Document
|
||||
from documents.parsers import run_convert
|
||||
|
||||
logger = logging.getLogger("paperless.management.convert_thumbnails")
|
||||
|
||||
|
||||
def _do_convert(work_package):
|
||||
_, existing_thumbnail, converted_thumbnail = work_package
|
||||
try:
|
||||
|
||||
logger.info(f"Converting thumbnail: {existing_thumbnail}")
|
||||
|
||||
# Run actual conversion
|
||||
run_convert(
|
||||
density=300,
|
||||
scale="500x5000>",
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=False,
|
||||
auto_orient=True,
|
||||
input_file=f"{existing_thumbnail}[0]",
|
||||
output_file=str(converted_thumbnail),
|
||||
)
|
||||
|
||||
# Copy newly created thumbnail to thumbnail directory
|
||||
shutil.copy(converted_thumbnail, existing_thumbnail.parent)
|
||||
|
||||
# Remove the PNG version
|
||||
existing_thumbnail.unlink()
|
||||
|
||||
logger.info(
|
||||
"Conversion to WebP completed, "
|
||||
f"replaced {existing_thumbnail.name} with {converted_thumbnail.name}",
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error converting thumbnail" f" (existing file unchanged): {e}",
|
||||
)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
||||
help = """
|
||||
Converts existing PNG thumbnails into
|
||||
WebP format.
|
||||
""".replace(
|
||||
" ",
|
||||
"",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
|
||||
logger.info("Converting all PNG thumbnails to WebP")
|
||||
start = time.time()
|
||||
documents = Document.objects.all()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
|
||||
work_packages = []
|
||||
|
||||
for document in documents:
|
||||
existing_thumbnail = Path(document.thumbnail_path).resolve()
|
||||
|
||||
if existing_thumbnail.suffix == ".png":
|
||||
|
||||
# Change the existing filename suffix from png to webp
|
||||
converted_thumbnail_name = existing_thumbnail.with_suffix(
|
||||
".webp",
|
||||
).name
|
||||
|
||||
# Create the expected output filename in the tempdir
|
||||
converted_thumbnail = (
|
||||
Path(tempdir) / Path(converted_thumbnail_name)
|
||||
).resolve()
|
||||
|
||||
# Package up the necessary info
|
||||
work_packages.append(
|
||||
(document, existing_thumbnail, converted_thumbnail),
|
||||
)
|
||||
|
||||
if len(work_packages):
|
||||
with multiprocessing.pool.Pool(processes=4, maxtasksperchild=4) as pool:
|
||||
pool.map(_do_convert, work_packages)
|
||||
|
||||
end = time.time()
|
||||
duration = end - start
|
||||
|
||||
logger.info(f"Conversion completed in {duration:.3f}s")
|
107
src/documents/migrations/1021_webp_thumbnail_conversion.py
Normal file
107
src/documents/migrations/1021_webp_thumbnail_conversion.py
Normal file
@ -0,0 +1,107 @@
|
||||
# Generated by Django 4.0.5 on 2022-06-11 15:40
|
||||
import logging
|
||||
import multiprocessing.pool
|
||||
import shutil
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import migrations
|
||||
from documents.parsers import run_convert
|
||||
|
||||
logger = logging.getLogger("paperless.migrations")
|
||||
|
||||
|
||||
def _do_convert(work_package):
|
||||
existing_thumbnail, converted_thumbnail = work_package
|
||||
try:
|
||||
|
||||
logger.info(f"Converting thumbnail: {existing_thumbnail}")
|
||||
|
||||
# Run actual conversion
|
||||
run_convert(
|
||||
density=300,
|
||||
scale="500x5000>",
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=False,
|
||||
auto_orient=True,
|
||||
input_file=f"{existing_thumbnail}[0]",
|
||||
output_file=str(converted_thumbnail),
|
||||
)
|
||||
|
||||
# Copy newly created thumbnail to thumbnail directory
|
||||
shutil.copy(converted_thumbnail, existing_thumbnail.parent)
|
||||
|
||||
# Remove the PNG version
|
||||
existing_thumbnail.unlink()
|
||||
|
||||
logger.info(
|
||||
"Conversion to WebP completed, "
|
||||
f"replaced {existing_thumbnail.name} with {converted_thumbnail.name}",
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error converting thumbnail (existing file unchanged): {e}")
|
||||
|
||||
|
||||
def _convert_thumbnails_to_webp(apps, schema_editor):
|
||||
start = time.time()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
|
||||
work_packages = []
|
||||
|
||||
for file in Path(settings.THUMBNAIL_DIR).glob("*.png"):
|
||||
existing_thumbnail = file.resolve()
|
||||
|
||||
# Change the existing filename suffix from png to webp
|
||||
converted_thumbnail_name = existing_thumbnail.with_suffix(
|
||||
".webp",
|
||||
).name
|
||||
|
||||
# Create the expected output filename in the tempdir
|
||||
converted_thumbnail = (
|
||||
Path(tempdir) / Path(converted_thumbnail_name)
|
||||
).resolve()
|
||||
|
||||
# Package up the necessary info
|
||||
work_packages.append(
|
||||
(existing_thumbnail, converted_thumbnail),
|
||||
)
|
||||
|
||||
if len(work_packages):
|
||||
|
||||
logger.info(
|
||||
"\n\n"
|
||||
" This is a one-time only migration to convert thumbnails for all of your\n"
|
||||
" documents into WebP format. If you have a lot of documents though, \n"
|
||||
" this may take a while, so a coffee break may be in order."
|
||||
"\n",
|
||||
)
|
||||
|
||||
with multiprocessing.pool.Pool(
|
||||
processes=min(multiprocessing.cpu_count(), 4),
|
||||
maxtasksperchild=4,
|
||||
) as pool:
|
||||
pool.map(_do_convert, work_packages)
|
||||
|
||||
end = time.time()
|
||||
duration = end - start
|
||||
|
||||
logger.info(f"Conversion completed in {duration:.3f}s")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("documents", "1020_merge_20220518_1839"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(
|
||||
code=_convert_thumbnails_to_webp,
|
||||
reverse_code=migrations.RunPython.noop,
|
||||
),
|
||||
]
|
@ -294,26 +294,13 @@ class Document(models.Model):
|
||||
|
||||
@property
|
||||
def thumbnail_path(self) -> str:
|
||||
png_file_name = f"{self.pk:07}.png"
|
||||
webp_file_name = f"{self.pk:07}.webp"
|
||||
if self.storage_type == self.STORAGE_TYPE_GPG:
|
||||
png_file_name += ".gpg"
|
||||
webp_file_name += ".gpg"
|
||||
|
||||
# This property is used to both generate the file path
|
||||
# and locate the file itself
|
||||
# Hence why this looks a little weird
|
||||
|
||||
webp_file_path = os.path.join(settings.THUMBNAIL_DIR, webp_file_name)
|
||||
png_file_path = os.path.join(settings.THUMBNAIL_DIR, png_file_name)
|
||||
|
||||
# 1. Assume the thumbnail is WebP
|
||||
if os.path.exists(png_file_path):
|
||||
thumb = png_file_path
|
||||
else:
|
||||
thumb = webp_file_path
|
||||
|
||||
return os.path.normpath(thumb)
|
||||
return os.path.normpath(webp_file_path)
|
||||
|
||||
@property
|
||||
def thumbnail_file(self):
|
||||
|
@ -1,139 +0,0 @@
|
||||
import filecmp
|
||||
import shutil
|
||||
import tempfile
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from django.core.management import call_command
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from documents.models import Document
|
||||
|
||||
|
||||
class TestConvertThumbnails(TestCase):
|
||||
def call_command(self):
|
||||
stdout = StringIO()
|
||||
stderr = StringIO()
|
||||
call_command(
|
||||
"convert_thumbnails",
|
||||
"--no-color",
|
||||
stdout=stdout,
|
||||
stderr=stderr,
|
||||
)
|
||||
return stdout.getvalue(), stderr.getvalue()
|
||||
|
||||
def setUp(self):
|
||||
"""
|
||||
Creates a document in the database
|
||||
"""
|
||||
super().setUp()
|
||||
|
||||
self.doc = Document.objects.create(
|
||||
pk=1,
|
||||
checksum="A",
|
||||
title="A",
|
||||
content="first document",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
self.doc.save()
|
||||
|
||||
def pretend_convert_output(self, *args, **kwargs):
|
||||
"""
|
||||
Pretends to do the conversion, by copying the input file
|
||||
to the output file
|
||||
"""
|
||||
shutil.copy2(
|
||||
Path(kwargs["input_file"].rstrip("[0]")),
|
||||
Path(kwargs["output_file"]),
|
||||
)
|
||||
|
||||
def create_webp_thumbnail_file(self, thumb_dir):
|
||||
"""
|
||||
Creates a dummy WebP thumbnail file in the given directory, based on
|
||||
the database Document
|
||||
"""
|
||||
thumb_file = Path(thumb_dir) / Path(f"{self.doc.pk:07}.webp")
|
||||
thumb_file.write_text("this is a dummy webp file")
|
||||
return thumb_file
|
||||
|
||||
def create_png_thumbnail_file(self, thumb_dir):
|
||||
"""
|
||||
Creates a dummy PNG thumbnail file in the given directory, based on
|
||||
the database Document
|
||||
"""
|
||||
thumb_file = Path(thumb_dir) / Path(f"{self.doc.pk:07}.png")
|
||||
thumb_file.write_text("this is a dummy png file")
|
||||
return thumb_file
|
||||
|
||||
@mock.patch("documents.management.commands.convert_thumbnails.run_convert")
|
||||
def test_do_nothing_if_converted(self, run_convert_mock):
|
||||
"""
|
||||
GIVEN:
|
||||
- Document exists with default WebP thumbnail path
|
||||
WHEN:
|
||||
- Thumbnail conversion is attempted
|
||||
THEN:
|
||||
- Nothing is converted
|
||||
"""
|
||||
|
||||
stdout, _ = self.call_command()
|
||||
run_convert_mock.assert_not_called()
|
||||
self.assertIn("Converting all PNG thumbnails to WebP", stdout)
|
||||
|
||||
@mock.patch("documents.management.commands.convert_thumbnails.run_convert")
|
||||
def test_convert_single_thumbnail(self, run_convert_mock):
|
||||
"""
|
||||
GIVEN:
|
||||
- Document exists with PNG thumbnail
|
||||
WHEN:
|
||||
- Thumbnail conversion is attempted
|
||||
THEN:
|
||||
- Single thumbnail is converted
|
||||
"""
|
||||
|
||||
run_convert_mock.side_effect = self.pretend_convert_output
|
||||
|
||||
with tempfile.TemporaryDirectory() as thumbnail_dir:
|
||||
|
||||
with override_settings(
|
||||
THUMBNAIL_DIR=thumbnail_dir,
|
||||
):
|
||||
|
||||
thumb_file = self.create_png_thumbnail_file(thumbnail_dir)
|
||||
|
||||
stdout, _ = self.call_command()
|
||||
|
||||
run_convert_mock.assert_called_once()
|
||||
self.assertIn(f"{thumb_file}", stdout)
|
||||
self.assertIn("Conversion to WebP completed", stdout)
|
||||
|
||||
self.assertFalse(thumb_file.exists())
|
||||
self.assertTrue(thumb_file.with_suffix(".webp").exists())
|
||||
|
||||
@mock.patch("documents.management.commands.convert_thumbnails.run_convert")
|
||||
def test_convert_errors_out(self, run_convert_mock):
|
||||
"""
|
||||
GIVEN:
|
||||
- Document exists with PNG thumbnail
|
||||
WHEN:
|
||||
- Thumbnail conversion is attempted, but raises an exception
|
||||
THEN:
|
||||
- Single thumbnail is converted
|
||||
"""
|
||||
|
||||
run_convert_mock.side_effect = OSError
|
||||
|
||||
with tempfile.TemporaryDirectory() as thumbnail_dir:
|
||||
|
||||
with override_settings(
|
||||
THUMBNAIL_DIR=thumbnail_dir,
|
||||
):
|
||||
|
||||
thumb_file = self.create_png_thumbnail_file(thumbnail_dir)
|
||||
|
||||
_, stderr = self.call_command()
|
||||
|
||||
run_convert_mock.assert_called_once()
|
||||
self.assertIn("Error converting thumbnail", stderr)
|
||||
self.assertTrue(thumb_file.exists())
|
231
src/documents/tests/test_migration_webp_conversion.py
Normal file
231
src/documents/tests/test_migration_webp_conversion.py
Normal file
@ -0,0 +1,231 @@
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
from typing import Iterable
|
||||
from typing import Union
|
||||
from unittest import mock
|
||||
|
||||
from django.test import override_settings
|
||||
from documents.tests.test_migration_archive_files import thumbnail_path
|
||||
from documents.tests.utils import TestMigrations
|
||||
|
||||
|
||||
@mock.patch(
|
||||
"documents.migrations.1021_webp_thumbnail_conversion.multiprocessing.pool.Pool.map",
|
||||
)
|
||||
@mock.patch("documents.migrations.1021_webp_thumbnail_conversion.run_convert")
|
||||
class TestMigrateWebPThumbnails(TestMigrations):
|
||||
|
||||
migrate_from = "1020_merge_20220518_1839"
|
||||
migrate_to = "1021_webp_thumbnail_conversion"
|
||||
auto_migrate = False
|
||||
|
||||
def pretend_convert_output(self, *args, **kwargs):
|
||||
"""
|
||||
Pretends to do the conversion, by copying the input file
|
||||
to the output file
|
||||
"""
|
||||
shutil.copy2(
|
||||
Path(kwargs["input_file"].rstrip("[0]")),
|
||||
Path(kwargs["output_file"]),
|
||||
)
|
||||
|
||||
def pretend_map(self, func: Callable, iterable: Iterable):
|
||||
"""
|
||||
Pretends to be the map of a multiprocessing.Pool, but secretly does
|
||||
everything in series
|
||||
"""
|
||||
for item in iterable:
|
||||
func(item)
|
||||
|
||||
def create_dummy_thumbnails(
|
||||
self,
|
||||
thumb_dir: Path,
|
||||
ext: str,
|
||||
count: int,
|
||||
start_count: int = 0,
|
||||
):
|
||||
"""
|
||||
Helper to create a certain count of files of given extension in a given directory
|
||||
"""
|
||||
for idx in range(count):
|
||||
(Path(thumb_dir) / Path(f"{start_count + idx:07}.{ext}")).touch()
|
||||
# Triple check expected files exist
|
||||
self.assert_file_count_by_extension(ext, thumb_dir, count)
|
||||
|
||||
def create_webp_thumbnail_files(
|
||||
self,
|
||||
thumb_dir: Path,
|
||||
count: int,
|
||||
start_count: int = 0,
|
||||
):
|
||||
"""
|
||||
Creates a dummy WebP thumbnail file in the given directory, based on
|
||||
the database Document
|
||||
"""
|
||||
self.create_dummy_thumbnails(thumb_dir, "webp", count, start_count)
|
||||
|
||||
def create_png_thumbnail_file(
|
||||
self,
|
||||
thumb_dir: Path,
|
||||
count: int,
|
||||
start_count: int = 0,
|
||||
):
|
||||
"""
|
||||
Creates a dummy PNG thumbnail file in the given directory, based on
|
||||
the database Document
|
||||
"""
|
||||
self.create_dummy_thumbnails(thumb_dir, "png", count, start_count)
|
||||
|
||||
def assert_file_count_by_extension(
|
||||
self,
|
||||
ext: str,
|
||||
dir: Union[str, Path],
|
||||
expected_count: int,
|
||||
):
|
||||
"""
|
||||
Helper to assert a certain count of given extension files in given directory
|
||||
"""
|
||||
if not isinstance(dir, Path):
|
||||
dir = Path(dir)
|
||||
matching_files = list(dir.glob(f"*.{ext}"))
|
||||
self.assertEqual(len(matching_files), expected_count)
|
||||
|
||||
def assert_png_file_count(self, dir: Path, expected_count: int):
|
||||
"""
|
||||
Helper to assert a certain count of PNG extension files in given directory
|
||||
"""
|
||||
self.assert_file_count_by_extension("png", dir, expected_count)
|
||||
|
||||
def assert_webp_file_count(self, dir: Path, expected_count: int):
|
||||
"""
|
||||
Helper to assert a certain count of WebP extension files in given directory
|
||||
"""
|
||||
self.assert_file_count_by_extension("webp", dir, expected_count)
|
||||
|
||||
def setUp(self):
|
||||
|
||||
self.thumbnail_dir = Path(tempfile.mkdtemp()).resolve()
|
||||
|
||||
return super().setUp()
|
||||
|
||||
def tearDown(self) -> None:
|
||||
|
||||
shutil.rmtree(self.thumbnail_dir)
|
||||
|
||||
return super().tearDown()
|
||||
|
||||
def test_do_nothing_if_converted(
|
||||
self,
|
||||
run_convert_mock: mock.MagicMock,
|
||||
map_mock: mock.MagicMock,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Document exists with default WebP thumbnail path
|
||||
WHEN:
|
||||
- Thumbnail conversion is attempted
|
||||
THEN:
|
||||
- Nothing is converted
|
||||
"""
|
||||
map_mock.side_effect = self.pretend_map
|
||||
|
||||
with override_settings(
|
||||
THUMBNAIL_DIR=self.thumbnail_dir,
|
||||
):
|
||||
|
||||
self.create_webp_thumbnail_files(self.thumbnail_dir, 3)
|
||||
|
||||
self.performMigration()
|
||||
run_convert_mock.assert_not_called()
|
||||
|
||||
self.assert_webp_file_count(self.thumbnail_dir, 3)
|
||||
|
||||
def test_convert_single_thumbnail(
|
||||
self,
|
||||
run_convert_mock: mock.MagicMock,
|
||||
map_mock: mock.MagicMock,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Document exists with PNG thumbnail
|
||||
WHEN:
|
||||
- Thumbnail conversion is attempted
|
||||
THEN:
|
||||
- Single thumbnail is converted
|
||||
"""
|
||||
map_mock.side_effect = self.pretend_map
|
||||
run_convert_mock.side_effect = self.pretend_convert_output
|
||||
|
||||
with override_settings(
|
||||
THUMBNAIL_DIR=self.thumbnail_dir,
|
||||
):
|
||||
self.create_png_thumbnail_file(self.thumbnail_dir, 3)
|
||||
|
||||
self.performMigration()
|
||||
|
||||
run_convert_mock.assert_called()
|
||||
self.assertEqual(run_convert_mock.call_count, 3)
|
||||
|
||||
self.assert_webp_file_count(self.thumbnail_dir, 3)
|
||||
|
||||
def test_convert_errors_out(
|
||||
self,
|
||||
run_convert_mock: mock.MagicMock,
|
||||
map_mock: mock.MagicMock,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Document exists with PNG thumbnail
|
||||
WHEN:
|
||||
- Thumbnail conversion is attempted, but raises an exception
|
||||
THEN:
|
||||
- Single thumbnail is converted
|
||||
"""
|
||||
map_mock.side_effect = self.pretend_map
|
||||
run_convert_mock.side_effect = OSError
|
||||
|
||||
with override_settings(
|
||||
THUMBNAIL_DIR=self.thumbnail_dir,
|
||||
):
|
||||
|
||||
self.create_png_thumbnail_file(self.thumbnail_dir, 3)
|
||||
|
||||
self.performMigration()
|
||||
|
||||
run_convert_mock.assert_called()
|
||||
self.assertEqual(run_convert_mock.call_count, 3)
|
||||
|
||||
self.assert_png_file_count(self.thumbnail_dir, 3)
|
||||
|
||||
def test_convert_mixed(
|
||||
self,
|
||||
run_convert_mock: mock.MagicMock,
|
||||
map_mock: mock.MagicMock,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Document exists with PNG thumbnail
|
||||
WHEN:
|
||||
- Thumbnail conversion is attempted, but raises an exception
|
||||
THEN:
|
||||
- Single thumbnail is converted
|
||||
"""
|
||||
map_mock.side_effect = self.pretend_map
|
||||
run_convert_mock.side_effect = self.pretend_convert_output
|
||||
|
||||
with override_settings(
|
||||
THUMBNAIL_DIR=self.thumbnail_dir,
|
||||
):
|
||||
|
||||
self.create_png_thumbnail_file(self.thumbnail_dir, 3)
|
||||
self.create_webp_thumbnail_files(self.thumbnail_dir, 2, start_count=3)
|
||||
|
||||
self.performMigration()
|
||||
|
||||
run_convert_mock.assert_called()
|
||||
self.assertEqual(run_convert_mock.call_count, 3)
|
||||
|
||||
self.assert_png_file_count(self.thumbnail_dir, 0)
|
||||
self.assert_webp_file_count(self.thumbnail_dir, 5)
|
Loading…
x
Reference in New Issue
Block a user