diff --git a/docker/install_management_commands.sh b/docker/install_management_commands.sh index beb600fdb..e5c8b30a0 100755 --- a/docker/install_management_commands.sh +++ b/docker/install_management_commands.sh @@ -2,8 +2,7 @@ set -eu -for command in convert_thumbnails \ - decrypt_documents \ +for command in decrypt_documents \ document_archiver \ document_exporter \ document_importer \ diff --git a/src/documents/management/commands/convert_thumbnails.py b/src/documents/management/commands/convert_thumbnails.py deleted file mode 100644 index 089c689c9..000000000 --- a/src/documents/management/commands/convert_thumbnails.py +++ /dev/null @@ -1,97 +0,0 @@ -import logging -import multiprocessing.pool -import shutil -import tempfile -import time -from pathlib import Path - -from django.core.management.base import BaseCommand -from documents.models import Document -from documents.parsers import run_convert - -logger = logging.getLogger("paperless.management.convert_thumbnails") - - -def _do_convert(work_package): - _, existing_thumbnail, converted_thumbnail = work_package - try: - - logger.info(f"Converting thumbnail: {existing_thumbnail}") - - # Run actual conversion - run_convert( - density=300, - scale="500x5000>", - alpha="remove", - strip=True, - trim=False, - auto_orient=True, - input_file=f"{existing_thumbnail}[0]", - output_file=str(converted_thumbnail), - ) - - # Copy newly created thumbnail to thumbnail directory - shutil.copy(converted_thumbnail, existing_thumbnail.parent) - - # Remove the PNG version - existing_thumbnail.unlink() - - logger.info( - "Conversion to WebP completed, " - f"replaced {existing_thumbnail.name} with {converted_thumbnail.name}", - ) - - except Exception as e: - logger.error( - f"Error converting thumbnail" f" (existing file unchanged): {e}", - ) - - -class Command(BaseCommand): - - help = """ - Converts existing PNG thumbnails into - WebP format. - """.replace( - " ", - "", - ) - - def handle(self, *args, **options): - - logger.info("Converting all PNG thumbnails to WebP") - start = time.time() - documents = Document.objects.all() - - with tempfile.TemporaryDirectory() as tempdir: - - work_packages = [] - - for document in documents: - existing_thumbnail = Path(document.thumbnail_path).resolve() - - if existing_thumbnail.suffix == ".png": - - # Change the existing filename suffix from png to webp - converted_thumbnail_name = existing_thumbnail.with_suffix( - ".webp", - ).name - - # Create the expected output filename in the tempdir - converted_thumbnail = ( - Path(tempdir) / Path(converted_thumbnail_name) - ).resolve() - - # Package up the necessary info - work_packages.append( - (document, existing_thumbnail, converted_thumbnail), - ) - - if len(work_packages): - with multiprocessing.pool.Pool(processes=4, maxtasksperchild=4) as pool: - pool.map(_do_convert, work_packages) - - end = time.time() - duration = end - start - - logger.info(f"Conversion completed in {duration:.3f}s") diff --git a/src/documents/migrations/1021_webp_thumbnail_conversion.py b/src/documents/migrations/1021_webp_thumbnail_conversion.py new file mode 100644 index 000000000..c5a1c8733 --- /dev/null +++ b/src/documents/migrations/1021_webp_thumbnail_conversion.py @@ -0,0 +1,107 @@ +# Generated by Django 4.0.5 on 2022-06-11 15:40 +import logging +import multiprocessing.pool +import shutil +import tempfile +import time +from pathlib import Path + +from django.conf import settings +from django.db import migrations +from documents.parsers import run_convert + +logger = logging.getLogger("paperless.migrations") + + +def _do_convert(work_package): + existing_thumbnail, converted_thumbnail = work_package + try: + + logger.info(f"Converting thumbnail: {existing_thumbnail}") + + # Run actual conversion + run_convert( + density=300, + scale="500x5000>", + alpha="remove", + strip=True, + trim=False, + auto_orient=True, + input_file=f"{existing_thumbnail}[0]", + output_file=str(converted_thumbnail), + ) + + # Copy newly created thumbnail to thumbnail directory + shutil.copy(converted_thumbnail, existing_thumbnail.parent) + + # Remove the PNG version + existing_thumbnail.unlink() + + logger.info( + "Conversion to WebP completed, " + f"replaced {existing_thumbnail.name} with {converted_thumbnail.name}", + ) + + except Exception as e: + logger.error(f"Error converting thumbnail (existing file unchanged): {e}") + + +def _convert_thumbnails_to_webp(apps, schema_editor): + start = time.time() + + with tempfile.TemporaryDirectory() as tempdir: + + work_packages = [] + + for file in Path(settings.THUMBNAIL_DIR).glob("*.png"): + existing_thumbnail = file.resolve() + + # Change the existing filename suffix from png to webp + converted_thumbnail_name = existing_thumbnail.with_suffix( + ".webp", + ).name + + # Create the expected output filename in the tempdir + converted_thumbnail = ( + Path(tempdir) / Path(converted_thumbnail_name) + ).resolve() + + # Package up the necessary info + work_packages.append( + (existing_thumbnail, converted_thumbnail), + ) + + if len(work_packages): + + logger.info( + "\n\n" + " This is a one-time only migration to convert thumbnails for all of your\n" + " documents into WebP format. If you have a lot of documents though, \n" + " this may take a while, so a coffee break may be in order." + "\n", + ) + + with multiprocessing.pool.Pool( + processes=min(multiprocessing.cpu_count(), 4), + maxtasksperchild=4, + ) as pool: + pool.map(_do_convert, work_packages) + + end = time.time() + duration = end - start + + logger.info(f"Conversion completed in {duration:.3f}s") + + +class Migration(migrations.Migration): + + dependencies = [ + ("documents", "1020_merge_20220518_1839"), + ] + + operations = [ + migrations.RunPython( + code=_convert_thumbnails_to_webp, + reverse_code=migrations.RunPython.noop, + ), + ] diff --git a/src/documents/models.py b/src/documents/models.py index 9fed321c3..f24ce462e 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -294,26 +294,13 @@ class Document(models.Model): @property def thumbnail_path(self) -> str: - png_file_name = f"{self.pk:07}.png" webp_file_name = f"{self.pk:07}.webp" if self.storage_type == self.STORAGE_TYPE_GPG: - png_file_name += ".gpg" webp_file_name += ".gpg" - # This property is used to both generate the file path - # and locate the file itself - # Hence why this looks a little weird - webp_file_path = os.path.join(settings.THUMBNAIL_DIR, webp_file_name) - png_file_path = os.path.join(settings.THUMBNAIL_DIR, png_file_name) - # 1. Assume the thumbnail is WebP - if os.path.exists(png_file_path): - thumb = png_file_path - else: - thumb = webp_file_path - - return os.path.normpath(thumb) + return os.path.normpath(webp_file_path) @property def thumbnail_file(self): diff --git a/src/documents/tests/test_management_convert_thumbnail.py b/src/documents/tests/test_management_convert_thumbnail.py deleted file mode 100644 index 8413cec3a..000000000 --- a/src/documents/tests/test_management_convert_thumbnail.py +++ /dev/null @@ -1,139 +0,0 @@ -import filecmp -import shutil -import tempfile -from io import StringIO -from pathlib import Path -from unittest import mock - -from django.core.management import call_command -from django.test import override_settings -from django.test import TestCase -from documents.models import Document - - -class TestConvertThumbnails(TestCase): - def call_command(self): - stdout = StringIO() - stderr = StringIO() - call_command( - "convert_thumbnails", - "--no-color", - stdout=stdout, - stderr=stderr, - ) - return stdout.getvalue(), stderr.getvalue() - - def setUp(self): - """ - Creates a document in the database - """ - super().setUp() - - self.doc = Document.objects.create( - pk=1, - checksum="A", - title="A", - content="first document", - mime_type="application/pdf", - ) - self.doc.save() - - def pretend_convert_output(self, *args, **kwargs): - """ - Pretends to do the conversion, by copying the input file - to the output file - """ - shutil.copy2( - Path(kwargs["input_file"].rstrip("[0]")), - Path(kwargs["output_file"]), - ) - - def create_webp_thumbnail_file(self, thumb_dir): - """ - Creates a dummy WebP thumbnail file in the given directory, based on - the database Document - """ - thumb_file = Path(thumb_dir) / Path(f"{self.doc.pk:07}.webp") - thumb_file.write_text("this is a dummy webp file") - return thumb_file - - def create_png_thumbnail_file(self, thumb_dir): - """ - Creates a dummy PNG thumbnail file in the given directory, based on - the database Document - """ - thumb_file = Path(thumb_dir) / Path(f"{self.doc.pk:07}.png") - thumb_file.write_text("this is a dummy png file") - return thumb_file - - @mock.patch("documents.management.commands.convert_thumbnails.run_convert") - def test_do_nothing_if_converted(self, run_convert_mock): - """ - GIVEN: - - Document exists with default WebP thumbnail path - WHEN: - - Thumbnail conversion is attempted - THEN: - - Nothing is converted - """ - - stdout, _ = self.call_command() - run_convert_mock.assert_not_called() - self.assertIn("Converting all PNG thumbnails to WebP", stdout) - - @mock.patch("documents.management.commands.convert_thumbnails.run_convert") - def test_convert_single_thumbnail(self, run_convert_mock): - """ - GIVEN: - - Document exists with PNG thumbnail - WHEN: - - Thumbnail conversion is attempted - THEN: - - Single thumbnail is converted - """ - - run_convert_mock.side_effect = self.pretend_convert_output - - with tempfile.TemporaryDirectory() as thumbnail_dir: - - with override_settings( - THUMBNAIL_DIR=thumbnail_dir, - ): - - thumb_file = self.create_png_thumbnail_file(thumbnail_dir) - - stdout, _ = self.call_command() - - run_convert_mock.assert_called_once() - self.assertIn(f"{thumb_file}", stdout) - self.assertIn("Conversion to WebP completed", stdout) - - self.assertFalse(thumb_file.exists()) - self.assertTrue(thumb_file.with_suffix(".webp").exists()) - - @mock.patch("documents.management.commands.convert_thumbnails.run_convert") - def test_convert_errors_out(self, run_convert_mock): - """ - GIVEN: - - Document exists with PNG thumbnail - WHEN: - - Thumbnail conversion is attempted, but raises an exception - THEN: - - Single thumbnail is converted - """ - - run_convert_mock.side_effect = OSError - - with tempfile.TemporaryDirectory() as thumbnail_dir: - - with override_settings( - THUMBNAIL_DIR=thumbnail_dir, - ): - - thumb_file = self.create_png_thumbnail_file(thumbnail_dir) - - _, stderr = self.call_command() - - run_convert_mock.assert_called_once() - self.assertIn("Error converting thumbnail", stderr) - self.assertTrue(thumb_file.exists()) diff --git a/src/documents/tests/test_migration_webp_conversion.py b/src/documents/tests/test_migration_webp_conversion.py new file mode 100644 index 000000000..a3a5fa6bc --- /dev/null +++ b/src/documents/tests/test_migration_webp_conversion.py @@ -0,0 +1,231 @@ +import shutil +import tempfile +from pathlib import Path +from typing import Callable +from typing import Iterable +from typing import Union +from unittest import mock + +from django.test import override_settings +from documents.tests.test_migration_archive_files import thumbnail_path +from documents.tests.utils import TestMigrations + + +@mock.patch( + "documents.migrations.1021_webp_thumbnail_conversion.multiprocessing.pool.Pool.map", +) +@mock.patch("documents.migrations.1021_webp_thumbnail_conversion.run_convert") +class TestMigrateWebPThumbnails(TestMigrations): + + migrate_from = "1020_merge_20220518_1839" + migrate_to = "1021_webp_thumbnail_conversion" + auto_migrate = False + + def pretend_convert_output(self, *args, **kwargs): + """ + Pretends to do the conversion, by copying the input file + to the output file + """ + shutil.copy2( + Path(kwargs["input_file"].rstrip("[0]")), + Path(kwargs["output_file"]), + ) + + def pretend_map(self, func: Callable, iterable: Iterable): + """ + Pretends to be the map of a multiprocessing.Pool, but secretly does + everything in series + """ + for item in iterable: + func(item) + + def create_dummy_thumbnails( + self, + thumb_dir: Path, + ext: str, + count: int, + start_count: int = 0, + ): + """ + Helper to create a certain count of files of given extension in a given directory + """ + for idx in range(count): + (Path(thumb_dir) / Path(f"{start_count + idx:07}.{ext}")).touch() + # Triple check expected files exist + self.assert_file_count_by_extension(ext, thumb_dir, count) + + def create_webp_thumbnail_files( + self, + thumb_dir: Path, + count: int, + start_count: int = 0, + ): + """ + Creates a dummy WebP thumbnail file in the given directory, based on + the database Document + """ + self.create_dummy_thumbnails(thumb_dir, "webp", count, start_count) + + def create_png_thumbnail_file( + self, + thumb_dir: Path, + count: int, + start_count: int = 0, + ): + """ + Creates a dummy PNG thumbnail file in the given directory, based on + the database Document + """ + self.create_dummy_thumbnails(thumb_dir, "png", count, start_count) + + def assert_file_count_by_extension( + self, + ext: str, + dir: Union[str, Path], + expected_count: int, + ): + """ + Helper to assert a certain count of given extension files in given directory + """ + if not isinstance(dir, Path): + dir = Path(dir) + matching_files = list(dir.glob(f"*.{ext}")) + self.assertEqual(len(matching_files), expected_count) + + def assert_png_file_count(self, dir: Path, expected_count: int): + """ + Helper to assert a certain count of PNG extension files in given directory + """ + self.assert_file_count_by_extension("png", dir, expected_count) + + def assert_webp_file_count(self, dir: Path, expected_count: int): + """ + Helper to assert a certain count of WebP extension files in given directory + """ + self.assert_file_count_by_extension("webp", dir, expected_count) + + def setUp(self): + + self.thumbnail_dir = Path(tempfile.mkdtemp()).resolve() + + return super().setUp() + + def tearDown(self) -> None: + + shutil.rmtree(self.thumbnail_dir) + + return super().tearDown() + + def test_do_nothing_if_converted( + self, + run_convert_mock: mock.MagicMock, + map_mock: mock.MagicMock, + ): + """ + GIVEN: + - Document exists with default WebP thumbnail path + WHEN: + - Thumbnail conversion is attempted + THEN: + - Nothing is converted + """ + map_mock.side_effect = self.pretend_map + + with override_settings( + THUMBNAIL_DIR=self.thumbnail_dir, + ): + + self.create_webp_thumbnail_files(self.thumbnail_dir, 3) + + self.performMigration() + run_convert_mock.assert_not_called() + + self.assert_webp_file_count(self.thumbnail_dir, 3) + + def test_convert_single_thumbnail( + self, + run_convert_mock: mock.MagicMock, + map_mock: mock.MagicMock, + ): + """ + GIVEN: + - Document exists with PNG thumbnail + WHEN: + - Thumbnail conversion is attempted + THEN: + - Single thumbnail is converted + """ + map_mock.side_effect = self.pretend_map + run_convert_mock.side_effect = self.pretend_convert_output + + with override_settings( + THUMBNAIL_DIR=self.thumbnail_dir, + ): + self.create_png_thumbnail_file(self.thumbnail_dir, 3) + + self.performMigration() + + run_convert_mock.assert_called() + self.assertEqual(run_convert_mock.call_count, 3) + + self.assert_webp_file_count(self.thumbnail_dir, 3) + + def test_convert_errors_out( + self, + run_convert_mock: mock.MagicMock, + map_mock: mock.MagicMock, + ): + """ + GIVEN: + - Document exists with PNG thumbnail + WHEN: + - Thumbnail conversion is attempted, but raises an exception + THEN: + - Single thumbnail is converted + """ + map_mock.side_effect = self.pretend_map + run_convert_mock.side_effect = OSError + + with override_settings( + THUMBNAIL_DIR=self.thumbnail_dir, + ): + + self.create_png_thumbnail_file(self.thumbnail_dir, 3) + + self.performMigration() + + run_convert_mock.assert_called() + self.assertEqual(run_convert_mock.call_count, 3) + + self.assert_png_file_count(self.thumbnail_dir, 3) + + def test_convert_mixed( + self, + run_convert_mock: mock.MagicMock, + map_mock: mock.MagicMock, + ): + """ + GIVEN: + - Document exists with PNG thumbnail + WHEN: + - Thumbnail conversion is attempted, but raises an exception + THEN: + - Single thumbnail is converted + """ + map_mock.side_effect = self.pretend_map + run_convert_mock.side_effect = self.pretend_convert_output + + with override_settings( + THUMBNAIL_DIR=self.thumbnail_dir, + ): + + self.create_png_thumbnail_file(self.thumbnail_dir, 3) + self.create_webp_thumbnail_files(self.thumbnail_dir, 2, start_count=3) + + self.performMigration() + + run_convert_mock.assert_called() + self.assertEqual(run_convert_mock.call_count, 3) + + self.assert_png_file_count(self.thumbnail_dir, 0) + self.assert_webp_file_count(self.thumbnail_dir, 5)