From cc4cea1a41c19e01cf93e0b21d4da56d65600bb3 Mon Sep 17 00:00:00 2001
From: Trenton Holmes <holmes.trenton@gmail.com>
Date: Sat, 11 Jun 2022 13:04:21 -0700
Subject: [PATCH] Converts the conversion into a database migration

---
 docker/install_management_commands.sh         |   3 +-
 .../management/commands/convert_thumbnails.py |  97 --------
 .../1021_webp_thumbnail_conversion.py         | 107 ++++++++
 src/documents/models.py                       |  15 +-
 .../test_management_convert_thumbnail.py      | 139 -----------
 .../tests/test_migration_webp_conversion.py   | 231 ++++++++++++++++++
 6 files changed, 340 insertions(+), 252 deletions(-)
 delete mode 100644 src/documents/management/commands/convert_thumbnails.py
 create mode 100644 src/documents/migrations/1021_webp_thumbnail_conversion.py
 delete mode 100644 src/documents/tests/test_management_convert_thumbnail.py
 create mode 100644 src/documents/tests/test_migration_webp_conversion.py

diff --git a/docker/install_management_commands.sh b/docker/install_management_commands.sh
index beb600fdb..e5c8b30a0 100755
--- a/docker/install_management_commands.sh
+++ b/docker/install_management_commands.sh
@@ -2,8 +2,7 @@
 
 set -eu
 
-for command in convert_thumbnails \
-	decrypt_documents \
+for command in decrypt_documents \
 	document_archiver \
 	document_exporter \
 	document_importer \
diff --git a/src/documents/management/commands/convert_thumbnails.py b/src/documents/management/commands/convert_thumbnails.py
deleted file mode 100644
index 089c689c9..000000000
--- a/src/documents/management/commands/convert_thumbnails.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import logging
-import multiprocessing.pool
-import shutil
-import tempfile
-import time
-from pathlib import Path
-
-from django.core.management.base import BaseCommand
-from documents.models import Document
-from documents.parsers import run_convert
-
-logger = logging.getLogger("paperless.management.convert_thumbnails")
-
-
-def _do_convert(work_package):
-    _, existing_thumbnail, converted_thumbnail = work_package
-    try:
-
-        logger.info(f"Converting thumbnail: {existing_thumbnail}")
-
-        # Run actual conversion
-        run_convert(
-            density=300,
-            scale="500x5000>",
-            alpha="remove",
-            strip=True,
-            trim=False,
-            auto_orient=True,
-            input_file=f"{existing_thumbnail}[0]",
-            output_file=str(converted_thumbnail),
-        )
-
-        # Copy newly created thumbnail to thumbnail directory
-        shutil.copy(converted_thumbnail, existing_thumbnail.parent)
-
-        # Remove the PNG version
-        existing_thumbnail.unlink()
-
-        logger.info(
-            "Conversion to WebP completed, "
-            f"replaced {existing_thumbnail.name} with {converted_thumbnail.name}",
-        )
-
-    except Exception as e:
-        logger.error(
-            f"Error converting thumbnail" f" (existing file unchanged): {e}",
-        )
-
-
-class Command(BaseCommand):
-
-    help = """
-        Converts existing PNG thumbnails into
-        WebP format.
-    """.replace(
-        "    ",
-        "",
-    )
-
-    def handle(self, *args, **options):
-
-        logger.info("Converting all PNG thumbnails to WebP")
-        start = time.time()
-        documents = Document.objects.all()
-
-        with tempfile.TemporaryDirectory() as tempdir:
-
-            work_packages = []
-
-            for document in documents:
-                existing_thumbnail = Path(document.thumbnail_path).resolve()
-
-                if existing_thumbnail.suffix == ".png":
-
-                    # Change the existing filename suffix from png to webp
-                    converted_thumbnail_name = existing_thumbnail.with_suffix(
-                        ".webp",
-                    ).name
-
-                    # Create the expected output filename in the tempdir
-                    converted_thumbnail = (
-                        Path(tempdir) / Path(converted_thumbnail_name)
-                    ).resolve()
-
-                    # Package up the necessary info
-                    work_packages.append(
-                        (document, existing_thumbnail, converted_thumbnail),
-                    )
-
-            if len(work_packages):
-                with multiprocessing.pool.Pool(processes=4, maxtasksperchild=4) as pool:
-                    pool.map(_do_convert, work_packages)
-
-            end = time.time()
-            duration = end - start
-
-        logger.info(f"Conversion completed in {duration:.3f}s")
diff --git a/src/documents/migrations/1021_webp_thumbnail_conversion.py b/src/documents/migrations/1021_webp_thumbnail_conversion.py
new file mode 100644
index 000000000..c5a1c8733
--- /dev/null
+++ b/src/documents/migrations/1021_webp_thumbnail_conversion.py
@@ -0,0 +1,107 @@
+# Generated by Django 4.0.5 on 2022-06-11 15:40
+import logging
+import multiprocessing.pool
+import shutil
+import tempfile
+import time
+from pathlib import Path
+
+from django.conf import settings
+from django.db import migrations
+from documents.parsers import run_convert
+
+logger = logging.getLogger("paperless.migrations")
+
+
+def _do_convert(work_package):
+    existing_thumbnail, converted_thumbnail = work_package
+    try:
+
+        logger.info(f"Converting thumbnail: {existing_thumbnail}")
+
+        # Run actual conversion
+        run_convert(
+            density=300,
+            scale="500x5000>",
+            alpha="remove",
+            strip=True,
+            trim=False,
+            auto_orient=True,
+            input_file=f"{existing_thumbnail}[0]",
+            output_file=str(converted_thumbnail),
+        )
+
+        # Copy newly created thumbnail to thumbnail directory
+        shutil.copy(converted_thumbnail, existing_thumbnail.parent)
+
+        # Remove the PNG version
+        existing_thumbnail.unlink()
+
+        logger.info(
+            "Conversion to WebP completed, "
+            f"replaced {existing_thumbnail.name} with {converted_thumbnail.name}",
+        )
+
+    except Exception as e:
+        logger.error(f"Error converting thumbnail (existing file unchanged): {e}")
+
+
+def _convert_thumbnails_to_webp(apps, schema_editor):
+    start = time.time()
+
+    with tempfile.TemporaryDirectory() as tempdir:
+
+        work_packages = []
+
+        for file in Path(settings.THUMBNAIL_DIR).glob("*.png"):
+            existing_thumbnail = file.resolve()
+
+            # Change the existing filename suffix from png to webp
+            converted_thumbnail_name = existing_thumbnail.with_suffix(
+                ".webp",
+            ).name
+
+            # Create the expected output filename in the tempdir
+            converted_thumbnail = (
+                Path(tempdir) / Path(converted_thumbnail_name)
+            ).resolve()
+
+            # Package up the necessary info
+            work_packages.append(
+                (existing_thumbnail, converted_thumbnail),
+            )
+
+        if len(work_packages):
+
+            logger.info(
+                "\n\n"
+                "  This is a one-time only migration to convert thumbnails for all of your\n"
+                "  documents into WebP format.  If you have a lot of documents though, \n"
+                "  this may take a while, so a coffee break may be in order."
+                "\n",
+            )
+
+            with multiprocessing.pool.Pool(
+                processes=min(multiprocessing.cpu_count(), 4),
+                maxtasksperchild=4,
+            ) as pool:
+                pool.map(_do_convert, work_packages)
+
+        end = time.time()
+        duration = end - start
+
+    logger.info(f"Conversion completed in {duration:.3f}s")
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("documents", "1020_merge_20220518_1839"),
+    ]
+
+    operations = [
+        migrations.RunPython(
+            code=_convert_thumbnails_to_webp,
+            reverse_code=migrations.RunPython.noop,
+        ),
+    ]
diff --git a/src/documents/models.py b/src/documents/models.py
index 9fed321c3..f24ce462e 100644
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -294,26 +294,13 @@ class Document(models.Model):
 
     @property
     def thumbnail_path(self) -> str:
-        png_file_name = f"{self.pk:07}.png"
         webp_file_name = f"{self.pk:07}.webp"
         if self.storage_type == self.STORAGE_TYPE_GPG:
-            png_file_name += ".gpg"
             webp_file_name += ".gpg"
 
-        # This property is used to both generate the file path
-        # and locate the file itself
-        # Hence why this looks a little weird
-
         webp_file_path = os.path.join(settings.THUMBNAIL_DIR, webp_file_name)
-        png_file_path = os.path.join(settings.THUMBNAIL_DIR, png_file_name)
 
-        # 1. Assume the thumbnail is WebP
-        if os.path.exists(png_file_path):
-            thumb = png_file_path
-        else:
-            thumb = webp_file_path
-
-        return os.path.normpath(thumb)
+        return os.path.normpath(webp_file_path)
 
     @property
     def thumbnail_file(self):
diff --git a/src/documents/tests/test_management_convert_thumbnail.py b/src/documents/tests/test_management_convert_thumbnail.py
deleted file mode 100644
index 8413cec3a..000000000
--- a/src/documents/tests/test_management_convert_thumbnail.py
+++ /dev/null
@@ -1,139 +0,0 @@
-import filecmp
-import shutil
-import tempfile
-from io import StringIO
-from pathlib import Path
-from unittest import mock
-
-from django.core.management import call_command
-from django.test import override_settings
-from django.test import TestCase
-from documents.models import Document
-
-
-class TestConvertThumbnails(TestCase):
-    def call_command(self):
-        stdout = StringIO()
-        stderr = StringIO()
-        call_command(
-            "convert_thumbnails",
-            "--no-color",
-            stdout=stdout,
-            stderr=stderr,
-        )
-        return stdout.getvalue(), stderr.getvalue()
-
-    def setUp(self):
-        """
-        Creates a document in the database
-        """
-        super().setUp()
-
-        self.doc = Document.objects.create(
-            pk=1,
-            checksum="A",
-            title="A",
-            content="first document",
-            mime_type="application/pdf",
-        )
-        self.doc.save()
-
-    def pretend_convert_output(self, *args, **kwargs):
-        """
-        Pretends to do the conversion, by copying the input file
-        to the output file
-        """
-        shutil.copy2(
-            Path(kwargs["input_file"].rstrip("[0]")),
-            Path(kwargs["output_file"]),
-        )
-
-    def create_webp_thumbnail_file(self, thumb_dir):
-        """
-        Creates a dummy WebP thumbnail file in the given directory, based on
-        the database Document
-        """
-        thumb_file = Path(thumb_dir) / Path(f"{self.doc.pk:07}.webp")
-        thumb_file.write_text("this is a dummy webp file")
-        return thumb_file
-
-    def create_png_thumbnail_file(self, thumb_dir):
-        """
-        Creates a dummy PNG thumbnail file in the given directory, based on
-        the database Document
-        """
-        thumb_file = Path(thumb_dir) / Path(f"{self.doc.pk:07}.png")
-        thumb_file.write_text("this is a dummy png file")
-        return thumb_file
-
-    @mock.patch("documents.management.commands.convert_thumbnails.run_convert")
-    def test_do_nothing_if_converted(self, run_convert_mock):
-        """
-        GIVEN:
-            - Document exists with default WebP thumbnail path
-        WHEN:
-            - Thumbnail conversion is attempted
-        THEN:
-            - Nothing is converted
-        """
-
-        stdout, _ = self.call_command()
-        run_convert_mock.assert_not_called()
-        self.assertIn("Converting all PNG thumbnails to WebP", stdout)
-
-    @mock.patch("documents.management.commands.convert_thumbnails.run_convert")
-    def test_convert_single_thumbnail(self, run_convert_mock):
-        """
-        GIVEN:
-            - Document exists with PNG thumbnail
-        WHEN:
-            - Thumbnail conversion is attempted
-        THEN:
-            - Single thumbnail is converted
-        """
-
-        run_convert_mock.side_effect = self.pretend_convert_output
-
-        with tempfile.TemporaryDirectory() as thumbnail_dir:
-
-            with override_settings(
-                THUMBNAIL_DIR=thumbnail_dir,
-            ):
-
-                thumb_file = self.create_png_thumbnail_file(thumbnail_dir)
-
-                stdout, _ = self.call_command()
-
-                run_convert_mock.assert_called_once()
-                self.assertIn(f"{thumb_file}", stdout)
-                self.assertIn("Conversion to WebP completed", stdout)
-
-                self.assertFalse(thumb_file.exists())
-                self.assertTrue(thumb_file.with_suffix(".webp").exists())
-
-    @mock.patch("documents.management.commands.convert_thumbnails.run_convert")
-    def test_convert_errors_out(self, run_convert_mock):
-        """
-        GIVEN:
-            - Document exists with PNG thumbnail
-        WHEN:
-            - Thumbnail conversion is attempted, but raises an exception
-        THEN:
-            - Single thumbnail is converted
-        """
-
-        run_convert_mock.side_effect = OSError
-
-        with tempfile.TemporaryDirectory() as thumbnail_dir:
-
-            with override_settings(
-                THUMBNAIL_DIR=thumbnail_dir,
-            ):
-
-                thumb_file = self.create_png_thumbnail_file(thumbnail_dir)
-
-                _, stderr = self.call_command()
-
-                run_convert_mock.assert_called_once()
-                self.assertIn("Error converting thumbnail", stderr)
-                self.assertTrue(thumb_file.exists())
diff --git a/src/documents/tests/test_migration_webp_conversion.py b/src/documents/tests/test_migration_webp_conversion.py
new file mode 100644
index 000000000..a3a5fa6bc
--- /dev/null
+++ b/src/documents/tests/test_migration_webp_conversion.py
@@ -0,0 +1,231 @@
+import shutil
+import tempfile
+from pathlib import Path
+from typing import Callable
+from typing import Iterable
+from typing import Union
+from unittest import mock
+
+from django.test import override_settings
+from documents.tests.test_migration_archive_files import thumbnail_path
+from documents.tests.utils import TestMigrations
+
+
+@mock.patch(
+    "documents.migrations.1021_webp_thumbnail_conversion.multiprocessing.pool.Pool.map",
+)
+@mock.patch("documents.migrations.1021_webp_thumbnail_conversion.run_convert")
+class TestMigrateWebPThumbnails(TestMigrations):
+
+    migrate_from = "1020_merge_20220518_1839"
+    migrate_to = "1021_webp_thumbnail_conversion"
+    auto_migrate = False
+
+    def pretend_convert_output(self, *args, **kwargs):
+        """
+        Pretends to do the conversion, by copying the input file
+        to the output file
+        """
+        shutil.copy2(
+            Path(kwargs["input_file"].rstrip("[0]")),
+            Path(kwargs["output_file"]),
+        )
+
+    def pretend_map(self, func: Callable, iterable: Iterable):
+        """
+        Pretends to be the map of a multiprocessing.Pool, but secretly does
+        everything in series
+        """
+        for item in iterable:
+            func(item)
+
+    def create_dummy_thumbnails(
+        self,
+        thumb_dir: Path,
+        ext: str,
+        count: int,
+        start_count: int = 0,
+    ):
+        """
+        Helper to create a certain count of files of given extension in a given directory
+        """
+        for idx in range(count):
+            (Path(thumb_dir) / Path(f"{start_count + idx:07}.{ext}")).touch()
+        # Triple check expected files exist
+        self.assert_file_count_by_extension(ext, thumb_dir, count)
+
+    def create_webp_thumbnail_files(
+        self,
+        thumb_dir: Path,
+        count: int,
+        start_count: int = 0,
+    ):
+        """
+        Creates a dummy WebP thumbnail file in the given directory, based on
+        the database Document
+        """
+        self.create_dummy_thumbnails(thumb_dir, "webp", count, start_count)
+
+    def create_png_thumbnail_file(
+        self,
+        thumb_dir: Path,
+        count: int,
+        start_count: int = 0,
+    ):
+        """
+        Creates a dummy PNG thumbnail file in the given directory, based on
+        the database Document
+        """
+        self.create_dummy_thumbnails(thumb_dir, "png", count, start_count)
+
+    def assert_file_count_by_extension(
+        self,
+        ext: str,
+        dir: Union[str, Path],
+        expected_count: int,
+    ):
+        """
+        Helper to assert a certain count of given extension files in given directory
+        """
+        if not isinstance(dir, Path):
+            dir = Path(dir)
+        matching_files = list(dir.glob(f"*.{ext}"))
+        self.assertEqual(len(matching_files), expected_count)
+
+    def assert_png_file_count(self, dir: Path, expected_count: int):
+        """
+        Helper to assert a certain count of PNG extension files in given directory
+        """
+        self.assert_file_count_by_extension("png", dir, expected_count)
+
+    def assert_webp_file_count(self, dir: Path, expected_count: int):
+        """
+        Helper to assert a certain count of WebP extension files in given directory
+        """
+        self.assert_file_count_by_extension("webp", dir, expected_count)
+
+    def setUp(self):
+
+        self.thumbnail_dir = Path(tempfile.mkdtemp()).resolve()
+
+        return super().setUp()
+
+    def tearDown(self) -> None:
+
+        shutil.rmtree(self.thumbnail_dir)
+
+        return super().tearDown()
+
+    def test_do_nothing_if_converted(
+        self,
+        run_convert_mock: mock.MagicMock,
+        map_mock: mock.MagicMock,
+    ):
+        """
+        GIVEN:
+            - Document exists with default WebP thumbnail path
+        WHEN:
+            - Thumbnail conversion is attempted
+        THEN:
+            - Nothing is converted
+        """
+        map_mock.side_effect = self.pretend_map
+
+        with override_settings(
+            THUMBNAIL_DIR=self.thumbnail_dir,
+        ):
+
+            self.create_webp_thumbnail_files(self.thumbnail_dir, 3)
+
+            self.performMigration()
+            run_convert_mock.assert_not_called()
+
+            self.assert_webp_file_count(self.thumbnail_dir, 3)
+
+    def test_convert_single_thumbnail(
+        self,
+        run_convert_mock: mock.MagicMock,
+        map_mock: mock.MagicMock,
+    ):
+        """
+        GIVEN:
+            - Document exists with PNG thumbnail
+        WHEN:
+            - Thumbnail conversion is attempted
+        THEN:
+            - Single thumbnail is converted
+        """
+        map_mock.side_effect = self.pretend_map
+        run_convert_mock.side_effect = self.pretend_convert_output
+
+        with override_settings(
+            THUMBNAIL_DIR=self.thumbnail_dir,
+        ):
+            self.create_png_thumbnail_file(self.thumbnail_dir, 3)
+
+            self.performMigration()
+
+            run_convert_mock.assert_called()
+            self.assertEqual(run_convert_mock.call_count, 3)
+
+            self.assert_webp_file_count(self.thumbnail_dir, 3)
+
+    def test_convert_errors_out(
+        self,
+        run_convert_mock: mock.MagicMock,
+        map_mock: mock.MagicMock,
+    ):
+        """
+        GIVEN:
+            - Document exists with PNG thumbnail
+        WHEN:
+            - Thumbnail conversion is attempted, but raises an exception
+        THEN:
+            - Single thumbnail is converted
+        """
+        map_mock.side_effect = self.pretend_map
+        run_convert_mock.side_effect = OSError
+
+        with override_settings(
+            THUMBNAIL_DIR=self.thumbnail_dir,
+        ):
+
+            self.create_png_thumbnail_file(self.thumbnail_dir, 3)
+
+            self.performMigration()
+
+            run_convert_mock.assert_called()
+            self.assertEqual(run_convert_mock.call_count, 3)
+
+            self.assert_png_file_count(self.thumbnail_dir, 3)
+
+    def test_convert_mixed(
+        self,
+        run_convert_mock: mock.MagicMock,
+        map_mock: mock.MagicMock,
+    ):
+        """
+        GIVEN:
+            - Document exists with PNG thumbnail
+        WHEN:
+            - Thumbnail conversion is attempted, but raises an exception
+        THEN:
+            - Single thumbnail is converted
+        """
+        map_mock.side_effect = self.pretend_map
+        run_convert_mock.side_effect = self.pretend_convert_output
+
+        with override_settings(
+            THUMBNAIL_DIR=self.thumbnail_dir,
+        ):
+
+            self.create_png_thumbnail_file(self.thumbnail_dir, 3)
+            self.create_webp_thumbnail_files(self.thumbnail_dir, 2, start_count=3)
+
+            self.performMigration()
+
+            run_convert_mock.assert_called()
+            self.assertEqual(run_convert_mock.call_count, 3)
+
+            self.assert_png_file_count(self.thumbnail_dir, 0)
+            self.assert_webp_file_count(self.thumbnail_dir, 5)