From a2d4d1686758365f41c0dd52dcaf8ab5a3369b33 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Tue, 25 Apr 2023 12:17:37 -0700 Subject: [PATCH] Make the importer a little more robust against some types of errors --- .../management/commands/document_importer.py | 80 ++++++++++------- src/documents/tests/test_importer.py | 85 ++++++++++++++++++- .../tests/test_management_exporter.py | 10 +-- 3 files changed, 136 insertions(+), 39 deletions(-) diff --git a/src/documents/management/commands/document_importer.py b/src/documents/management/commands/document_importer.py index d68f04ac3..0676117e7 100644 --- a/src/documents/management/commands/document_importer.py +++ b/src/documents/management/commands/document_importer.py @@ -64,9 +64,9 @@ class Command(BaseCommand): logging.getLogger().handlers[0].level = logging.ERROR - self.source = options["source"] + self.source = Path(options["source"]).resolve() - if not os.path.exists(self.source): + if not self.source.exists(): raise CommandError("That path doesn't exist") if not os.access(self.source, os.R_OK): @@ -74,39 +74,39 @@ class Command(BaseCommand): manifest_paths = [] - main_manifest_path = os.path.normpath( - os.path.join(self.source, "manifest.json"), - ) + main_manifest_path = self.source / "manifest.json" + self._check_manifest_exists(main_manifest_path) - with open(main_manifest_path) as f: - self.manifest = json.load(f) + with main_manifest_path.open() as infile: + self.manifest = json.load(infile) manifest_paths.append(main_manifest_path) for file in Path(self.source).glob("**/*-manifest.json"): - with open(file) as f: - self.manifest += json.load(f) + with file.open() as infile: + self.manifest += json.load(infile) manifest_paths.append(file) - version_path = os.path.normpath(os.path.join(self.source, "version.json")) - if os.path.exists(version_path): - with open(version_path) as f: - self.version = json.load(f)["version"] - # Provide an initial warning if needed to the user - if self.version != version.__full_version_str__: - self.stdout.write( - self.style.WARNING( - "Version mismatch: " - f"Currently {version.__full_version_str__}," - f" importing {self.version}." - " Continuing, but import may fail.", - ), - ) + version_path = self.source / "version.json" + if version_path.exists(): + with version_path.open() as infile: + self.version = json.load(infile)["version"] + # Provide an initial warning if needed to the user + if self.version != version.__full_version_str__: + self.stdout.write( + self.style.WARNING( + "Version mismatch: " + f"Currently {version.__full_version_str__}," + f" importing {self.version}." + " Continuing, but import may fail.", + ), + ) else: self.stdout.write(self.style.NOTICE("No version.json file located")) - self._check_manifest() + self._check_manifest_valid() + with disable_signal( post_save, receiver=update_filename_and_move_files, @@ -150,14 +150,18 @@ class Command(BaseCommand): ) @staticmethod - def _check_manifest_exists(path): - if not os.path.exists(path): + def _check_manifest_exists(path: Path): + if not path.exists(): raise CommandError( "That directory doesn't appear to contain a manifest.json file.", ) - def _check_manifest(self): - + def _check_manifest_valid(self): + """ + Attempts to verify the manifest is valid. Namely checking the files + referred to exist and the files can be read from + """ + self.stdout.write("Checking the manifest") for record in self.manifest: if record["model"] != "documents.document": @@ -170,19 +174,35 @@ class Command(BaseCommand): ) doc_file = record[EXPORTER_FILE_NAME] - if not os.path.exists(os.path.join(self.source, doc_file)): + doc_path = self.source / doc_file + if not doc_path.exists(): raise CommandError( 'The manifest file refers to "{}" which does not ' "appear to be in the source directory.".format(doc_file), ) + try: + with doc_path.open(mode="rb") as infile: + infile.read(1) + except Exception as e: + raise CommandError( + f"Failed to read from original file {doc_path}", + ) from e if EXPORTER_ARCHIVE_NAME in record: archive_file = record[EXPORTER_ARCHIVE_NAME] - if not os.path.exists(os.path.join(self.source, archive_file)): + doc_archive_path = self.source / archive_file + if not doc_archive_path.exists(): raise CommandError( f"The manifest file refers to {archive_file} which " f"does not appear to be in the source directory.", ) + try: + with doc_archive_path.open(mode="rb") as infile: + infile.read(1) + except Exception as e: + raise CommandError( + f"Failed to read from archive file {doc_archive_path}", + ) from e def _import_files_from_manifest(self, progress_bar_disable): diff --git a/src/documents/tests/test_importer.py b/src/documents/tests/test_importer.py index 8a659dbb6..a98e7394b 100644 --- a/src/documents/tests/test_importer.py +++ b/src/documents/tests/test_importer.py @@ -1,6 +1,10 @@ from django.core.management.base import CommandError from django.test import TestCase from documents.settings import EXPORTER_FILE_NAME +from documents.settings import EXPORTER_ARCHIVE_NAME +from pathlib import Path +import tempfile +from django.core.management import call_command from documents.management.commands.document_importer import Command @@ -14,17 +18,17 @@ class TestImporter(TestCase): self.assertRaises( CommandError, cmd._check_manifest_exists, - "/tmp/manifest.json", + Path("/tmp/manifest.json"), ) def test_check_manifest(self): cmd = Command() - cmd.source = "/tmp" + cmd.source = Path("/tmp") cmd.manifest = [{"model": "documents.document"}] with self.assertRaises(CommandError) as cm: - cmd._check_manifest() + cmd._check_manifest_valid() self.assertIn("The manifest file contains a record", str(cm.exception)) cmd.manifest = [ @@ -32,8 +36,81 @@ class TestImporter(TestCase): ] # self.assertRaises(CommandError, cmd._check_manifest) with self.assertRaises(CommandError) as cm: - cmd._check_manifest() + cmd._check_manifest_valid() self.assertIn( 'The manifest file refers to "noexist.pdf"', str(cm.exception), ) + + def test_import_permission_error(self): + """ + GIVEN: + - Original file which cannot be read from + - Archive file which cannot be read from + WHEN: + - Import is attempted + THEN: + - CommandError is raised indicating the issue + """ + with tempfile.TemporaryDirectory() as temp_dir: + + # Create empty files + original_path = Path(temp_dir) / "original.pdf" + archive_path = Path(temp_dir) / "archive.pdf" + original_path.touch() + archive_path.touch() + + # No read permissions + original_path.chmod(0o222) + + cmd = Command() + cmd.source = Path(temp_dir) + cmd.manifest = [ + { + "model": "documents.document", + EXPORTER_FILE_NAME: "original.pdf", + EXPORTER_ARCHIVE_NAME: "archive.pdf", + }, + ] + with self.assertRaises(CommandError) as cm: + cmd._check_manifest_valid() + self.assertInt("Failed to read from original file", str(cm.exception)) + + original_path.chmod(0o444) + archive_path.chmod(0o222) + + with self.assertRaises(CommandError) as cm: + cmd._check_manifest_valid() + self.assertInt("Failed to read from archive file", str(cm.exception)) + + def test_import_source_not_existing(self): + """ + GIVEN: + - Source given doesn't exist + WHEN: + - Import is attempted + THEN: + - CommandError is raised indicating the issue + """ + with self.assertRaises(CommandError) as cm: + call_command("document_importer", Path("/tmp/notapath")) + self.assertInt("That path doesn't exist", str(cm.exception)) + + def test_import_source_not_readable(self): + """ + GIVEN: + - Source given isn't readable + WHEN: + - Import is attempted + THEN: + - CommandError is raised indicating the issue + """ + with tempfile.TemporaryDirectory() as temp_dir: + path = Path(temp_dir) + path.chmod(0o222) + with self.assertRaises(CommandError) as cm: + call_command("document_importer", path) + self.assertInt( + "That path doesn't appear to be readable", + str(cm.exception), + ) diff --git a/src/documents/tests/test_management_exporter.py b/src/documents/tests/test_management_exporter.py index 05b6db5b3..cc52dc9c1 100644 --- a/src/documents/tests/test_management_exporter.py +++ b/src/documents/tests/test_management_exporter.py @@ -212,7 +212,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase): Tag.objects.all().delete() self.assertEqual(Document.objects.count(), 0) - call_command("document_importer", self.target) + call_command("document_importer", "--no-progress-bar", self.target) self.assertEqual(Document.objects.count(), 4) self.assertEqual(Tag.objects.count(), 1) self.assertEqual(Correspondent.objects.count(), 1) @@ -541,7 +541,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.assertEqual(Document.objects.count(), 4) Document.objects.all().delete() self.assertEqual(Document.objects.count(), 0) - call_command("document_importer", self.target) + call_command("document_importer", "--no-progress-bar", self.target) self.assertEqual(Document.objects.count(), 4) def test_no_thumbnail(self): @@ -584,7 +584,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.assertEqual(Document.objects.count(), 4) Document.objects.all().delete() self.assertEqual(Document.objects.count(), 0) - call_command("document_importer", self.target) + call_command("document_importer", "--no-progress-bar", self.target) self.assertEqual(Document.objects.count(), 4) def test_split_manifest(self): @@ -613,7 +613,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.assertEqual(Document.objects.count(), 4) Document.objects.all().delete() self.assertEqual(Document.objects.count(), 0) - call_command("document_importer", self.target) + call_command("document_importer", "--no-progress-bar", self.target) self.assertEqual(Document.objects.count(), 4) def test_folder_prefix(self): @@ -637,5 +637,5 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.assertEqual(Document.objects.count(), 4) Document.objects.all().delete() self.assertEqual(Document.objects.count(), 0) - call_command("document_importer", self.target) + call_command("document_importer", "--no-progress-bar", self.target) self.assertEqual(Document.objects.count(), 4)