From ca355d585525d36a7aa2d640ee405060c10bcd2f Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Mon, 4 Dec 2023 19:39:59 -0800 Subject: [PATCH] Adds additional warnings during an import if it might fail due to reasons (#4814) --- .../management/commands/document_importer.py | 44 ++- src/documents/tests/test_importer.py | 115 -------- .../tests/test_management_importer.py | 274 ++++++++++++++++++ 3 files changed, 314 insertions(+), 119 deletions(-) delete mode 100644 src/documents/tests/test_importer.py create mode 100644 src/documents/tests/test_management_importer.py diff --git a/src/documents/management/commands/document_importer.py b/src/documents/management/commands/document_importer.py index 9895104c3..c166ec0cb 100644 --- a/src/documents/management/commands/document_importer.py +++ b/src/documents/management/commands/document_importer.py @@ -7,6 +7,7 @@ from pathlib import Path import tqdm from django.conf import settings from django.contrib.auth.models import Permission +from django.contrib.auth.models import User from django.contrib.contenttypes.models import ContentType from django.core.exceptions import FieldDoesNotExist from django.core.management import call_command @@ -60,10 +61,11 @@ class Command(BaseCommand): self.manifest = None self.version = None - def handle(self, *args, **options): - logging.getLogger().handlers[0].level = logging.ERROR - - self.source = Path(options["source"]).resolve() + def pre_check(self) -> None: + """ + Runs some initial checks against the source directory, including looking for + common mistakes like having files still and users other than expected + """ if not self.source.exists(): raise CommandError("That path doesn't exist") @@ -71,6 +73,40 @@ class Command(BaseCommand): if not os.access(self.source, os.R_OK): raise CommandError("That path doesn't appear to be readable") + for document_dir in [settings.ORIGINALS_DIR, settings.ARCHIVE_DIR]: + if document_dir.exists() and document_dir.is_dir(): + for entry in document_dir.glob("**/*"): + if entry.is_dir(): + continue + self.stdout.write( + self.style.WARNING( + f"Found file {entry.relative_to(document_dir)}, this might indicate a non-empty installation", + ), + ) + break + if ( + User.objects.exclude(username__in=["consumer", "AnonymousUser"]).count() + != 0 + ): + self.stdout.write( + self.style.WARNING( + "Found existing user(s), this might indicate a non-empty installation", + ), + ) + if Document.objects.count() != 0: + self.stdout.write( + self.style.WARNING( + "Found existing documents(s), this might indicate a non-empty installation", + ), + ) + + def handle(self, *args, **options): + logging.getLogger().handlers[0].level = logging.ERROR + + self.source = Path(options["source"]).resolve() + + self.pre_check() + manifest_paths = [] main_manifest_path = self.source / "manifest.json" diff --git a/src/documents/tests/test_importer.py b/src/documents/tests/test_importer.py deleted file mode 100644 index d86101fd8..000000000 --- a/src/documents/tests/test_importer.py +++ /dev/null @@ -1,115 +0,0 @@ -import tempfile -from pathlib import Path - -from django.core.management import call_command -from django.core.management.base import CommandError -from django.test import TestCase - -from documents.management.commands.document_importer import Command -from documents.settings import EXPORTER_ARCHIVE_NAME -from documents.settings import EXPORTER_FILE_NAME - - -class TestImporter(TestCase): - def __init__(self, *args, **kwargs): - TestCase.__init__(self, *args, **kwargs) - - def test_check_manifest_exists(self): - cmd = Command() - self.assertRaises( - CommandError, - cmd._check_manifest_exists, - Path("/tmp/manifest.json"), - ) - - def test_check_manifest(self): - cmd = Command() - cmd.source = Path("/tmp") - - cmd.manifest = [{"model": "documents.document"}] - with self.assertRaises(CommandError) as cm: - cmd._check_manifest_valid() - self.assertIn("The manifest file contains a record", str(cm.exception)) - - cmd.manifest = [ - {"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"}, - ] - # self.assertRaises(CommandError, cmd._check_manifest) - with self.assertRaises(CommandError) as cm: - cmd._check_manifest_valid() - self.assertIn( - 'The manifest file refers to "noexist.pdf"', - str(cm.exception), - ) - - def test_import_permission_error(self): - """ - GIVEN: - - Original file which cannot be read from - - Archive file which cannot be read from - WHEN: - - Import is attempted - THEN: - - CommandError is raised indicating the issue - """ - with tempfile.TemporaryDirectory() as temp_dir: - # Create empty files - original_path = Path(temp_dir) / "original.pdf" - archive_path = Path(temp_dir) / "archive.pdf" - original_path.touch() - archive_path.touch() - - # No read permissions - original_path.chmod(0o222) - - cmd = Command() - cmd.source = Path(temp_dir) - cmd.manifest = [ - { - "model": "documents.document", - EXPORTER_FILE_NAME: "original.pdf", - EXPORTER_ARCHIVE_NAME: "archive.pdf", - }, - ] - with self.assertRaises(CommandError) as cm: - cmd._check_manifest_valid() - self.assertInt("Failed to read from original file", str(cm.exception)) - - original_path.chmod(0o444) - archive_path.chmod(0o222) - - with self.assertRaises(CommandError) as cm: - cmd._check_manifest_valid() - self.assertInt("Failed to read from archive file", str(cm.exception)) - - def test_import_source_not_existing(self): - """ - GIVEN: - - Source given doesn't exist - WHEN: - - Import is attempted - THEN: - - CommandError is raised indicating the issue - """ - with self.assertRaises(CommandError) as cm: - call_command("document_importer", Path("/tmp/notapath")) - self.assertInt("That path doesn't exist", str(cm.exception)) - - def test_import_source_not_readable(self): - """ - GIVEN: - - Source given isn't readable - WHEN: - - Import is attempted - THEN: - - CommandError is raised indicating the issue - """ - with tempfile.TemporaryDirectory() as temp_dir: - path = Path(temp_dir) - path.chmod(0o222) - with self.assertRaises(CommandError) as cm: - call_command("document_importer", path) - self.assertInt( - "That path doesn't appear to be readable", - str(cm.exception), - ) diff --git a/src/documents/tests/test_management_importer.py b/src/documents/tests/test_management_importer.py new file mode 100644 index 000000000..c0d155d02 --- /dev/null +++ b/src/documents/tests/test_management_importer.py @@ -0,0 +1,274 @@ +import json +import tempfile +from io import StringIO +from pathlib import Path + +from django.contrib.auth.models import User +from django.core.management import call_command +from django.core.management.base import CommandError +from django.test import TestCase + +from documents.management.commands.document_importer import Command +from documents.models import Document +from documents.settings import EXPORTER_ARCHIVE_NAME +from documents.settings import EXPORTER_FILE_NAME +from documents.tests.utils import DirectoriesMixin +from documents.tests.utils import FileSystemAssertsMixin + + +class TestCommandImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase): + def test_check_manifest_exists(self): + """ + GIVEN: + - Source directory exists + - No manifest.json file exists in the directory + WHEN: + - Import is attempted + THEN: + - CommandError is raised indicating the issue + """ + with self.assertRaises(CommandError) as e: + call_command( + "document_importer", + "--no-progress-bar", + str(self.dirs.scratch_dir), + ) + self.assertIn( + "That directory doesn't appear to contain a manifest.json file.", + str(e), + ) + + def test_check_manifest_malformed(self): + """ + GIVEN: + - Source directory exists + - manifest.json file exists in the directory + - manifest.json is missing the documents exported name + WHEN: + - Import is attempted + THEN: + - CommandError is raised indicating the issue + """ + manifest_file = self.dirs.scratch_dir / "manifest.json" + with manifest_file.open("w") as outfile: + json.dump([{"model": "documents.document"}], outfile) + + with self.assertRaises(CommandError) as e: + call_command( + "document_importer", + "--no-progress-bar", + str(self.dirs.scratch_dir), + ) + self.assertIn( + "The manifest file contains a record which does not refer to an actual document file.", + str(e), + ) + + def test_check_manifest_file_not_found(self): + """ + GIVEN: + - Source directory exists + - manifest.json file exists in the directory + - manifest.json refers to a file which doesn't exist + WHEN: + - Import is attempted + THEN: + - CommandError is raised indicating the issue + """ + manifest_file = self.dirs.scratch_dir / "manifest.json" + with manifest_file.open("w") as outfile: + json.dump( + [{"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"}], + outfile, + ) + + with self.assertRaises(CommandError) as e: + call_command( + "document_importer", + "--no-progress-bar", + str(self.dirs.scratch_dir), + ) + self.assertIn('The manifest file refers to "noexist.pdf"', str(e)) + + def test_import_permission_error(self): + """ + GIVEN: + - Original file which cannot be read from + - Archive file which cannot be read from + WHEN: + - Import is attempted + THEN: + - CommandError is raised indicating the issue + """ + with tempfile.TemporaryDirectory() as temp_dir: + # Create empty files + original_path = Path(temp_dir) / "original.pdf" + archive_path = Path(temp_dir) / "archive.pdf" + original_path.touch() + archive_path.touch() + + # No read permissions + original_path.chmod(0o222) + + cmd = Command() + cmd.source = Path(temp_dir) + cmd.manifest = [ + { + "model": "documents.document", + EXPORTER_FILE_NAME: "original.pdf", + EXPORTER_ARCHIVE_NAME: "archive.pdf", + }, + ] + with self.assertRaises(CommandError) as cm: + cmd._check_manifest_valid() + self.assertInt("Failed to read from original file", str(cm.exception)) + + original_path.chmod(0o444) + archive_path.chmod(0o222) + + with self.assertRaises(CommandError) as cm: + cmd._check_manifest_valid() + self.assertInt("Failed to read from archive file", str(cm.exception)) + + def test_import_source_not_existing(self): + """ + GIVEN: + - Source given doesn't exist + WHEN: + - Import is attempted + THEN: + - CommandError is raised indicating the issue + """ + with self.assertRaises(CommandError) as cm: + call_command("document_importer", Path("/tmp/notapath")) + self.assertInt("That path doesn't exist", str(cm.exception)) + + def test_import_source_not_readable(self): + """ + GIVEN: + - Source given isn't readable + WHEN: + - Import is attempted + THEN: + - CommandError is raised indicating the issue + """ + with tempfile.TemporaryDirectory() as temp_dir: + path = Path(temp_dir) + path.chmod(0o222) + with self.assertRaises(CommandError) as cm: + call_command("document_importer", path) + self.assertInt( + "That path doesn't appear to be readable", + str(cm.exception), + ) + + def test_import_source_does_not_exist(self): + """ + GIVEN: + - Source directory does not exist + WHEN: + - Request to import documents from a directory + THEN: + - CommandError is raised indicating the folder doesn't exist + """ + path = Path("/tmp/should-not-exist") + + self.assertIsNotFile(path) + + with self.assertRaises(CommandError) as e: + call_command("document_importer", "--no-progress-bar", str(path)) + + self.assertIn("That path doesn't exist", str(e)) + + def test_import_files_exist(self): + """ + GIVEN: + - Source directory does exist + - A file exists in the originals directory + WHEN: + - Request to import documents from a directory + THEN: + - CommandError is raised indicating the file exists + """ + (self.dirs.originals_dir / "temp").mkdir() + + (self.dirs.originals_dir / "temp" / "file.pdf").touch() + + stdout = StringIO() + + with self.assertRaises(CommandError): + call_command( + "document_importer", + "--no-progress-bar", + str(self.dirs.scratch_dir), + stdout=stdout, + ) + stdout.seek(0) + self.assertIn( + "Found file temp/file.pdf, this might indicate a non-empty installation", + str(stdout.read()), + ) + + def test_import_with_user_exists(self): + """ + GIVEN: + - Source directory does exist + - At least 1 User exists in the database + WHEN: + - Request to import documents from a directory + THEN: + - A warning is output to stdout + """ + stdout = StringIO() + + User.objects.create() + + # Not creating a manifest, etc, so it errors + with self.assertRaises(CommandError): + call_command( + "document_importer", + "--no-progress-bar", + str(self.dirs.scratch_dir), + stdout=stdout, + ) + stdout.seek(0) + self.assertIn( + "Found existing user(s), this might indicate a non-empty installation", + str(stdout.read()), + ) + + def test_import_with_documents_exists(self): + """ + GIVEN: + - Source directory does exist + - At least 1 Document exists in the database + WHEN: + - Request to import documents from a directory + THEN: + - A warning is output to stdout + """ + stdout = StringIO() + + Document.objects.create( + content="Content", + checksum="42995833e01aea9b3edee44bbfdd7ce1", + archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", + title="wow1", + filename="0000001.pdf", + mime_type="application/pdf", + archive_filename="0000001.pdf", + ) + + # Not creating a manifest, etc, so it errors + with self.assertRaises(CommandError): + call_command( + "document_importer", + "--no-progress-bar", + str(self.dirs.scratch_dir), + stdout=stdout, + ) + stdout.seek(0) + self.assertIn( + "Found existing documents(s), this might indicate a non-empty installation", + str(stdout.read()), + )