Adds additional warnings during an import if it might fail due to reasons (#4814)

This commit is contained in:
Trenton H 2023-12-04 19:39:59 -08:00 committed by GitHub
parent 826322b610
commit ca355d5855
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 314 additions and 119 deletions

View File

@ -7,6 +7,7 @@ from pathlib import Path
import tqdm import tqdm
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import Permission from django.contrib.auth.models import Permission
from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes.models import ContentType
from django.core.exceptions import FieldDoesNotExist from django.core.exceptions import FieldDoesNotExist
from django.core.management import call_command from django.core.management import call_command
@ -60,10 +61,11 @@ class Command(BaseCommand):
self.manifest = None self.manifest = None
self.version = None self.version = None
def handle(self, *args, **options): def pre_check(self) -> None:
logging.getLogger().handlers[0].level = logging.ERROR """
Runs some initial checks against the source directory, including looking for
self.source = Path(options["source"]).resolve() common mistakes like having files still and users other than expected
"""
if not self.source.exists(): if not self.source.exists():
raise CommandError("That path doesn't exist") raise CommandError("That path doesn't exist")
@ -71,6 +73,40 @@ class Command(BaseCommand):
if not os.access(self.source, os.R_OK): if not os.access(self.source, os.R_OK):
raise CommandError("That path doesn't appear to be readable") raise CommandError("That path doesn't appear to be readable")
for document_dir in [settings.ORIGINALS_DIR, settings.ARCHIVE_DIR]:
if document_dir.exists() and document_dir.is_dir():
for entry in document_dir.glob("**/*"):
if entry.is_dir():
continue
self.stdout.write(
self.style.WARNING(
f"Found file {entry.relative_to(document_dir)}, this might indicate a non-empty installation",
),
)
break
if (
User.objects.exclude(username__in=["consumer", "AnonymousUser"]).count()
!= 0
):
self.stdout.write(
self.style.WARNING(
"Found existing user(s), this might indicate a non-empty installation",
),
)
if Document.objects.count() != 0:
self.stdout.write(
self.style.WARNING(
"Found existing documents(s), this might indicate a non-empty installation",
),
)
def handle(self, *args, **options):
logging.getLogger().handlers[0].level = logging.ERROR
self.source = Path(options["source"]).resolve()
self.pre_check()
manifest_paths = [] manifest_paths = []
main_manifest_path = self.source / "manifest.json" main_manifest_path = self.source / "manifest.json"

View File

@ -1,115 +0,0 @@
import tempfile
from pathlib import Path
from django.core.management import call_command
from django.core.management.base import CommandError
from django.test import TestCase
from documents.management.commands.document_importer import Command
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME
class TestImporter(TestCase):
def __init__(self, *args, **kwargs):
TestCase.__init__(self, *args, **kwargs)
def test_check_manifest_exists(self):
cmd = Command()
self.assertRaises(
CommandError,
cmd._check_manifest_exists,
Path("/tmp/manifest.json"),
)
def test_check_manifest(self):
cmd = Command()
cmd.source = Path("/tmp")
cmd.manifest = [{"model": "documents.document"}]
with self.assertRaises(CommandError) as cm:
cmd._check_manifest_valid()
self.assertIn("The manifest file contains a record", str(cm.exception))
cmd.manifest = [
{"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"},
]
# self.assertRaises(CommandError, cmd._check_manifest)
with self.assertRaises(CommandError) as cm:
cmd._check_manifest_valid()
self.assertIn(
'The manifest file refers to "noexist.pdf"',
str(cm.exception),
)
def test_import_permission_error(self):
"""
GIVEN:
- Original file which cannot be read from
- Archive file which cannot be read from
WHEN:
- Import is attempted
THEN:
- CommandError is raised indicating the issue
"""
with tempfile.TemporaryDirectory() as temp_dir:
# Create empty files
original_path = Path(temp_dir) / "original.pdf"
archive_path = Path(temp_dir) / "archive.pdf"
original_path.touch()
archive_path.touch()
# No read permissions
original_path.chmod(0o222)
cmd = Command()
cmd.source = Path(temp_dir)
cmd.manifest = [
{
"model": "documents.document",
EXPORTER_FILE_NAME: "original.pdf",
EXPORTER_ARCHIVE_NAME: "archive.pdf",
},
]
with self.assertRaises(CommandError) as cm:
cmd._check_manifest_valid()
self.assertInt("Failed to read from original file", str(cm.exception))
original_path.chmod(0o444)
archive_path.chmod(0o222)
with self.assertRaises(CommandError) as cm:
cmd._check_manifest_valid()
self.assertInt("Failed to read from archive file", str(cm.exception))
def test_import_source_not_existing(self):
"""
GIVEN:
- Source given doesn't exist
WHEN:
- Import is attempted
THEN:
- CommandError is raised indicating the issue
"""
with self.assertRaises(CommandError) as cm:
call_command("document_importer", Path("/tmp/notapath"))
self.assertInt("That path doesn't exist", str(cm.exception))
def test_import_source_not_readable(self):
"""
GIVEN:
- Source given isn't readable
WHEN:
- Import is attempted
THEN:
- CommandError is raised indicating the issue
"""
with tempfile.TemporaryDirectory() as temp_dir:
path = Path(temp_dir)
path.chmod(0o222)
with self.assertRaises(CommandError) as cm:
call_command("document_importer", path)
self.assertInt(
"That path doesn't appear to be readable",
str(cm.exception),
)

View File

@ -0,0 +1,274 @@
import json
import tempfile
from io import StringIO
from pathlib import Path
from django.contrib.auth.models import User
from django.core.management import call_command
from django.core.management.base import CommandError
from django.test import TestCase
from documents.management.commands.document_importer import Command
from documents.models import Document
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
class TestCommandImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_check_manifest_exists(self):
"""
GIVEN:
- Source directory exists
- No manifest.json file exists in the directory
WHEN:
- Import is attempted
THEN:
- CommandError is raised indicating the issue
"""
with self.assertRaises(CommandError) as e:
call_command(
"document_importer",
"--no-progress-bar",
str(self.dirs.scratch_dir),
)
self.assertIn(
"That directory doesn't appear to contain a manifest.json file.",
str(e),
)
def test_check_manifest_malformed(self):
"""
GIVEN:
- Source directory exists
- manifest.json file exists in the directory
- manifest.json is missing the documents exported name
WHEN:
- Import is attempted
THEN:
- CommandError is raised indicating the issue
"""
manifest_file = self.dirs.scratch_dir / "manifest.json"
with manifest_file.open("w") as outfile:
json.dump([{"model": "documents.document"}], outfile)
with self.assertRaises(CommandError) as e:
call_command(
"document_importer",
"--no-progress-bar",
str(self.dirs.scratch_dir),
)
self.assertIn(
"The manifest file contains a record which does not refer to an actual document file.",
str(e),
)
def test_check_manifest_file_not_found(self):
"""
GIVEN:
- Source directory exists
- manifest.json file exists in the directory
- manifest.json refers to a file which doesn't exist
WHEN:
- Import is attempted
THEN:
- CommandError is raised indicating the issue
"""
manifest_file = self.dirs.scratch_dir / "manifest.json"
with manifest_file.open("w") as outfile:
json.dump(
[{"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"}],
outfile,
)
with self.assertRaises(CommandError) as e:
call_command(
"document_importer",
"--no-progress-bar",
str(self.dirs.scratch_dir),
)
self.assertIn('The manifest file refers to "noexist.pdf"', str(e))
def test_import_permission_error(self):
"""
GIVEN:
- Original file which cannot be read from
- Archive file which cannot be read from
WHEN:
- Import is attempted
THEN:
- CommandError is raised indicating the issue
"""
with tempfile.TemporaryDirectory() as temp_dir:
# Create empty files
original_path = Path(temp_dir) / "original.pdf"
archive_path = Path(temp_dir) / "archive.pdf"
original_path.touch()
archive_path.touch()
# No read permissions
original_path.chmod(0o222)
cmd = Command()
cmd.source = Path(temp_dir)
cmd.manifest = [
{
"model": "documents.document",
EXPORTER_FILE_NAME: "original.pdf",
EXPORTER_ARCHIVE_NAME: "archive.pdf",
},
]
with self.assertRaises(CommandError) as cm:
cmd._check_manifest_valid()
self.assertInt("Failed to read from original file", str(cm.exception))
original_path.chmod(0o444)
archive_path.chmod(0o222)
with self.assertRaises(CommandError) as cm:
cmd._check_manifest_valid()
self.assertInt("Failed to read from archive file", str(cm.exception))
def test_import_source_not_existing(self):
"""
GIVEN:
- Source given doesn't exist
WHEN:
- Import is attempted
THEN:
- CommandError is raised indicating the issue
"""
with self.assertRaises(CommandError) as cm:
call_command("document_importer", Path("/tmp/notapath"))
self.assertInt("That path doesn't exist", str(cm.exception))
def test_import_source_not_readable(self):
"""
GIVEN:
- Source given isn't readable
WHEN:
- Import is attempted
THEN:
- CommandError is raised indicating the issue
"""
with tempfile.TemporaryDirectory() as temp_dir:
path = Path(temp_dir)
path.chmod(0o222)
with self.assertRaises(CommandError) as cm:
call_command("document_importer", path)
self.assertInt(
"That path doesn't appear to be readable",
str(cm.exception),
)
def test_import_source_does_not_exist(self):
"""
GIVEN:
- Source directory does not exist
WHEN:
- Request to import documents from a directory
THEN:
- CommandError is raised indicating the folder doesn't exist
"""
path = Path("/tmp/should-not-exist")
self.assertIsNotFile(path)
with self.assertRaises(CommandError) as e:
call_command("document_importer", "--no-progress-bar", str(path))
self.assertIn("That path doesn't exist", str(e))
def test_import_files_exist(self):
"""
GIVEN:
- Source directory does exist
- A file exists in the originals directory
WHEN:
- Request to import documents from a directory
THEN:
- CommandError is raised indicating the file exists
"""
(self.dirs.originals_dir / "temp").mkdir()
(self.dirs.originals_dir / "temp" / "file.pdf").touch()
stdout = StringIO()
with self.assertRaises(CommandError):
call_command(
"document_importer",
"--no-progress-bar",
str(self.dirs.scratch_dir),
stdout=stdout,
)
stdout.seek(0)
self.assertIn(
"Found file temp/file.pdf, this might indicate a non-empty installation",
str(stdout.read()),
)
def test_import_with_user_exists(self):
"""
GIVEN:
- Source directory does exist
- At least 1 User exists in the database
WHEN:
- Request to import documents from a directory
THEN:
- A warning is output to stdout
"""
stdout = StringIO()
User.objects.create()
# Not creating a manifest, etc, so it errors
with self.assertRaises(CommandError):
call_command(
"document_importer",
"--no-progress-bar",
str(self.dirs.scratch_dir),
stdout=stdout,
)
stdout.seek(0)
self.assertIn(
"Found existing user(s), this might indicate a non-empty installation",
str(stdout.read()),
)
def test_import_with_documents_exists(self):
"""
GIVEN:
- Source directory does exist
- At least 1 Document exists in the database
WHEN:
- Request to import documents from a directory
THEN:
- A warning is output to stdout
"""
stdout = StringIO()
Document.objects.create(
content="Content",
checksum="42995833e01aea9b3edee44bbfdd7ce1",
archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
title="wow1",
filename="0000001.pdf",
mime_type="application/pdf",
archive_filename="0000001.pdf",
)
# Not creating a manifest, etc, so it errors
with self.assertRaises(CommandError):
call_command(
"document_importer",
"--no-progress-bar",
str(self.dirs.scratch_dir),
stdout=stdout,
)
stdout.seek(0)
self.assertIn(
"Found existing documents(s), this might indicate a non-empty installation",
str(stdout.read()),
)