mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-17 10:13:56 -05:00
Make the importer a little more robust against some types of errors
This commit is contained in:
parent
1fc9eaf360
commit
a2d4d16867
@ -64,9 +64,9 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
logging.getLogger().handlers[0].level = logging.ERROR
|
logging.getLogger().handlers[0].level = logging.ERROR
|
||||||
|
|
||||||
self.source = options["source"]
|
self.source = Path(options["source"]).resolve()
|
||||||
|
|
||||||
if not os.path.exists(self.source):
|
if not self.source.exists():
|
||||||
raise CommandError("That path doesn't exist")
|
raise CommandError("That path doesn't exist")
|
||||||
|
|
||||||
if not os.access(self.source, os.R_OK):
|
if not os.access(self.source, os.R_OK):
|
||||||
@ -74,39 +74,39 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
manifest_paths = []
|
manifest_paths = []
|
||||||
|
|
||||||
main_manifest_path = os.path.normpath(
|
main_manifest_path = self.source / "manifest.json"
|
||||||
os.path.join(self.source, "manifest.json"),
|
|
||||||
)
|
|
||||||
self._check_manifest_exists(main_manifest_path)
|
self._check_manifest_exists(main_manifest_path)
|
||||||
|
|
||||||
with open(main_manifest_path) as f:
|
with main_manifest_path.open() as infile:
|
||||||
self.manifest = json.load(f)
|
self.manifest = json.load(infile)
|
||||||
manifest_paths.append(main_manifest_path)
|
manifest_paths.append(main_manifest_path)
|
||||||
|
|
||||||
for file in Path(self.source).glob("**/*-manifest.json"):
|
for file in Path(self.source).glob("**/*-manifest.json"):
|
||||||
with open(file) as f:
|
with file.open() as infile:
|
||||||
self.manifest += json.load(f)
|
self.manifest += json.load(infile)
|
||||||
manifest_paths.append(file)
|
manifest_paths.append(file)
|
||||||
|
|
||||||
version_path = os.path.normpath(os.path.join(self.source, "version.json"))
|
version_path = self.source / "version.json"
|
||||||
if os.path.exists(version_path):
|
if version_path.exists():
|
||||||
with open(version_path) as f:
|
with version_path.open() as infile:
|
||||||
self.version = json.load(f)["version"]
|
self.version = json.load(infile)["version"]
|
||||||
# Provide an initial warning if needed to the user
|
# Provide an initial warning if needed to the user
|
||||||
if self.version != version.__full_version_str__:
|
if self.version != version.__full_version_str__:
|
||||||
self.stdout.write(
|
self.stdout.write(
|
||||||
self.style.WARNING(
|
self.style.WARNING(
|
||||||
"Version mismatch: "
|
"Version mismatch: "
|
||||||
f"Currently {version.__full_version_str__},"
|
f"Currently {version.__full_version_str__},"
|
||||||
f" importing {self.version}."
|
f" importing {self.version}."
|
||||||
" Continuing, but import may fail.",
|
" Continuing, but import may fail.",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.stdout.write(self.style.NOTICE("No version.json file located"))
|
self.stdout.write(self.style.NOTICE("No version.json file located"))
|
||||||
|
|
||||||
self._check_manifest()
|
self._check_manifest_valid()
|
||||||
|
|
||||||
with disable_signal(
|
with disable_signal(
|
||||||
post_save,
|
post_save,
|
||||||
receiver=update_filename_and_move_files,
|
receiver=update_filename_and_move_files,
|
||||||
@ -150,14 +150,18 @@ class Command(BaseCommand):
|
|||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _check_manifest_exists(path):
|
def _check_manifest_exists(path: Path):
|
||||||
if not os.path.exists(path):
|
if not path.exists():
|
||||||
raise CommandError(
|
raise CommandError(
|
||||||
"That directory doesn't appear to contain a manifest.json file.",
|
"That directory doesn't appear to contain a manifest.json file.",
|
||||||
)
|
)
|
||||||
|
|
||||||
def _check_manifest(self):
|
def _check_manifest_valid(self):
|
||||||
|
"""
|
||||||
|
Attempts to verify the manifest is valid. Namely checking the files
|
||||||
|
referred to exist and the files can be read from
|
||||||
|
"""
|
||||||
|
self.stdout.write("Checking the manifest")
|
||||||
for record in self.manifest:
|
for record in self.manifest:
|
||||||
|
|
||||||
if record["model"] != "documents.document":
|
if record["model"] != "documents.document":
|
||||||
@ -170,19 +174,35 @@ class Command(BaseCommand):
|
|||||||
)
|
)
|
||||||
|
|
||||||
doc_file = record[EXPORTER_FILE_NAME]
|
doc_file = record[EXPORTER_FILE_NAME]
|
||||||
if not os.path.exists(os.path.join(self.source, doc_file)):
|
doc_path = self.source / doc_file
|
||||||
|
if not doc_path.exists():
|
||||||
raise CommandError(
|
raise CommandError(
|
||||||
'The manifest file refers to "{}" which does not '
|
'The manifest file refers to "{}" which does not '
|
||||||
"appear to be in the source directory.".format(doc_file),
|
"appear to be in the source directory.".format(doc_file),
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
|
with doc_path.open(mode="rb") as infile:
|
||||||
|
infile.read(1)
|
||||||
|
except Exception as e:
|
||||||
|
raise CommandError(
|
||||||
|
f"Failed to read from original file {doc_path}",
|
||||||
|
) from e
|
||||||
|
|
||||||
if EXPORTER_ARCHIVE_NAME in record:
|
if EXPORTER_ARCHIVE_NAME in record:
|
||||||
archive_file = record[EXPORTER_ARCHIVE_NAME]
|
archive_file = record[EXPORTER_ARCHIVE_NAME]
|
||||||
if not os.path.exists(os.path.join(self.source, archive_file)):
|
doc_archive_path = self.source / archive_file
|
||||||
|
if not doc_archive_path.exists():
|
||||||
raise CommandError(
|
raise CommandError(
|
||||||
f"The manifest file refers to {archive_file} which "
|
f"The manifest file refers to {archive_file} which "
|
||||||
f"does not appear to be in the source directory.",
|
f"does not appear to be in the source directory.",
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
|
with doc_archive_path.open(mode="rb") as infile:
|
||||||
|
infile.read(1)
|
||||||
|
except Exception as e:
|
||||||
|
raise CommandError(
|
||||||
|
f"Failed to read from archive file {doc_archive_path}",
|
||||||
|
) from e
|
||||||
|
|
||||||
def _import_files_from_manifest(self, progress_bar_disable):
|
def _import_files_from_manifest(self, progress_bar_disable):
|
||||||
|
|
||||||
|
@ -1,6 +1,10 @@
|
|||||||
from django.core.management.base import CommandError
|
from django.core.management.base import CommandError
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from documents.settings import EXPORTER_FILE_NAME
|
from documents.settings import EXPORTER_FILE_NAME
|
||||||
|
from documents.settings import EXPORTER_ARCHIVE_NAME
|
||||||
|
from pathlib import Path
|
||||||
|
import tempfile
|
||||||
|
from django.core.management import call_command
|
||||||
|
|
||||||
from documents.management.commands.document_importer import Command
|
from documents.management.commands.document_importer import Command
|
||||||
|
|
||||||
@ -14,17 +18,17 @@ class TestImporter(TestCase):
|
|||||||
self.assertRaises(
|
self.assertRaises(
|
||||||
CommandError,
|
CommandError,
|
||||||
cmd._check_manifest_exists,
|
cmd._check_manifest_exists,
|
||||||
"/tmp/manifest.json",
|
Path("/tmp/manifest.json"),
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_check_manifest(self):
|
def test_check_manifest(self):
|
||||||
|
|
||||||
cmd = Command()
|
cmd = Command()
|
||||||
cmd.source = "/tmp"
|
cmd.source = Path("/tmp")
|
||||||
|
|
||||||
cmd.manifest = [{"model": "documents.document"}]
|
cmd.manifest = [{"model": "documents.document"}]
|
||||||
with self.assertRaises(CommandError) as cm:
|
with self.assertRaises(CommandError) as cm:
|
||||||
cmd._check_manifest()
|
cmd._check_manifest_valid()
|
||||||
self.assertIn("The manifest file contains a record", str(cm.exception))
|
self.assertIn("The manifest file contains a record", str(cm.exception))
|
||||||
|
|
||||||
cmd.manifest = [
|
cmd.manifest = [
|
||||||
@ -32,8 +36,81 @@ class TestImporter(TestCase):
|
|||||||
]
|
]
|
||||||
# self.assertRaises(CommandError, cmd._check_manifest)
|
# self.assertRaises(CommandError, cmd._check_manifest)
|
||||||
with self.assertRaises(CommandError) as cm:
|
with self.assertRaises(CommandError) as cm:
|
||||||
cmd._check_manifest()
|
cmd._check_manifest_valid()
|
||||||
self.assertIn(
|
self.assertIn(
|
||||||
'The manifest file refers to "noexist.pdf"',
|
'The manifest file refers to "noexist.pdf"',
|
||||||
str(cm.exception),
|
str(cm.exception),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_import_permission_error(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Original file which cannot be read from
|
||||||
|
- Archive file which cannot be read from
|
||||||
|
WHEN:
|
||||||
|
- Import is attempted
|
||||||
|
THEN:
|
||||||
|
- CommandError is raised indicating the issue
|
||||||
|
"""
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
|
||||||
|
# Create empty files
|
||||||
|
original_path = Path(temp_dir) / "original.pdf"
|
||||||
|
archive_path = Path(temp_dir) / "archive.pdf"
|
||||||
|
original_path.touch()
|
||||||
|
archive_path.touch()
|
||||||
|
|
||||||
|
# No read permissions
|
||||||
|
original_path.chmod(0o222)
|
||||||
|
|
||||||
|
cmd = Command()
|
||||||
|
cmd.source = Path(temp_dir)
|
||||||
|
cmd.manifest = [
|
||||||
|
{
|
||||||
|
"model": "documents.document",
|
||||||
|
EXPORTER_FILE_NAME: "original.pdf",
|
||||||
|
EXPORTER_ARCHIVE_NAME: "archive.pdf",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
with self.assertRaises(CommandError) as cm:
|
||||||
|
cmd._check_manifest_valid()
|
||||||
|
self.assertInt("Failed to read from original file", str(cm.exception))
|
||||||
|
|
||||||
|
original_path.chmod(0o444)
|
||||||
|
archive_path.chmod(0o222)
|
||||||
|
|
||||||
|
with self.assertRaises(CommandError) as cm:
|
||||||
|
cmd._check_manifest_valid()
|
||||||
|
self.assertInt("Failed to read from archive file", str(cm.exception))
|
||||||
|
|
||||||
|
def test_import_source_not_existing(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Source given doesn't exist
|
||||||
|
WHEN:
|
||||||
|
- Import is attempted
|
||||||
|
THEN:
|
||||||
|
- CommandError is raised indicating the issue
|
||||||
|
"""
|
||||||
|
with self.assertRaises(CommandError) as cm:
|
||||||
|
call_command("document_importer", Path("/tmp/notapath"))
|
||||||
|
self.assertInt("That path doesn't exist", str(cm.exception))
|
||||||
|
|
||||||
|
def test_import_source_not_readable(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Source given isn't readable
|
||||||
|
WHEN:
|
||||||
|
- Import is attempted
|
||||||
|
THEN:
|
||||||
|
- CommandError is raised indicating the issue
|
||||||
|
"""
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
path = Path(temp_dir)
|
||||||
|
path.chmod(0o222)
|
||||||
|
with self.assertRaises(CommandError) as cm:
|
||||||
|
call_command("document_importer", path)
|
||||||
|
self.assertInt(
|
||||||
|
"That path doesn't appear to be readable",
|
||||||
|
str(cm.exception),
|
||||||
|
)
|
||||||
|
@ -212,7 +212,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
Tag.objects.all().delete()
|
Tag.objects.all().delete()
|
||||||
self.assertEqual(Document.objects.count(), 0)
|
self.assertEqual(Document.objects.count(), 0)
|
||||||
|
|
||||||
call_command("document_importer", self.target)
|
call_command("document_importer", "--no-progress-bar", self.target)
|
||||||
self.assertEqual(Document.objects.count(), 4)
|
self.assertEqual(Document.objects.count(), 4)
|
||||||
self.assertEqual(Tag.objects.count(), 1)
|
self.assertEqual(Tag.objects.count(), 1)
|
||||||
self.assertEqual(Correspondent.objects.count(), 1)
|
self.assertEqual(Correspondent.objects.count(), 1)
|
||||||
@ -541,7 +541,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
self.assertEqual(Document.objects.count(), 4)
|
self.assertEqual(Document.objects.count(), 4)
|
||||||
Document.objects.all().delete()
|
Document.objects.all().delete()
|
||||||
self.assertEqual(Document.objects.count(), 0)
|
self.assertEqual(Document.objects.count(), 0)
|
||||||
call_command("document_importer", self.target)
|
call_command("document_importer", "--no-progress-bar", self.target)
|
||||||
self.assertEqual(Document.objects.count(), 4)
|
self.assertEqual(Document.objects.count(), 4)
|
||||||
|
|
||||||
def test_no_thumbnail(self):
|
def test_no_thumbnail(self):
|
||||||
@ -584,7 +584,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
self.assertEqual(Document.objects.count(), 4)
|
self.assertEqual(Document.objects.count(), 4)
|
||||||
Document.objects.all().delete()
|
Document.objects.all().delete()
|
||||||
self.assertEqual(Document.objects.count(), 0)
|
self.assertEqual(Document.objects.count(), 0)
|
||||||
call_command("document_importer", self.target)
|
call_command("document_importer", "--no-progress-bar", self.target)
|
||||||
self.assertEqual(Document.objects.count(), 4)
|
self.assertEqual(Document.objects.count(), 4)
|
||||||
|
|
||||||
def test_split_manifest(self):
|
def test_split_manifest(self):
|
||||||
@ -613,7 +613,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
self.assertEqual(Document.objects.count(), 4)
|
self.assertEqual(Document.objects.count(), 4)
|
||||||
Document.objects.all().delete()
|
Document.objects.all().delete()
|
||||||
self.assertEqual(Document.objects.count(), 0)
|
self.assertEqual(Document.objects.count(), 0)
|
||||||
call_command("document_importer", self.target)
|
call_command("document_importer", "--no-progress-bar", self.target)
|
||||||
self.assertEqual(Document.objects.count(), 4)
|
self.assertEqual(Document.objects.count(), 4)
|
||||||
|
|
||||||
def test_folder_prefix(self):
|
def test_folder_prefix(self):
|
||||||
@ -637,5 +637,5 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
self.assertEqual(Document.objects.count(), 4)
|
self.assertEqual(Document.objects.count(), 4)
|
||||||
Document.objects.all().delete()
|
Document.objects.all().delete()
|
||||||
self.assertEqual(Document.objects.count(), 0)
|
self.assertEqual(Document.objects.count(), 0)
|
||||||
call_command("document_importer", self.target)
|
call_command("document_importer", "--no-progress-bar", self.target)
|
||||||
self.assertEqual(Document.objects.count(), 4)
|
self.assertEqual(Document.objects.count(), 4)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user