add split-manifest option to administration exporter

This commit is contained in:
Matthieu Helleboid 2023-01-14 11:54:54 +01:00 committed by Trenton H
parent 896304ccaa
commit 4cb4bd13ad
4 changed files with 66 additions and 8 deletions

View File

@ -227,9 +227,10 @@ is not a TTY" errors. For example:
`docker-compose exec -T webserver document_exporter ../export`
```
document_exporter target [-c] [-f] [-p] [-d] [-na] [-nt]
document_exporter target [-sm] [-c] [-f] [-p] [-d] [-na] [-nt]
optional arguments:
-sm, --split-manifest
-c, --compare-checksums
-f, --use-filename-format
-p, --use-filename-prefix
@ -243,6 +244,9 @@ optional arguments:
documents, thumbnails and a `manifest.json` file. The manifest contains
all metadata from the database (correspondents, tags, etc).
If `-sm` or `--split-manifest` is provided, information about document
will be placed in individual json files.
When you use the provided docker compose script, specify `../export` as
the target. This path inside the container is automatically mounted on
your host on the folder `export`.
@ -279,8 +283,8 @@ paperless to use `PAPERLESS_FILENAME_FORMAT` for exported filenames
instead, specify `--use-filename-format`.
If `-p` or `--use-filename-format` is provided, Files will be exported
in dedicated folders according to their nature: `archive`, `originals`
or `thumbnails`
in dedicated folders according to their nature: `archive`, `originals`,
`thumbnails` or `json`
!!! warning

View File

@ -53,6 +53,14 @@ class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument("target")
parser.add_argument(
"-sm",
"--split-manifest",
default=False,
action="store_true",
help="Export document information in individual manifest json files.",
)
parser.add_argument(
"-c",
"--compare-checksums",
@ -125,6 +133,7 @@ class Command(BaseCommand):
def __init__(self, *args, **kwargs):
BaseCommand.__init__(self, *args, **kwargs)
self.target: Path = None
self.split_manifest = None
self.files_in_export_dir: Set[Path] = set()
self.exported_files: List[Path] = []
self.compare_checksums = False
@ -137,6 +146,7 @@ class Command(BaseCommand):
def handle(self, *args, **options):
self.target = Path(options["target"]).resolve()
self.split_manifest = options["split_manifest"]
self.compare_checksums = options["compare_checksums"]
self.use_filename_format = options["use_filename_format"]
self.use_filename_prefix = options["use_filename_prefix"]
@ -217,7 +227,8 @@ class Command(BaseCommand):
documents = Document.objects.order_by("id")
document_map = {d.pk: d for d in documents}
document_manifest = json.loads(serializers.serialize("json", documents))
manifest += document_manifest
if not self.split_manifest:
manifest += document_manifest
manifest += json.loads(
serializers.serialize("json", MailAccount.objects.all()),
@ -334,6 +345,15 @@ class Command(BaseCommand):
archive_target,
)
if self.split_manifest:
manifest_name = base_name + "-manifest.json"
if self.use_filename_prefix:
manifest_name = os.path.join("json", manifest_name)
manifest_name = os.path.join(self.target, manifest_name)
os.makedirs(os.path.dirname(manifest_name), exist_ok=True)
with open(manifest_name, "w") as f:
json.dump([document_manifest[index]], f, indent=2)
# 4.1 write manifest to target folder
manifest_path = (self.target / Path("manifest.json")).resolve()
manifest_path.write_text(json.dumps(manifest, indent=2))

View File

@ -72,11 +72,24 @@ class Command(BaseCommand):
if not os.access(self.source, os.R_OK):
raise CommandError("That path doesn't appear to be readable")
manifest_path = os.path.normpath(os.path.join(self.source, "manifest.json"))
self._check_manifest_exists(manifest_path)
manifest_paths = []
with open(manifest_path) as f:
main_manifest_path = os.path.normpath(
os.path.join(self.source, "manifest.json"),
)
self._check_manifest_exists(main_manifest_path)
with open(main_manifest_path) as f:
self.manifest = json.load(f)
manifest_paths.append(main_manifest_path)
for root, dirs, files in os.walk(self.source):
for file in files:
if file.endswith("-manifest.json"):
doc_manifest_path = os.path.normpath(os.path.join(root, file))
with open(doc_manifest_path) as f:
self.manifest += json.load(f)
manifest_paths.append(doc_manifest_path)
version_path = os.path.normpath(os.path.join(self.source, "version.json"))
if os.path.exists(version_path):
@ -109,7 +122,8 @@ class Command(BaseCommand):
):
# Fill up the database with whatever is in the manifest
try:
call_command("loaddata", manifest_path)
for manifest_path in manifest_paths:
call_command("loaddata", manifest_path)
except (FieldDoesNotExist, DeserializationError) as e:
self.stdout.write(self.style.ERROR("Database import failed"))
if (

View File

@ -104,6 +104,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
delete=False,
no_archive=False,
no_thumbnail=False,
split_manifest=False,
):
args = ["document_exporter", self.target]
if use_filename_format:
@ -116,6 +117,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
args += ["--no-archive"]
if no_thumbnail:
args += ["--no-thumbnail"]
if split_manifest:
args += ["--split-manifest"]
call_command(*args)
@ -563,3 +566,20 @@ class TestExportImport(DirectoriesMixin, TestCase):
with paperless_environment() as dirs:
call_command("document_importer", self.target)
self.assertEqual(Document.objects.count(), 4)
def test_split_manifest(self):
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
shutil.copytree(
os.path.join(os.path.dirname(__file__), "samples", "documents"),
os.path.join(self.dirs.media_dir, "documents"),
)
manifest = self._do_export(split_manifest=True)
has_document = False
for element in manifest:
has_document = has_document or element["model"] == "documents.document"
self.assertFalse(has_document)
with paperless_environment() as dirs:
call_command("document_importer", self.target)
self.assertEqual(Document.objects.count(), 4)