mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
add split-manifest option to administration exporter
This commit is contained in:
parent
896304ccaa
commit
4cb4bd13ad
@ -227,9 +227,10 @@ is not a TTY" errors. For example:
|
||||
`docker-compose exec -T webserver document_exporter ../export`
|
||||
|
||||
```
|
||||
document_exporter target [-c] [-f] [-p] [-d] [-na] [-nt]
|
||||
document_exporter target [-sm] [-c] [-f] [-p] [-d] [-na] [-nt]
|
||||
|
||||
optional arguments:
|
||||
-sm, --split-manifest
|
||||
-c, --compare-checksums
|
||||
-f, --use-filename-format
|
||||
-p, --use-filename-prefix
|
||||
@ -243,6 +244,9 @@ optional arguments:
|
||||
documents, thumbnails and a `manifest.json` file. The manifest contains
|
||||
all metadata from the database (correspondents, tags, etc).
|
||||
|
||||
If `-sm` or `--split-manifest` is provided, information about document
|
||||
will be placed in individual json files.
|
||||
|
||||
When you use the provided docker compose script, specify `../export` as
|
||||
the target. This path inside the container is automatically mounted on
|
||||
your host on the folder `export`.
|
||||
@ -279,8 +283,8 @@ paperless to use `PAPERLESS_FILENAME_FORMAT` for exported filenames
|
||||
instead, specify `--use-filename-format`.
|
||||
|
||||
If `-p` or `--use-filename-format` is provided, Files will be exported
|
||||
in dedicated folders according to their nature: `archive`, `originals`
|
||||
or `thumbnails`
|
||||
in dedicated folders according to their nature: `archive`, `originals`,
|
||||
`thumbnails` or `json`
|
||||
|
||||
!!! warning
|
||||
|
||||
|
@ -53,6 +53,14 @@ class Command(BaseCommand):
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument("target")
|
||||
|
||||
parser.add_argument(
|
||||
"-sm",
|
||||
"--split-manifest",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Export document information in individual manifest json files.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-c",
|
||||
"--compare-checksums",
|
||||
@ -125,6 +133,7 @@ class Command(BaseCommand):
|
||||
def __init__(self, *args, **kwargs):
|
||||
BaseCommand.__init__(self, *args, **kwargs)
|
||||
self.target: Path = None
|
||||
self.split_manifest = None
|
||||
self.files_in_export_dir: Set[Path] = set()
|
||||
self.exported_files: List[Path] = []
|
||||
self.compare_checksums = False
|
||||
@ -137,6 +146,7 @@ class Command(BaseCommand):
|
||||
def handle(self, *args, **options):
|
||||
|
||||
self.target = Path(options["target"]).resolve()
|
||||
self.split_manifest = options["split_manifest"]
|
||||
self.compare_checksums = options["compare_checksums"]
|
||||
self.use_filename_format = options["use_filename_format"]
|
||||
self.use_filename_prefix = options["use_filename_prefix"]
|
||||
@ -217,7 +227,8 @@ class Command(BaseCommand):
|
||||
documents = Document.objects.order_by("id")
|
||||
document_map = {d.pk: d for d in documents}
|
||||
document_manifest = json.loads(serializers.serialize("json", documents))
|
||||
manifest += document_manifest
|
||||
if not self.split_manifest:
|
||||
manifest += document_manifest
|
||||
|
||||
manifest += json.loads(
|
||||
serializers.serialize("json", MailAccount.objects.all()),
|
||||
@ -334,6 +345,15 @@ class Command(BaseCommand):
|
||||
archive_target,
|
||||
)
|
||||
|
||||
if self.split_manifest:
|
||||
manifest_name = base_name + "-manifest.json"
|
||||
if self.use_filename_prefix:
|
||||
manifest_name = os.path.join("json", manifest_name)
|
||||
manifest_name = os.path.join(self.target, manifest_name)
|
||||
os.makedirs(os.path.dirname(manifest_name), exist_ok=True)
|
||||
with open(manifest_name, "w") as f:
|
||||
json.dump([document_manifest[index]], f, indent=2)
|
||||
|
||||
# 4.1 write manifest to target folder
|
||||
manifest_path = (self.target / Path("manifest.json")).resolve()
|
||||
manifest_path.write_text(json.dumps(manifest, indent=2))
|
||||
|
@ -72,11 +72,24 @@ class Command(BaseCommand):
|
||||
if not os.access(self.source, os.R_OK):
|
||||
raise CommandError("That path doesn't appear to be readable")
|
||||
|
||||
manifest_path = os.path.normpath(os.path.join(self.source, "manifest.json"))
|
||||
self._check_manifest_exists(manifest_path)
|
||||
manifest_paths = []
|
||||
|
||||
with open(manifest_path) as f:
|
||||
main_manifest_path = os.path.normpath(
|
||||
os.path.join(self.source, "manifest.json"),
|
||||
)
|
||||
self._check_manifest_exists(main_manifest_path)
|
||||
|
||||
with open(main_manifest_path) as f:
|
||||
self.manifest = json.load(f)
|
||||
manifest_paths.append(main_manifest_path)
|
||||
|
||||
for root, dirs, files in os.walk(self.source):
|
||||
for file in files:
|
||||
if file.endswith("-manifest.json"):
|
||||
doc_manifest_path = os.path.normpath(os.path.join(root, file))
|
||||
with open(doc_manifest_path) as f:
|
||||
self.manifest += json.load(f)
|
||||
manifest_paths.append(doc_manifest_path)
|
||||
|
||||
version_path = os.path.normpath(os.path.join(self.source, "version.json"))
|
||||
if os.path.exists(version_path):
|
||||
@ -109,7 +122,8 @@ class Command(BaseCommand):
|
||||
):
|
||||
# Fill up the database with whatever is in the manifest
|
||||
try:
|
||||
call_command("loaddata", manifest_path)
|
||||
for manifest_path in manifest_paths:
|
||||
call_command("loaddata", manifest_path)
|
||||
except (FieldDoesNotExist, DeserializationError) as e:
|
||||
self.stdout.write(self.style.ERROR("Database import failed"))
|
||||
if (
|
||||
|
@ -104,6 +104,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
delete=False,
|
||||
no_archive=False,
|
||||
no_thumbnail=False,
|
||||
split_manifest=False,
|
||||
):
|
||||
args = ["document_exporter", self.target]
|
||||
if use_filename_format:
|
||||
@ -116,6 +117,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
args += ["--no-archive"]
|
||||
if no_thumbnail:
|
||||
args += ["--no-thumbnail"]
|
||||
if split_manifest:
|
||||
args += ["--split-manifest"]
|
||||
|
||||
call_command(*args)
|
||||
|
||||
@ -563,3 +566,20 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
with paperless_environment() as dirs:
|
||||
call_command("document_importer", self.target)
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
|
||||
def test_split_manifest(self):
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||
os.path.join(self.dirs.media_dir, "documents"),
|
||||
)
|
||||
|
||||
manifest = self._do_export(split_manifest=True)
|
||||
has_document = False
|
||||
for element in manifest:
|
||||
has_document = has_document or element["model"] == "documents.document"
|
||||
self.assertFalse(has_document)
|
||||
|
||||
with paperless_environment() as dirs:
|
||||
call_command("document_importer", self.target)
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
|
Loading…
x
Reference in New Issue
Block a user