mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
add split-manifest option to administration exporter
This commit is contained in:
parent
896304ccaa
commit
4cb4bd13ad
@ -227,9 +227,10 @@ is not a TTY" errors. For example:
|
|||||||
`docker-compose exec -T webserver document_exporter ../export`
|
`docker-compose exec -T webserver document_exporter ../export`
|
||||||
|
|
||||||
```
|
```
|
||||||
document_exporter target [-c] [-f] [-p] [-d] [-na] [-nt]
|
document_exporter target [-sm] [-c] [-f] [-p] [-d] [-na] [-nt]
|
||||||
|
|
||||||
optional arguments:
|
optional arguments:
|
||||||
|
-sm, --split-manifest
|
||||||
-c, --compare-checksums
|
-c, --compare-checksums
|
||||||
-f, --use-filename-format
|
-f, --use-filename-format
|
||||||
-p, --use-filename-prefix
|
-p, --use-filename-prefix
|
||||||
@ -243,6 +244,9 @@ optional arguments:
|
|||||||
documents, thumbnails and a `manifest.json` file. The manifest contains
|
documents, thumbnails and a `manifest.json` file. The manifest contains
|
||||||
all metadata from the database (correspondents, tags, etc).
|
all metadata from the database (correspondents, tags, etc).
|
||||||
|
|
||||||
|
If `-sm` or `--split-manifest` is provided, information about document
|
||||||
|
will be placed in individual json files.
|
||||||
|
|
||||||
When you use the provided docker compose script, specify `../export` as
|
When you use the provided docker compose script, specify `../export` as
|
||||||
the target. This path inside the container is automatically mounted on
|
the target. This path inside the container is automatically mounted on
|
||||||
your host on the folder `export`.
|
your host on the folder `export`.
|
||||||
@ -279,8 +283,8 @@ paperless to use `PAPERLESS_FILENAME_FORMAT` for exported filenames
|
|||||||
instead, specify `--use-filename-format`.
|
instead, specify `--use-filename-format`.
|
||||||
|
|
||||||
If `-p` or `--use-filename-format` is provided, Files will be exported
|
If `-p` or `--use-filename-format` is provided, Files will be exported
|
||||||
in dedicated folders according to their nature: `archive`, `originals`
|
in dedicated folders according to their nature: `archive`, `originals`,
|
||||||
or `thumbnails`
|
`thumbnails` or `json`
|
||||||
|
|
||||||
!!! warning
|
!!! warning
|
||||||
|
|
||||||
|
@ -53,6 +53,14 @@ class Command(BaseCommand):
|
|||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument("target")
|
parser.add_argument("target")
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"-sm",
|
||||||
|
"--split-manifest",
|
||||||
|
default=False,
|
||||||
|
action="store_true",
|
||||||
|
help="Export document information in individual manifest json files.",
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-c",
|
"-c",
|
||||||
"--compare-checksums",
|
"--compare-checksums",
|
||||||
@ -125,6 +133,7 @@ class Command(BaseCommand):
|
|||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
BaseCommand.__init__(self, *args, **kwargs)
|
BaseCommand.__init__(self, *args, **kwargs)
|
||||||
self.target: Path = None
|
self.target: Path = None
|
||||||
|
self.split_manifest = None
|
||||||
self.files_in_export_dir: Set[Path] = set()
|
self.files_in_export_dir: Set[Path] = set()
|
||||||
self.exported_files: List[Path] = []
|
self.exported_files: List[Path] = []
|
||||||
self.compare_checksums = False
|
self.compare_checksums = False
|
||||||
@ -137,6 +146,7 @@ class Command(BaseCommand):
|
|||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
|
||||||
self.target = Path(options["target"]).resolve()
|
self.target = Path(options["target"]).resolve()
|
||||||
|
self.split_manifest = options["split_manifest"]
|
||||||
self.compare_checksums = options["compare_checksums"]
|
self.compare_checksums = options["compare_checksums"]
|
||||||
self.use_filename_format = options["use_filename_format"]
|
self.use_filename_format = options["use_filename_format"]
|
||||||
self.use_filename_prefix = options["use_filename_prefix"]
|
self.use_filename_prefix = options["use_filename_prefix"]
|
||||||
@ -217,7 +227,8 @@ class Command(BaseCommand):
|
|||||||
documents = Document.objects.order_by("id")
|
documents = Document.objects.order_by("id")
|
||||||
document_map = {d.pk: d for d in documents}
|
document_map = {d.pk: d for d in documents}
|
||||||
document_manifest = json.loads(serializers.serialize("json", documents))
|
document_manifest = json.loads(serializers.serialize("json", documents))
|
||||||
manifest += document_manifest
|
if not self.split_manifest:
|
||||||
|
manifest += document_manifest
|
||||||
|
|
||||||
manifest += json.loads(
|
manifest += json.loads(
|
||||||
serializers.serialize("json", MailAccount.objects.all()),
|
serializers.serialize("json", MailAccount.objects.all()),
|
||||||
@ -334,6 +345,15 @@ class Command(BaseCommand):
|
|||||||
archive_target,
|
archive_target,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if self.split_manifest:
|
||||||
|
manifest_name = base_name + "-manifest.json"
|
||||||
|
if self.use_filename_prefix:
|
||||||
|
manifest_name = os.path.join("json", manifest_name)
|
||||||
|
manifest_name = os.path.join(self.target, manifest_name)
|
||||||
|
os.makedirs(os.path.dirname(manifest_name), exist_ok=True)
|
||||||
|
with open(manifest_name, "w") as f:
|
||||||
|
json.dump([document_manifest[index]], f, indent=2)
|
||||||
|
|
||||||
# 4.1 write manifest to target folder
|
# 4.1 write manifest to target folder
|
||||||
manifest_path = (self.target / Path("manifest.json")).resolve()
|
manifest_path = (self.target / Path("manifest.json")).resolve()
|
||||||
manifest_path.write_text(json.dumps(manifest, indent=2))
|
manifest_path.write_text(json.dumps(manifest, indent=2))
|
||||||
|
@ -72,11 +72,24 @@ class Command(BaseCommand):
|
|||||||
if not os.access(self.source, os.R_OK):
|
if not os.access(self.source, os.R_OK):
|
||||||
raise CommandError("That path doesn't appear to be readable")
|
raise CommandError("That path doesn't appear to be readable")
|
||||||
|
|
||||||
manifest_path = os.path.normpath(os.path.join(self.source, "manifest.json"))
|
manifest_paths = []
|
||||||
self._check_manifest_exists(manifest_path)
|
|
||||||
|
|
||||||
with open(manifest_path) as f:
|
main_manifest_path = os.path.normpath(
|
||||||
|
os.path.join(self.source, "manifest.json"),
|
||||||
|
)
|
||||||
|
self._check_manifest_exists(main_manifest_path)
|
||||||
|
|
||||||
|
with open(main_manifest_path) as f:
|
||||||
self.manifest = json.load(f)
|
self.manifest = json.load(f)
|
||||||
|
manifest_paths.append(main_manifest_path)
|
||||||
|
|
||||||
|
for root, dirs, files in os.walk(self.source):
|
||||||
|
for file in files:
|
||||||
|
if file.endswith("-manifest.json"):
|
||||||
|
doc_manifest_path = os.path.normpath(os.path.join(root, file))
|
||||||
|
with open(doc_manifest_path) as f:
|
||||||
|
self.manifest += json.load(f)
|
||||||
|
manifest_paths.append(doc_manifest_path)
|
||||||
|
|
||||||
version_path = os.path.normpath(os.path.join(self.source, "version.json"))
|
version_path = os.path.normpath(os.path.join(self.source, "version.json"))
|
||||||
if os.path.exists(version_path):
|
if os.path.exists(version_path):
|
||||||
@ -109,7 +122,8 @@ class Command(BaseCommand):
|
|||||||
):
|
):
|
||||||
# Fill up the database with whatever is in the manifest
|
# Fill up the database with whatever is in the manifest
|
||||||
try:
|
try:
|
||||||
call_command("loaddata", manifest_path)
|
for manifest_path in manifest_paths:
|
||||||
|
call_command("loaddata", manifest_path)
|
||||||
except (FieldDoesNotExist, DeserializationError) as e:
|
except (FieldDoesNotExist, DeserializationError) as e:
|
||||||
self.stdout.write(self.style.ERROR("Database import failed"))
|
self.stdout.write(self.style.ERROR("Database import failed"))
|
||||||
if (
|
if (
|
||||||
|
@ -104,6 +104,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
|||||||
delete=False,
|
delete=False,
|
||||||
no_archive=False,
|
no_archive=False,
|
||||||
no_thumbnail=False,
|
no_thumbnail=False,
|
||||||
|
split_manifest=False,
|
||||||
):
|
):
|
||||||
args = ["document_exporter", self.target]
|
args = ["document_exporter", self.target]
|
||||||
if use_filename_format:
|
if use_filename_format:
|
||||||
@ -116,6 +117,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
|||||||
args += ["--no-archive"]
|
args += ["--no-archive"]
|
||||||
if no_thumbnail:
|
if no_thumbnail:
|
||||||
args += ["--no-thumbnail"]
|
args += ["--no-thumbnail"]
|
||||||
|
if split_manifest:
|
||||||
|
args += ["--split-manifest"]
|
||||||
|
|
||||||
call_command(*args)
|
call_command(*args)
|
||||||
|
|
||||||
@ -563,3 +566,20 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
|||||||
with paperless_environment() as dirs:
|
with paperless_environment() as dirs:
|
||||||
call_command("document_importer", self.target)
|
call_command("document_importer", self.target)
|
||||||
self.assertEqual(Document.objects.count(), 4)
|
self.assertEqual(Document.objects.count(), 4)
|
||||||
|
|
||||||
|
def test_split_manifest(self):
|
||||||
|
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||||
|
shutil.copytree(
|
||||||
|
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||||
|
os.path.join(self.dirs.media_dir, "documents"),
|
||||||
|
)
|
||||||
|
|
||||||
|
manifest = self._do_export(split_manifest=True)
|
||||||
|
has_document = False
|
||||||
|
for element in manifest:
|
||||||
|
has_document = has_document or element["model"] == "documents.document"
|
||||||
|
self.assertFalse(has_document)
|
||||||
|
|
||||||
|
with paperless_environment() as dirs:
|
||||||
|
call_command("document_importer", self.target)
|
||||||
|
self.assertEqual(Document.objects.count(), 4)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user