mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	add split-manifest option to administration exporter
This commit is contained in:
		 Matthieu Helleboid
					Matthieu Helleboid
				
			
				
					committed by
					
						 Trenton H
						Trenton H
					
				
			
			
				
	
			
			
			 Trenton H
						Trenton H
					
				
			
						parent
						
							896304ccaa
						
					
				
				
					commit
					4cb4bd13ad
				
			| @@ -53,6 +53,14 @@ class Command(BaseCommand): | ||||
|     def add_arguments(self, parser): | ||||
|         parser.add_argument("target") | ||||
|  | ||||
|         parser.add_argument( | ||||
|             "-sm", | ||||
|             "--split-manifest", | ||||
|             default=False, | ||||
|             action="store_true", | ||||
|             help="Export document information in individual manifest json files.", | ||||
|         ) | ||||
|  | ||||
|         parser.add_argument( | ||||
|             "-c", | ||||
|             "--compare-checksums", | ||||
| @@ -125,6 +133,7 @@ class Command(BaseCommand): | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         BaseCommand.__init__(self, *args, **kwargs) | ||||
|         self.target: Path = None | ||||
|         self.split_manifest = None | ||||
|         self.files_in_export_dir: Set[Path] = set() | ||||
|         self.exported_files: List[Path] = [] | ||||
|         self.compare_checksums = False | ||||
| @@ -137,6 +146,7 @@ class Command(BaseCommand): | ||||
|     def handle(self, *args, **options): | ||||
|  | ||||
|         self.target = Path(options["target"]).resolve() | ||||
|         self.split_manifest = options["split_manifest"] | ||||
|         self.compare_checksums = options["compare_checksums"] | ||||
|         self.use_filename_format = options["use_filename_format"] | ||||
|         self.use_filename_prefix = options["use_filename_prefix"] | ||||
| @@ -217,7 +227,8 @@ class Command(BaseCommand): | ||||
|             documents = Document.objects.order_by("id") | ||||
|             document_map = {d.pk: d for d in documents} | ||||
|             document_manifest = json.loads(serializers.serialize("json", documents)) | ||||
|             manifest += document_manifest | ||||
|             if not self.split_manifest: | ||||
|                 manifest += document_manifest | ||||
|  | ||||
|             manifest += json.loads( | ||||
|                 serializers.serialize("json", MailAccount.objects.all()), | ||||
| @@ -334,6 +345,15 @@ class Command(BaseCommand): | ||||
|                         archive_target, | ||||
|                     ) | ||||
|  | ||||
|             if self.split_manifest: | ||||
|                 manifest_name = base_name + "-manifest.json" | ||||
|                 if self.use_filename_prefix: | ||||
|                     manifest_name = os.path.join("json", manifest_name) | ||||
|                 manifest_name = os.path.join(self.target, manifest_name) | ||||
|                 os.makedirs(os.path.dirname(manifest_name), exist_ok=True) | ||||
|                 with open(manifest_name, "w") as f: | ||||
|                     json.dump([document_manifest[index]], f, indent=2) | ||||
|  | ||||
|         # 4.1 write manifest to target folder | ||||
|         manifest_path = (self.target / Path("manifest.json")).resolve() | ||||
|         manifest_path.write_text(json.dumps(manifest, indent=2)) | ||||
|   | ||||
| @@ -72,11 +72,24 @@ class Command(BaseCommand): | ||||
|         if not os.access(self.source, os.R_OK): | ||||
|             raise CommandError("That path doesn't appear to be readable") | ||||
|  | ||||
|         manifest_path = os.path.normpath(os.path.join(self.source, "manifest.json")) | ||||
|         self._check_manifest_exists(manifest_path) | ||||
|         manifest_paths = [] | ||||
|  | ||||
|         with open(manifest_path) as f: | ||||
|         main_manifest_path = os.path.normpath( | ||||
|             os.path.join(self.source, "manifest.json"), | ||||
|         ) | ||||
|         self._check_manifest_exists(main_manifest_path) | ||||
|  | ||||
|         with open(main_manifest_path) as f: | ||||
|             self.manifest = json.load(f) | ||||
|         manifest_paths.append(main_manifest_path) | ||||
|  | ||||
|         for root, dirs, files in os.walk(self.source): | ||||
|             for file in files: | ||||
|                 if file.endswith("-manifest.json"): | ||||
|                     doc_manifest_path = os.path.normpath(os.path.join(root, file)) | ||||
|                     with open(doc_manifest_path) as f: | ||||
|                         self.manifest += json.load(f) | ||||
|                     manifest_paths.append(doc_manifest_path) | ||||
|  | ||||
|         version_path = os.path.normpath(os.path.join(self.source, "version.json")) | ||||
|         if os.path.exists(version_path): | ||||
| @@ -109,7 +122,8 @@ class Command(BaseCommand): | ||||
|             ): | ||||
|                 # Fill up the database with whatever is in the manifest | ||||
|                 try: | ||||
|                     call_command("loaddata", manifest_path) | ||||
|                     for manifest_path in manifest_paths: | ||||
|                         call_command("loaddata", manifest_path) | ||||
|                 except (FieldDoesNotExist, DeserializationError) as e: | ||||
|                     self.stdout.write(self.style.ERROR("Database import failed")) | ||||
|                     if ( | ||||
|   | ||||
| @@ -104,6 +104,7 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|         delete=False, | ||||
|         no_archive=False, | ||||
|         no_thumbnail=False, | ||||
|         split_manifest=False, | ||||
|     ): | ||||
|         args = ["document_exporter", self.target] | ||||
|         if use_filename_format: | ||||
| @@ -116,6 +117,8 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|             args += ["--no-archive"] | ||||
|         if no_thumbnail: | ||||
|             args += ["--no-thumbnail"] | ||||
|         if split_manifest: | ||||
|             args += ["--split-manifest"] | ||||
|  | ||||
|         call_command(*args) | ||||
|  | ||||
| @@ -563,3 +566,20 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|         with paperless_environment() as dirs: | ||||
|             call_command("document_importer", self.target) | ||||
|             self.assertEqual(Document.objects.count(), 4) | ||||
|  | ||||
|     def test_split_manifest(self): | ||||
|         shutil.rmtree(os.path.join(self.dirs.media_dir, "documents")) | ||||
|         shutil.copytree( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "documents"), | ||||
|             os.path.join(self.dirs.media_dir, "documents"), | ||||
|         ) | ||||
|  | ||||
|         manifest = self._do_export(split_manifest=True) | ||||
|         has_document = False | ||||
|         for element in manifest: | ||||
|             has_document = has_document or element["model"] == "documents.document" | ||||
|         self.assertFalse(has_document) | ||||
|  | ||||
|         with paperless_environment() as dirs: | ||||
|             call_command("document_importer", self.target) | ||||
|             self.assertEqual(Document.objects.count(), 4) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user