mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Enhancement: Add --compare-json option to document_exporter to write json files only if changed (#8261)
This commit is contained in:
		| @@ -241,6 +241,7 @@ document_exporter target [-c] [-d] [-f] [-na] [-nt] [-p] [-sm] [-z] | ||||
|  | ||||
| optional arguments: | ||||
| -c,  --compare-checksums | ||||
| -cj, --compare-json | ||||
| -d,  --delete | ||||
| -f,  --use-filename-format | ||||
| -na, --no-archive | ||||
| @@ -269,7 +270,8 @@ only export changed and added files. Paperless determines whether a file | ||||
| has changed by inspecting the file attributes "date/time modified" and | ||||
| "size". If that does not work out for you, specify `-c` or | ||||
| `--compare-checksums` and paperless will attempt to compare file | ||||
| checksums instead. This is slower. | ||||
| checksums instead. This is slower. The manifest and metadata json files | ||||
| are always updated, unless `cj` or `--compare-json` is specified. | ||||
|  | ||||
| Paperless will not remove any existing files in the export directory. If | ||||
| you want paperless to also remove files that do not belong to the | ||||
|   | ||||
| @@ -82,6 +82,18 @@ class Command(CryptMixin, BaseCommand): | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|         parser.add_argument( | ||||
|             "-cj", | ||||
|             "--compare-json", | ||||
|             default=False, | ||||
|             action="store_true", | ||||
|             help=( | ||||
|                 "Compare json file checksums when determining whether to " | ||||
|                 "export a json file or not (manifest or metadata). " | ||||
|                 "If not specified, the file is always exported." | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|         parser.add_argument( | ||||
|             "-d", | ||||
|             "--delete", | ||||
| @@ -178,6 +190,7 @@ class Command(CryptMixin, BaseCommand): | ||||
|         self.target = Path(options["target"]).resolve() | ||||
|         self.split_manifest: bool = options["split_manifest"] | ||||
|         self.compare_checksums: bool = options["compare_checksums"] | ||||
|         self.compare_json: bool = options["compare_json"] | ||||
|         self.use_filename_format: bool = options["use_filename_format"] | ||||
|         self.use_folder_prefix: bool = options["use_folder_prefix"] | ||||
|         self.delete: bool = options["delete"] | ||||
| @@ -343,12 +356,11 @@ class Command(CryptMixin, BaseCommand): | ||||
|                         manifest_dict["custom_field_instances"], | ||||
|                     ), | ||||
|                 ) | ||||
|                 manifest_name.write_text( | ||||
|                     json.dumps(content, indent=2, ensure_ascii=False), | ||||
|                     encoding="utf-8", | ||||
|  | ||||
|                 self.check_and_write_json( | ||||
|                     content, | ||||
|                     manifest_name, | ||||
|                 ) | ||||
|                 if manifest_name in self.files_in_export_dir: | ||||
|                     self.files_in_export_dir.remove(manifest_name) | ||||
|  | ||||
|         # These were exported already | ||||
|         if self.split_manifest: | ||||
| @@ -361,12 +373,10 @@ class Command(CryptMixin, BaseCommand): | ||||
|         for key in manifest_dict: | ||||
|             manifest.extend(manifest_dict[key]) | ||||
|         manifest_path = (self.target / "manifest.json").resolve() | ||||
|         manifest_path.write_text( | ||||
|             json.dumps(manifest, indent=2, ensure_ascii=False), | ||||
|             encoding="utf-8", | ||||
|         self.check_and_write_json( | ||||
|             manifest, | ||||
|             manifest_path, | ||||
|         ) | ||||
|         if manifest_path in self.files_in_export_dir: | ||||
|             self.files_in_export_dir.remove(manifest_path) | ||||
|  | ||||
|         # 4.2 write version information to target folder | ||||
|         extra_metadata_path = (self.target / "metadata.json").resolve() | ||||
| @@ -378,16 +388,11 @@ class Command(CryptMixin, BaseCommand): | ||||
|         # Django stores most of these in the field itself, we store them once here | ||||
|         if self.passphrase: | ||||
|             metadata.update(self.get_crypt_params()) | ||||
|         extra_metadata_path.write_text( | ||||
|             json.dumps( | ||||
|                 metadata, | ||||
|                 indent=2, | ||||
|                 ensure_ascii=False, | ||||
|             ), | ||||
|             encoding="utf-8", | ||||
|  | ||||
|         self.check_and_write_json( | ||||
|             metadata, | ||||
|             extra_metadata_path, | ||||
|         ) | ||||
|         if extra_metadata_path in self.files_in_export_dir: | ||||
|             self.files_in_export_dir.remove(extra_metadata_path) | ||||
|  | ||||
|         if self.delete: | ||||
|             # 5. Remove files which we did not explicitly export in this run | ||||
| @@ -516,6 +521,35 @@ class Command(CryptMixin, BaseCommand): | ||||
|                     archive_target, | ||||
|                 ) | ||||
|  | ||||
|     def check_and_write_json( | ||||
|         self, | ||||
|         content: list[dict] | dict, | ||||
|         target: Path, | ||||
|     ): | ||||
|         """ | ||||
|         Writes the source content to the target json file. | ||||
|         If --compare-json arg was used, don't write to target file if | ||||
|         the file exists and checksum is identical to content checksum. | ||||
|         This preserves the file timestamps when no changes are made. | ||||
|         """ | ||||
|  | ||||
|         target = target.resolve() | ||||
|         perform_write = True | ||||
|         if target in self.files_in_export_dir: | ||||
|             self.files_in_export_dir.remove(target) | ||||
|             if self.compare_json: | ||||
|                 target_checksum = hashlib.md5(target.read_bytes()).hexdigest() | ||||
|                 src_str = json.dumps(content, indent=2, ensure_ascii=False) | ||||
|                 src_checksum = hashlib.md5(src_str.encode("utf-8")).hexdigest() | ||||
|                 if src_checksum == target_checksum: | ||||
|                     perform_write = False | ||||
|  | ||||
|         if perform_write: | ||||
|             target.write_text( | ||||
|                 json.dumps(content, indent=2, ensure_ascii=False), | ||||
|                 encoding="utf-8", | ||||
|             ) | ||||
|  | ||||
|     def check_and_copy( | ||||
|         self, | ||||
|         source: Path, | ||||
|   | ||||
| @@ -153,6 +153,7 @@ class TestExportImport( | ||||
|         *, | ||||
|         use_filename_format=False, | ||||
|         compare_checksums=False, | ||||
|         compare_json=False, | ||||
|         delete=False, | ||||
|         no_archive=False, | ||||
|         no_thumbnail=False, | ||||
| @@ -165,6 +166,8 @@ class TestExportImport( | ||||
|             args += ["--use-filename-format"] | ||||
|         if compare_checksums: | ||||
|             args += ["--compare-checksums"] | ||||
|         if compare_json: | ||||
|             args += ["--compare-json"] | ||||
|         if delete: | ||||
|             args += ["--delete"] | ||||
|         if no_archive: | ||||
| @@ -340,6 +343,10 @@ class TestExportImport( | ||||
|         self.assertNotEqual(st_mtime_1, st_mtime_2) | ||||
|         self.assertNotEqual(st_mtime_2, st_mtime_3) | ||||
|  | ||||
|         self._do_export(compare_json=True) | ||||
|         st_mtime_4 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime | ||||
|         self.assertEqual(st_mtime_3, st_mtime_4) | ||||
|  | ||||
|     def test_update_export_changed_checksum(self): | ||||
|         shutil.rmtree(os.path.join(self.dirs.media_dir, "documents")) | ||||
|         shutil.copytree( | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kevin Doren
					Kevin Doren