mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Enhancement: Add --compare-json option to document_exporter to write json files only if changed (#8261)
This commit is contained in:
parent
f0e71330ac
commit
827121808a
@ -241,6 +241,7 @@ document_exporter target [-c] [-d] [-f] [-na] [-nt] [-p] [-sm] [-z]
|
||||
|
||||
optional arguments:
|
||||
-c, --compare-checksums
|
||||
-cj, --compare-json
|
||||
-d, --delete
|
||||
-f, --use-filename-format
|
||||
-na, --no-archive
|
||||
@ -269,7 +270,8 @@ only export changed and added files. Paperless determines whether a file
|
||||
has changed by inspecting the file attributes "date/time modified" and
|
||||
"size". If that does not work out for you, specify `-c` or
|
||||
`--compare-checksums` and paperless will attempt to compare file
|
||||
checksums instead. This is slower.
|
||||
checksums instead. This is slower. The manifest and metadata json files
|
||||
are always updated, unless `cj` or `--compare-json` is specified.
|
||||
|
||||
Paperless will not remove any existing files in the export directory. If
|
||||
you want paperless to also remove files that do not belong to the
|
||||
|
@ -82,6 +82,18 @@ class Command(CryptMixin, BaseCommand):
|
||||
),
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-cj",
|
||||
"--compare-json",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help=(
|
||||
"Compare json file checksums when determining whether to "
|
||||
"export a json file or not (manifest or metadata). "
|
||||
"If not specified, the file is always exported."
|
||||
),
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--delete",
|
||||
@ -178,6 +190,7 @@ class Command(CryptMixin, BaseCommand):
|
||||
self.target = Path(options["target"]).resolve()
|
||||
self.split_manifest: bool = options["split_manifest"]
|
||||
self.compare_checksums: bool = options["compare_checksums"]
|
||||
self.compare_json: bool = options["compare_json"]
|
||||
self.use_filename_format: bool = options["use_filename_format"]
|
||||
self.use_folder_prefix: bool = options["use_folder_prefix"]
|
||||
self.delete: bool = options["delete"]
|
||||
@ -343,12 +356,11 @@ class Command(CryptMixin, BaseCommand):
|
||||
manifest_dict["custom_field_instances"],
|
||||
),
|
||||
)
|
||||
manifest_name.write_text(
|
||||
json.dumps(content, indent=2, ensure_ascii=False),
|
||||
encoding="utf-8",
|
||||
|
||||
self.check_and_write_json(
|
||||
content,
|
||||
manifest_name,
|
||||
)
|
||||
if manifest_name in self.files_in_export_dir:
|
||||
self.files_in_export_dir.remove(manifest_name)
|
||||
|
||||
# These were exported already
|
||||
if self.split_manifest:
|
||||
@ -361,12 +373,10 @@ class Command(CryptMixin, BaseCommand):
|
||||
for key in manifest_dict:
|
||||
manifest.extend(manifest_dict[key])
|
||||
manifest_path = (self.target / "manifest.json").resolve()
|
||||
manifest_path.write_text(
|
||||
json.dumps(manifest, indent=2, ensure_ascii=False),
|
||||
encoding="utf-8",
|
||||
self.check_and_write_json(
|
||||
manifest,
|
||||
manifest_path,
|
||||
)
|
||||
if manifest_path in self.files_in_export_dir:
|
||||
self.files_in_export_dir.remove(manifest_path)
|
||||
|
||||
# 4.2 write version information to target folder
|
||||
extra_metadata_path = (self.target / "metadata.json").resolve()
|
||||
@ -378,16 +388,11 @@ class Command(CryptMixin, BaseCommand):
|
||||
# Django stores most of these in the field itself, we store them once here
|
||||
if self.passphrase:
|
||||
metadata.update(self.get_crypt_params())
|
||||
extra_metadata_path.write_text(
|
||||
json.dumps(
|
||||
metadata,
|
||||
indent=2,
|
||||
ensure_ascii=False,
|
||||
),
|
||||
encoding="utf-8",
|
||||
|
||||
self.check_and_write_json(
|
||||
metadata,
|
||||
extra_metadata_path,
|
||||
)
|
||||
if extra_metadata_path in self.files_in_export_dir:
|
||||
self.files_in_export_dir.remove(extra_metadata_path)
|
||||
|
||||
if self.delete:
|
||||
# 5. Remove files which we did not explicitly export in this run
|
||||
@ -516,6 +521,35 @@ class Command(CryptMixin, BaseCommand):
|
||||
archive_target,
|
||||
)
|
||||
|
||||
def check_and_write_json(
|
||||
self,
|
||||
content: list[dict] | dict,
|
||||
target: Path,
|
||||
):
|
||||
"""
|
||||
Writes the source content to the target json file.
|
||||
If --compare-json arg was used, don't write to target file if
|
||||
the file exists and checksum is identical to content checksum.
|
||||
This preserves the file timestamps when no changes are made.
|
||||
"""
|
||||
|
||||
target = target.resolve()
|
||||
perform_write = True
|
||||
if target in self.files_in_export_dir:
|
||||
self.files_in_export_dir.remove(target)
|
||||
if self.compare_json:
|
||||
target_checksum = hashlib.md5(target.read_bytes()).hexdigest()
|
||||
src_str = json.dumps(content, indent=2, ensure_ascii=False)
|
||||
src_checksum = hashlib.md5(src_str.encode("utf-8")).hexdigest()
|
||||
if src_checksum == target_checksum:
|
||||
perform_write = False
|
||||
|
||||
if perform_write:
|
||||
target.write_text(
|
||||
json.dumps(content, indent=2, ensure_ascii=False),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
def check_and_copy(
|
||||
self,
|
||||
source: Path,
|
||||
|
@ -153,6 +153,7 @@ class TestExportImport(
|
||||
*,
|
||||
use_filename_format=False,
|
||||
compare_checksums=False,
|
||||
compare_json=False,
|
||||
delete=False,
|
||||
no_archive=False,
|
||||
no_thumbnail=False,
|
||||
@ -165,6 +166,8 @@ class TestExportImport(
|
||||
args += ["--use-filename-format"]
|
||||
if compare_checksums:
|
||||
args += ["--compare-checksums"]
|
||||
if compare_json:
|
||||
args += ["--compare-json"]
|
||||
if delete:
|
||||
args += ["--delete"]
|
||||
if no_archive:
|
||||
@ -340,6 +343,10 @@ class TestExportImport(
|
||||
self.assertNotEqual(st_mtime_1, st_mtime_2)
|
||||
self.assertNotEqual(st_mtime_2, st_mtime_3)
|
||||
|
||||
self._do_export(compare_json=True)
|
||||
st_mtime_4 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
|
||||
self.assertEqual(st_mtime_3, st_mtime_4)
|
||||
|
||||
def test_update_export_changed_checksum(self):
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(
|
||||
|
Loading…
x
Reference in New Issue
Block a user