mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Enhancement: Add --compare-json option to document_exporter to write json files only if changed (#8261)
This commit is contained in:
parent
f0e71330ac
commit
827121808a
@ -241,6 +241,7 @@ document_exporter target [-c] [-d] [-f] [-na] [-nt] [-p] [-sm] [-z]
|
|||||||
|
|
||||||
optional arguments:
|
optional arguments:
|
||||||
-c, --compare-checksums
|
-c, --compare-checksums
|
||||||
|
-cj, --compare-json
|
||||||
-d, --delete
|
-d, --delete
|
||||||
-f, --use-filename-format
|
-f, --use-filename-format
|
||||||
-na, --no-archive
|
-na, --no-archive
|
||||||
@ -269,7 +270,8 @@ only export changed and added files. Paperless determines whether a file
|
|||||||
has changed by inspecting the file attributes "date/time modified" and
|
has changed by inspecting the file attributes "date/time modified" and
|
||||||
"size". If that does not work out for you, specify `-c` or
|
"size". If that does not work out for you, specify `-c` or
|
||||||
`--compare-checksums` and paperless will attempt to compare file
|
`--compare-checksums` and paperless will attempt to compare file
|
||||||
checksums instead. This is slower.
|
checksums instead. This is slower. The manifest and metadata json files
|
||||||
|
are always updated, unless `cj` or `--compare-json` is specified.
|
||||||
|
|
||||||
Paperless will not remove any existing files in the export directory. If
|
Paperless will not remove any existing files in the export directory. If
|
||||||
you want paperless to also remove files that do not belong to the
|
you want paperless to also remove files that do not belong to the
|
||||||
|
@ -82,6 +82,18 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"-cj",
|
||||||
|
"--compare-json",
|
||||||
|
default=False,
|
||||||
|
action="store_true",
|
||||||
|
help=(
|
||||||
|
"Compare json file checksums when determining whether to "
|
||||||
|
"export a json file or not (manifest or metadata). "
|
||||||
|
"If not specified, the file is always exported."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-d",
|
"-d",
|
||||||
"--delete",
|
"--delete",
|
||||||
@ -178,6 +190,7 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
self.target = Path(options["target"]).resolve()
|
self.target = Path(options["target"]).resolve()
|
||||||
self.split_manifest: bool = options["split_manifest"]
|
self.split_manifest: bool = options["split_manifest"]
|
||||||
self.compare_checksums: bool = options["compare_checksums"]
|
self.compare_checksums: bool = options["compare_checksums"]
|
||||||
|
self.compare_json: bool = options["compare_json"]
|
||||||
self.use_filename_format: bool = options["use_filename_format"]
|
self.use_filename_format: bool = options["use_filename_format"]
|
||||||
self.use_folder_prefix: bool = options["use_folder_prefix"]
|
self.use_folder_prefix: bool = options["use_folder_prefix"]
|
||||||
self.delete: bool = options["delete"]
|
self.delete: bool = options["delete"]
|
||||||
@ -343,12 +356,11 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
manifest_dict["custom_field_instances"],
|
manifest_dict["custom_field_instances"],
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
manifest_name.write_text(
|
|
||||||
json.dumps(content, indent=2, ensure_ascii=False),
|
self.check_and_write_json(
|
||||||
encoding="utf-8",
|
content,
|
||||||
|
manifest_name,
|
||||||
)
|
)
|
||||||
if manifest_name in self.files_in_export_dir:
|
|
||||||
self.files_in_export_dir.remove(manifest_name)
|
|
||||||
|
|
||||||
# These were exported already
|
# These were exported already
|
||||||
if self.split_manifest:
|
if self.split_manifest:
|
||||||
@ -361,12 +373,10 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
for key in manifest_dict:
|
for key in manifest_dict:
|
||||||
manifest.extend(manifest_dict[key])
|
manifest.extend(manifest_dict[key])
|
||||||
manifest_path = (self.target / "manifest.json").resolve()
|
manifest_path = (self.target / "manifest.json").resolve()
|
||||||
manifest_path.write_text(
|
self.check_and_write_json(
|
||||||
json.dumps(manifest, indent=2, ensure_ascii=False),
|
manifest,
|
||||||
encoding="utf-8",
|
manifest_path,
|
||||||
)
|
)
|
||||||
if manifest_path in self.files_in_export_dir:
|
|
||||||
self.files_in_export_dir.remove(manifest_path)
|
|
||||||
|
|
||||||
# 4.2 write version information to target folder
|
# 4.2 write version information to target folder
|
||||||
extra_metadata_path = (self.target / "metadata.json").resolve()
|
extra_metadata_path = (self.target / "metadata.json").resolve()
|
||||||
@ -378,16 +388,11 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
# Django stores most of these in the field itself, we store them once here
|
# Django stores most of these in the field itself, we store them once here
|
||||||
if self.passphrase:
|
if self.passphrase:
|
||||||
metadata.update(self.get_crypt_params())
|
metadata.update(self.get_crypt_params())
|
||||||
extra_metadata_path.write_text(
|
|
||||||
json.dumps(
|
self.check_and_write_json(
|
||||||
metadata,
|
metadata,
|
||||||
indent=2,
|
extra_metadata_path,
|
||||||
ensure_ascii=False,
|
|
||||||
),
|
|
||||||
encoding="utf-8",
|
|
||||||
)
|
)
|
||||||
if extra_metadata_path in self.files_in_export_dir:
|
|
||||||
self.files_in_export_dir.remove(extra_metadata_path)
|
|
||||||
|
|
||||||
if self.delete:
|
if self.delete:
|
||||||
# 5. Remove files which we did not explicitly export in this run
|
# 5. Remove files which we did not explicitly export in this run
|
||||||
@ -516,6 +521,35 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
archive_target,
|
archive_target,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def check_and_write_json(
|
||||||
|
self,
|
||||||
|
content: list[dict] | dict,
|
||||||
|
target: Path,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Writes the source content to the target json file.
|
||||||
|
If --compare-json arg was used, don't write to target file if
|
||||||
|
the file exists and checksum is identical to content checksum.
|
||||||
|
This preserves the file timestamps when no changes are made.
|
||||||
|
"""
|
||||||
|
|
||||||
|
target = target.resolve()
|
||||||
|
perform_write = True
|
||||||
|
if target in self.files_in_export_dir:
|
||||||
|
self.files_in_export_dir.remove(target)
|
||||||
|
if self.compare_json:
|
||||||
|
target_checksum = hashlib.md5(target.read_bytes()).hexdigest()
|
||||||
|
src_str = json.dumps(content, indent=2, ensure_ascii=False)
|
||||||
|
src_checksum = hashlib.md5(src_str.encode("utf-8")).hexdigest()
|
||||||
|
if src_checksum == target_checksum:
|
||||||
|
perform_write = False
|
||||||
|
|
||||||
|
if perform_write:
|
||||||
|
target.write_text(
|
||||||
|
json.dumps(content, indent=2, ensure_ascii=False),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
def check_and_copy(
|
def check_and_copy(
|
||||||
self,
|
self,
|
||||||
source: Path,
|
source: Path,
|
||||||
|
@ -153,6 +153,7 @@ class TestExportImport(
|
|||||||
*,
|
*,
|
||||||
use_filename_format=False,
|
use_filename_format=False,
|
||||||
compare_checksums=False,
|
compare_checksums=False,
|
||||||
|
compare_json=False,
|
||||||
delete=False,
|
delete=False,
|
||||||
no_archive=False,
|
no_archive=False,
|
||||||
no_thumbnail=False,
|
no_thumbnail=False,
|
||||||
@ -165,6 +166,8 @@ class TestExportImport(
|
|||||||
args += ["--use-filename-format"]
|
args += ["--use-filename-format"]
|
||||||
if compare_checksums:
|
if compare_checksums:
|
||||||
args += ["--compare-checksums"]
|
args += ["--compare-checksums"]
|
||||||
|
if compare_json:
|
||||||
|
args += ["--compare-json"]
|
||||||
if delete:
|
if delete:
|
||||||
args += ["--delete"]
|
args += ["--delete"]
|
||||||
if no_archive:
|
if no_archive:
|
||||||
@ -340,6 +343,10 @@ class TestExportImport(
|
|||||||
self.assertNotEqual(st_mtime_1, st_mtime_2)
|
self.assertNotEqual(st_mtime_1, st_mtime_2)
|
||||||
self.assertNotEqual(st_mtime_2, st_mtime_3)
|
self.assertNotEqual(st_mtime_2, st_mtime_3)
|
||||||
|
|
||||||
|
self._do_export(compare_json=True)
|
||||||
|
st_mtime_4 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
|
||||||
|
self.assertEqual(st_mtime_3, st_mtime_4)
|
||||||
|
|
||||||
def test_update_export_changed_checksum(self):
|
def test_update_export_changed_checksum(self):
|
||||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||||
shutil.copytree(
|
shutil.copytree(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user