From 9ae186e6f9711162072ccd8b9257d111a3ae8059 Mon Sep 17 00:00:00 2001 From: Matthieu Helleboid Date: Fri, 13 Jan 2023 01:10:49 +0100 Subject: [PATCH] add no-archive and no-thumbnail options to administration exporter and importer --- docs/administration.md | 12 +++- .../management/commands/document_exporter.py | 42 +++++++++--- .../management/commands/document_importer.py | 35 +++++----- .../tests/test_management_exporter.py | 66 +++++++++++++++++++ 4 files changed, 130 insertions(+), 25 deletions(-) diff --git a/docs/administration.md b/docs/administration.md index c64c6c6ae..e7fa74fec 100644 --- a/docs/administration.md +++ b/docs/administration.md @@ -227,12 +227,14 @@ is not a TTY" errors. For example: `docker-compose exec -T webserver document_exporter ../export` ``` -document_exporter target [-c] [-f] [-d] +document_exporter target [-c] [-f] [-d] [-na] [-nt] optional arguments: -c, --compare-checksums -f, --use-filename-format -d, --delete +-na, --no-archive +-nt, --no-thumbnail -z --zip ``` @@ -259,6 +261,14 @@ current export such as files from deleted documents, specify `--delete`. Be careful when pointing paperless to a directory that already contains other files. +Paperless will not export archive files if you use `--no-archive`, or will +not export thumbnails if you use `--no-thumbnail`. After importing, These +files can be generated again by using `document_archiver` or +`document_thumbnails`. It can make sense to omit these files from backup +as their content and checksum can change (new archiver or thumbnail +generator algorithm) and may then cause additional used space in +a deduplicated backup. + If `-z` or `--zip` is provided, the export will be a zipfile in the target directory, named according to the current date. diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py index 3cd028f01..b44317969 100644 --- a/src/documents/management/commands/document_exporter.py +++ b/src/documents/management/commands/document_exporter.py @@ -82,6 +82,21 @@ class Command(BaseCommand): "deleted documents.", ) + parser.add_argument( + "-na", + "--no-archive", + default=False, + action="store_true", + help="Avoid exporting archive files", + ) + + parser.add_argument( + "-nt", + "--no-thumbnail", + default=False, + action="store_true", + help="Avoid exporting thumbnail files", + ) parser.add_argument( "--no-progress-bar", default=False, @@ -105,6 +120,8 @@ class Command(BaseCommand): self.compare_checksums = False self.use_filename_format = False self.delete = False + self.no_archive = False + self.no_thumbnail = False def handle(self, *args, **options): @@ -112,6 +129,8 @@ class Command(BaseCommand): self.compare_checksums = options["compare_checksums"] self.use_filename_format = options["use_filename_format"] self.delete = options["delete"] + self.no_archive = options["no_archive"] + self.no_thumbnail = options["no_thumbnail"] zip_export: bool = options["zip"] # If zipping, save the original target for later and @@ -246,11 +265,14 @@ class Command(BaseCommand): original_target = (self.target / Path(original_name)).resolve() document_dict[EXPORTER_FILE_NAME] = original_name - thumbnail_name = base_name + "-thumbnail.webp" - thumbnail_target = (self.target / Path(thumbnail_name)).resolve() - document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name + if not self.no_thumbnail: + thumbnail_name = base_name + "-thumbnail.webp" + thumbnail_target = (self.target / Path(thumbnail_name)).resolve() + document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name + else: + thumbnail_target = None - if document.has_archive_version: + if not self.no_archive and document.has_archive_version: archive_name = base_name + "-archive.pdf" archive_target = (self.target / Path(archive_name)).resolve() document_dict[EXPORTER_ARCHIVE_NAME] = archive_name @@ -266,10 +288,11 @@ class Command(BaseCommand): original_target.write_bytes(GnuPG.decrypted(out_file)) os.utime(original_target, times=(t, t)) - thumbnail_target.parent.mkdir(parents=True, exist_ok=True) - with document.thumbnail_file as out_file: - thumbnail_target.write_bytes(GnuPG.decrypted(out_file)) - os.utime(thumbnail_target, times=(t, t)) + if thumbnail_target: + thumbnail_target.parent.mkdir(parents=True, exist_ok=True) + with document.thumbnail_file as out_file: + thumbnail_target.write_bytes(GnuPG.decrypted(out_file)) + os.utime(thumbnail_target, times=(t, t)) if archive_target: archive_target.parent.mkdir(parents=True, exist_ok=True) @@ -283,7 +306,8 @@ class Command(BaseCommand): original_target, ) - self.check_and_copy(document.thumbnail_path, None, thumbnail_target) + if thumbnail_target: + self.check_and_copy(document.thumbnail_path, None, thumbnail_target) if archive_target: self.check_and_copy( diff --git a/src/documents/management/commands/document_importer.py b/src/documents/management/commands/document_importer.py index db85460da..b62159d54 100644 --- a/src/documents/management/commands/document_importer.py +++ b/src/documents/management/commands/document_importer.py @@ -193,8 +193,11 @@ class Command(BaseCommand): doc_file = record[EXPORTER_FILE_NAME] document_path = os.path.join(self.source, doc_file) - thumb_file = record[EXPORTER_THUMBNAIL_NAME] - thumbnail_path = Path(os.path.join(self.source, thumb_file)).resolve() + if EXPORTER_THUMBNAIL_NAME in record: + thumb_file = record[EXPORTER_THUMBNAIL_NAME] + thumbnail_path = Path(os.path.join(self.source, thumb_file)).resolve() + else: + thumbnail_path = None if EXPORTER_ARCHIVE_NAME in record: archive_file = record[EXPORTER_ARCHIVE_NAME] @@ -212,19 +215,21 @@ class Command(BaseCommand): shutil.copy2(document_path, document.source_path) - if thumbnail_path.suffix in {".png", ".PNG"}: - run_convert( - density=300, - scale="500x5000>", - alpha="remove", - strip=True, - trim=False, - auto_orient=True, - input_file=f"{thumbnail_path}[0]", - output_file=str(document.thumbnail_path), - ) - else: - shutil.copy2(thumbnail_path, document.thumbnail_path) + if thumbnail_path: + if thumbnail_path.suffix in {".png", ".PNG"}: + run_convert( + density=300, + scale="500x5000>", + alpha="remove", + strip=True, + trim=False, + auto_orient=True, + input_file=f"{thumbnail_path}[0]", + output_file=str(document.thumbnail_path), + ) + else: + shutil.copy2(thumbnail_path, document.thumbnail_path) + if archive_path: create_source_path_directory(document.archive_path) # TODO: this assumes that the export is valid and diff --git a/src/documents/tests/test_management_exporter.py b/src/documents/tests/test_management_exporter.py index 5aff05793..8bf8def39 100644 --- a/src/documents/tests/test_management_exporter.py +++ b/src/documents/tests/test_management_exporter.py @@ -102,6 +102,8 @@ class TestExportImport(DirectoriesMixin, TestCase): use_filename_format=False, compare_checksums=False, delete=False, + no_archive=False, + no_thumbnail=False, ): args = ["document_exporter", self.target] if use_filename_format: @@ -110,6 +112,10 @@ class TestExportImport(DirectoriesMixin, TestCase): args += ["--compare-checksums"] if delete: args += ["--delete"] + if no_archive: + args += ["--no-archive"] + if no_thumbnail: + args += ["--no-thumbnail"] call_command(*args) @@ -497,3 +503,63 @@ class TestExportImport(DirectoriesMixin, TestCase): call_command(*args) self.assertEqual("That path doesn't appear to be writable", str(e)) + + def test_no_archive(self): + shutil.rmtree(os.path.join(self.dirs.media_dir, "documents")) + shutil.copytree( + os.path.join(os.path.dirname(__file__), "samples", "documents"), + os.path.join(self.dirs.media_dir, "documents"), + ) + + manifest = self._do_export() + has_archive = False + for element in manifest: + if element["model"] == "documents.document": + has_archive = ( + has_archive or document_exporter.EXPORTER_ARCHIVE_NAME in element + ) + self.assertTrue(has_archive) + + has_archive = False + manifest = self._do_export(no_archive=True) + for element in manifest: + if element["model"] == "documents.document": + has_archive = ( + has_archive or document_exporter.EXPORTER_ARCHIVE_NAME in element + ) + self.assertFalse(has_archive) + + with paperless_environment() as dirs: + call_command("document_importer", self.target) + self.assertEqual(Document.objects.count(), 4) + + def test_no_thumbnail(self): + shutil.rmtree(os.path.join(self.dirs.media_dir, "documents")) + shutil.copytree( + os.path.join(os.path.dirname(__file__), "samples", "documents"), + os.path.join(self.dirs.media_dir, "documents"), + ) + + manifest = self._do_export() + has_thumbnail = False + for element in manifest: + if element["model"] == "documents.document": + has_thumbnail = ( + has_thumbnail + or document_exporter.EXPORTER_THUMBNAIL_NAME in element + ) + self.assertTrue(has_thumbnail) + + has_thumbnail = False + manifest = self._do_export(no_thumbnail=True) + for element in manifest: + if element["model"] == "documents.document": + has_thumbnail = ( + has_thumbnail + or document_exporter.EXPORTER_THUMBNAIL_NAME in element + ) + self.assertFalse(has_thumbnail) + + with paperless_environment() as dirs: + call_command("document_importer", self.target) + self.assertEqual(Document.objects.count(), 4)