add no-archive and no-thumbnail options to administration exporter and importer

This commit is contained in:
Matthieu Helleboid 2023-01-13 01:10:49 +01:00 committed by Trenton H
parent c7690c05f5
commit 9ae186e6f9
4 changed files with 130 additions and 25 deletions

View File

@ -227,12 +227,14 @@ is not a TTY" errors. For example:
`docker-compose exec -T webserver document_exporter ../export`
```
document_exporter target [-c] [-f] [-d]
document_exporter target [-c] [-f] [-d] [-na] [-nt]
optional arguments:
-c, --compare-checksums
-f, --use-filename-format
-d, --delete
-na, --no-archive
-nt, --no-thumbnail
-z --zip
```
@ -259,6 +261,14 @@ current export such as files from deleted documents, specify `--delete`.
Be careful when pointing paperless to a directory that already contains
other files.
Paperless will not export archive files if you use `--no-archive`, or will
not export thumbnails if you use `--no-thumbnail`. After importing, These
files can be generated again by using `document_archiver` or
`document_thumbnails`. It can make sense to omit these files from backup
as their content and checksum can change (new archiver or thumbnail
generator algorithm) and may then cause additional used space in
a deduplicated backup.
If `-z` or `--zip` is provided, the export will be a zipfile
in the target directory, named according to the current date.

View File

@ -82,6 +82,21 @@ class Command(BaseCommand):
"deleted documents.",
)
parser.add_argument(
"-na",
"--no-archive",
default=False,
action="store_true",
help="Avoid exporting archive files",
)
parser.add_argument(
"-nt",
"--no-thumbnail",
default=False,
action="store_true",
help="Avoid exporting thumbnail files",
)
parser.add_argument(
"--no-progress-bar",
default=False,
@ -105,6 +120,8 @@ class Command(BaseCommand):
self.compare_checksums = False
self.use_filename_format = False
self.delete = False
self.no_archive = False
self.no_thumbnail = False
def handle(self, *args, **options):
@ -112,6 +129,8 @@ class Command(BaseCommand):
self.compare_checksums = options["compare_checksums"]
self.use_filename_format = options["use_filename_format"]
self.delete = options["delete"]
self.no_archive = options["no_archive"]
self.no_thumbnail = options["no_thumbnail"]
zip_export: bool = options["zip"]
# If zipping, save the original target for later and
@ -246,11 +265,14 @@ class Command(BaseCommand):
original_target = (self.target / Path(original_name)).resolve()
document_dict[EXPORTER_FILE_NAME] = original_name
thumbnail_name = base_name + "-thumbnail.webp"
thumbnail_target = (self.target / Path(thumbnail_name)).resolve()
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
if not self.no_thumbnail:
thumbnail_name = base_name + "-thumbnail.webp"
thumbnail_target = (self.target / Path(thumbnail_name)).resolve()
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
else:
thumbnail_target = None
if document.has_archive_version:
if not self.no_archive and document.has_archive_version:
archive_name = base_name + "-archive.pdf"
archive_target = (self.target / Path(archive_name)).resolve()
document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
@ -266,10 +288,11 @@ class Command(BaseCommand):
original_target.write_bytes(GnuPG.decrypted(out_file))
os.utime(original_target, times=(t, t))
thumbnail_target.parent.mkdir(parents=True, exist_ok=True)
with document.thumbnail_file as out_file:
thumbnail_target.write_bytes(GnuPG.decrypted(out_file))
os.utime(thumbnail_target, times=(t, t))
if thumbnail_target:
thumbnail_target.parent.mkdir(parents=True, exist_ok=True)
with document.thumbnail_file as out_file:
thumbnail_target.write_bytes(GnuPG.decrypted(out_file))
os.utime(thumbnail_target, times=(t, t))
if archive_target:
archive_target.parent.mkdir(parents=True, exist_ok=True)
@ -283,7 +306,8 @@ class Command(BaseCommand):
original_target,
)
self.check_and_copy(document.thumbnail_path, None, thumbnail_target)
if thumbnail_target:
self.check_and_copy(document.thumbnail_path, None, thumbnail_target)
if archive_target:
self.check_and_copy(

View File

@ -193,8 +193,11 @@ class Command(BaseCommand):
doc_file = record[EXPORTER_FILE_NAME]
document_path = os.path.join(self.source, doc_file)
thumb_file = record[EXPORTER_THUMBNAIL_NAME]
thumbnail_path = Path(os.path.join(self.source, thumb_file)).resolve()
if EXPORTER_THUMBNAIL_NAME in record:
thumb_file = record[EXPORTER_THUMBNAIL_NAME]
thumbnail_path = Path(os.path.join(self.source, thumb_file)).resolve()
else:
thumbnail_path = None
if EXPORTER_ARCHIVE_NAME in record:
archive_file = record[EXPORTER_ARCHIVE_NAME]
@ -212,19 +215,21 @@ class Command(BaseCommand):
shutil.copy2(document_path, document.source_path)
if thumbnail_path.suffix in {".png", ".PNG"}:
run_convert(
density=300,
scale="500x5000>",
alpha="remove",
strip=True,
trim=False,
auto_orient=True,
input_file=f"{thumbnail_path}[0]",
output_file=str(document.thumbnail_path),
)
else:
shutil.copy2(thumbnail_path, document.thumbnail_path)
if thumbnail_path:
if thumbnail_path.suffix in {".png", ".PNG"}:
run_convert(
density=300,
scale="500x5000>",
alpha="remove",
strip=True,
trim=False,
auto_orient=True,
input_file=f"{thumbnail_path}[0]",
output_file=str(document.thumbnail_path),
)
else:
shutil.copy2(thumbnail_path, document.thumbnail_path)
if archive_path:
create_source_path_directory(document.archive_path)
# TODO: this assumes that the export is valid and

View File

@ -102,6 +102,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
use_filename_format=False,
compare_checksums=False,
delete=False,
no_archive=False,
no_thumbnail=False,
):
args = ["document_exporter", self.target]
if use_filename_format:
@ -110,6 +112,10 @@ class TestExportImport(DirectoriesMixin, TestCase):
args += ["--compare-checksums"]
if delete:
args += ["--delete"]
if no_archive:
args += ["--no-archive"]
if no_thumbnail:
args += ["--no-thumbnail"]
call_command(*args)
@ -497,3 +503,63 @@ class TestExportImport(DirectoriesMixin, TestCase):
call_command(*args)
self.assertEqual("That path doesn't appear to be writable", str(e))
def test_no_archive(self):
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
shutil.copytree(
os.path.join(os.path.dirname(__file__), "samples", "documents"),
os.path.join(self.dirs.media_dir, "documents"),
)
manifest = self._do_export()
has_archive = False
for element in manifest:
if element["model"] == "documents.document":
has_archive = (
has_archive or document_exporter.EXPORTER_ARCHIVE_NAME in element
)
self.assertTrue(has_archive)
has_archive = False
manifest = self._do_export(no_archive=True)
for element in manifest:
if element["model"] == "documents.document":
has_archive = (
has_archive or document_exporter.EXPORTER_ARCHIVE_NAME in element
)
self.assertFalse(has_archive)
with paperless_environment() as dirs:
call_command("document_importer", self.target)
self.assertEqual(Document.objects.count(), 4)
def test_no_thumbnail(self):
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
shutil.copytree(
os.path.join(os.path.dirname(__file__), "samples", "documents"),
os.path.join(self.dirs.media_dir, "documents"),
)
manifest = self._do_export()
has_thumbnail = False
for element in manifest:
if element["model"] == "documents.document":
has_thumbnail = (
has_thumbnail
or document_exporter.EXPORTER_THUMBNAIL_NAME in element
)
self.assertTrue(has_thumbnail)
has_thumbnail = False
manifest = self._do_export(no_thumbnail=True)
for element in manifest:
if element["model"] == "documents.document":
has_thumbnail = (
has_thumbnail
or document_exporter.EXPORTER_THUMBNAIL_NAME in element
)
self.assertFalse(has_thumbnail)
with paperless_environment() as dirs:
call_command("document_importer", self.target)
self.assertEqual(Document.objects.count(), 4)