Feature: Allow encrypting sensitive fields in export (#6927)

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
Trenton H
2024-06-09 07:41:18 -07:00
committed by GitHub
parent 6ddb62bf3f
commit d9002005b1
8 changed files with 583 additions and 120 deletions

View File

@@ -31,6 +31,7 @@ if settings.AUDIT_LOG_ENABLED:
from documents.file_handling import delete_empty_directories
from documents.file_handling import generate_filename
from documents.management.commands.mixins import CryptMixin
from documents.models import Correspondent
from documents.models import CustomField
from documents.models import CustomFieldInstance
@@ -56,7 +57,7 @@ from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
class Command(BaseCommand):
class Command(CryptMixin, BaseCommand):
help = (
"Decrypt and rename all files in our collection into a given target "
"directory. And include a manifest file containing document data for "
@@ -165,6 +166,11 @@ class Command(BaseCommand):
help="If set, the progress bar will not be shown",
)
parser.add_argument(
"--passphrase",
help="If provided, is used to encrypt sensitive data in the export",
)
def handle(self, *args, **options):
self.target = Path(options["target"]).resolve()
self.split_manifest: bool = options["split_manifest"]
@@ -177,6 +183,7 @@ class Command(BaseCommand):
self.zip_export: bool = options["zip"]
self.data_only: bool = options["data_only"]
self.no_progress_bar: bool = options["no_progress_bar"]
self.passphrase: Optional[str] = options.get("passphrase")
self.files_in_export_dir: set[Path] = set()
self.exported_files: set[str] = set()
@@ -272,6 +279,8 @@ class Command(BaseCommand):
serializers.serialize("json", manifest_key_to_object_query[key]),
)
self.encrypt_secret_fields(manifest_dict)
# These are treated specially and included in the per-document manifest
# if that setting is enabled. Otherwise, they are just exported to the bulk
# manifest
@@ -353,17 +362,25 @@ class Command(BaseCommand):
self.files_in_export_dir.remove(manifest_path)
# 4.2 write version information to target folder
version_path = (self.target / "version.json").resolve()
version_path.write_text(
extra_metadata_path = (self.target / "metadata.json").resolve()
metadata: dict[str, str | int | dict[str, str | int]] = {
"version": version.__full_version_str__,
}
# 4.2.1 If needed, write the crypto values into the metadata
# Django stores most of these in the field itself, we store them once here
if self.passphrase:
metadata.update(self.get_crypt_params())
extra_metadata_path.write_text(
json.dumps(
{"version": version.__full_version_str__},
metadata,
indent=2,
ensure_ascii=False,
),
encoding="utf-8",
)
if version_path in self.files_in_export_dir:
self.files_in_export_dir.remove(version_path)
if extra_metadata_path in self.files_in_export_dir:
self.files_in_export_dir.remove(extra_metadata_path)
if self.delete:
# 5. Remove files which we did not explicitly export in this run
@@ -527,3 +544,29 @@ class Command(BaseCommand):
if perform_copy:
target.parent.mkdir(parents=True, exist_ok=True)
copy_file_with_basic_stats(source, target)
def encrypt_secret_fields(self, manifest: dict) -> None:
"""
Encrypts certain fields in the export. Currently limited to the mail account password
"""
if self.passphrase:
self.setup_crypto(passphrase=self.passphrase)
for crypt_config in self.CRYPT_FIELDS:
exporter_key = crypt_config["exporter_key"]
crypt_fields = crypt_config["fields"]
for manifest_record in manifest[exporter_key]:
for field in crypt_fields:
manifest_record["fields"][field] = self.encrypt_string(
value=manifest_record["fields"][field],
)
elif MailAccount.objects.count() > 0:
self.stdout.write(
self.style.NOTICE(
"You have configured mail accounts, "
"but no passphrase was given. "
"Passwords will be in plaintext",
),
)