From 28622d700dbaa94b35e48619ab2248d27ad52771 Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Sun, 6 Dec 2020 19:03:45 +0100 Subject: [PATCH] changed the way public filenames (i.e., for download and exporting) are generated. #94 --- .../management/commands/document_exporter.py | 32 +++++++++++------ src/documents/models.py | 34 ++++++++++++------- src/documents/signals/handlers.py | 4 +-- src/documents/tests/test_document_model.py | 10 +++--- .../tests/test_management_exporter.py | 5 ++- src/documents/views.py | 4 +-- 6 files changed, 55 insertions(+), 34 deletions(-) diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py index f1ee74038..0e0b7901a 100644 --- a/src/documents/management/commands/document_exporter.py +++ b/src/documents/management/commands/document_exporter.py @@ -38,6 +38,9 @@ class Command(Renderable, BaseCommand): if not os.access(self.target, os.W_OK): raise CommandError("That path doesn't appear to be writable") + if os.listdir(self.target): + raise CommandError("That directory is not empty.") + self.dump() def dump(self): @@ -48,37 +51,44 @@ class Command(Renderable, BaseCommand): for index, document_dict in enumerate(manifest): + # Force output to unencrypted as that will be the current state. # The importer will make the decision to encrypt or not. manifest[index]["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501 document = document_map[document_dict["pk"]] - unique_filename = f"{document.pk:07}_{document.file_name}" - file_target = os.path.join(self.target, unique_filename) + print(f"Exporting: {document}") - thumbnail_name = unique_filename + "-thumbnail.png" + filename_counter = 0 + while True: + original_name = document.get_public_filename(counter=filename_counter) + original_target = os.path.join(self.target, original_name) + + if not os.path.exists(original_target): + break + else: + filename_counter += 1 + + thumbnail_name = original_name + "-thumbnail.png" thumbnail_target = os.path.join(self.target, thumbnail_name) - document_dict[EXPORTER_FILE_NAME] = unique_filename + document_dict[EXPORTER_FILE_NAME] = original_name document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name if os.path.exists(document.archive_path): - archive_name = \ - f"{document.pk:07}_archive_{document.archive_file_name}" + archive_name = document.get_public_filename(archive=True, counter=filename_counter, suffix="_archive") archive_target = os.path.join(self.target, archive_name) document_dict[EXPORTER_ARCHIVE_NAME] = archive_name else: archive_target = None - print(f"Exporting: {file_target}") - t = int(time.mktime(document.created.timetuple())) if document.storage_type == Document.STORAGE_TYPE_GPG: - with open(file_target, "wb") as f: + with open(original_target, "wb") as f: f.write(GnuPG.decrypted(document.source_file)) - os.utime(file_target, times=(t, t)) + os.utime(original_target, times=(t, t)) with open(thumbnail_target, "wb") as f: f.write(GnuPG.decrypted(document.thumbnail_file)) @@ -90,7 +100,7 @@ class Command(Renderable, BaseCommand): os.utime(archive_target, times=(t, t)) else: - shutil.copy(document.source_path, file_target) + shutil.copy(document.source_path, original_target) shutil.copy(document.thumbnail_path, thumbnail_target) if archive_target: diff --git a/src/documents/models.py b/src/documents/models.py index a4f887d77..a410687f7 100755 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -1,10 +1,12 @@ # coding=utf-8 - +import datetime import logging import os import re from collections import OrderedDict +import pathvalidate + import dateutil.parser from django.conf import settings from django.db import models @@ -206,13 +208,11 @@ class Document(models.Model): ordering = ("correspondent", "title") def __str__(self): - created = self.created.strftime("%Y%m%d") + created = datetime.date.isoformat(self.created) if self.correspondent and self.title: - return "{}: {} - {}".format( - created, self.correspondent, self.title) - if self.correspondent or self.title: - return "{}: {}".format(created, self.correspondent or self.title) - return str(created) + return f"{created} {self.correspondent} {self.title}" + else: + return f"{created} {self.title}" @property def source_path(self): @@ -248,13 +248,21 @@ class Document(models.Model): def archive_file(self): return open(self.archive_path, "rb") - @property - def file_name(self): - return slugify(str(self)) + self.file_type + def get_public_filename(self, archive=False, counter=0, suffix=None): + result = str(self) - @property - def archive_file_name(self): - return slugify(str(self)) + ".pdf" + if counter: + result += f"_{counter:02}" + + if suffix: + result += suffix + + if archive: + result += ".pdf" + else: + result += self.file_type + + return pathvalidate.sanitize_filename(result, replacement_text="-") @property def file_type(self): diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index 4d9dc9ccd..32119a0a3 100755 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -157,7 +157,7 @@ def run_post_consume_script(sender, document, **kwargs): Popen(( settings.POST_CONSUME_SCRIPT, str(document.pk), - document.file_name, + document.get_public_filename(), os.path.normpath(document.source_path), os.path.normpath(document.thumbnail_path), reverse("document-download", kwargs={"pk": document.pk}), @@ -179,7 +179,7 @@ def cleanup_document_deletion(sender, instance, using, **kwargs): f"Deleted file {f}.") except OSError as e: logging.getLogger(__name__).warning( - f"While deleting document {instance.file_name}, the file " + f"While deleting document {str(instance)}, the file " f"{f} could not be deleted: {e}" ) diff --git a/src/documents/tests/test_document_model.py b/src/documents/tests/test_document_model.py index 8764c7ec8..74bd9a2a7 100644 --- a/src/documents/tests/test_document_model.py +++ b/src/documents/tests/test_document_model.py @@ -48,19 +48,19 @@ class TestDocument(TestCase): def test_file_name(self): doc = Document(mime_type="application/pdf", title="test", created=datetime(2020, 12, 25)) - self.assertEqual(doc.file_name, "20201225-test.pdf") + self.assertEqual(doc.get_public_filename(), "2020-12-25 test.pdf") def test_file_name_jpg(self): doc = Document(mime_type="image/jpeg", title="test", created=datetime(2020, 12, 25)) - self.assertEqual(doc.file_name, "20201225-test.jpg") + self.assertEqual(doc.get_public_filename(), "2020-12-25 test.jpg") def test_file_name_unknown(self): doc = Document(mime_type="application/zip", title="test", created=datetime(2020, 12, 25)) - self.assertEqual(doc.file_name, "20201225-test.zip") + self.assertEqual(doc.get_public_filename(), "2020-12-25 test.zip") - def test_file_name_invalid(self): + def test_file_name_invalid_type(self): doc = Document(mime_type="image/jpegasd", title="test", created=datetime(2020, 12, 25)) - self.assertEqual(doc.file_name, "20201225-test") + self.assertEqual(doc.get_public_filename(), "2020-12-25 test") diff --git a/src/documents/tests/test_management_exporter.py b/src/documents/tests/test_management_exporter.py index 284d6108d..ab9733dc4 100644 --- a/src/documents/tests/test_management_exporter.py +++ b/src/documents/tests/test_management_exporter.py @@ -66,6 +66,9 @@ class TestExportImport(DirectoriesMixin, TestCase): def test_export_missing_files(self): target = tempfile.mkdtemp() - call_command('document_exporter', target) Document.objects.create(checksum="AAAAAAAAAAAAAAAAA", title="wow", filename="0000004.pdf", id=3, mime_type="application/pdf") self.assertRaises(FileNotFoundError, call_command, 'document_exporter', target) + + def test_duplicate_titles(self): + # TODO + pass diff --git a/src/documents/views.py b/src/documents/views.py index adef757ef..c6b4d4b35 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -145,11 +145,11 @@ class DocumentViewSet(RetrieveModelMixin, doc = Document.objects.get(id=pk) if not self.original_requested(request) and os.path.isfile(doc.archive_path): # NOQA: E501 file_handle = doc.archive_file - filename = doc.archive_file_name + filename = doc.get_public_filename(archive=True) mime_type = 'application/pdf' else: file_handle = doc.source_file - filename = doc.file_name + filename = doc.get_public_filename() mime_type = doc.mime_type if doc.storage_type == Document.STORAGE_TYPE_GPG: