changed the way public filenames (i.e., for download and exporting) are generated. #94

This commit is contained in:
jonaswinkler 2020-12-06 19:03:45 +01:00
parent a079c310b4
commit 28622d700d
6 changed files with 55 additions and 34 deletions

View File

@ -38,6 +38,9 @@ class Command(Renderable, BaseCommand):
if not os.access(self.target, os.W_OK): if not os.access(self.target, os.W_OK):
raise CommandError("That path doesn't appear to be writable") raise CommandError("That path doesn't appear to be writable")
if os.listdir(self.target):
raise CommandError("That directory is not empty.")
self.dump() self.dump()
def dump(self): def dump(self):
@ -48,37 +51,44 @@ class Command(Renderable, BaseCommand):
for index, document_dict in enumerate(manifest): for index, document_dict in enumerate(manifest):
# Force output to unencrypted as that will be the current state. # Force output to unencrypted as that will be the current state.
# The importer will make the decision to encrypt or not. # The importer will make the decision to encrypt or not.
manifest[index]["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501 manifest[index]["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501
document = document_map[document_dict["pk"]] document = document_map[document_dict["pk"]]
unique_filename = f"{document.pk:07}_{document.file_name}" print(f"Exporting: {document}")
file_target = os.path.join(self.target, unique_filename)
thumbnail_name = unique_filename + "-thumbnail.png" filename_counter = 0
while True:
original_name = document.get_public_filename(counter=filename_counter)
original_target = os.path.join(self.target, original_name)
if not os.path.exists(original_target):
break
else:
filename_counter += 1
thumbnail_name = original_name + "-thumbnail.png"
thumbnail_target = os.path.join(self.target, thumbnail_name) thumbnail_target = os.path.join(self.target, thumbnail_name)
document_dict[EXPORTER_FILE_NAME] = unique_filename document_dict[EXPORTER_FILE_NAME] = original_name
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
if os.path.exists(document.archive_path): if os.path.exists(document.archive_path):
archive_name = \ archive_name = document.get_public_filename(archive=True, counter=filename_counter, suffix="_archive")
f"{document.pk:07}_archive_{document.archive_file_name}"
archive_target = os.path.join(self.target, archive_name) archive_target = os.path.join(self.target, archive_name)
document_dict[EXPORTER_ARCHIVE_NAME] = archive_name document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
else: else:
archive_target = None archive_target = None
print(f"Exporting: {file_target}")
t = int(time.mktime(document.created.timetuple())) t = int(time.mktime(document.created.timetuple()))
if document.storage_type == Document.STORAGE_TYPE_GPG: if document.storage_type == Document.STORAGE_TYPE_GPG:
with open(file_target, "wb") as f: with open(original_target, "wb") as f:
f.write(GnuPG.decrypted(document.source_file)) f.write(GnuPG.decrypted(document.source_file))
os.utime(file_target, times=(t, t)) os.utime(original_target, times=(t, t))
with open(thumbnail_target, "wb") as f: with open(thumbnail_target, "wb") as f:
f.write(GnuPG.decrypted(document.thumbnail_file)) f.write(GnuPG.decrypted(document.thumbnail_file))
@ -90,7 +100,7 @@ class Command(Renderable, BaseCommand):
os.utime(archive_target, times=(t, t)) os.utime(archive_target, times=(t, t))
else: else:
shutil.copy(document.source_path, file_target) shutil.copy(document.source_path, original_target)
shutil.copy(document.thumbnail_path, thumbnail_target) shutil.copy(document.thumbnail_path, thumbnail_target)
if archive_target: if archive_target:

View File

@ -1,10 +1,12 @@
# coding=utf-8 # coding=utf-8
import datetime
import logging import logging
import os import os
import re import re
from collections import OrderedDict from collections import OrderedDict
import pathvalidate
import dateutil.parser import dateutil.parser
from django.conf import settings from django.conf import settings
from django.db import models from django.db import models
@ -206,13 +208,11 @@ class Document(models.Model):
ordering = ("correspondent", "title") ordering = ("correspondent", "title")
def __str__(self): def __str__(self):
created = self.created.strftime("%Y%m%d") created = datetime.date.isoformat(self.created)
if self.correspondent and self.title: if self.correspondent and self.title:
return "{}: {} - {}".format( return f"{created} {self.correspondent} {self.title}"
created, self.correspondent, self.title) else:
if self.correspondent or self.title: return f"{created} {self.title}"
return "{}: {}".format(created, self.correspondent or self.title)
return str(created)
@property @property
def source_path(self): def source_path(self):
@ -248,13 +248,21 @@ class Document(models.Model):
def archive_file(self): def archive_file(self):
return open(self.archive_path, "rb") return open(self.archive_path, "rb")
@property def get_public_filename(self, archive=False, counter=0, suffix=None):
def file_name(self): result = str(self)
return slugify(str(self)) + self.file_type
@property if counter:
def archive_file_name(self): result += f"_{counter:02}"
return slugify(str(self)) + ".pdf"
if suffix:
result += suffix
if archive:
result += ".pdf"
else:
result += self.file_type
return pathvalidate.sanitize_filename(result, replacement_text="-")
@property @property
def file_type(self): def file_type(self):

View File

@ -157,7 +157,7 @@ def run_post_consume_script(sender, document, **kwargs):
Popen(( Popen((
settings.POST_CONSUME_SCRIPT, settings.POST_CONSUME_SCRIPT,
str(document.pk), str(document.pk),
document.file_name, document.get_public_filename(),
os.path.normpath(document.source_path), os.path.normpath(document.source_path),
os.path.normpath(document.thumbnail_path), os.path.normpath(document.thumbnail_path),
reverse("document-download", kwargs={"pk": document.pk}), reverse("document-download", kwargs={"pk": document.pk}),
@ -179,7 +179,7 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
f"Deleted file {f}.") f"Deleted file {f}.")
except OSError as e: except OSError as e:
logging.getLogger(__name__).warning( logging.getLogger(__name__).warning(
f"While deleting document {instance.file_name}, the file " f"While deleting document {str(instance)}, the file "
f"{f} could not be deleted: {e}" f"{f} could not be deleted: {e}"
) )

View File

@ -48,19 +48,19 @@ class TestDocument(TestCase):
def test_file_name(self): def test_file_name(self):
doc = Document(mime_type="application/pdf", title="test", created=datetime(2020, 12, 25)) doc = Document(mime_type="application/pdf", title="test", created=datetime(2020, 12, 25))
self.assertEqual(doc.file_name, "20201225-test.pdf") self.assertEqual(doc.get_public_filename(), "2020-12-25 test.pdf")
def test_file_name_jpg(self): def test_file_name_jpg(self):
doc = Document(mime_type="image/jpeg", title="test", created=datetime(2020, 12, 25)) doc = Document(mime_type="image/jpeg", title="test", created=datetime(2020, 12, 25))
self.assertEqual(doc.file_name, "20201225-test.jpg") self.assertEqual(doc.get_public_filename(), "2020-12-25 test.jpg")
def test_file_name_unknown(self): def test_file_name_unknown(self):
doc = Document(mime_type="application/zip", title="test", created=datetime(2020, 12, 25)) doc = Document(mime_type="application/zip", title="test", created=datetime(2020, 12, 25))
self.assertEqual(doc.file_name, "20201225-test.zip") self.assertEqual(doc.get_public_filename(), "2020-12-25 test.zip")
def test_file_name_invalid(self): def test_file_name_invalid_type(self):
doc = Document(mime_type="image/jpegasd", title="test", created=datetime(2020, 12, 25)) doc = Document(mime_type="image/jpegasd", title="test", created=datetime(2020, 12, 25))
self.assertEqual(doc.file_name, "20201225-test") self.assertEqual(doc.get_public_filename(), "2020-12-25 test")

View File

@ -66,6 +66,9 @@ class TestExportImport(DirectoriesMixin, TestCase):
def test_export_missing_files(self): def test_export_missing_files(self):
target = tempfile.mkdtemp() target = tempfile.mkdtemp()
call_command('document_exporter', target)
Document.objects.create(checksum="AAAAAAAAAAAAAAAAA", title="wow", filename="0000004.pdf", id=3, mime_type="application/pdf") Document.objects.create(checksum="AAAAAAAAAAAAAAAAA", title="wow", filename="0000004.pdf", id=3, mime_type="application/pdf")
self.assertRaises(FileNotFoundError, call_command, 'document_exporter', target) self.assertRaises(FileNotFoundError, call_command, 'document_exporter', target)
def test_duplicate_titles(self):
# TODO
pass

View File

@ -145,11 +145,11 @@ class DocumentViewSet(RetrieveModelMixin,
doc = Document.objects.get(id=pk) doc = Document.objects.get(id=pk)
if not self.original_requested(request) and os.path.isfile(doc.archive_path): # NOQA: E501 if not self.original_requested(request) and os.path.isfile(doc.archive_path): # NOQA: E501
file_handle = doc.archive_file file_handle = doc.archive_file
filename = doc.archive_file_name filename = doc.get_public_filename(archive=True)
mime_type = 'application/pdf' mime_type = 'application/pdf'
else: else:
file_handle = doc.source_file file_handle = doc.source_file
filename = doc.file_name filename = doc.get_public_filename()
mime_type = doc.mime_type mime_type = doc.mime_type
if doc.storage_type == Document.STORAGE_TYPE_GPG: if doc.storage_type == Document.STORAGE_TYPE_GPG: