changed the way public filenames (i.e., for download and exporting) are generated. #94

This commit is contained in:
jonaswinkler 2020-12-06 19:03:45 +01:00
parent a079c310b4
commit 28622d700d
6 changed files with 55 additions and 34 deletions

View File

@ -38,6 +38,9 @@ class Command(Renderable, BaseCommand):
if not os.access(self.target, os.W_OK):
raise CommandError("That path doesn't appear to be writable")
if os.listdir(self.target):
raise CommandError("That directory is not empty.")
self.dump()
def dump(self):
@ -48,37 +51,44 @@ class Command(Renderable, BaseCommand):
for index, document_dict in enumerate(manifest):
# Force output to unencrypted as that will be the current state.
# The importer will make the decision to encrypt or not.
manifest[index]["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501
document = document_map[document_dict["pk"]]
unique_filename = f"{document.pk:07}_{document.file_name}"
file_target = os.path.join(self.target, unique_filename)
print(f"Exporting: {document}")
thumbnail_name = unique_filename + "-thumbnail.png"
filename_counter = 0
while True:
original_name = document.get_public_filename(counter=filename_counter)
original_target = os.path.join(self.target, original_name)
if not os.path.exists(original_target):
break
else:
filename_counter += 1
thumbnail_name = original_name + "-thumbnail.png"
thumbnail_target = os.path.join(self.target, thumbnail_name)
document_dict[EXPORTER_FILE_NAME] = unique_filename
document_dict[EXPORTER_FILE_NAME] = original_name
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
if os.path.exists(document.archive_path):
archive_name = \
f"{document.pk:07}_archive_{document.archive_file_name}"
archive_name = document.get_public_filename(archive=True, counter=filename_counter, suffix="_archive")
archive_target = os.path.join(self.target, archive_name)
document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
else:
archive_target = None
print(f"Exporting: {file_target}")
t = int(time.mktime(document.created.timetuple()))
if document.storage_type == Document.STORAGE_TYPE_GPG:
with open(file_target, "wb") as f:
with open(original_target, "wb") as f:
f.write(GnuPG.decrypted(document.source_file))
os.utime(file_target, times=(t, t))
os.utime(original_target, times=(t, t))
with open(thumbnail_target, "wb") as f:
f.write(GnuPG.decrypted(document.thumbnail_file))
@ -90,7 +100,7 @@ class Command(Renderable, BaseCommand):
os.utime(archive_target, times=(t, t))
else:
shutil.copy(document.source_path, file_target)
shutil.copy(document.source_path, original_target)
shutil.copy(document.thumbnail_path, thumbnail_target)
if archive_target:

View File

@ -1,10 +1,12 @@
# coding=utf-8
import datetime
import logging
import os
import re
from collections import OrderedDict
import pathvalidate
import dateutil.parser
from django.conf import settings
from django.db import models
@ -206,13 +208,11 @@ class Document(models.Model):
ordering = ("correspondent", "title")
def __str__(self):
created = self.created.strftime("%Y%m%d")
created = datetime.date.isoformat(self.created)
if self.correspondent and self.title:
return "{}: {} - {}".format(
created, self.correspondent, self.title)
if self.correspondent or self.title:
return "{}: {}".format(created, self.correspondent or self.title)
return str(created)
return f"{created} {self.correspondent} {self.title}"
else:
return f"{created} {self.title}"
@property
def source_path(self):
@ -248,13 +248,21 @@ class Document(models.Model):
def archive_file(self):
return open(self.archive_path, "rb")
@property
def file_name(self):
return slugify(str(self)) + self.file_type
def get_public_filename(self, archive=False, counter=0, suffix=None):
result = str(self)
@property
def archive_file_name(self):
return slugify(str(self)) + ".pdf"
if counter:
result += f"_{counter:02}"
if suffix:
result += suffix
if archive:
result += ".pdf"
else:
result += self.file_type
return pathvalidate.sanitize_filename(result, replacement_text="-")
@property
def file_type(self):

View File

@ -157,7 +157,7 @@ def run_post_consume_script(sender, document, **kwargs):
Popen((
settings.POST_CONSUME_SCRIPT,
str(document.pk),
document.file_name,
document.get_public_filename(),
os.path.normpath(document.source_path),
os.path.normpath(document.thumbnail_path),
reverse("document-download", kwargs={"pk": document.pk}),
@ -179,7 +179,7 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
f"Deleted file {f}.")
except OSError as e:
logging.getLogger(__name__).warning(
f"While deleting document {instance.file_name}, the file "
f"While deleting document {str(instance)}, the file "
f"{f} could not be deleted: {e}"
)

View File

@ -48,19 +48,19 @@ class TestDocument(TestCase):
def test_file_name(self):
doc = Document(mime_type="application/pdf", title="test", created=datetime(2020, 12, 25))
self.assertEqual(doc.file_name, "20201225-test.pdf")
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.pdf")
def test_file_name_jpg(self):
doc = Document(mime_type="image/jpeg", title="test", created=datetime(2020, 12, 25))
self.assertEqual(doc.file_name, "20201225-test.jpg")
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.jpg")
def test_file_name_unknown(self):
doc = Document(mime_type="application/zip", title="test", created=datetime(2020, 12, 25))
self.assertEqual(doc.file_name, "20201225-test.zip")
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.zip")
def test_file_name_invalid(self):
def test_file_name_invalid_type(self):
doc = Document(mime_type="image/jpegasd", title="test", created=datetime(2020, 12, 25))
self.assertEqual(doc.file_name, "20201225-test")
self.assertEqual(doc.get_public_filename(), "2020-12-25 test")

View File

@ -66,6 +66,9 @@ class TestExportImport(DirectoriesMixin, TestCase):
def test_export_missing_files(self):
target = tempfile.mkdtemp()
call_command('document_exporter', target)
Document.objects.create(checksum="AAAAAAAAAAAAAAAAA", title="wow", filename="0000004.pdf", id=3, mime_type="application/pdf")
self.assertRaises(FileNotFoundError, call_command, 'document_exporter', target)
def test_duplicate_titles(self):
# TODO
pass

View File

@ -145,11 +145,11 @@ class DocumentViewSet(RetrieveModelMixin,
doc = Document.objects.get(id=pk)
if not self.original_requested(request) and os.path.isfile(doc.archive_path): # NOQA: E501
file_handle = doc.archive_file
filename = doc.archive_file_name
filename = doc.get_public_filename(archive=True)
mime_type = 'application/pdf'
else:
file_handle = doc.source_file
filename = doc.file_name
filename = doc.get_public_filename()
mime_type = doc.mime_type
if doc.storage_type == Document.STORAGE_TYPE_GPG: