diff --git a/paperless.conf.example b/paperless.conf.example index b04e93f94..67e1b1c89 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -54,6 +54,17 @@ PAPERLESS_CONSUME_MAIL_PASS="" # ignored. PAPERLESS_EMAIL_SECRET="" +# Specify a filename format for an (optional) subdirectory and the document itself +# Use the following placefolders: +# * {correspondent} +# * {title} +# * {created} +# * {added} +# * {tags[FILTER]} +# Uniqueness of filenames is ensured, as an incrementing counter is attached +# to each filename. +#PAPERLESS_DIRECTORY_FORMAT="" +#PAPERLESS_FILENAME_FORMAT="" ############################################################################### #### Security #### diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 3cb484b2a..d846691f9 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -234,6 +234,9 @@ class Consumer: self._write(document, doc, document.source_path) self._write(document, thumbnail, document.thumbnail_path) + document.set_filename(document.source_filename) + document.save() + self.log("info", "Completed") return document diff --git a/src/documents/migrations/0023_document_current_filename.py b/src/documents/migrations/0023_document_current_filename.py new file mode 100644 index 000000000..be78ea863 --- /dev/null +++ b/src/documents/migrations/0023_document_current_filename.py @@ -0,0 +1,37 @@ +# Generated by Django 2.0.10 on 2019-04-26 18:57 + +from django.db import migrations, models + + +def set_filename(apps, schema_editor): + Document = apps.get_model("documents", "Document") + for doc in Document.objects.all(): + file_name = "{:07}.{}".format(doc.pk, doc.file_type) + if doc.storage_type == "gpg": + file_name += ".gpg" + + # Set filename + doc.filename = file_name + + # Save document + doc.save() + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0022_auto_20181007_1420'), + ] + + operations = [ + migrations.AddField( + model_name='document', + name='filename', + field=models.FilePathField(default=None, + null=True, + editable=False, + help_text='Current filename in storage', + max_length=256), + ), + migrations.RunPython(set_filename) + ] diff --git a/src/documents/models.py b/src/documents/models.py index c6fc8191e..25e5621b3 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -7,12 +7,14 @@ import uuid from collections import OrderedDict import dateutil.parser +from django.dispatch import receiver from django.conf import settings from django.db import models from django.template.defaultfilters import slugify from django.utils import timezone from django.utils.text import slugify from fuzzywuzzy import fuzz +from collections import defaultdict from .managers import LogManager @@ -254,6 +256,14 @@ class Document(models.Model): added = models.DateTimeField( default=timezone.now, editable=False, db_index=True) + filename = models.CharField( + max_length=256, + editable=False, + default=None, + null=True, + help_text="Current filename in storage" + ) + class Meta: ordering = ("correspondent", "title") @@ -267,17 +277,90 @@ class Document(models.Model): return str(created) @property - def source_path(self): + def source_filename(self): + if self.filename is None: + self.filename = self.source_filename_new() - file_name = "{:07}.{}".format(self.pk, self.file_type) + return self.filename + + def many_to_list(self, field): + mylist = [] + for t in field.all(): + mylist.append(t.name) + return mylist + + def many_to_dictionary(self, field): + mydictionary = dict() + for t in field.all(): + delimeter = t.name.find('_') + + if delimeter is -1: + continue + + key = t.name[:delimeter] + value = t.name[delimeter+1:] + + mydictionary[key] = value + + return mydictionary + + def source_filename_new(self): + # Create directory name based on configured format + if settings.PAPERLESS_DIRECTORY_FORMAT is not None: + directory = settings.PAPERLESS_DIRECTORY_FORMAT.format( + correspondent=self.correspondent, + title=self.title, + created=self.created, + added=self.added, + tags=defaultdict(str, + self.many_to_dictionary(self.tags))) + else: + directory = "" + + # Create filename based on configured format + if settings.PAPERLESS_FILENAME_FORMAT is not None: + filename = settings.PAPERLESS_FILENAME_FORMAT.format( + correspondent=self.correspondent, + title=self.title, + created=self.created, + added=self.added, + tags=defaultdict(str, + self.many_to_dictionary(self.tags))) + else: + filename = "" + + path = os.path.join(slugify(directory), slugify(filename)) + + # Always append the primary key to guarantee uniqueness of filename + if len(path) > 0: + filename = "%s-%07i.%s" % (path, self.pk, self.file_type) + else: + filename = "%07i.%s" % (self.pk, self.file_type) + + # Append .gpg for encrypted files if self.storage_type == self.STORAGE_TYPE_GPG: - file_name += ".gpg" + filename += ".gpg" + # Create directory for target + create_dir = self.filename_to_path(slugify(directory)) + try: + os.makedirs(create_dir) + except os.error: + # Directory existed already, ignore + pass + + return filename + + @property + def source_path(self): + return self.filename_to_path(self.source_filename) + + def filename_to_path(self, filename): return os.path.join( settings.MEDIA_ROOT, "documents", "originals", - file_name + filename ) @property @@ -314,6 +397,54 @@ class Document(models.Model): def thumbnail_url(self): return reverse("fetch", kwargs={"kind": "thumb", "pk": self.pk}) + def set_filename(self, filename): + if os.path.isfile(self.filename_to_path(filename)): + self.filename = filename + + +@receiver(models.signals.m2m_changed, sender=Document.tags.through) +@receiver(models.signals.post_save, sender=Document) +def update_filename(sender, instance, **kwargs): + if instance.filename is None: + return + + # Build the new filename + new_filename = instance.source_filename_new() + + # If the filename is the same, then nothing needs to be done + if instance.filename is None or \ + instance.filename == new_filename: + return + + # Check if filename needs changing + if new_filename != instance.filename: + # Determine the full "target" path + path_new = instance.filename_to_path(new_filename) + dir_new = instance.filename_to_path(os.path.dirname(new_filename)) + + # Determine the full "current" path + path_current = instance.filename_to_path(instance.filename) + + # Move file + os.rename(path_current, path_new) + + # Delete empty directory + old_dir = os.path.dirname(instance.filename) + old_path = instance.filename_to_path(old_dir) + if len(os.listdir(old_path)) == 0: + try: + os.rmdir(old_path) + except os.error: + # Directory not empty + pass + + instance.filename = new_filename + + # Save instance + # This will not cause a cascade of post_save signals, as next time + # nothing needs to be renamed + instance.save() + class Log(models.Model): diff --git a/src/paperless/settings.py b/src/paperless/settings.py index ddc903857..4a7317e02 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -334,3 +334,7 @@ for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")): # well. Set to 0 to disable this filter. PAPERLESS_RECENT_CORRESPONDENT_YEARS = int(os.getenv( "PAPERLESS_RECENT_CORRESPONDENT_YEARS", 0)) + +# Specify the filename format for out files +PAPERLESS_DIRECTORY_FORMAT = os.getenv("PAPERLESS_DIRECTORY_FORMAT") +PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")