From d1a54d65768c738a8602f6428036955a6d0ec7df Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Fri, 27 Dec 2019 14:13:18 +0000 Subject: [PATCH 01/38] Allows to configure directory and filename formats for documents stored in paperless Default configuration is as before (incrementing numbers), but additional fields can be added at will --- paperless.conf.example | 11 ++ src/documents/consumer.py | 3 + .../0023_document_current_filename.py | 37 +++++ src/documents/models.py | 139 +++++++++++++++++- src/paperless/settings.py | 4 + 5 files changed, 190 insertions(+), 4 deletions(-) create mode 100644 src/documents/migrations/0023_document_current_filename.py diff --git a/paperless.conf.example b/paperless.conf.example index b04e93f94..67e1b1c89 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -54,6 +54,17 @@ PAPERLESS_CONSUME_MAIL_PASS="" # ignored. PAPERLESS_EMAIL_SECRET="" +# Specify a filename format for an (optional) subdirectory and the document itself +# Use the following placefolders: +# * {correspondent} +# * {title} +# * {created} +# * {added} +# * {tags[FILTER]} +# Uniqueness of filenames is ensured, as an incrementing counter is attached +# to each filename. +#PAPERLESS_DIRECTORY_FORMAT="" +#PAPERLESS_FILENAME_FORMAT="" ############################################################################### #### Security #### diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 3cb484b2a..d846691f9 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -234,6 +234,9 @@ class Consumer: self._write(document, doc, document.source_path) self._write(document, thumbnail, document.thumbnail_path) + document.set_filename(document.source_filename) + document.save() + self.log("info", "Completed") return document diff --git a/src/documents/migrations/0023_document_current_filename.py b/src/documents/migrations/0023_document_current_filename.py new file mode 100644 index 000000000..be78ea863 --- /dev/null +++ b/src/documents/migrations/0023_document_current_filename.py @@ -0,0 +1,37 @@ +# Generated by Django 2.0.10 on 2019-04-26 18:57 + +from django.db import migrations, models + + +def set_filename(apps, schema_editor): + Document = apps.get_model("documents", "Document") + for doc in Document.objects.all(): + file_name = "{:07}.{}".format(doc.pk, doc.file_type) + if doc.storage_type == "gpg": + file_name += ".gpg" + + # Set filename + doc.filename = file_name + + # Save document + doc.save() + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0022_auto_20181007_1420'), + ] + + operations = [ + migrations.AddField( + model_name='document', + name='filename', + field=models.FilePathField(default=None, + null=True, + editable=False, + help_text='Current filename in storage', + max_length=256), + ), + migrations.RunPython(set_filename) + ] diff --git a/src/documents/models.py b/src/documents/models.py index c6fc8191e..25e5621b3 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -7,12 +7,14 @@ import uuid from collections import OrderedDict import dateutil.parser +from django.dispatch import receiver from django.conf import settings from django.db import models from django.template.defaultfilters import slugify from django.utils import timezone from django.utils.text import slugify from fuzzywuzzy import fuzz +from collections import defaultdict from .managers import LogManager @@ -254,6 +256,14 @@ class Document(models.Model): added = models.DateTimeField( default=timezone.now, editable=False, db_index=True) + filename = models.CharField( + max_length=256, + editable=False, + default=None, + null=True, + help_text="Current filename in storage" + ) + class Meta: ordering = ("correspondent", "title") @@ -267,17 +277,90 @@ class Document(models.Model): return str(created) @property - def source_path(self): + def source_filename(self): + if self.filename is None: + self.filename = self.source_filename_new() - file_name = "{:07}.{}".format(self.pk, self.file_type) + return self.filename + + def many_to_list(self, field): + mylist = [] + for t in field.all(): + mylist.append(t.name) + return mylist + + def many_to_dictionary(self, field): + mydictionary = dict() + for t in field.all(): + delimeter = t.name.find('_') + + if delimeter is -1: + continue + + key = t.name[:delimeter] + value = t.name[delimeter+1:] + + mydictionary[key] = value + + return mydictionary + + def source_filename_new(self): + # Create directory name based on configured format + if settings.PAPERLESS_DIRECTORY_FORMAT is not None: + directory = settings.PAPERLESS_DIRECTORY_FORMAT.format( + correspondent=self.correspondent, + title=self.title, + created=self.created, + added=self.added, + tags=defaultdict(str, + self.many_to_dictionary(self.tags))) + else: + directory = "" + + # Create filename based on configured format + if settings.PAPERLESS_FILENAME_FORMAT is not None: + filename = settings.PAPERLESS_FILENAME_FORMAT.format( + correspondent=self.correspondent, + title=self.title, + created=self.created, + added=self.added, + tags=defaultdict(str, + self.many_to_dictionary(self.tags))) + else: + filename = "" + + path = os.path.join(slugify(directory), slugify(filename)) + + # Always append the primary key to guarantee uniqueness of filename + if len(path) > 0: + filename = "%s-%07i.%s" % (path, self.pk, self.file_type) + else: + filename = "%07i.%s" % (self.pk, self.file_type) + + # Append .gpg for encrypted files if self.storage_type == self.STORAGE_TYPE_GPG: - file_name += ".gpg" + filename += ".gpg" + # Create directory for target + create_dir = self.filename_to_path(slugify(directory)) + try: + os.makedirs(create_dir) + except os.error: + # Directory existed already, ignore + pass + + return filename + + @property + def source_path(self): + return self.filename_to_path(self.source_filename) + + def filename_to_path(self, filename): return os.path.join( settings.MEDIA_ROOT, "documents", "originals", - file_name + filename ) @property @@ -314,6 +397,54 @@ class Document(models.Model): def thumbnail_url(self): return reverse("fetch", kwargs={"kind": "thumb", "pk": self.pk}) + def set_filename(self, filename): + if os.path.isfile(self.filename_to_path(filename)): + self.filename = filename + + +@receiver(models.signals.m2m_changed, sender=Document.tags.through) +@receiver(models.signals.post_save, sender=Document) +def update_filename(sender, instance, **kwargs): + if instance.filename is None: + return + + # Build the new filename + new_filename = instance.source_filename_new() + + # If the filename is the same, then nothing needs to be done + if instance.filename is None or \ + instance.filename == new_filename: + return + + # Check if filename needs changing + if new_filename != instance.filename: + # Determine the full "target" path + path_new = instance.filename_to_path(new_filename) + dir_new = instance.filename_to_path(os.path.dirname(new_filename)) + + # Determine the full "current" path + path_current = instance.filename_to_path(instance.filename) + + # Move file + os.rename(path_current, path_new) + + # Delete empty directory + old_dir = os.path.dirname(instance.filename) + old_path = instance.filename_to_path(old_dir) + if len(os.listdir(old_path)) == 0: + try: + os.rmdir(old_path) + except os.error: + # Directory not empty + pass + + instance.filename = new_filename + + # Save instance + # This will not cause a cascade of post_save signals, as next time + # nothing needs to be renamed + instance.save() + class Log(models.Model): diff --git a/src/paperless/settings.py b/src/paperless/settings.py index ddc903857..4a7317e02 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -334,3 +334,7 @@ for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")): # well. Set to 0 to disable this filter. PAPERLESS_RECENT_CORRESPONDENT_YEARS = int(os.getenv( "PAPERLESS_RECENT_CORRESPONDENT_YEARS", 0)) + +# Specify the filename format for out files +PAPERLESS_DIRECTORY_FORMAT = os.getenv("PAPERLESS_DIRECTORY_FORMAT") +PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT") From f65b90122e14c97df28e45b78abbef57e3a8cfae Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Fri, 27 Dec 2019 14:13:25 +0000 Subject: [PATCH 02/38] Added tool to rename all documents according to the lastest filename format --- .../management/commands/document_renamer.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 src/documents/management/commands/document_renamer.py diff --git a/src/documents/management/commands/document_renamer.py b/src/documents/management/commands/document_renamer.py new file mode 100644 index 000000000..d7d77a111 --- /dev/null +++ b/src/documents/management/commands/document_renamer.py @@ -0,0 +1,24 @@ +from django.core.management.base import BaseCommand + +from documents.models import Document, Tag + +from ...mixins import Renderable + + +class Command(Renderable, BaseCommand): + + help = """ + This will rename all documents to match the latest filename format. + """.replace(" ", "") + + def __init__(self, *args, **kwargs): + self.verbosity = 0 + BaseCommand.__init__(self, *args, **kwargs) + + def handle(self, *args, **options): + + self.verbosity = options["verbosity"] + + for document in Document.objects.all(): + # Saving the document again will generate a new filename and rename + document.save() From 6d93889801f35f3edc9b390c54ef11ab90642c20 Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Fri, 27 Dec 2019 14:13:28 +0000 Subject: [PATCH 03/38] Add unit tests for filename feature --- src/documents/models.py | 84 +++++++----- src/documents/tests/test_file_handling.py | 154 ++++++++++++++++++++++ 2 files changed, 207 insertions(+), 31 deletions(-) create mode 100644 src/documents/tests/test_file_handling.py diff --git a/src/documents/models.py b/src/documents/models.py index 25e5621b3..741b7079e 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -256,7 +256,7 @@ class Document(models.Model): added = models.DateTimeField( default=timezone.now, editable=False, db_index=True) - filename = models.CharField( + filename = models.FilePathField( max_length=256, editable=False, default=None, @@ -402,48 +402,70 @@ class Document(models.Model): self.filename = filename +def delete_empty_directory(directory): + if len(os.listdir(directory)) == 0: + try: + os.rmdir(directory) + except os.error: + # Directory not empty + pass + @receiver(models.signals.m2m_changed, sender=Document.tags.through) @receiver(models.signals.post_save, sender=Document) def update_filename(sender, instance, **kwargs): - if instance.filename is None: - return + if instance.filename is None: + return - # Build the new filename - new_filename = instance.source_filename_new() + # Build the new filename + new_filename = instance.source_filename_new() - # If the filename is the same, then nothing needs to be done - if instance.filename is None or \ - instance.filename == new_filename: - return + # If the filename is the same, then nothing needs to be done + if instance.filename is None or \ + instance.filename == new_filename: + return - # Check if filename needs changing - if new_filename != instance.filename: - # Determine the full "target" path - path_new = instance.filename_to_path(new_filename) - dir_new = instance.filename_to_path(os.path.dirname(new_filename)) + # Check if filename needs changing + if new_filename != instance.filename: + # Determine the full "target" path + path_new = instance.filename_to_path(new_filename) + dir_new = instance.filename_to_path(os.path.dirname(new_filename)) - # Determine the full "current" path - path_current = instance.filename_to_path(instance.filename) + # Determine the full "current" path + path_current = instance.filename_to_path(instance.filename) - # Move file + # Move file + try: os.rename(path_current, path_new) + except PermissionError: + # Do not update filename in object + return - # Delete empty directory - old_dir = os.path.dirname(instance.filename) - old_path = instance.filename_to_path(old_dir) - if len(os.listdir(old_path)) == 0: - try: - os.rmdir(old_path) - except os.error: - # Directory not empty - pass + # Delete empty directory + old_dir = os.path.dirname(instance.filename) + old_path = instance.filename_to_path(old_dir) + delete_empty_directory(old_path) - instance.filename = new_filename + instance.filename = new_filename - # Save instance - # This will not cause a cascade of post_save signals, as next time - # nothing needs to be renamed - instance.save() + # Save instance + # This will not cause a cascade of post_save signals, as next time + # nothing needs to be renamed + instance.save() + + +@receiver(models.signals.post_delete, sender=Document) +def delete_files(sender, instance, **kwargs): + if instance.filename is None: + return + + # Remove the document + old_file = instance.filename_to_path(instance.filename) + os.remove(old_file) + + # And remove the directory (if applicable) + old_dir = os.path.dirname(instance.filename) + old_path = instance.filename_to_path(old_dir) + delete_empty_directory(old_path) class Log(models.Model): diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py new file mode 100644 index 000000000..edeb1ab5e --- /dev/null +++ b/src/documents/tests/test_file_handling.py @@ -0,0 +1,154 @@ +import datetime +import os +import shutil +from unittest import mock +from uuid import uuid4 +from pathlib import Path + +from dateutil import tz +from django.test import TestCase, override_settings + +from django.utils.text import slugify +from ..models import Document, Correspondent +from django.conf import settings + + +class TestDate(TestCase): + @override_settings(PAPERLESS_DIRECTORY_FORMAT="") + @override_settings(PAPERLESS_FILENAME_FORMAT="") + def test_source_filename(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + self.assertEqual(document.source_filename, "0000001.pdf") + + document.filename = "test.pdf" + self.assertEqual(document.source_filename, "test.pdf") + + @override_settings(PAPERLESS_DIRECTORY_FORMAT="") + @override_settings(PAPERLESS_FILENAME_FORMAT="") + def test_source_filename_new(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + self.assertEqual(document.source_filename_new(), "0000001.pdf") + + document.storage_type = Document.STORAGE_TYPE_GPG + self.assertEqual(document.source_filename_new(), "0000001.pdf.gpg") + + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". + format(str(uuid4())[:8])) + @override_settings(PAPERLESS_DIRECTORY_FORMAT="{correspondent}") + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}") + def test_file_renaming(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.source_filename_new(), + "none/none-0000001.pdf") + Path(document.source_path).touch() + + # Test source_path + self.assertEqual(document.source_path, settings.MEDIA_ROOT + + "/documents/originals/none/none-0000001.pdf") + + # Enable encryption and check again + document.storage_type = Document.STORAGE_TYPE_GPG + tmp = document.source_filename + self.assertEqual(document.source_filename_new(), + "none/none-0000001.pdf.gpg") + document.save() + + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none"), True) + + # Set a correspondent and save the document + document.correspondent = Correspondent.objects.get_or_create( + name="test")[0] + document.save() + + # Check proper handling of files + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/test"), True) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none"), False) + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + + "originals/test/test-0000001.pdf.gpg"), True) + self.assertEqual(document.source_filename_new(), + "test/test-0000001.pdf.gpg") + + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". + format(str(uuid4())[:8])) + @override_settings(PAPERLESS_DIRECTORY_FORMAT="{correspondent}") + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}") + def test_document_delete(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.source_filename_new(), + "none/none-0000001.pdf") + Path(document.source_path).touch() + + # Ensure file deletion after delete + document.delete() + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + + "/documents/originals/none/none-0000001.pdf"), False) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none"), False) + + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". + format(str(uuid4())[:8])) + @override_settings(PAPERLESS_DIRECTORY_FORMAT="{correspondent}") + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}") + def test_directory_not_empty(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.source_filename_new(), + "none/none-0000001.pdf") + Path(document.source_path).touch() + Path(document.source_path + "test").touch() + + # Set a correspondent and save the document + document.correspondent = Correspondent.objects.get_or_create( + name="test")[0] + document.save() + + # Check proper handling of files + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/test"), True) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none"), True) + + # Cleanup + os.remove(settings.MEDIA_ROOT + + "/documents/originals/none/none-0000001.pdftest") + os.rmdir(settings.MEDIA_ROOT + "/documents/originals/none") + + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". + format(str(uuid4())[:8])) + @override_settings(PAPERLESS_DIRECTORY_FORMAT=None) + @override_settings(PAPERLESS_FILENAME_FORMAT=None) + def test_format_none(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + self.assertEqual(document.source_filename_new(), "0000001.pdf") From 94feaef5c49eee3b625c6dea1ef944025489822b Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Fri, 27 Dec 2019 14:13:29 +0000 Subject: [PATCH 04/38] Check if document file exist before deletion --- src/documents/models.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/documents/models.py b/src/documents/models.py index 741b7079e..0a69434e0 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -410,6 +410,7 @@ def delete_empty_directory(directory): # Directory not empty pass + @receiver(models.signals.m2m_changed, sender=Document.tags.through) @receiver(models.signals.post_save, sender=Document) def update_filename(sender, instance, **kwargs): @@ -460,7 +461,13 @@ def delete_files(sender, instance, **kwargs): # Remove the document old_file = instance.filename_to_path(instance.filename) - os.remove(old_file) + + if os.path.isfile(old_file): + os.remove(old_file) + else: + logger = logging.getLogger(__name__) + logger.warning("Deleted document " + str(instance.id) + " but file " + + old_file + " was no longer present") # And remove the directory (if applicable) old_dir = os.path.dirname(instance.filename) From 2d98951d881693c67e19db7c5dad0aa7acbae4e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Wolf-Bastian=20P=C3=B6ttner?= Date: Sat, 1 Feb 2020 14:22:02 +0100 Subject: [PATCH 05/38] Update paperless.conf.example Co-Authored-By: Pit --- paperless.conf.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paperless.conf.example b/paperless.conf.example index 67e1b1c89..8f586e601 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -55,7 +55,7 @@ PAPERLESS_CONSUME_MAIL_PASS="" PAPERLESS_EMAIL_SECRET="" # Specify a filename format for an (optional) subdirectory and the document itself -# Use the following placefolders: +# Use the following placeholders: # * {correspondent} # * {title} # * {created} From c0f125060875bf180b25d3348430009be77b184b Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Sat, 1 Feb 2020 13:25:23 +0000 Subject: [PATCH 06/38] Removed unused function many_to_list --- src/documents/models.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index 0a69434e0..a5e3a6361 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -283,12 +283,6 @@ class Document(models.Model): return self.filename - def many_to_list(self, field): - mylist = [] - for t in field.all(): - mylist.append(t.name) - return mylist - def many_to_dictionary(self, field): mydictionary = dict() for t in field.all(): From 8ab4ac1b987f4f71319f9e6705792c982c6c0860 Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Sat, 1 Feb 2020 13:26:49 +0000 Subject: [PATCH 07/38] Removed try-catch around os.makedirs and used exist_ok=True instead --- src/documents/models.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index a5e3a6361..1c7905643 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -335,13 +335,9 @@ class Document(models.Model): if self.storage_type == self.STORAGE_TYPE_GPG: filename += ".gpg" - # Create directory for target + # Create directory for target (and ignore, if directory exists already) create_dir = self.filename_to_path(slugify(directory)) - try: - os.makedirs(create_dir) - except os.error: - # Directory existed already, ignore - pass + os.makedirs(create_dir, exist_ok=True) return filename From 985350d7158c5b5c47f2ee2bdea6831e8f5cd444 Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Sat, 1 Feb 2020 13:52:27 +0000 Subject: [PATCH 08/38] Refactored source_filename_new into generate_source_filename and create_source_directory --- src/documents/models.py | 22 +++++++++++++++------- src/documents/tests/test_file_handling.py | 22 +++++++++++++--------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index 1c7905643..e37a4fbb5 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -279,7 +279,7 @@ class Document(models.Model): @property def source_filename(self): if self.filename is None: - self.filename = self.source_filename_new() + self.filename = self.generate_source_filename() return self.filename @@ -298,7 +298,7 @@ class Document(models.Model): return mydictionary - def source_filename_new(self): + def generate_source_filename(self): # Create directory name based on configured format if settings.PAPERLESS_DIRECTORY_FORMAT is not None: directory = settings.PAPERLESS_DIRECTORY_FORMAT.format( @@ -335,12 +335,17 @@ class Document(models.Model): if self.storage_type == self.STORAGE_TYPE_GPG: filename += ".gpg" - # Create directory for target (and ignore, if directory exists already) - create_dir = self.filename_to_path(slugify(directory)) - os.makedirs(create_dir, exist_ok=True) - return filename + def create_source_directory(self): + new_filename = self.generate_source_filename() + + # Determine the full "target" path + dir_new = self.filename_to_path(os.path.dirname(new_filename)) + + # Create new path + os.makedirs(dir_new, exist_ok=True) + @property def source_path(self): return self.filename_to_path(self.source_filename) @@ -408,7 +413,7 @@ def update_filename(sender, instance, **kwargs): return # Build the new filename - new_filename = instance.source_filename_new() + new_filename = instance.generate_source_filename() # If the filename is the same, then nothing needs to be done if instance.filename is None or \ @@ -421,6 +426,9 @@ def update_filename(sender, instance, **kwargs): path_new = instance.filename_to_path(new_filename) dir_new = instance.filename_to_path(os.path.dirname(new_filename)) + # Create new path + instance.create_source_directory() + # Determine the full "current" path path_current = instance.filename_to_path(instance.filename) diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index edeb1ab5e..a4d842236 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -29,16 +29,17 @@ class TestDate(TestCase): @override_settings(PAPERLESS_DIRECTORY_FORMAT="") @override_settings(PAPERLESS_FILENAME_FORMAT="") - def test_source_filename_new(self): + def test_generate_source_filename(self): document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED document.save() - self.assertEqual(document.source_filename_new(), "0000001.pdf") + self.assertEqual(document.generate_source_filename(), "0000001.pdf") document.storage_type = Document.STORAGE_TYPE_GPG - self.assertEqual(document.source_filename_new(), "0000001.pdf.gpg") + self.assertEqual(document.generate_source_filename(), + "0000001.pdf.gpg") @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) @@ -52,8 +53,9 @@ class TestDate(TestCase): # Ensure that filename is properly generated tmp = document.source_filename - self.assertEqual(document.source_filename_new(), + self.assertEqual(document.generate_source_filename(), "none/none-0000001.pdf") + document.create_source_directory() Path(document.source_path).touch() # Test source_path @@ -63,7 +65,7 @@ class TestDate(TestCase): # Enable encryption and check again document.storage_type = Document.STORAGE_TYPE_GPG tmp = document.source_filename - self.assertEqual(document.source_filename_new(), + self.assertEqual(document.generate_source_filename(), "none/none-0000001.pdf.gpg") document.save() @@ -82,7 +84,7 @@ class TestDate(TestCase): "/documents/originals/none"), False) self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + "originals/test/test-0000001.pdf.gpg"), True) - self.assertEqual(document.source_filename_new(), + self.assertEqual(document.generate_source_filename(), "test/test-0000001.pdf.gpg") @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". @@ -97,8 +99,9 @@ class TestDate(TestCase): # Ensure that filename is properly generated tmp = document.source_filename - self.assertEqual(document.source_filename_new(), + self.assertEqual(document.generate_source_filename(), "none/none-0000001.pdf") + document.create_source_directory() Path(document.source_path).touch() # Ensure file deletion after delete @@ -120,8 +123,9 @@ class TestDate(TestCase): # Ensure that filename is properly generated tmp = document.source_filename - self.assertEqual(document.source_filename_new(), + self.assertEqual(document.generate_source_filename(), "none/none-0000001.pdf") + document.create_source_directory() Path(document.source_path).touch() Path(document.source_path + "test").touch() @@ -151,4 +155,4 @@ class TestDate(TestCase): document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED document.save() - self.assertEqual(document.source_filename_new(), "0000001.pdf") + self.assertEqual(document.generate_source_filename(), "0000001.pdf") From 597057c3cfd512434e141363e8babb4af8764b97 Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Sat, 1 Feb 2020 13:54:15 +0000 Subject: [PATCH 09/38] Removed duplicate check if instance.filename --- src/documents/models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index e37a4fbb5..65beb80d4 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -416,8 +416,7 @@ def update_filename(sender, instance, **kwargs): new_filename = instance.generate_source_filename() # If the filename is the same, then nothing needs to be done - if instance.filename is None or \ - instance.filename == new_filename: + if instance.filename == new_filename: return # Check if filename needs changing From 94e1ebc407c06ebbbf1eebf6494ec69ca7fe78d5 Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Sat, 1 Feb 2020 14:12:58 +0000 Subject: [PATCH 10/38] Added comment to update_document --- src/documents/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/documents/models.py b/src/documents/models.py index 65beb80d4..359960846 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -409,6 +409,7 @@ def delete_empty_directory(directory): @receiver(models.signals.m2m_changed, sender=Document.tags.through) @receiver(models.signals.post_save, sender=Document) def update_filename(sender, instance, **kwargs): + # Skip if document has not been saved yet if instance.filename is None: return From f1a62a4b284d47706ca5840a72ae83c29ffeeabe Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Sat, 1 Feb 2020 14:14:13 +0000 Subject: [PATCH 11/38] Removed unnecessary check --- src/documents/models.py | 46 ++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index 359960846..c20ecb93a 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -420,36 +420,34 @@ def update_filename(sender, instance, **kwargs): if instance.filename == new_filename: return - # Check if filename needs changing - if new_filename != instance.filename: - # Determine the full "target" path - path_new = instance.filename_to_path(new_filename) - dir_new = instance.filename_to_path(os.path.dirname(new_filename)) + # Determine the full "target" path + path_new = instance.filename_to_path(new_filename) + dir_new = instance.filename_to_path(os.path.dirname(new_filename)) - # Create new path - instance.create_source_directory() + # Create new path + instance.create_source_directory() - # Determine the full "current" path - path_current = instance.filename_to_path(instance.filename) + # Determine the full "current" path + path_current = instance.filename_to_path(instance.filename) - # Move file - try: - os.rename(path_current, path_new) - except PermissionError: - # Do not update filename in object - return + # Move file + try: + os.rename(path_current, path_new) + except PermissionError: + # Do not update filename in object + return - # Delete empty directory - old_dir = os.path.dirname(instance.filename) - old_path = instance.filename_to_path(old_dir) - delete_empty_directory(old_path) + # Delete empty directory + old_dir = os.path.dirname(instance.filename) + old_path = instance.filename_to_path(old_dir) + delete_empty_directory(old_path) - instance.filename = new_filename + instance.filename = new_filename - # Save instance - # This will not cause a cascade of post_save signals, as next time - # nothing needs to be renamed - instance.save() + # Save instance + # This will not cause a cascade of post_save signals, as next time + # nothing needs to be renamed + instance.save() @receiver(models.signals.post_delete, sender=Document) From ac10f81696c94e5c776eb291a2afd69e893e6c6e Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Sat, 1 Feb 2020 14:18:33 +0000 Subject: [PATCH 12/38] Made filename_to_path a static method --- src/documents/models.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index c20ecb93a..d016c32e7 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -341,16 +341,17 @@ class Document(models.Model): new_filename = self.generate_source_filename() # Determine the full "target" path - dir_new = self.filename_to_path(os.path.dirname(new_filename)) + dir_new = Document.filename_to_path(os.path.dirname(new_filename)) # Create new path os.makedirs(dir_new, exist_ok=True) @property def source_path(self): - return self.filename_to_path(self.source_filename) + return Document.filename_to_path(self.source_filename) - def filename_to_path(self, filename): + @staticmethod + def filename_to_path(filename): return os.path.join( settings.MEDIA_ROOT, "documents", @@ -393,7 +394,7 @@ class Document(models.Model): return reverse("fetch", kwargs={"kind": "thumb", "pk": self.pk}) def set_filename(self, filename): - if os.path.isfile(self.filename_to_path(filename)): + if os.path.isfile(Document.filename_to_path(filename)): self.filename = filename From 64c1a748217fc4ff3181fac50a9f743c01cf8d87 Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Sat, 1 Feb 2020 20:03:20 +0000 Subject: [PATCH 13/38] Refactored delete_empty_directory into try_delete_empty_directories and implemented feature to ensure, that all created and now empty directories are really deleted --- src/documents/models.py | 37 +++++++++++++---------- src/documents/tests/test_file_handling.py | 31 +++++++++++++++++++ 2 files changed, 52 insertions(+), 16 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index d016c32e7..58206ba11 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -294,7 +294,7 @@ class Document(models.Model): key = t.name[:delimeter] value = t.name[delimeter+1:] - mydictionary[key] = value + mydictionary[key] = slugify(value) return mydictionary @@ -302,10 +302,10 @@ class Document(models.Model): # Create directory name based on configured format if settings.PAPERLESS_DIRECTORY_FORMAT is not None: directory = settings.PAPERLESS_DIRECTORY_FORMAT.format( - correspondent=self.correspondent, - title=self.title, - created=self.created, - added=self.added, + correspondent=slugify(self.correspondent), + title=slugify(self.title), + created=slugify(self.created), + added=slugify(self.added), tags=defaultdict(str, self.many_to_dictionary(self.tags))) else: @@ -314,16 +314,16 @@ class Document(models.Model): # Create filename based on configured format if settings.PAPERLESS_FILENAME_FORMAT is not None: filename = settings.PAPERLESS_FILENAME_FORMAT.format( - correspondent=self.correspondent, - title=self.title, - created=self.created, - added=self.added, + correspondent=slugify(self.correspondent), + title=slugify(self.title), + created=slugify(self.created), + added=slugify(self.added), tags=defaultdict(str, self.many_to_dictionary(self.tags))) else: filename = "" - path = os.path.join(slugify(directory), slugify(filename)) + path = os.path.join(directory, filename) # Always append the primary key to guarantee uniqueness of filename if len(path) > 0: @@ -398,13 +398,18 @@ class Document(models.Model): self.filename = filename -def delete_empty_directory(directory): - if len(os.listdir(directory)) == 0: +def try_delete_empty_directories(directory): + # Go up in the directory hierarchy and try to delete all directories + while directory != Document.filename_to_path(""): + # Try to delete the current directory try: os.rmdir(directory) except os.error: - # Directory not empty - pass + # Directory not empty, no need to go further up + return + + # Cut off actual directory and go one level up + directory, tmp = os.path.split(directory) @receiver(models.signals.m2m_changed, sender=Document.tags.through) @@ -441,7 +446,7 @@ def update_filename(sender, instance, **kwargs): # Delete empty directory old_dir = os.path.dirname(instance.filename) old_path = instance.filename_to_path(old_dir) - delete_empty_directory(old_path) + try_delete_empty_directories(old_path) instance.filename = new_filename @@ -469,7 +474,7 @@ def delete_files(sender, instance, **kwargs): # And remove the directory (if applicable) old_dir = os.path.dirname(instance.filename) old_path = instance.filename_to_path(old_dir) - delete_empty_directory(old_path) + try_delete_empty_directories(old_path) class Log(models.Model): diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index a4d842236..46f5bbc9d 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -145,6 +145,37 @@ class TestDate(TestCase): "/documents/originals/none/none-0000001.pdftest") os.rmdir(settings.MEDIA_ROOT + "/documents/originals/none") + + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". + format(str(uuid4())[:8])) + @override_settings(PAPERLESS_DIRECTORY_FORMAT="{correspondent}/{correspondent}") + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}") + def test_nested_directory_cleanup(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none/none/none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + # Check proper handling of files + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none/none"), True) + + document.delete() + + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + + "/documents/originals/none/none/none-0000001.pdf"), False) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none/none"), False) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none"), False) + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) @override_settings(PAPERLESS_DIRECTORY_FORMAT=None) From a5518a307ca473ac2c3a883d316401c6f21969fd Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Sat, 1 Feb 2020 20:09:36 +0000 Subject: [PATCH 14/38] Unified PAPERLESS_DIRECTORY_FORMAT and PAPERLESS_FILENAME_FORMAT into one configuration setting --- paperless.conf.example | 3 +-- src/documents/models.py | 30 ++++++----------------- src/documents/tests/test_file_handling.py | 15 +++--------- src/paperless/settings.py | 1 - 4 files changed, 13 insertions(+), 36 deletions(-) diff --git a/paperless.conf.example b/paperless.conf.example index 8f586e601..ff96bfe20 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -54,7 +54,7 @@ PAPERLESS_CONSUME_MAIL_PASS="" # ignored. PAPERLESS_EMAIL_SECRET="" -# Specify a filename format for an (optional) subdirectory and the document itself +# Specify a filename format for the document (directories are supported) # Use the following placeholders: # * {correspondent} # * {title} @@ -63,7 +63,6 @@ PAPERLESS_EMAIL_SECRET="" # * {tags[FILTER]} # Uniqueness of filenames is ensured, as an incrementing counter is attached # to each filename. -#PAPERLESS_DIRECTORY_FORMAT="" #PAPERLESS_FILENAME_FORMAT="" ############################################################################### diff --git a/src/documents/models.py b/src/documents/models.py index 58206ba11..25caddebf 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -299,31 +299,17 @@ class Document(models.Model): return mydictionary def generate_source_filename(self): - # Create directory name based on configured format - if settings.PAPERLESS_DIRECTORY_FORMAT is not None: - directory = settings.PAPERLESS_DIRECTORY_FORMAT.format( - correspondent=slugify(self.correspondent), - title=slugify(self.title), - created=slugify(self.created), - added=slugify(self.added), - tags=defaultdict(str, - self.many_to_dictionary(self.tags))) - else: - directory = "" - # Create filename based on configured format if settings.PAPERLESS_FILENAME_FORMAT is not None: - filename = settings.PAPERLESS_FILENAME_FORMAT.format( - correspondent=slugify(self.correspondent), - title=slugify(self.title), - created=slugify(self.created), - added=slugify(self.added), - tags=defaultdict(str, - self.many_to_dictionary(self.tags))) + path = settings.PAPERLESS_FILENAME_FORMAT.format( + correspondent=slugify(self.correspondent), + title=slugify(self.title), + created=slugify(self.created), + added=slugify(self.added), + tags=defaultdict(str, + self.many_to_dictionary(self.tags))) else: - filename = "" - - path = os.path.join(directory, filename) + path = "" # Always append the primary key to guarantee uniqueness of filename if len(path) > 0: diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index 46f5bbc9d..30dc8fd81 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -14,7 +14,6 @@ from django.conf import settings class TestDate(TestCase): - @override_settings(PAPERLESS_DIRECTORY_FORMAT="") @override_settings(PAPERLESS_FILENAME_FORMAT="") def test_source_filename(self): document = Document() @@ -27,7 +26,6 @@ class TestDate(TestCase): document.filename = "test.pdf" self.assertEqual(document.source_filename, "test.pdf") - @override_settings(PAPERLESS_DIRECTORY_FORMAT="") @override_settings(PAPERLESS_FILENAME_FORMAT="") def test_generate_source_filename(self): document = Document() @@ -43,8 +41,7 @@ class TestDate(TestCase): @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) - @override_settings(PAPERLESS_DIRECTORY_FORMAT="{correspondent}") - @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}") + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") def test_file_renaming(self): document = Document() document.file_type = "pdf" @@ -89,8 +86,7 @@ class TestDate(TestCase): @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) - @override_settings(PAPERLESS_DIRECTORY_FORMAT="{correspondent}") - @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}") + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") def test_document_delete(self): document = Document() document.file_type = "pdf" @@ -113,8 +109,7 @@ class TestDate(TestCase): @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) - @override_settings(PAPERLESS_DIRECTORY_FORMAT="{correspondent}") - @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}") + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") def test_directory_not_empty(self): document = Document() document.file_type = "pdf" @@ -148,8 +143,7 @@ class TestDate(TestCase): @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) - @override_settings(PAPERLESS_DIRECTORY_FORMAT="{correspondent}/{correspondent}") - @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}") + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}") def test_nested_directory_cleanup(self): document = Document() document.file_type = "pdf" @@ -178,7 +172,6 @@ class TestDate(TestCase): @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) - @override_settings(PAPERLESS_DIRECTORY_FORMAT=None) @override_settings(PAPERLESS_FILENAME_FORMAT=None) def test_format_none(self): document = Document() diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 4a7317e02..3cadc7d2b 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -336,5 +336,4 @@ PAPERLESS_RECENT_CORRESPONDENT_YEARS = int(os.getenv( "PAPERLESS_RECENT_CORRESPONDENT_YEARS", 0)) # Specify the filename format for out files -PAPERLESS_DIRECTORY_FORMAT = os.getenv("PAPERLESS_DIRECTORY_FORMAT") PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT") From 86de7847cf02e403b51d31a9b21296dffc675b32 Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Sat, 1 Feb 2020 20:13:05 +0000 Subject: [PATCH 15/38] Avoid TOCTOU error by rather catching an exception than checking beforehand --- src/documents/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index 25caddebf..c52b6ba0a 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -450,9 +450,9 @@ def delete_files(sender, instance, **kwargs): # Remove the document old_file = instance.filename_to_path(instance.filename) - if os.path.isfile(old_file): + try: os.remove(old_file) - else: + except FileNotFoundError: logger = logging.getLogger(__name__) logger.warning("Deleted document " + str(instance.id) + " but file " + old_file + " was no longer present") From 7391f5c1a20d9d87451f168dcf7b735d4d0af12a Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Sat, 1 Feb 2020 20:58:44 +0000 Subject: [PATCH 16/38] Tags in filenames can now either be found by key (in case - or _ are used as delimeter) or by index. Added some more tests. --- paperless.conf.example | 3 +- src/documents/models.py | 26 +++++- src/documents/tests/test_file_handling.py | 107 ++++++++++++++++++++-- 3 files changed, 125 insertions(+), 11 deletions(-) diff --git a/paperless.conf.example b/paperless.conf.example index ff96bfe20..a375ae8ba 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -60,7 +60,8 @@ PAPERLESS_EMAIL_SECRET="" # * {title} # * {created} # * {added} -# * {tags[FILTER]} +# * {tag[KEY]} If your tags conform to key_value or key-value +# * {tags[INDEX]} If your tags are strings, select the tag by index # Uniqueness of filenames is ensured, as an incrementing counter is attached # to each filename. #PAPERLESS_FILENAME_FORMAT="" diff --git a/src/documents/models.py b/src/documents/models.py index c52b6ba0a..ae49c1518 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -283,21 +283,39 @@ class Document(models.Model): return self.filename - def many_to_dictionary(self, field): + @staticmethod + def many_to_dictionary(field): + # Converts ManyToManyField to dictionary by assuming, that field + # entries contain an _ or - which will be used as a delimeter mydictionary = dict() + for t in field.all(): + # Find delimeter delimeter = t.name.find('_') + if delimeter is -1: + delimeter = t.name.find('-') + if delimeter is -1: continue key = t.name[:delimeter] value = t.name[delimeter+1:] - mydictionary[key] = slugify(value) + mydictionary[slugify(key)] = slugify(value) return mydictionary + @staticmethod + def many_to_list(field): + # Converts ManyToManyField to list + mylist = list() + + for t in field.all(): + mylist.append(slugify(t)) + + return mylist + def generate_source_filename(self): # Create filename based on configured format if settings.PAPERLESS_FILENAME_FORMAT is not None: @@ -306,8 +324,8 @@ class Document(models.Model): title=slugify(self.title), created=slugify(self.created), added=slugify(self.added), - tags=defaultdict(str, - self.many_to_dictionary(self.tags))) + tag=defaultdict(str, self.many_to_dictionary(self.tags)), + tags=self.many_to_list(self.tags)) else: path = "" diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index 30dc8fd81..912d02573 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -9,7 +9,7 @@ from dateutil import tz from django.test import TestCase, override_settings from django.utils.text import slugify -from ..models import Document, Correspondent +from ..models import Tag, Document, Correspondent from django.conf import settings @@ -41,7 +41,8 @@ class TestDate(TestCase): @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) - @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") def test_file_renaming(self): document = Document() document.file_type = "pdf" @@ -86,7 +87,8 @@ class TestDate(TestCase): @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) - @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") def test_document_delete(self): document = Document() document.file_type = "pdf" @@ -109,7 +111,8 @@ class TestDate(TestCase): @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) - @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") def test_directory_not_empty(self): document = Document() document.file_type = "pdf" @@ -140,10 +143,101 @@ class TestDate(TestCase): "/documents/originals/none/none-0000001.pdftest") os.rmdir(settings.MEDIA_ROOT + "/documents/originals/none") + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". + format(str(uuid4())[:8])) + @override_settings(PAPERLESS_FILENAME_FORMAT="{tag[type]}") + def test_tags_with_underscore(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Add tag to document + document.tags.create(name="type_demo") + document.tags.create(name="foo_bar") + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "demo-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + document.delete() @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) - @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}") + @override_settings(PAPERLESS_FILENAME_FORMAT="{tag[type]}") + def test_tags_with_dash(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Add tag to document + document.tags.create(name="type-demo") + document.tags.create(name="foo-bar") + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "demo-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + document.delete() + + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". + format(str(uuid4())[:8])) + @override_settings(PAPERLESS_FILENAME_FORMAT="{tag[type]}") + def test_tags_malformed(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Add tag to document + document.tags.create(name="type:demo") + document.tags.create(name="foo:bar") + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + document.delete() + + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". + format(str(uuid4())[:8])) + @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}") + def test_tags_all(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Add tag to document + document.tags.create(name="demo") + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "demo-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + document.delete() + + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". + format(str(uuid4())[:8])) + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}/{correspondent}") def test_nested_directory_cleanup(self): document = Document() document.file_type = "pdf" @@ -164,7 +258,8 @@ class TestDate(TestCase): document.delete() self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + - "/documents/originals/none/none/none-0000001.pdf"), False) + "/documents/originals/none/none/none-0000001.pdf"), + False) self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/none/none"), False) self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + From bbcfba240507a82adc20c016471ceaef4838b025 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Wolf-Bastian=20P=C3=B6ttner?= Date: Mon, 17 Feb 2020 20:45:43 +0100 Subject: [PATCH 17/38] Update src/documents/models.py Co-Authored-By: Pit --- src/documents/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/documents/models.py b/src/documents/models.py index ae49c1518..fba81f697 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -413,7 +413,7 @@ def try_delete_empty_directories(directory): return # Cut off actual directory and go one level up - directory, tmp = os.path.split(directory) + directory, _ = os.path.split(directory) @receiver(models.signals.m2m_changed, sender=Document.tags.through) From 354c8331086c91721d5d0fadb42b18e868cac15d Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Mon, 17 Feb 2020 19:49:47 +0000 Subject: [PATCH 18/38] Ensure documents directory still exists after nested deletion --- src/documents/tests/test_file_handling.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index 912d02573..c4d5f7f32 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -264,6 +264,8 @@ class TestDate(TestCase): "/documents/originals/none/none"), False) self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/none"), False) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals"), False) @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) From 7200f3c0c59e2472f29ee063f687a7de29c6ac8d Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Mon, 17 Feb 2020 20:58:55 +0000 Subject: [PATCH 19/38] Prevent index out of bounds for tag lists --- src/documents/models.py | 33 +++++++++++--- src/documents/tests/test_file_handling.py | 54 +++++++++++++++++++++++ 2 files changed, 80 insertions(+), 7 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index fba81f697..89a9f5968 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -316,16 +316,35 @@ class Document(models.Model): return mylist + @staticmethod + def fill_list(input_list, length, filler): + while len(input_list) < length: + input_list.append(slugify(filler)) + + return input_list + def generate_source_filename(self): # Create filename based on configured format if settings.PAPERLESS_FILENAME_FORMAT is not None: - path = settings.PAPERLESS_FILENAME_FORMAT.format( - correspondent=slugify(self.correspondent), - title=slugify(self.title), - created=slugify(self.created), - added=slugify(self.added), - tag=defaultdict(str, self.many_to_dictionary(self.tags)), - tags=self.many_to_list(self.tags)) + list_length = 10 + tags = self.many_to_list(self.tags) + while True: + tags = Document.fill_list(tags, list_length, None) + try: + path = settings.PAPERLESS_FILENAME_FORMAT.format( + correspondent=slugify(self.correspondent), + title=slugify(self.title), + created=slugify(self.created), + added=slugify(self.added), + tag=defaultdict(str, self.many_to_dictionary(self.tags)), + tags=tags) + break + except IndexError: + list_length *= 10 + + if list_length > 1000: + path = "" + break else: path = "" diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index c4d5f7f32..fb7f152fc 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -234,6 +234,60 @@ class TestDate(TestCase): document.delete() + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". + format(str(uuid4())[:8])) + @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}") + def test_tags_out_of_bounds_0(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + document.delete() + + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". + format(str(uuid4())[:8])) + @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[10000000]}") + def test_tags_out_of_bounds_10000000(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + document.delete() + + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". + format(str(uuid4())[:8])) + @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[99]}") + def test_tags_out_of_bounds_99(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + document.delete() + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + From d2cc93639238fd4768451dfd104ca867d83dc056 Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Mon, 17 Feb 2020 20:59:17 +0000 Subject: [PATCH 20/38] Ensure filling in "none" when a tag has not been found --- src/documents/models.py | 4 +++- src/documents/tests/test_file_handling.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index 89a9f5968..69d508350 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -326,6 +326,8 @@ class Document(models.Model): def generate_source_filename(self): # Create filename based on configured format if settings.PAPERLESS_FILENAME_FORMAT is not None: + tag = defaultdict(lambda: slugify(None), + self.many_to_dictionary(self.tags)) list_length = 10 tags = self.many_to_list(self.tags) while True: @@ -336,7 +338,7 @@ class Document(models.Model): title=slugify(self.title), created=slugify(self.created), added=slugify(self.added), - tag=defaultdict(str, self.many_to_dictionary(self.tags)), + tag=tag, tags=tags) break except IndexError: diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index fb7f152fc..3065948ea 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -206,7 +206,7 @@ class TestDate(TestCase): # Ensure that filename is properly generated tmp = document.source_filename self.assertEqual(document.generate_source_filename(), - "0000001.pdf") + "none-0000001.pdf") document.create_source_directory() Path(document.source_path).touch() From 0ebc64d9d7cc50f152e3e222d9dd640668d41f1c Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Mon, 17 Feb 2020 21:00:10 +0000 Subject: [PATCH 21/38] Fix typo in delimiter --- src/documents/models.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index 69d508350..19f11336d 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -286,21 +286,21 @@ class Document(models.Model): @staticmethod def many_to_dictionary(field): # Converts ManyToManyField to dictionary by assuming, that field - # entries contain an _ or - which will be used as a delimeter + # entries contain an _ or - which will be used as a delimiter mydictionary = dict() for t in field.all(): - # Find delimeter - delimeter = t.name.find('_') + # Find delimiter + delimiter = t.name.find('_') - if delimeter is -1: - delimeter = t.name.find('-') + if delimiter is -1: + delimiter = t.name.find('-') - if delimeter is -1: + if delimiter is -1: continue - key = t.name[:delimeter] - value = t.name[delimeter+1:] + key = t.name[:delimiter] + value = t.name[delimiter+1:] mydictionary[slugify(key)] = slugify(value) From cb1a6b86b48cfffbb5cf34dbfa0dde0c92c25a99 Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Mon, 17 Feb 2020 21:02:37 +0000 Subject: [PATCH 22/38] Ensure explicit use of field name --- src/documents/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/documents/models.py b/src/documents/models.py index 19f11336d..4929ea459 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -312,7 +312,7 @@ class Document(models.Model): mylist = list() for t in field.all(): - mylist.append(slugify(t)) + mylist.append(slugify(t.name)) return mylist From 36af8c4050803b2c4ab7d3a68510f4d28a894c8c Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Sat, 28 Mar 2020 20:57:23 +0000 Subject: [PATCH 23/38] Get rid of fill_list and rather use defaultdict --- src/documents/models.py | 28 ++++++++--------------- src/documents/tests/test_file_handling.py | 2 +- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index 4929ea459..39dcc4a37 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -328,25 +328,15 @@ class Document(models.Model): if settings.PAPERLESS_FILENAME_FORMAT is not None: tag = defaultdict(lambda: slugify(None), self.many_to_dictionary(self.tags)) - list_length = 10 - tags = self.many_to_list(self.tags) - while True: - tags = Document.fill_list(tags, list_length, None) - try: - path = settings.PAPERLESS_FILENAME_FORMAT.format( - correspondent=slugify(self.correspondent), - title=slugify(self.title), - created=slugify(self.created), - added=slugify(self.added), - tag=tag, - tags=tags) - break - except IndexError: - list_length *= 10 - - if list_length > 1000: - path = "" - break + tags = defaultdict(lambda: slugify(None), + enumerate(self.many_to_list(self.tags))) + path = settings.PAPERLESS_FILENAME_FORMAT.format( + correspondent=slugify(self.correspondent), + title=slugify(self.title), + created=slugify(self.created), + added=slugify(self.added), + tag=tag, + tags=tags) else: path = "" diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index 3065948ea..f6c8d0eba 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -264,7 +264,7 @@ class TestDate(TestCase): # Ensure that filename is properly generated tmp = document.source_filename self.assertEqual(document.generate_source_filename(), - "0000001.pdf") + "none-0000001.pdf") document.create_source_directory() Path(document.source_path).touch() From 4a988088587189cb6c374b4b4ea89bc63d2aa13b Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Sat, 28 Mar 2020 20:57:57 +0000 Subject: [PATCH 24/38] Ensure, that document root is not deleted --- src/documents/models.py | 6 +++++- src/documents/tests/test_file_handling.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index 39dcc4a37..17357c1a7 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -415,7 +415,10 @@ class Document(models.Model): def try_delete_empty_directories(directory): # Go up in the directory hierarchy and try to delete all directories - while directory != Document.filename_to_path(""): + directory = os.path.normpath(directory) + root = os.path.normpath(Document.filename_to_path("")) + + while directory != root: # Try to delete the current directory try: os.rmdir(directory) @@ -425,6 +428,7 @@ def try_delete_empty_directories(directory): # Cut off actual directory and go one level up directory, _ = os.path.split(directory) + directory = os.path.normpath(directory) @receiver(models.signals.m2m_changed, sender=Document.tags.through) diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index f6c8d0eba..fae1c0634 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -319,7 +319,7 @@ class TestDate(TestCase): self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/none"), False) self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + - "/documents/originals"), False) + "/documents/originals"), True) @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) From 85a0560d77897a402e97e40672da579ad9963aac Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Sat, 28 Mar 2020 21:04:55 +0000 Subject: [PATCH 25/38] Allow filename to use tags[KEY] and tags[INDEX] --- paperless.conf.example | 2 +- src/documents/models.py | 20 +++++--------------- src/documents/tests/test_file_handling.py | 6 +++--- 3 files changed, 9 insertions(+), 19 deletions(-) diff --git a/paperless.conf.example b/paperless.conf.example index a375ae8ba..b99995b8f 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -60,7 +60,7 @@ PAPERLESS_EMAIL_SECRET="" # * {title} # * {created} # * {added} -# * {tag[KEY]} If your tags conform to key_value or key-value +# * {tags[KEY]} If your tags conform to key_value or key-value # * {tags[INDEX]} If your tags are strings, select the tag by index # Uniqueness of filenames is ensured, as an incrementing counter is attached # to each filename. diff --git a/src/documents/models.py b/src/documents/models.py index 17357c1a7..006c214fa 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -289,7 +289,10 @@ class Document(models.Model): # entries contain an _ or - which will be used as a delimiter mydictionary = dict() - for t in field.all(): + for index, t in enumerate(field.all()): + # Populate tag names by index + mydictionary[index] = slugify(t.name) + # Find delimiter delimiter = t.name.find('_') @@ -306,16 +309,6 @@ class Document(models.Model): return mydictionary - @staticmethod - def many_to_list(field): - # Converts ManyToManyField to list - mylist = list() - - for t in field.all(): - mylist.append(slugify(t.name)) - - return mylist - @staticmethod def fill_list(input_list, length, filler): while len(input_list) < length: @@ -326,16 +319,13 @@ class Document(models.Model): def generate_source_filename(self): # Create filename based on configured format if settings.PAPERLESS_FILENAME_FORMAT is not None: - tag = defaultdict(lambda: slugify(None), - self.many_to_dictionary(self.tags)) tags = defaultdict(lambda: slugify(None), - enumerate(self.many_to_list(self.tags))) + self.many_to_dictionary(self.tags)) path = settings.PAPERLESS_FILENAME_FORMAT.format( correspondent=slugify(self.correspondent), title=slugify(self.title), created=slugify(self.created), added=slugify(self.added), - tag=tag, tags=tags) else: path = "" diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index fae1c0634..7af99adfc 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -145,7 +145,7 @@ class TestDate(TestCase): @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) - @override_settings(PAPERLESS_FILENAME_FORMAT="{tag[type]}") + @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") def test_tags_with_underscore(self): document = Document() document.file_type = "pdf" @@ -168,7 +168,7 @@ class TestDate(TestCase): @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) - @override_settings(PAPERLESS_FILENAME_FORMAT="{tag[type]}") + @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") def test_tags_with_dash(self): document = Document() document.file_type = "pdf" @@ -191,7 +191,7 @@ class TestDate(TestCase): @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) - @override_settings(PAPERLESS_FILENAME_FORMAT="{tag[type]}") + @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") def test_tags_malformed(self): document = Document() document.file_type = "pdf" From 40ed29d45b72b7a9dff329477207fb8a9a8eb9ce Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Wed, 1 Apr 2020 19:37:21 +0000 Subject: [PATCH 26/38] Remove now unused fill_list --- src/documents/models.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index 006c214fa..9e57b7e7f 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -309,13 +309,6 @@ class Document(models.Model): return mydictionary - @staticmethod - def fill_list(input_list, length, filler): - while len(input_list) < length: - input_list.append(slugify(filler)) - - return input_list - def generate_source_filename(self): # Create filename based on configured format if settings.PAPERLESS_FILENAME_FORMAT is not None: From 64b598a1921645c56fd54e7fa2472e32ae6acdea Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Fri, 10 Apr 2020 12:53:34 +0000 Subject: [PATCH 27/38] Fix: always use source_filename attribute when accessing a file --- src/documents/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/documents/models.py b/src/documents/models.py index 9e57b7e7f..b81b4d42d 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -436,7 +436,7 @@ def update_filename(sender, instance, **kwargs): instance.create_source_directory() # Determine the full "current" path - path_current = instance.filename_to_path(instance.filename) + path_current = instance.filename_to_path(instance.source_filename) # Move file try: From c028f7dde5e9a5fb56ae52de3d49ce7037a3bce1 Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Fri, 10 Apr 2020 12:54:07 +0000 Subject: [PATCH 28/38] Recover from accidentally renamed files --- src/documents/models.py | 54 +++++++++++++ src/documents/tests/test_file_handling.py | 94 +++++++++++++++++++++++ 2 files changed, 148 insertions(+) diff --git a/src/documents/models.py b/src/documents/models.py index b81b4d42d..ba5b861c6 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -276,11 +276,43 @@ class Document(models.Model): return "{}: {}".format(created, self.correspondent or self.title) return str(created) + def find_renamed_document(self, subdirectory=""): + suffix = "%07i.%s" % (self.pk, self.file_type) + + # Append .gpg for encrypted files + if self.storage_type == self.STORAGE_TYPE_GPG: + suffix += ".gpg" + + # Go up in the directory hierarchy and try to delete all directories + root = os.path.normpath(Document.filename_to_path(subdirectory)) + + for filename in os.listdir(root): + if filename.endswith(suffix): + return os.path.join(subdirectory, filename) + + fullname = os.path.join(subdirectory, filename) + if os.path.isdir(Document.filename_to_path(fullname)): + return self.find_renamed_document(fullname) + + return None + @property def source_filename(self): + # Initial filename generation (for new documents) if self.filename is None: self.filename = self.generate_source_filename() + # Check if document is still available under filename + elif not os.path.isfile(Document.filename_to_path(self.filename)): + recovered_filename = self.find_renamed_document() + + # If we have found the file, save filename and clean up empty dirs + if recovered_filename is not None: + self.filename = recovered_filename + self.save() + + delete_all_empty_subdirectories(Document.filename_to_path("")) + return self.filename @staticmethod @@ -414,6 +446,28 @@ def try_delete_empty_directories(directory): directory = os.path.normpath(directory) +def delete_all_empty_subdirectories(directory): + # Go through all folders and try to delete all directories + root = os.path.normpath(Document.filename_to_path(directory)) + + for filename in os.listdir(root): + fullname = os.path.join(directory, filename) + + if not os.path.isdir(Document.filename_to_path(fullname)): + continue + + # Try to delete the directory + try: + os.rmdir(Document.filename_to_path(fullname)) + continue + except os.error: + # Directory not empty, no need to go further up + continue + + # Go into subdirectory to see, if there is more to delete + delete_all_empty_subdirectories(os.path.join(directory, filename)) + + @receiver(models.signals.m2m_changed, sender=Document.tags.through) @receiver(models.signals.post_save, sender=Document) def update_filename(sender, instance, **kwargs): diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index 7af99adfc..0da2c27e7 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -331,3 +331,97 @@ class TestDate(TestCase): document.save() self.assertEqual(document.generate_source_filename(), "0000001.pdf") + + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". + format(str(uuid4())[:8])) + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") + def test_document_renamed(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none/none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + # Test source_path + self.assertEqual(document.source_path, settings.MEDIA_ROOT + + "/documents/originals/none/none-0000001.pdf") + + # Rename the document "illegaly" + os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test") + os.rename(settings.MEDIA_ROOT + "/documents/originals/" + + "none/none-0000001.pdf", + settings.MEDIA_ROOT + "/documents/originals/" + + "test/test-0000001.pdf") + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + + "originals/test/test-0000001.pdf"), True) + + # Set new correspondent and expect document to be saved properly + document.correspondent = Correspondent.objects.get_or_create( + name="foo")[0] + document.save() + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + + "originals/foo/foo-0000001.pdf"), True) + + # Check proper handling of files + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/foo"), True) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none"), False) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/test"), False) + self.assertEqual(document.generate_source_filename(), + "foo/foo-0000001.pdf") + + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". + format(str(uuid4())[:8])) + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") + def test_document_renamed_encrypted(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_GPG + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none/none-0000001.pdf.gpg") + document.create_source_directory() + Path(document.source_path).touch() + + # Test source_path + self.assertEqual(document.source_path, settings.MEDIA_ROOT + + "/documents/originals/none/none-0000001.pdf.gpg") + + # Rename the document "illegaly" + os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test") + os.rename(settings.MEDIA_ROOT + "/documents/originals/" + + "none/none-0000001.pdf.gpg", + settings.MEDIA_ROOT + "/documents/originals/" + + "test/test-0000001.pdf.gpg") + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + + "originals/test/test-0000001.pdf.gpg"), True) + + # Set new correspondent and expect document to be saved properly + document.correspondent = Correspondent.objects.get_or_create( + name="foo")[0] + document.save() + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + + "originals/foo/foo-0000001.pdf.gpg"), True) + + # Check proper handling of files + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/foo"), True) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none"), False) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/test"), False) + self.assertEqual(document.generate_source_filename(), + "foo/foo-0000001.pdf.gpg") From c83f58eec42a6332c4a79ff86d2074e5397e8574 Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Fri, 10 Apr 2020 13:12:17 +0000 Subject: [PATCH 29/38] Remove temporary directories after failed tests --- src/documents/tests/test_file_handling.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index 0da2c27e7..1dbe1ca18 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -4,6 +4,7 @@ import shutil from unittest import mock from uuid import uuid4 from pathlib import Path +from shutil import rmtree from dateutil import tz from django.test import TestCase, override_settings @@ -14,6 +15,19 @@ from django.conf import settings class TestDate(TestCase): + def tearDown(self): + # Delete all temporary directories from failed tests + root = os.path.normpath("/tmp") + + for filename in os.listdir(root): + fullname = os.path.join(root, filename) + + if not os.path.isdir(fullname): + continue + + if filename.startswith("paperless-tests-"): + shutil.rmtree(fullname, ignore_errors=True) + @override_settings(PAPERLESS_FILENAME_FORMAT="") def test_source_filename(self): document = Document() From 8651c5f3bbcf67aba4baa01cb0a8cdb1abac7d17 Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Mon, 13 Apr 2020 07:12:48 +0000 Subject: [PATCH 30/38] Add: Ability to specifically remove temporary directories that have been used by a unit test --- src/documents/tests/test_file_handling.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index 1dbe1ca18..50f3fe485 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -15,18 +15,14 @@ from django.conf import settings class TestDate(TestCase): + deletion_list = [] + + def add_to_deletion_list(self, dirname): + self.deletion_list.append(dirname) + def tearDown(self): - # Delete all temporary directories from failed tests - root = os.path.normpath("/tmp") - - for filename in os.listdir(root): - fullname = os.path.join(root, filename) - - if not os.path.isdir(fullname): - continue - - if filename.startswith("paperless-tests-"): - shutil.rmtree(fullname, ignore_errors=True) + for dirname in self.deletion_list: + shutil.rmtree(dirname, ignore_errors=True) @override_settings(PAPERLESS_FILENAME_FORMAT="") def test_source_filename(self): From e2cc21f5bb685bbb8b96a7391d07a375a1f4c06d Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Mon, 13 Apr 2020 07:13:28 +0000 Subject: [PATCH 31/38] Removed MEDIA_ROOT setting override for test that did not require it --- src/documents/tests/test_file_handling.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index 50f3fe485..b8d2b20f0 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -331,8 +331,6 @@ class TestDate(TestCase): self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals"), True) - @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". - format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT=None) def test_format_none(self): document = Document() From 679fdc407c36a068cd9d036854bc36bea14bff9a Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Mon, 13 Apr 2020 07:13:54 +0000 Subject: [PATCH 32/38] Add: make unit test add their temporary directory to the deletion list --- src/documents/tests/test_file_handling.py | 26 +++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index b8d2b20f0..75ce80905 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -54,6 +54,8 @@ class TestDate(TestCase): @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}") def test_file_renaming(self): + self.add_to_deletion_list(settings.MEDIA_ROOT) + document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -100,6 +102,8 @@ class TestDate(TestCase): @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}") def test_document_delete(self): + self.add_to_deletion_list(settings.MEDIA_ROOT) + document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -124,6 +128,8 @@ class TestDate(TestCase): @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}") def test_directory_not_empty(self): + self.add_to_deletion_list(settings.MEDIA_ROOT) + document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -157,6 +163,8 @@ class TestDate(TestCase): format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") def test_tags_with_underscore(self): + self.add_to_deletion_list(settings.MEDIA_ROOT) + document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -180,6 +188,8 @@ class TestDate(TestCase): format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") def test_tags_with_dash(self): + self.add_to_deletion_list(settings.MEDIA_ROOT) + document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -203,6 +213,8 @@ class TestDate(TestCase): format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") def test_tags_malformed(self): + self.add_to_deletion_list(settings.MEDIA_ROOT) + document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -226,6 +238,8 @@ class TestDate(TestCase): format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}") def test_tags_all(self): + self.add_to_deletion_list(settings.MEDIA_ROOT) + document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -248,6 +262,8 @@ class TestDate(TestCase): format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}") def test_tags_out_of_bounds_0(self): + self.add_to_deletion_list(settings.MEDIA_ROOT) + document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -266,6 +282,8 @@ class TestDate(TestCase): format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[10000000]}") def test_tags_out_of_bounds_10000000(self): + self.add_to_deletion_list(settings.MEDIA_ROOT) + document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -284,6 +302,8 @@ class TestDate(TestCase): format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[99]}") def test_tags_out_of_bounds_99(self): + self.add_to_deletion_list(settings.MEDIA_ROOT) + document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -303,6 +323,8 @@ class TestDate(TestCase): @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}/{correspondent}") def test_nested_directory_cleanup(self): + self.add_to_deletion_list(settings.MEDIA_ROOT) + document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -345,6 +367,8 @@ class TestDate(TestCase): @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}") def test_document_renamed(self): + self.add_to_deletion_list(settings.MEDIA_ROOT) + document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -392,6 +416,8 @@ class TestDate(TestCase): @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}") def test_document_renamed_encrypted(self): + self.add_to_deletion_list(settings.MEDIA_ROOT) + document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_GPG From dc507b6451c0b2751a61ebfe3fbbbe4efa919a57 Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Mon, 13 Apr 2020 13:26:05 +0000 Subject: [PATCH 33/38] Fix: made try_delete_empty_directories and delete_all_empty_subdirectories staticmethods --- src/documents/models.py | 68 +++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index ba5b861c6..9099803cf 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -428,44 +428,46 @@ class Document(models.Model): self.filename = filename -def try_delete_empty_directories(directory): - # Go up in the directory hierarchy and try to delete all directories - directory = os.path.normpath(directory) - root = os.path.normpath(Document.filename_to_path("")) - - while directory != root: - # Try to delete the current directory - try: - os.rmdir(directory) - except os.error: - # Directory not empty, no need to go further up - return - - # Cut off actual directory and go one level up - directory, _ = os.path.split(directory) + @staticmethod + def try_delete_empty_directories(directory): + # Go up in the directory hierarchy and try to delete all directories directory = os.path.normpath(directory) + root = os.path.normpath(Document.filename_to_path("")) + + while directory != root: + # Try to delete the current directory + try: + os.rmdir(directory) + except os.error: + # Directory not empty, no need to go further up + return + + # Cut off actual directory and go one level up + directory, _ = os.path.split(directory) + directory = os.path.normpath(directory) -def delete_all_empty_subdirectories(directory): - # Go through all folders and try to delete all directories - root = os.path.normpath(Document.filename_to_path(directory)) + @staticmethod + def delete_all_empty_subdirectories(directory): + # Go through all folders and try to delete all directories + root = os.path.normpath(Document.filename_to_path(directory)) - for filename in os.listdir(root): - fullname = os.path.join(directory, filename) + for filename in os.listdir(root): + fullname = os.path.join(directory, filename) - if not os.path.isdir(Document.filename_to_path(fullname)): - continue + if not os.path.isdir(Document.filename_to_path(fullname)): + continue - # Try to delete the directory - try: - os.rmdir(Document.filename_to_path(fullname)) - continue - except os.error: - # Directory not empty, no need to go further up - continue + # Go into subdirectory to see, if there is more to delete + Document.delete_all_empty_subdirectories(os.path.join(directory, filename)) - # Go into subdirectory to see, if there is more to delete - delete_all_empty_subdirectories(os.path.join(directory, filename)) + # Try to delete the directory + try: + os.rmdir(Document.filename_to_path(fullname)) + continue + except os.error: + # Directory not empty, no need to go further up + continue @receiver(models.signals.m2m_changed, sender=Document.tags.through) @@ -502,7 +504,7 @@ def update_filename(sender, instance, **kwargs): # Delete empty directory old_dir = os.path.dirname(instance.filename) old_path = instance.filename_to_path(old_dir) - try_delete_empty_directories(old_path) + Document.try_delete_empty_directories(old_path) instance.filename = new_filename @@ -530,7 +532,7 @@ def delete_files(sender, instance, **kwargs): # And remove the directory (if applicable) old_dir = os.path.dirname(instance.filename) old_path = instance.filename_to_path(old_dir) - try_delete_empty_directories(old_path) + Document.try_delete_empty_directories(old_path) class Log(models.Model): From 429fb38ee987e18e3bb196c2a5d204842eb0fc95 Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Mon, 13 Apr 2020 13:31:29 +0000 Subject: [PATCH 34/38] Fix: made try_delete_empty_directories and delete_all_empty_subdirectories staticmethods --- src/documents/models.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index 9099803cf..368d0f46c 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -311,7 +311,8 @@ class Document(models.Model): self.filename = recovered_filename self.save() - delete_all_empty_subdirectories(Document.filename_to_path("")) + Document.delete_all_empty_subdirectories( + Document.filename_to_path("")) return self.filename @@ -427,7 +428,6 @@ class Document(models.Model): if os.path.isfile(Document.filename_to_path(filename)): self.filename = filename - @staticmethod def try_delete_empty_directories(directory): # Go up in the directory hierarchy and try to delete all directories @@ -446,7 +446,6 @@ class Document(models.Model): directory, _ = os.path.split(directory) directory = os.path.normpath(directory) - @staticmethod def delete_all_empty_subdirectories(directory): # Go through all folders and try to delete all directories @@ -459,7 +458,8 @@ class Document(models.Model): continue # Go into subdirectory to see, if there is more to delete - Document.delete_all_empty_subdirectories(os.path.join(directory, filename)) + Document.delete_all_empty_subdirectories( + os.path.join(directory, filename)) # Try to delete the directory try: From 17212b302bca7ef2c910f8f7d07f43fed1a04759 Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Mon, 13 Apr 2020 13:32:08 +0000 Subject: [PATCH 35/38] Add: more error handling --- src/documents/models.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index 368d0f46c..518a7b617 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -306,13 +306,20 @@ class Document(models.Model): elif not os.path.isfile(Document.filename_to_path(self.filename)): recovered_filename = self.find_renamed_document() - # If we have found the file, save filename and clean up empty dirs + # If we have found the file so update the filename if recovered_filename is not None: + logger = logging.getLogger(__name__) + logger.warning("Filename of document " + str(self.id) + + " has changed and was successfully updated") self.filename = recovered_filename - self.save() + # Remove all empty subdirectories from MEDIA_ROOT Document.delete_all_empty_subdirectories( Document.filename_to_path("")) + else: + logger = logging.getLogger(__name__) + logger.error("File of document " + str(self.id) + " has " + + "gone and could not be recovered") return self.filename @@ -477,6 +484,10 @@ def update_filename(sender, instance, **kwargs): if instance.filename is None: return + # Check is file exists and update filename otherwise + if not os.path.isfile(Document.filename_to_path(instance.filename)): + instance.filename = instance.source_filename + # Build the new filename new_filename = instance.generate_source_filename() @@ -500,6 +511,11 @@ def update_filename(sender, instance, **kwargs): except PermissionError: # Do not update filename in object return + except FileNotFoundError: + logger = logging.getLogger(__name__) + logger.error("Renaming of document " + str(instance.id) + " failed " + + "as file " + instance.filename + " was no longer present") + return # Delete empty directory old_dir = os.path.dirname(instance.filename) From 3365550515a4805a4b46bd2491106e1bc54baffc Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Mon, 13 Apr 2020 13:30:15 +0000 Subject: [PATCH 36/38] Add: more tests --- src/documents/tests/test_file_handling.py | 155 ++++++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index 75ce80905..b00ff68c7 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -97,6 +97,45 @@ class TestDate(TestCase): self.assertEqual(document.generate_source_filename(), "test/test-0000001.pdf.gpg") + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". + format(str(uuid4())[:8])) + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") + def test_file_renaming_missing_permissions(self): + self.add_to_deletion_list(settings.MEDIA_ROOT) + + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none/none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + # Test source_path + self.assertEqual(document.source_path, settings.MEDIA_ROOT + + "/documents/originals/none/none-0000001.pdf") + + # Make the folder read- and execute-only (no writing and no renaming) + os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o555) + + # Set a correspondent and save the document + document.correspondent = Correspondent.objects.get_or_create( + name="test")[0] + document.save() + + # Check proper handling of files + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + + "originals/none/none-0000001.pdf"), True) + self.assertEqual(document.source_filename, + "none/none-0000001.pdf") + + os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o666) + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + @@ -123,6 +162,16 @@ class TestDate(TestCase): self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/none"), False) + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") + def test_document_delete_nofile(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + document.delete() + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + @@ -393,6 +442,8 @@ class TestDate(TestCase): "test/test-0000001.pdf") self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + "originals/test/test-0000001.pdf"), True) + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + + "originals/none/none-0000001.pdf"), False) # Set new correspondent and expect document to be saved properly document.correspondent = Correspondent.objects.get_or_create( @@ -442,6 +493,8 @@ class TestDate(TestCase): "test/test-0000001.pdf.gpg") self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + "originals/test/test-0000001.pdf.gpg"), True) + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + + "originals/none/none-0000001.pdf"), False) # Set new correspondent and expect document to be saved properly document.correspondent = Correspondent.objects.get_or_create( @@ -459,3 +512,105 @@ class TestDate(TestCase): "/documents/originals/test"), False) self.assertEqual(document.generate_source_filename(), "foo/foo-0000001.pdf.gpg") + + def test_delete_all_empty_subdirectories(self): + # Create our working directory + tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8]) + os.makedirs(tmp) + self.add_to_deletion_list(tmp) + + os.makedirs(os.path.join(tmp, "empty")) + os.makedirs(os.path.join(tmp, "empty", "subdirectory")) + + os.makedirs(os.path.join(tmp, "notempty")) + Path(os.path.join(tmp, "notempty", "file")).touch() + + Document.delete_all_empty_subdirectories(tmp) + + self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True) + self.assertEqual(os.path.isdir(os.path.join(tmp, "empty")), False) + self.assertEqual(os.path.isfile( + os.path.join(tmp, "notempty", "file")), True) + + def test_try_delete_empty_directories(self): + # Create our working directory + tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8]) + os.makedirs(tmp) + self.add_to_deletion_list(tmp) + + os.makedirs(os.path.join(tmp, "notempty")) + Path(os.path.join(tmp, "notempty", "file")).touch() + os.makedirs(os.path.join(tmp, "notempty", "empty")) + + Document.try_delete_empty_directories( + os.path.join(tmp, "notempty", "empty")) + self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True) + self.assertEqual(os.path.isfile( + os.path.join(tmp, "notempty", "file")), True) + self.assertEqual(os.path.isdir( + os.path.join(tmp, "notempty", "empty")), False) + + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". + format(str(uuid4())[:8])) + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") + def test_document_accidentally_deleted(self): + self.add_to_deletion_list(settings.MEDIA_ROOT) + + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none/none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + # Test source_path + self.assertEqual(document.source_path, settings.MEDIA_ROOT + + "/documents/originals/none/none-0000001.pdf") + + # Delete the document "illegaly" + os.remove(settings.MEDIA_ROOT + "/documents/originals/" + + "none/none-0000001.pdf") + + # Set new correspondent and expect document to be saved properly + document.correspondent = Correspondent.objects.get_or_create( + name="foo")[0] + document.save() + + # Check proper handling of files + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none"), True) + self.assertEqual(document.source_filename, + "none/none-0000001.pdf") + + @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". + format(str(uuid4())[:8])) + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") + def test_set_filename(self): + self.add_to_deletion_list(settings.MEDIA_ROOT) + + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none/none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + # Set existing filename + document.set_filename(tmp) + self.assertEqual(document.source_filename, "none/none-0000001.pdf") + + # Set non-existing filename + document.set_filename("doesnotexist") + self.assertEqual(document.source_filename, "none/none-0000001.pdf") From 0a064bfd744d132793200a2a074b9ec1a3912765 Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Wed, 15 Apr 2020 20:16:25 +0000 Subject: [PATCH 37/38] Fix: testcase left unaccessible directory --- src/documents/tests/test_file_handling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index b00ff68c7..f4e82bfb8 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -134,7 +134,7 @@ class TestDate(TestCase): self.assertEqual(document.source_filename, "none/none-0000001.pdf") - os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o666) + os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o777) @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". format(str(uuid4())[:8])) From bb8e3ad6866287e5c39f290a77a90fa7be5a2089 Mon Sep 17 00:00:00 2001 From: Wolf-Bastian Poettner Date: Wed, 15 Apr 2020 20:17:58 +0000 Subject: [PATCH 38/38] Add: ensure original directory is created and removed for each test case --- src/documents/tests/test_file_handling.py | 71 +++-------------------- 1 file changed, 7 insertions(+), 64 deletions(-) diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index f4e82bfb8..d55a50cd2 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -20,6 +20,13 @@ class TestDate(TestCase): def add_to_deletion_list(self, dirname): self.deletion_list.append(dirname) + def setUp(self): + folder = "/tmp/paperless-tests-{}".format(str(uuid4())[:8]) + os.makedirs(folder + "/documents/originals") + storage_override = override_settings(MEDIA_ROOT=folder) + storage_override.enable() + self.add_to_deletion_list(folder) + def tearDown(self): for dirname in self.deletion_list: shutil.rmtree(dirname, ignore_errors=True) @@ -49,13 +56,9 @@ class TestDate(TestCase): self.assertEqual(document.generate_source_filename(), "0000001.pdf.gpg") - @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". - format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}") def test_file_renaming(self): - self.add_to_deletion_list(settings.MEDIA_ROOT) - document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -97,13 +100,9 @@ class TestDate(TestCase): self.assertEqual(document.generate_source_filename(), "test/test-0000001.pdf.gpg") - @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". - format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}") def test_file_renaming_missing_permissions(self): - self.add_to_deletion_list(settings.MEDIA_ROOT) - document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -136,13 +135,9 @@ class TestDate(TestCase): os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o777) - @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". - format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}") def test_document_delete(self): - self.add_to_deletion_list(settings.MEDIA_ROOT) - document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -172,13 +167,9 @@ class TestDate(TestCase): document.delete() - @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". - format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}") def test_directory_not_empty(self): - self.add_to_deletion_list(settings.MEDIA_ROOT) - document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -208,12 +199,8 @@ class TestDate(TestCase): "/documents/originals/none/none-0000001.pdftest") os.rmdir(settings.MEDIA_ROOT + "/documents/originals/none") - @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". - format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") def test_tags_with_underscore(self): - self.add_to_deletion_list(settings.MEDIA_ROOT) - document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -233,12 +220,8 @@ class TestDate(TestCase): document.delete() - @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". - format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") def test_tags_with_dash(self): - self.add_to_deletion_list(settings.MEDIA_ROOT) - document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -258,12 +241,8 @@ class TestDate(TestCase): document.delete() - @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". - format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") def test_tags_malformed(self): - self.add_to_deletion_list(settings.MEDIA_ROOT) - document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -283,12 +262,8 @@ class TestDate(TestCase): document.delete() - @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". - format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}") def test_tags_all(self): - self.add_to_deletion_list(settings.MEDIA_ROOT) - document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -307,12 +282,8 @@ class TestDate(TestCase): document.delete() - @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". - format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}") def test_tags_out_of_bounds_0(self): - self.add_to_deletion_list(settings.MEDIA_ROOT) - document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -327,12 +298,8 @@ class TestDate(TestCase): document.delete() - @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". - format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[10000000]}") def test_tags_out_of_bounds_10000000(self): - self.add_to_deletion_list(settings.MEDIA_ROOT) - document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -347,12 +314,8 @@ class TestDate(TestCase): document.delete() - @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". - format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[99]}") def test_tags_out_of_bounds_99(self): - self.add_to_deletion_list(settings.MEDIA_ROOT) - document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -367,13 +330,9 @@ class TestDate(TestCase): document.delete() - @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". - format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}/{correspondent}") def test_nested_directory_cleanup(self): - self.add_to_deletion_list(settings.MEDIA_ROOT) - document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -411,13 +370,9 @@ class TestDate(TestCase): self.assertEqual(document.generate_source_filename(), "0000001.pdf") - @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". - format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}") def test_document_renamed(self): - self.add_to_deletion_list(settings.MEDIA_ROOT) - document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -462,13 +417,9 @@ class TestDate(TestCase): self.assertEqual(document.generate_source_filename(), "foo/foo-0000001.pdf") - @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". - format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}") def test_document_renamed_encrypted(self): - self.add_to_deletion_list(settings.MEDIA_ROOT) - document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_GPG @@ -550,13 +501,9 @@ class TestDate(TestCase): self.assertEqual(os.path.isdir( os.path.join(tmp, "notempty", "empty")), False) - @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". - format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}") def test_document_accidentally_deleted(self): - self.add_to_deletion_list(settings.MEDIA_ROOT) - document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -588,13 +535,9 @@ class TestDate(TestCase): self.assertEqual(document.source_filename, "none/none-0000001.pdf") - @override_settings(MEDIA_ROOT="/tmp/paperless-tests-{}". - format(str(uuid4())[:8])) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}") def test_set_filename(self): - self.add_to_deletion_list(settings.MEDIA_ROOT) - document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED