added the filename handling back into the code

This commit is contained in:
Jonas Winkler 2020-11-08 13:00:45 +01:00
parent 7747e6512a
commit 9067a4f288
7 changed files with 841 additions and 62 deletions

View File

@ -26,6 +26,7 @@ This is a list of changes that have been made to the original project.
## Added
- **A new single page UI** built with bootstrap and Angular. Its much more responsive than the django admin pages. It features the follwing improvements over the old django admin interface:
- *Dashboard.* The landing page shows some useful information, such as statistics, recently scanned documents, file uploading, and possibly more in the future.
- *Document uploading on the web page.* This is very crude right now, but gets the job done. It simply uploads the documents and stores them in the configured consumer directory. The API for that has always been in the project, there simply was no form on the UI to support it.
- *Full text search* with a proper document indexer: The search feature sorts documents by relevance to the search query, highlights query terms in the found documents and provides autocomplete while typing the query. This is still very basic but will see extensions in the future.
- *Saveable filters.* Save filter and sorting presets and optionally display a couple documents of saved filters (i.e., your inbox sorted descending by added date, or tagged TODO, oldest to newest) on the dash board.
@ -49,21 +50,18 @@ This is a list of changes that have been made to the original project.
These features were removed each due to two reasons. First, I did not feel these features contributed all that much to the over project, and second, I don't want to maintain these features.
- **(BREAKING) Reminders.** I have no idea what they were used for and thus removed them from the project.
- **Filename handling (I'm sorry).** The master branch of the paperless project has seen some changes regarding the filename handling of stored documents. These changes allow you to change the filename of stored documents from their default form {id}.pdf. These changes have not made it into this project, since the whole point of paperless is that you don't have to access your documents on the disk anymore. If you are using version 2.7.0, this does not affect you. If you are on the most recent push on the master branch, the provided migration will revert these changes and rename all your files to their original file name.
- **Every customization made to the admin interface.** Since this is not the primary interface for the application anymore, there is no need to keep and maintain these. Besides, some changes were incompatible with the most recent versions of django. The interface is completely usable, though.
## Planned
These features will make it into the application at some point, sorted by priority.
- **Better tag editor.** The tag editor on the document detail page is not very convenient. This was put in there to get the project working but will be replaced with something nicer eventually.
- **More search.** The search backend is incredibly versatile and customizable. Searching is the most important feature of this project and thus, I want to implement things like:
- Group and limit search results by correspondent, show “more from this” links in the results.
- Ability to search for “Similar documents” in the search results
- Provide corrections for mispelled queries
- **More robust consumer** that shows its progress on the web page.
- **Arbitrary tag colors**. Allow the selection of any color with a color picker.
- **Dashboard**. The landing page is a little bleak right now but will feature status updates about the consumer, previews of saved filters and database statistics in the future.
## On the chopping block.

View File

@ -63,7 +63,19 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
# Any email sent to the target account that does not contain this text will be
# ignored.
#PAPERLESS_EMAIL_SECRET=""
PAPERLESS_EMAIL_SECRET=""
# Specify a filename format for the document (directories are supported)
# Use the following placeholders:
# * {correspondent}
# * {title}
# * {created}
# * {added}
# * {tags[KEY]} If your tags conform to key_value or key-value
# * {tags[INDEX]} If your tags are strings, select the tag by index
# Uniqueness of filenames is ensured, as an incrementing counter is attached
# to each filename.
#PAPERLESS_FILENAME_FORMAT=""
###############################################################################
#### Security ####

View File

@ -0,0 +1,24 @@
from django.core.management.base import BaseCommand
from documents.models import Document, Tag
from ...mixins import Renderable
class Command(Renderable, BaseCommand):
help = """
This will rename all documents to match the latest filename format.
""".replace(" ", "")
def __init__(self, *args, **kwargs):
self.verbosity = 0
BaseCommand.__init__(self, *args, **kwargs)
def handle(self, *args, **options):
self.verbosity = options["verbosity"]
for document in Document.objects.all():
# Saving the document again will generate a new filename and rename
document.save()

View File

@ -23,48 +23,6 @@ def make_index(apps, schema_editor):
print(" --> Cannot create document index.")
def restore_filenames(apps, schema_editor):
Document = apps.get_model("documents", "Document")
rename_operations = []
for doc in Document.objects.all():
file_name = "{:07}.{}".format(doc.pk, doc.file_type)
if doc.storage_type == "gpg":
file_name += ".gpg"
if not doc.filename == file_name:
try:
src = os.path.join(settings.ORIGINALS_DIR, doc.filename)
dst = os.path.join(settings.ORIGINALS_DIR, file_name)
if os.path.exists(dst):
raise Exception("Cannot move {}, {} already exists!".format(src, dst))
if not os.path.exists(src):
raise Exception("Cannot move {}, file does not exist! (this is bad, one of your documents is missing".format(src))
rename_operations.append( (src,dst) )
except (PermissionError, FileNotFoundError) as e:
raise Exception(e)
for (src, dst) in rename_operations:
print("file was renamed, restoring {} to {}".format(src, dst))
os.rename(src, dst)
def initialize_document_classifier(apps, schema_editor):
try:
print("Initalizing document classifier...")
from documents.classifier import DocumentClassifier
classifier = DocumentClassifier()
try:
classifier.train()
classifier.save_classifier()
except Exception as e:
print("Classifier error: {}".format(e))
except ImportError:
print("Document classifier not found, skipping")
class Migration(migrations.Migration):
dependencies = [
@ -72,13 +30,6 @@ class Migration(migrations.Migration):
]
operations = [
migrations.RunPython(
code=restore_filenames,
),
migrations.RemoveField(
model_name='document',
name='filename',
),
migrations.AddField(
model_name='document',
name='archive_serial_number',
@ -141,8 +92,4 @@ class Migration(migrations.Migration):
code=make_index,
reverse_code=django.db.migrations.operations.special.RunPython.noop,
),
migrations.RunPython(
code=initialize_document_classifier,
reverse_code=django.db.migrations.operations.special.RunPython.noop,
),
]

View File

@ -3,11 +3,12 @@
import logging
import os
import re
from collections import OrderedDict
from collections import OrderedDict, defaultdict
import dateutil.parser
from django.conf import settings
from django.db import models
from django.dispatch import receiver
from django.template.defaultfilters import slugify
from django.utils import timezone
from django.utils.text import slugify
@ -190,6 +191,14 @@ class Document(models.Model):
added = models.DateTimeField(
default=timezone.now, editable=False, db_index=True)
filename = models.FilePathField(
max_length=256,
editable=False,
default=None,
null=True,
help_text="Current filename in storage"
)
archive_serial_number = models.IntegerField(
blank=True,
null=True,
@ -211,15 +220,123 @@ class Document(models.Model):
return "{}: {}".format(created, self.correspondent or self.title)
return str(created)
def find_renamed_document(self, subdirectory=""):
suffix = "%07i.%s" % (self.pk, self.file_type)
# Append .gpg for encrypted files
if self.storage_type == self.STORAGE_TYPE_GPG:
suffix += ".gpg"
# Go up in the directory hierarchy and try to delete all directories
root = os.path.normpath(Document.filename_to_path(subdirectory))
for filename in os.listdir(root):
if filename.endswith(suffix):
return os.path.join(subdirectory, filename)
fullname = os.path.join(subdirectory, filename)
if os.path.isdir(Document.filename_to_path(fullname)):
return self.find_renamed_document(fullname)
return None
@property
def source_filename(self):
# Initial filename generation (for new documents)
if self.filename is None:
self.filename = self.generate_source_filename()
# Check if document is still available under filename
elif not os.path.isfile(Document.filename_to_path(self.filename)):
recovered_filename = self.find_renamed_document()
# If we have found the file so update the filename
if recovered_filename is not None:
logger = logging.getLogger(__name__)
logger.warning("Filename of document " + str(self.id) +
" has changed and was successfully updated")
self.filename = recovered_filename
# Remove all empty subdirectories from MEDIA_ROOT
Document.delete_all_empty_subdirectories(
Document.filename_to_path(""))
else:
logger = logging.getLogger(__name__)
logger.error("File of document " + str(self.id) + " has " +
"gone and could not be recovered")
return self.filename
@staticmethod
def many_to_dictionary(field):
# Converts ManyToManyField to dictionary by assuming, that field
# entries contain an _ or - which will be used as a delimiter
mydictionary = dict()
for index, t in enumerate(field.all()):
# Populate tag names by index
mydictionary[index] = slugify(t.name)
# Find delimiter
delimiter = t.name.find('_')
if delimiter == -1:
delimiter = t.name.find('-')
if delimiter == -1:
continue
key = t.name[:delimiter]
value = t.name[delimiter+1:]
mydictionary[slugify(key)] = slugify(value)
return mydictionary
def generate_source_filename(self):
# Create filename based on configured format
if settings.PAPERLESS_FILENAME_FORMAT is not None:
tags = defaultdict(lambda: slugify(None),
self.many_to_dictionary(self.tags))
path = settings.PAPERLESS_FILENAME_FORMAT.format(
correspondent=slugify(self.correspondent),
title=slugify(self.title),
created=slugify(self.created),
added=slugify(self.added),
tags=tags)
else:
path = ""
# Always append the primary key to guarantee uniqueness of filename
if len(path) > 0:
filename = "%s-%07i.%s" % (path, self.pk, self.file_type)
else:
filename = "%07i.%s" % (self.pk, self.file_type)
# Append .gpg for encrypted files
if self.storage_type == self.STORAGE_TYPE_GPG:
filename += ".gpg"
return filename
def create_source_directory(self):
new_filename = self.generate_source_filename()
# Determine the full "target" path
dir_new = Document.filename_to_path(os.path.dirname(new_filename))
# Create new path
os.makedirs(dir_new, exist_ok=True)
@property
def source_path(self):
file_name = "{:07}.{}".format(self.pk, self.file_type)
if self.storage_type == self.STORAGE_TYPE_GPG:
file_name += ".gpg"
return Document.filename_to_path(self.source_filename)
@staticmethod
def filename_to_path(filename):
return os.path.join(
settings.ORIGINALS_DIR,
file_name
filename
)
@property
@ -245,6 +362,125 @@ class Document(models.Model):
def thumbnail_file(self):
return open(self.thumbnail_path, "rb")
def set_filename(self, filename):
if os.path.isfile(Document.filename_to_path(filename)):
self.filename = filename
@staticmethod
def try_delete_empty_directories(directory):
# Go up in the directory hierarchy and try to delete all directories
directory = os.path.normpath(directory)
root = os.path.normpath(Document.filename_to_path(""))
while directory != root:
# Try to delete the current directory
try:
os.rmdir(directory)
except os.error:
# Directory not empty, no need to go further up
return
# Cut off actual directory and go one level up
directory, _ = os.path.split(directory)
directory = os.path.normpath(directory)
@staticmethod
def delete_all_empty_subdirectories(directory):
# Go through all folders and try to delete all directories
root = os.path.normpath(Document.filename_to_path(directory))
for filename in os.listdir(root):
fullname = os.path.join(directory, filename)
if not os.path.isdir(Document.filename_to_path(fullname)):
continue
# Go into subdirectory to see, if there is more to delete
Document.delete_all_empty_subdirectories(
os.path.join(directory, filename))
# Try to delete the directory
try:
os.rmdir(Document.filename_to_path(fullname))
continue
except os.error:
# Directory not empty, no need to go further up
continue
@receiver(models.signals.m2m_changed, sender=Document.tags.through)
@receiver(models.signals.post_save, sender=Document)
def update_filename(sender, instance, **kwargs):
# Skip if document has not been saved yet
if instance.filename is None:
return
# Check is file exists and update filename otherwise
if not os.path.isfile(Document.filename_to_path(instance.filename)):
instance.filename = instance.source_filename
# Build the new filename
new_filename = instance.generate_source_filename()
# If the filename is the same, then nothing needs to be done
if instance.filename == new_filename:
return
# Determine the full "target" path
path_new = instance.filename_to_path(new_filename)
dir_new = instance.filename_to_path(os.path.dirname(new_filename))
# Create new path
instance.create_source_directory()
# Determine the full "current" path
path_current = instance.filename_to_path(instance.source_filename)
# Move file
try:
os.rename(path_current, path_new)
except PermissionError:
# Do not update filename in object
return
except FileNotFoundError:
logger = logging.getLogger(__name__)
logger.error("Renaming of document " + str(instance.id) + " failed " +
"as file " + instance.filename + " was no longer present")
return
# Delete empty directory
old_dir = os.path.dirname(instance.filename)
old_path = instance.filename_to_path(old_dir)
Document.try_delete_empty_directories(old_path)
instance.filename = new_filename
# Save instance
# This will not cause a cascade of post_save signals, as next time
# nothing needs to be renamed
instance.save()
@receiver(models.signals.post_delete, sender=Document)
def delete_files(sender, instance, **kwargs):
if instance.filename is None:
return
# Remove the document
old_file = instance.filename_to_path(instance.filename)
try:
os.remove(old_file)
except FileNotFoundError:
logger = logging.getLogger(__name__)
logger.warning("Deleted document " + str(instance.id) + " but file " +
old_file + " was no longer present")
# And remove the directory (if applicable)
old_dir = os.path.dirname(instance.filename)
old_path = instance.filename_to_path(old_dir)
Document.try_delete_empty_directories(old_path)
class Log(models.Model):

View File

@ -0,0 +1,559 @@
import datetime
import os
import shutil
from unittest import mock
from uuid import uuid4
from pathlib import Path
from shutil import rmtree
from dateutil import tz
from django.test import TestCase, override_settings
from django.utils.text import slugify
from ..models import Tag, Document, Correspondent
from django.conf import settings
class TestDate(TestCase):
deletion_list = []
def add_to_deletion_list(self, dirname):
self.deletion_list.append(dirname)
def setUp(self):
folder = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
os.makedirs(folder + "/documents/originals")
override_settings(MEDIA_ROOT=folder).enable()
override_settings(ORIGINALS_DIR=folder + "/documents/originals").enable()
self.add_to_deletion_list(folder)
def tearDown(self):
for dirname in self.deletion_list:
shutil.rmtree(dirname, ignore_errors=True)
@override_settings(PAPERLESS_FILENAME_FORMAT="")
def test_source_filename(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
self.assertEqual(document.source_filename, "0000001.pdf")
document.filename = "test.pdf"
self.assertEqual(document.source_filename, "test.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="")
def test_generate_source_filename(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
self.assertEqual(document.generate_source_filename(), "0000001.pdf")
document.storage_type = Document.STORAGE_TYPE_GPG
self.assertEqual(document.generate_source_filename(),
"0000001.pdf.gpg")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_file_renaming(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Test source_path
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf")
# Enable encryption and check again
document.storage_type = Document.STORAGE_TYPE_GPG
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf.gpg")
document.save()
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), True)
# Set a correspondent and save the document
document.correspondent = Correspondent.objects.get_or_create(
name="test")[0]
document.save()
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/test"), True)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), False)
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/test/test-0000001.pdf.gpg"), True)
self.assertEqual(document.generate_source_filename(),
"test/test-0000001.pdf.gpg")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_file_renaming_missing_permissions(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Test source_path
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf")
# Make the folder read- and execute-only (no writing and no renaming)
os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o555)
# Set a correspondent and save the document
document.correspondent = Correspondent.objects.get_or_create(
name="test")[0]
document.save()
# Check proper handling of files
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/none/none-0000001.pdf"), True)
self.assertEqual(document.source_filename,
"none/none-0000001.pdf")
os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o777)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_document_delete(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Ensure file deletion after delete
document.delete()
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf"), False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), False)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_document_delete_nofile(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_directory_not_empty(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
Path(document.source_path + "test").touch()
# Set a correspondent and save the document
document.correspondent = Correspondent.objects.get_or_create(
name="test")[0]
document.save()
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/test"), True)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), True)
# Cleanup
os.remove(settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdftest")
os.rmdir(settings.MEDIA_ROOT + "/documents/originals/none")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_with_underscore(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Add tag to document
document.tags.create(name="type_demo")
document.tags.create(name="foo_bar")
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"demo-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_with_dash(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Add tag to document
document.tags.create(name="type-demo")
document.tags.create(name="foo-bar")
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"demo-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_malformed(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Add tag to document
document.tags.create(name="type:demo")
document.tags.create(name="foo:bar")
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
def test_tags_all(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Add tag to document
document.tags.create(name="demo")
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"demo-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
def test_tags_out_of_bounds_0(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[10000000]}")
def test_tags_out_of_bounds_10000000(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[99]}")
def test_tags_out_of_bounds_99(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}/{correspondent}")
def test_nested_directory_cleanup(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none/none"), True)
document.delete()
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT +
"/documents/originals/none/none/none-0000001.pdf"),
False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none/none"), False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals"), True)
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
def test_format_none(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
self.assertEqual(document.generate_source_filename(), "0000001.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_document_renamed(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Test source_path
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf")
# Rename the document "illegaly"
os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test")
os.rename(settings.MEDIA_ROOT + "/documents/originals/" +
"none/none-0000001.pdf",
settings.MEDIA_ROOT + "/documents/originals/" +
"test/test-0000001.pdf")
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/test/test-0000001.pdf"), True)
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/none/none-0000001.pdf"), False)
# Set new correspondent and expect document to be saved properly
document.correspondent = Correspondent.objects.get_or_create(
name="foo")[0]
document.save()
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/foo/foo-0000001.pdf"), True)
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/foo"), True)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/test"), False)
self.assertEqual(document.generate_source_filename(),
"foo/foo-0000001.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_document_renamed_encrypted(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_GPG
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf.gpg")
document.create_source_directory()
Path(document.source_path).touch()
# Test source_path
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf.gpg")
# Rename the document "illegaly"
os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test")
os.rename(settings.MEDIA_ROOT + "/documents/originals/" +
"none/none-0000001.pdf.gpg",
settings.MEDIA_ROOT + "/documents/originals/" +
"test/test-0000001.pdf.gpg")
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/test/test-0000001.pdf.gpg"), True)
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/none/none-0000001.pdf"), False)
# Set new correspondent and expect document to be saved properly
document.correspondent = Correspondent.objects.get_or_create(
name="foo")[0]
document.save()
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
"originals/foo/foo-0000001.pdf.gpg"), True)
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/foo"), True)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), False)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/test"), False)
self.assertEqual(document.generate_source_filename(),
"foo/foo-0000001.pdf.gpg")
def test_delete_all_empty_subdirectories(self):
# Create our working directory
tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
os.makedirs(tmp)
self.add_to_deletion_list(tmp)
os.makedirs(os.path.join(tmp, "empty"))
os.makedirs(os.path.join(tmp, "empty", "subdirectory"))
os.makedirs(os.path.join(tmp, "notempty"))
Path(os.path.join(tmp, "notempty", "file")).touch()
Document.delete_all_empty_subdirectories(tmp)
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
self.assertEqual(os.path.isdir(os.path.join(tmp, "empty")), False)
self.assertEqual(os.path.isfile(
os.path.join(tmp, "notempty", "file")), True)
def test_try_delete_empty_directories(self):
# Create our working directory
tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
os.makedirs(tmp)
self.add_to_deletion_list(tmp)
os.makedirs(os.path.join(tmp, "notempty"))
Path(os.path.join(tmp, "notempty", "file")).touch()
os.makedirs(os.path.join(tmp, "notempty", "empty"))
Document.try_delete_empty_directories(
os.path.join(tmp, "notempty", "empty"))
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
self.assertEqual(os.path.isfile(
os.path.join(tmp, "notempty", "file")), True)
self.assertEqual(os.path.isdir(
os.path.join(tmp, "notempty", "empty")), False)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_document_accidentally_deleted(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Test source_path
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
"/documents/originals/none/none-0000001.pdf")
# Delete the document "illegaly"
os.remove(settings.MEDIA_ROOT + "/documents/originals/" +
"none/none-0000001.pdf")
# Set new correspondent and expect document to be saved properly
document.correspondent = Correspondent.objects.get_or_create(
name="foo")[0]
document.save()
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), True)
self.assertEqual(document.source_filename,
"none/none-0000001.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}")
def test_set_filename(self):
document = Document()
document.file_type = "pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
# Ensure that filename is properly generated
tmp = document.source_filename
self.assertEqual(document.generate_source_filename(),
"none/none-0000001.pdf")
document.create_source_directory()
Path(document.source_path).touch()
# Set existing filename
document.set_filename(tmp)
self.assertEqual(document.source_filename, "none/none-0000001.pdf")
# Set non-existing filename
document.set_filename("doesnotexist")
self.assertEqual(document.source_filename, "none/none-0000001.pdf")

View File

@ -299,3 +299,6 @@ FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
FILENAME_PARSE_TRANSFORMS = []
for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
FILENAME_PARSE_TRANSFORMS.append((re.compile(t["pattern"]), t["repl"]))
# Specify the filename format for out files
PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")