This commit is contained in:
jonaswinkler 2020-12-08 13:54:35 +01:00
parent c240fa1883
commit 9da11f29c7
10 changed files with 245 additions and 148 deletions

View File

@ -19,6 +19,7 @@ django-extensions = "*"
django-filter = "~=2.4.0" django-filter = "~=2.4.0"
django-q = "~=1.3.4" django-q = "~=1.3.4"
djangorestframework = "~=3.12.2" djangorestframework = "~=3.12.2"
filelock = "*"
fuzzywuzzy = "*" fuzzywuzzy = "*"
gunicorn = "*" gunicorn = "*"
imap-tools = "*" imap-tools = "*"

37
Pipfile.lock generated
View File

@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "b10db53eb22d917723aa6107ff0970dc4e2aa886ee03d3ae08a994a856d57986" "sha256": "3c187671ead11714d48b56f4714b145f68814e09edea818610b87f18b4f7f6fd"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": { "requires": {
@ -197,6 +197,14 @@
"index": "pypi", "index": "pypi",
"version": "==3.12.2" "version": "==3.12.2"
}, },
"filelock": {
"hashes": [
"sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59",
"sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"
],
"index": "pypi",
"version": "==3.0.12"
},
"fuzzywuzzy": { "fuzzywuzzy": {
"hashes": [ "hashes": [
"sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8", "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8",
@ -858,10 +866,10 @@
}, },
"certifi": { "certifi": {
"hashes": [ "hashes": [
"sha256:1f422849db327d534e3d0c5f02a263458c3955ec0aae4ff09b95f195c59f4edd", "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c",
"sha256:f05def092c44fbf25834a51509ef6e631dc19765ab8a57b4e7ab85531f0a9cf4" "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"
], ],
"version": "==2020.11.8" "version": "==2020.12.5"
}, },
"chardet": { "chardet": {
"hashes": [ "hashes": [
@ -961,17 +969,18 @@
}, },
"faker": { "faker": {
"hashes": [ "hashes": [
"sha256:7bca5b074299ac6532be2f72979e6793f1a2403ca8105cb4cf0b385a964469c4", "sha256:1fcb415562ee6e2395b041e85fa6901d4708d30b84d54015226fa754ed0822c3",
"sha256:fb21a76064847561033d8cab1cfd11af436ddf2c6fe72eb51b3cda51dff86bdc" "sha256:e8beccb398ee9b8cc1a91d9295121d66512b6753b4846eb1e7370545d46b3311"
], ],
"markers": "python_version >= '3.5'", "markers": "python_version >= '3.6'",
"version": "==5.0.0" "version": "==5.0.1"
}, },
"filelock": { "filelock": {
"hashes": [ "hashes": [
"sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59", "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59",
"sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836" "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"
], ],
"index": "pypi",
"version": "==3.0.12" "version": "==3.0.12"
}, },
"idna": { "idna": {
@ -1100,11 +1109,11 @@
}, },
"pygments": { "pygments": {
"hashes": [ "hashes": [
"sha256:381985fcc551eb9d37c52088a32914e00517e57f4a21609f48141ba08e193fa0", "sha256:ccf3acacf3782cbed4a989426012f1c535c9a90d3a7fc3f16d231b9372d2b716",
"sha256:88a0bbcd659fcb9573703957c6b9cff9fab7295e6e76db54c9d00ae42df32773" "sha256:f275b6c0909e5dafd2d6269a656aa90fa58ebf4a74f8fcf9053195d226b24a08"
], ],
"markers": "python_version >= '3.5'", "markers": "python_version >= '3.5'",
"version": "==2.7.2" "version": "==2.7.3"
}, },
"pyparsing": { "pyparsing": {
"hashes": [ "hashes": [
@ -1313,11 +1322,11 @@
}, },
"virtualenv": { "virtualenv": {
"hashes": [ "hashes": [
"sha256:07cff122e9d343140366055f31be4dcd61fd598c69d11cd33a9d9c8df4546dd7", "sha256:54b05fc737ea9c9ee9f8340f579e5da5b09fb64fd010ab5757eb90268616907c",
"sha256:e0aac7525e880a429764cefd3aaaff54afb5d9f25c82627563603f5d7de5a6e5" "sha256:b7a8ec323ee02fb2312f098b6b4c9de99559b462775bc8fe3627a73706603c1b"
], ],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==20.2.1" "version": "==20.2.2"
}, },
"zipp": { "zipp": {
"hashes": [ "hashes": [

View File

@ -31,6 +31,8 @@ This release focusses primarily on many small issues with the UI.
* ``FILENAME_FORMAT`` placeholder for document types. * ``FILENAME_FORMAT`` placeholder for document types.
* The filename formatter is now less restrictive with file names and tries to * The filename formatter is now less restrictive with file names and tries to
conserve the original correspondents, types and titles as much as possible. conserve the original correspondents, types and titles as much as possible.
* The filename formatter does not include the document ID in filenames anymore. It will
rather append ``_01``, ``_02``, etc when it detects duplicate filenames.
paperless-ng 0.9.5 paperless-ng 0.9.5

View File

@ -8,13 +8,14 @@ from django.conf import settings
from django.db import transaction from django.db import transaction
from django.db.models import Q from django.db.models import Q
from django.utils import timezone from django.utils import timezone
from filelock import FileLock
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
from .file_handling import create_source_path_directory from .file_handling import create_source_path_directory, \
generate_unique_filename
from .loggers import LoggingMixin from .loggers import LoggingMixin
from .models import Document, FileInfo, Correspondent, DocumentType, Tag from .models import Document, FileInfo, Correspondent, DocumentType, Tag
from .parsers import ParseError, get_parser_class_for_mime_type, \ from .parsers import ParseError, get_parser_class_for_mime_type, parse_date
get_supported_file_extensions, parse_date
from .signals import ( from .signals import (
document_consumption_finished, document_consumption_finished,
document_consumption_started document_consumption_started
@ -38,6 +39,10 @@ class Consumer(LoggingMixin):
def pre_check_file_exists(self): def pre_check_file_exists(self):
if not os.path.isfile(self.path): if not os.path.isfile(self.path):
self.log(
"error",
"Cannot consume {}: It is not a file.".format(self.path)
)
raise ConsumerError("Cannot consume {}: It is not a file".format( raise ConsumerError("Cannot consume {}: It is not a file".format(
self.path)) self.path))
@ -47,6 +52,10 @@ class Consumer(LoggingMixin):
if Document.objects.filter(Q(checksum=checksum) | Q(archive_checksum=checksum)).exists(): # NOQA: E501 if Document.objects.filter(Q(checksum=checksum) | Q(archive_checksum=checksum)).exists(): # NOQA: E501
if settings.CONSUMER_DELETE_DUPLICATES: if settings.CONSUMER_DELETE_DUPLICATES:
os.unlink(self.path) os.unlink(self.path)
self.log(
"error",
"Not consuming {}: It is a duplicate.".format(self.filename)
)
raise ConsumerError( raise ConsumerError(
"Not consuming {}: It is a duplicate.".format(self.filename) "Not consuming {}: It is a duplicate.".format(self.filename)
) )
@ -148,8 +157,9 @@ class Consumer(LoggingMixin):
classifier = DocumentClassifier() classifier = DocumentClassifier()
classifier.reload() classifier.reload()
except (FileNotFoundError, IncompatibleClassifierVersionError) as e: except (FileNotFoundError, IncompatibleClassifierVersionError) as e:
logging.getLogger(__name__).warning( self.log(
"Cannot classify documents: {}.".format(e)) "warning",
f"Cannot classify documents: {e}.")
classifier = None classifier = None
# now that everything is done, we can start to store the document # now that everything is done, we can start to store the document
@ -176,31 +186,26 @@ class Consumer(LoggingMixin):
# After everything is in the database, copy the files into # After everything is in the database, copy the files into
# place. If this fails, we'll also rollback the transaction. # place. If this fails, we'll also rollback the transaction.
with FileLock(settings.MEDIA_LOCK):
document.filename = generate_unique_filename(
document, settings.ORIGINALS_DIR)
create_source_path_directory(document.source_path)
# TODO: not required, since this is done by the file handling
# logic
create_source_path_directory(document.source_path)
self._write(document.storage_type,
self.path, document.source_path)
self._write(document.storage_type,
thumbnail, document.thumbnail_path)
if archive_path and os.path.isfile(archive_path):
self._write(document.storage_type, self._write(document.storage_type,
archive_path, document.archive_path) self.path, document.source_path)
with open(archive_path, 'rb') as f: self._write(document.storage_type,
document.archive_checksum = hashlib.md5( thumbnail, document.thumbnail_path)
f.read()).hexdigest()
document.save() if archive_path and os.path.isfile(archive_path):
create_source_path_directory(document.archive_path)
self._write(document.storage_type,
archive_path, document.archive_path)
with open(archive_path, 'rb') as f:
document.archive_checksum = hashlib.md5(
f.read()).hexdigest()
# Afte performing all database operations and moving files
# into place, tell paperless where the file is.
document.filename = os.path.basename(document.source_path)
# Saving the document now will trigger the filename handling
# logic.
document.save() document.save()
# Delete the file only if it was successfully consumed # Delete the file only if it was successfully consumed

View File

@ -70,7 +70,22 @@ def many_to_dictionary(field):
return mydictionary return mydictionary
def generate_filename(doc): def generate_unique_filename(doc, root):
counter = 0
while True:
new_filename = generate_filename(doc, counter)
if new_filename == doc.filename:
# still the same as before.
return new_filename
if os.path.exists(os.path.join(root, new_filename)):
counter += 1
else:
return new_filename
def generate_filename(doc, counter=0):
path = "" path = ""
try: try:
@ -112,11 +127,11 @@ def generate_filename(doc):
f"Invalid PAPERLESS_FILENAME_FORMAT: " f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default") f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
# Always append the primary key to guarantee uniqueness of filename counter_str = f"_{counter:02}" if counter else ""
if len(path) > 0: if len(path) > 0:
filename = "%s-%07i%s" % (path, doc.pk, doc.file_type) filename = f"{path}{counter_str}{doc.file_type}"
else: else:
filename = "%07i%s" % (doc.pk, doc.file_type) filename = f"{doc.pk:07}{counter_str}{doc.file_type}"
# Append .gpg for encrypted files # Append .gpg for encrypted files
if doc.storage_type == doc.STORAGE_TYPE_GPG: if doc.storage_type == doc.STORAGE_TYPE_GPG:

View File

@ -5,11 +5,13 @@ import shutil
from django.conf import settings from django.conf import settings
from django.core.management import call_command from django.core.management import call_command
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand, CommandError
from filelock import FileLock
from documents.models import Document from documents.models import Document
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \ from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
EXPORTER_ARCHIVE_NAME EXPORTER_ARCHIVE_NAME
from ...file_handling import generate_filename, create_source_path_directory from ...file_handling import create_source_path_directory, \
generate_unique_filename
from ...mixins import Renderable from ...mixins import Renderable
@ -114,17 +116,20 @@ class Command(Renderable, BaseCommand):
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.filename = generate_filename(document) with FileLock(settings.MEDIA_LOCK):
document.filename = generate_unique_filename(
document, settings.ORIGINALS_DIR)
if os.path.isfile(document.source_path): if os.path.isfile(document.source_path):
raise FileExistsError(document.source_path) raise FileExistsError(document.source_path)
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
print(f"Moving {document_path} to {document.source_path}") print(f"Moving {document_path} to {document.source_path}")
shutil.copy(document_path, document.source_path) shutil.copy(document_path, document.source_path)
shutil.copy(thumbnail_path, document.thumbnail_path) shutil.copy(thumbnail_path, document.thumbnail_path)
if archive_path: if archive_path:
shutil.copy(archive_path, document.archive_path) create_source_path_directory(document.archive_path)
shutil.copy(archive_path, document.archive_path)
document.save() document.save()

View File

@ -9,11 +9,13 @@ from django.contrib.contenttypes.models import ContentType
from django.db import models, DatabaseError from django.db import models, DatabaseError
from django.dispatch import receiver from django.dispatch import receiver
from django.utils import timezone from django.utils import timezone
from filelock import FileLock
from rest_framework.reverse import reverse from rest_framework.reverse import reverse
from .. import index, matching from .. import index, matching
from ..file_handling import delete_empty_directories, generate_filename, \ from ..file_handling import delete_empty_directories, \
create_source_path_directory, archive_name_from_filename create_source_path_directory, archive_name_from_filename, \
generate_unique_filename
from ..models import Document, Tag from ..models import Document, Tag
@ -226,81 +228,94 @@ def update_filename_and_move_files(sender, instance, **kwargs):
# This will in turn cause this logic to move the file where it belongs. # This will in turn cause this logic to move the file where it belongs.
return return
old_filename = instance.filename with FileLock(settings.MEDIA_LOCK):
new_filename = generate_filename(instance) old_filename = instance.filename
new_filename = generate_unique_filename(
instance, settings.ORIGINALS_DIR)
if new_filename == instance.filename: if new_filename == instance.filename:
# Don't do anything if its the same. # Don't do anything if its the same.
return
old_source_path = instance.source_path
new_source_path = os.path.join(settings.ORIGINALS_DIR, new_filename)
if not validate_move(instance, old_source_path, new_source_path):
return
# archive files are optional, archive checksum tells us if we have one,
# since this is None for documents without archived files.
if instance.archive_checksum:
new_archive_filename = archive_name_from_filename(new_filename)
old_archive_path = instance.archive_path
new_archive_path = os.path.join(settings.ARCHIVE_DIR,
new_archive_filename)
if not validate_move(instance, old_archive_path, new_archive_path):
return return
create_source_path_directory(new_archive_path) old_source_path = instance.source_path
else: new_source_path = os.path.join(settings.ORIGINALS_DIR, new_filename)
old_archive_path = None
new_archive_path = None
create_source_path_directory(new_source_path) if not validate_move(instance, old_source_path, new_source_path):
return
try: # archive files are optional, archive checksum tells us if we have one,
os.rename(old_source_path, new_source_path) # since this is None for documents without archived files.
if instance.archive_checksum: if instance.archive_checksum:
os.rename(old_archive_path, new_archive_path) new_archive_filename = archive_name_from_filename(new_filename)
instance.filename = new_filename old_archive_path = instance.archive_path
# Don't save here to prevent infinite recursion. new_archive_path = os.path.join(settings.ARCHIVE_DIR,
Document.objects.filter(pk=instance.pk).update(filename=new_filename) new_archive_filename)
logging.getLogger(__name__).debug( if not validate_move(instance, old_archive_path, new_archive_path):
f"Moved file {old_source_path} to {new_source_path}.") return
if instance.archive_checksum: create_source_path_directory(new_archive_path)
logging.getLogger(__name__).debug( else:
f"Moved file {old_archive_path} to {new_archive_path}.") old_archive_path = None
new_archive_path = None
create_source_path_directory(new_source_path)
except OSError as e:
instance.filename = old_filename
# this happens when we can't move a file. If that's the case for the
# archive file, we try our best to revert the changes.
try: try:
os.rename(old_source_path, new_source_path)
if instance.archive_checksum:
os.rename(old_archive_path, new_archive_path)
instance.filename = new_filename
# Don't save() here to prevent infinite recursion.
Document.objects.filter(pk=instance.pk).update(
filename=new_filename)
logging.getLogger(__name__).debug(
f"Moved file {old_source_path} to {new_source_path}.")
if instance.archive_checksum:
logging.getLogger(__name__).debug(
f"Moved file {old_archive_path} to {new_archive_path}.")
except OSError as e:
instance.filename = old_filename
# this happens when we can't move a file. If that's the case for
# the archive file, we try our best to revert the changes.
# no need to save the instance, the update() has not happened yet.
try:
os.rename(new_source_path, old_source_path)
os.rename(new_archive_path, old_archive_path)
except Exception as e:
# This is fine, since:
# A: if we managed to move source from A to B, we will also
# manage to move it from B to A. If not, we have a serious
# issue that's going to get caught by the santiy checker.
# All files remain in place and will never be overwritten,
# so this is not the end of the world.
# B: if moving the orignal file failed, nothing has changed
# anyway.
pass
except DatabaseError as e:
# this happens after moving files, so move them back into place.
# since moving them once succeeded, it's very likely going to
# succeed again.
os.rename(new_source_path, old_source_path) os.rename(new_source_path, old_source_path)
os.rename(new_archive_path, old_archive_path) if instance.archive_checksum:
except Exception as e: os.rename(new_archive_path, old_archive_path)
# This is fine, since: instance.filename = old_filename
# A: if we managed to move source from A to B, we will also manage # again, no need to save the instance, since the actual update()
# to move it from B to A. If not, we have a serious issue # operation failed.
# that's going to get caught by the santiy checker.
# all files remain in place and will never be overwritten,
# so this is not the end of the world.
# B: if moving the orignal file failed, nothing has changed anyway.
pass
except DatabaseError as e:
os.rename(new_source_path, old_source_path)
if instance.archive_checksum:
os.rename(new_archive_path, old_archive_path)
instance.filename = old_filename
if not os.path.isfile(old_source_path): # finally, remove any empty sub folders. This will do nothing if
delete_empty_directories(os.path.dirname(old_source_path), # something has failed above.
root=settings.ORIGINALS_DIR) if not os.path.isfile(old_source_path):
delete_empty_directories(os.path.dirname(old_source_path),
root=settings.ORIGINALS_DIR)
if old_archive_path and not os.path.isfile(old_archive_path): if old_archive_path and not os.path.isfile(old_archive_path):
delete_empty_directories(os.path.dirname(old_archive_path), delete_empty_directories(os.path.dirname(old_archive_path),
root=settings.ARCHIVE_DIR) root=settings.ARCHIVE_DIR)
def set_log_entry(sender, document=None, logging_group=None, **kwargs): def set_log_entry(sender, document=None, logging_group=None, **kwargs):

View File

@ -598,10 +598,10 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertEqual(document.title, "new docs") self.assertEqual(document.title, "new docs")
self.assertEqual(document.correspondent.name, "Bank") self.assertEqual(document.correspondent.name, "Bank")
self.assertEqual(document.filename, "Bank/new docs-0000001.pdf") self.assertEqual(document.filename, "Bank/new docs.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@mock.patch("documents.signals.handlers.generate_filename") @mock.patch("documents.signals.handlers.generate_unique_filename")
def testFilenameHandlingUnstableFormat(self, m): def testFilenameHandlingUnstableFormat(self, m):
filenames = ["this", "that", "now this", "i cant decide"] filenames = ["this", "that", "now this", "i cant decide"]
@ -611,7 +611,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
filenames.insert(0, f) filenames.insert(0, f)
return f return f
m.side_effect = lambda f: get_filename() m.side_effect = lambda f, root: get_filename()
filename = self.get_test_file() filename = self.get_test_file()

View File

@ -40,13 +40,13 @@ class TestFileHandling(DirectoriesMixin, TestCase):
document.filename = generate_filename(document) document.filename = generate_filename(document)
# Ensure that filename is properly generated # Ensure that filename is properly generated
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk)) self.assertEqual(document.filename, "none/none.pdf")
# Enable encryption and check again # Enable encryption and check again
document.storage_type = Document.STORAGE_TYPE_GPG document.storage_type = Document.STORAGE_TYPE_GPG
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, self.assertEqual(document.filename,
"none/none-{:07d}.pdf.gpg".format(document.pk)) "none/none.pdf.gpg")
document.save() document.save()
@ -62,7 +62,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Check proper handling of files # Check proper handling of files
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test-{:07d}.pdf.gpg".format(document.pk)), True) self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), True)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_file_renaming_missing_permissions(self): def test_file_renaming_missing_permissions(self):
@ -74,12 +74,12 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk)) "none/none.pdf")
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
Path(document.source_path).touch() Path(document.source_path).touch()
# Test source_path # Test source_path
self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)) self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none.pdf")
# Make the folder read- and execute-only (no writing and no renaming) # Make the folder read- and execute-only (no writing and no renaming)
os.chmod(settings.ORIGINALS_DIR + "/none", 0o555) os.chmod(settings.ORIGINALS_DIR + "/none", 0o555)
@ -89,8 +89,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
document.save() document.save()
# Check proper handling of files # Check proper handling of files
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)), True) self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True)
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk)) self.assertEqual(document.filename, "none/none.pdf")
os.chmod(settings.ORIGINALS_DIR + "/none", 0o777) os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)
@ -108,7 +108,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk)) "none/none.pdf")
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
Path(document.source_path).touch() Path(document.source_path).touch()
@ -125,8 +125,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Check proper handling of files # Check proper handling of files
self.assertTrue(os.path.isfile(document.source_path)) self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)), True) self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True)
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk)) self.assertEqual(document.filename, "none/none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_document_delete(self): def test_document_delete(self):
@ -138,7 +138,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk)) "none/none.pdf")
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
Path(document.source_path).touch() Path(document.source_path).touch()
@ -146,7 +146,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure file deletion after delete # Ensure file deletion after delete
pk = document.pk pk = document.pk
document.delete() document.delete()
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(pk)), False) self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@ -168,7 +168,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk)) "none/none.pdf")
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
@ -199,7 +199,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
self.assertEqual(generate_filename(document), self.assertEqual(generate_filename(document),
"demo-{:07d}.pdf".format(document.pk)) "demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_with_dash(self): def test_tags_with_dash(self):
@ -215,7 +215,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
self.assertEqual(generate_filename(document), self.assertEqual(generate_filename(document),
"demo-{:07d}.pdf".format(document.pk)) "demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_malformed(self): def test_tags_malformed(self):
@ -231,7 +231,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
self.assertEqual(generate_filename(document), self.assertEqual(generate_filename(document),
"none-{:07d}.pdf".format(document.pk)) "none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
def test_tags_all(self): def test_tags_all(self):
@ -246,7 +246,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
self.assertEqual(generate_filename(document), self.assertEqual(generate_filename(document),
"demo-{:07d}.pdf".format(document.pk)) "demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}")
def test_tags_out_of_bounds(self): def test_tags_out_of_bounds(self):
@ -261,7 +261,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
self.assertEqual(generate_filename(document), self.assertEqual(generate_filename(document),
"none-{:07d}.pdf".format(document.pk)) "none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}")
def test_nested_directory_cleanup(self): def test_nested_directory_cleanup(self):
@ -272,7 +272,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, "none/none/none-{:07d}.pdf".format(document.pk)) self.assertEqual(document.filename, "none/none/none.pdf")
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
Path(document.source_path).touch() Path(document.source_path).touch()
@ -282,7 +282,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
pk = document.pk pk = document.pk
document.delete() document.delete()
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none-{:07d}.pdf".format(pk)), False) self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True)
@ -330,6 +330,48 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(document), "0000001.pdf") self.assertEqual(generate_filename(document), "0000001.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_duplicates(self):
document = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="A", pk=1)
document2 = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="B", pk=2)
Path(document.source_path).touch()
Path(document2.source_path).touch()
document.filename = "0000001.pdf"
document.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document.filename, "qwe.pdf")
document2.filename = "0000002.pdf"
document2.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document2.filename, "qwe_01.pdf")
# saving should not change the file names.
document.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document.filename, "qwe.pdf")
document2.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document2.filename, "qwe_01.pdf")
document.delete()
self.assertFalse(os.path.isfile(document.source_path))
# filename free, should remove _01 suffix
document2.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document2.filename, "qwe.pdf")
class TestFileHandlingWithArchive(DirectoriesMixin, TestCase): class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
@ -358,15 +400,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertFalse(os.path.isfile(archive)) self.assertFalse(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path)) self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path)) self.assertTrue(os.path.isfile(doc.archive_path))
self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc-0000001.pdf")) self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf"))
self.assertEqual(doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc-0000001.pdf")) self.assertEqual(doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
def test_move_archive_gone(self): def test_move_archive_gone(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf") original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf") archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch() Path(original).touch()
#Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B") doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
self.assertTrue(os.path.isfile(original)) self.assertTrue(os.path.isfile(original))
@ -381,7 +422,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
Path(original).touch() Path(original).touch()
Path(archive).touch() Path(archive).touch()
os.makedirs(os.path.join(settings.ARCHIVE_DIR, "none")) os.makedirs(os.path.join(settings.ARCHIVE_DIR, "none"))
Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc-0000001.pdf")).touch() Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B") doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
self.assertTrue(os.path.isfile(original)) self.assertTrue(os.path.isfile(original))
@ -494,14 +535,14 @@ class TestFilenameGeneration(TestCase):
def test_invalid_characters(self): def test_invalid_characters(self):
doc = Document.objects.create(title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1") doc = Document.objects.create(title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1")
self.assertEqual(generate_filename(doc), "This. is the title-0000001.pdf") self.assertEqual(generate_filename(doc), "This. is the title.pdf")
doc = Document.objects.create(title="my\\invalid/../title:yay", mime_type="application/pdf", pk=2, checksum="2") doc = Document.objects.create(title="my\\invalid/../title:yay", mime_type="application/pdf", pk=2, checksum="2")
self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay-0000002.pdf") self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay.pdf")
@override_settings( @override_settings(
PAPERLESS_FILENAME_FORMAT="{created}" PAPERLESS_FILENAME_FORMAT="{created}"
) )
def test_date(self): def test_date(self):
doc = Document.objects.create(title="does not matter", created=datetime.datetime(2020,5,21, 7,36,51, 153), mime_type="application/pdf", pk=2, checksum="2") doc = Document.objects.create(title="does not matter", created=datetime.datetime(2020,5,21, 7,36,51, 153), mime_type="application/pdf", pk=2, checksum="2")
self.assertEqual(generate_filename(doc), "2020-05-21-0000002.pdf") self.assertEqual(generate_filename(doc), "2020-05-21.pdf")

View File

@ -53,6 +53,10 @@ ARCHIVE_DIR = os.path.join(MEDIA_ROOT, "documents", "archive")
THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails") THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails")
DATA_DIR = os.getenv('PAPERLESS_DATA_DIR', os.path.join(BASE_DIR, "..", "data")) DATA_DIR = os.getenv('PAPERLESS_DATA_DIR', os.path.join(BASE_DIR, "..", "data"))
# Lock file for synchronizing changes to the MEDIA directory across multiple
# threads.
MEDIA_LOCK = os.path.join(MEDIA_ROOT, "media.lock")
INDEX_DIR = os.path.join(DATA_DIR, "index") INDEX_DIR = os.path.join(DATA_DIR, "index")
MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle") MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle")