From 9da11f29c7ca1f2a9df5feb5e386015ef78c6e3f Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Tue, 8 Dec 2020 13:54:35 +0100 Subject: [PATCH] fixes #90 --- Pipfile | 1 + Pipfile.lock | 37 +++-- docs/changelog.rst | 2 + src/documents/consumer.py | 57 +++---- src/documents/file_handling.py | 23 ++- .../management/commands/document_importer.py | 25 +-- src/documents/signals/handlers.py | 143 ++++++++++-------- src/documents/tests/test_consumer.py | 6 +- src/documents/tests/test_file_handling.py | 95 ++++++++---- src/paperless/settings.py | 4 + 10 files changed, 245 insertions(+), 148 deletions(-) diff --git a/Pipfile b/Pipfile index 2e86f2a42..830604a8d 100644 --- a/Pipfile +++ b/Pipfile @@ -19,6 +19,7 @@ django-extensions = "*" django-filter = "~=2.4.0" django-q = "~=1.3.4" djangorestframework = "~=3.12.2" +filelock = "*" fuzzywuzzy = "*" gunicorn = "*" imap-tools = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 6158a70e0..198351237 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "b10db53eb22d917723aa6107ff0970dc4e2aa886ee03d3ae08a994a856d57986" + "sha256": "3c187671ead11714d48b56f4714b145f68814e09edea818610b87f18b4f7f6fd" }, "pipfile-spec": 6, "requires": { @@ -197,6 +197,14 @@ "index": "pypi", "version": "==3.12.2" }, + "filelock": { + "hashes": [ + "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59", + "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836" + ], + "index": "pypi", + "version": "==3.0.12" + }, "fuzzywuzzy": { "hashes": [ "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8", @@ -858,10 +866,10 @@ }, "certifi": { "hashes": [ - "sha256:1f422849db327d534e3d0c5f02a263458c3955ec0aae4ff09b95f195c59f4edd", - "sha256:f05def092c44fbf25834a51509ef6e631dc19765ab8a57b4e7ab85531f0a9cf4" + "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c", + "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830" ], - "version": "==2020.11.8" + "version": "==2020.12.5" }, "chardet": { "hashes": [ @@ -961,17 +969,18 @@ }, "faker": { "hashes": [ - "sha256:7bca5b074299ac6532be2f72979e6793f1a2403ca8105cb4cf0b385a964469c4", - "sha256:fb21a76064847561033d8cab1cfd11af436ddf2c6fe72eb51b3cda51dff86bdc" + "sha256:1fcb415562ee6e2395b041e85fa6901d4708d30b84d54015226fa754ed0822c3", + "sha256:e8beccb398ee9b8cc1a91d9295121d66512b6753b4846eb1e7370545d46b3311" ], - "markers": "python_version >= '3.5'", - "version": "==5.0.0" + "markers": "python_version >= '3.6'", + "version": "==5.0.1" }, "filelock": { "hashes": [ "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59", "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836" ], + "index": "pypi", "version": "==3.0.12" }, "idna": { @@ -1100,11 +1109,11 @@ }, "pygments": { "hashes": [ - "sha256:381985fcc551eb9d37c52088a32914e00517e57f4a21609f48141ba08e193fa0", - "sha256:88a0bbcd659fcb9573703957c6b9cff9fab7295e6e76db54c9d00ae42df32773" + "sha256:ccf3acacf3782cbed4a989426012f1c535c9a90d3a7fc3f16d231b9372d2b716", + "sha256:f275b6c0909e5dafd2d6269a656aa90fa58ebf4a74f8fcf9053195d226b24a08" ], "markers": "python_version >= '3.5'", - "version": "==2.7.2" + "version": "==2.7.3" }, "pyparsing": { "hashes": [ @@ -1313,11 +1322,11 @@ }, "virtualenv": { "hashes": [ - "sha256:07cff122e9d343140366055f31be4dcd61fd598c69d11cd33a9d9c8df4546dd7", - "sha256:e0aac7525e880a429764cefd3aaaff54afb5d9f25c82627563603f5d7de5a6e5" + "sha256:54b05fc737ea9c9ee9f8340f579e5da5b09fb64fd010ab5757eb90268616907c", + "sha256:b7a8ec323ee02fb2312f098b6b4c9de99559b462775bc8fe3627a73706603c1b" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==20.2.1" + "version": "==20.2.2" }, "zipp": { "hashes": [ diff --git a/docs/changelog.rst b/docs/changelog.rst index b6f4295b1..2e3ed07f6 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -31,6 +31,8 @@ This release focusses primarily on many small issues with the UI. * ``FILENAME_FORMAT`` placeholder for document types. * The filename formatter is now less restrictive with file names and tries to conserve the original correspondents, types and titles as much as possible. + * The filename formatter does not include the document ID in filenames anymore. It will + rather append ``_01``, ``_02``, etc when it detects duplicate filenames. paperless-ng 0.9.5 diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 7bae5c2a9..23d17abc9 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -8,13 +8,14 @@ from django.conf import settings from django.db import transaction from django.db.models import Q from django.utils import timezone +from filelock import FileLock from .classifier import DocumentClassifier, IncompatibleClassifierVersionError -from .file_handling import create_source_path_directory +from .file_handling import create_source_path_directory, \ + generate_unique_filename from .loggers import LoggingMixin from .models import Document, FileInfo, Correspondent, DocumentType, Tag -from .parsers import ParseError, get_parser_class_for_mime_type, \ - get_supported_file_extensions, parse_date +from .parsers import ParseError, get_parser_class_for_mime_type, parse_date from .signals import ( document_consumption_finished, document_consumption_started @@ -38,6 +39,10 @@ class Consumer(LoggingMixin): def pre_check_file_exists(self): if not os.path.isfile(self.path): + self.log( + "error", + "Cannot consume {}: It is not a file.".format(self.path) + ) raise ConsumerError("Cannot consume {}: It is not a file".format( self.path)) @@ -47,6 +52,10 @@ class Consumer(LoggingMixin): if Document.objects.filter(Q(checksum=checksum) | Q(archive_checksum=checksum)).exists(): # NOQA: E501 if settings.CONSUMER_DELETE_DUPLICATES: os.unlink(self.path) + self.log( + "error", + "Not consuming {}: It is a duplicate.".format(self.filename) + ) raise ConsumerError( "Not consuming {}: It is a duplicate.".format(self.filename) ) @@ -148,8 +157,9 @@ class Consumer(LoggingMixin): classifier = DocumentClassifier() classifier.reload() except (FileNotFoundError, IncompatibleClassifierVersionError) as e: - logging.getLogger(__name__).warning( - "Cannot classify documents: {}.".format(e)) + self.log( + "warning", + f"Cannot classify documents: {e}.") classifier = None # now that everything is done, we can start to store the document @@ -176,31 +186,26 @@ class Consumer(LoggingMixin): # After everything is in the database, copy the files into # place. If this fails, we'll also rollback the transaction. + with FileLock(settings.MEDIA_LOCK): + document.filename = generate_unique_filename( + document, settings.ORIGINALS_DIR) + create_source_path_directory(document.source_path) - # TODO: not required, since this is done by the file handling - # logic - create_source_path_directory(document.source_path) - - self._write(document.storage_type, - self.path, document.source_path) - - self._write(document.storage_type, - thumbnail, document.thumbnail_path) - - if archive_path and os.path.isfile(archive_path): self._write(document.storage_type, - archive_path, document.archive_path) + self.path, document.source_path) - with open(archive_path, 'rb') as f: - document.archive_checksum = hashlib.md5( - f.read()).hexdigest() - document.save() + self._write(document.storage_type, + thumbnail, document.thumbnail_path) + + if archive_path and os.path.isfile(archive_path): + create_source_path_directory(document.archive_path) + self._write(document.storage_type, + archive_path, document.archive_path) + + with open(archive_path, 'rb') as f: + document.archive_checksum = hashlib.md5( + f.read()).hexdigest() - # Afte performing all database operations and moving files - # into place, tell paperless where the file is. - document.filename = os.path.basename(document.source_path) - # Saving the document now will trigger the filename handling - # logic. document.save() # Delete the file only if it was successfully consumed diff --git a/src/documents/file_handling.py b/src/documents/file_handling.py index a6d2f3ef4..c5efc33e4 100644 --- a/src/documents/file_handling.py +++ b/src/documents/file_handling.py @@ -70,7 +70,22 @@ def many_to_dictionary(field): return mydictionary -def generate_filename(doc): +def generate_unique_filename(doc, root): + counter = 0 + + while True: + new_filename = generate_filename(doc, counter) + if new_filename == doc.filename: + # still the same as before. + return new_filename + + if os.path.exists(os.path.join(root, new_filename)): + counter += 1 + else: + return new_filename + + +def generate_filename(doc, counter=0): path = "" try: @@ -112,11 +127,11 @@ def generate_filename(doc): f"Invalid PAPERLESS_FILENAME_FORMAT: " f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default") - # Always append the primary key to guarantee uniqueness of filename + counter_str = f"_{counter:02}" if counter else "" if len(path) > 0: - filename = "%s-%07i%s" % (path, doc.pk, doc.file_type) + filename = f"{path}{counter_str}{doc.file_type}" else: - filename = "%07i%s" % (doc.pk, doc.file_type) + filename = f"{doc.pk:07}{counter_str}{doc.file_type}" # Append .gpg for encrypted files if doc.storage_type == doc.STORAGE_TYPE_GPG: diff --git a/src/documents/management/commands/document_importer.py b/src/documents/management/commands/document_importer.py index ca8c8bf06..70d05d98b 100644 --- a/src/documents/management/commands/document_importer.py +++ b/src/documents/management/commands/document_importer.py @@ -5,11 +5,13 @@ import shutil from django.conf import settings from django.core.management import call_command from django.core.management.base import BaseCommand, CommandError +from filelock import FileLock from documents.models import Document from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \ EXPORTER_ARCHIVE_NAME -from ...file_handling import generate_filename, create_source_path_directory +from ...file_handling import create_source_path_directory, \ + generate_unique_filename from ...mixins import Renderable @@ -114,17 +116,20 @@ class Command(Renderable, BaseCommand): document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED - document.filename = generate_filename(document) + with FileLock(settings.MEDIA_LOCK): + document.filename = generate_unique_filename( + document, settings.ORIGINALS_DIR) - if os.path.isfile(document.source_path): - raise FileExistsError(document.source_path) + if os.path.isfile(document.source_path): + raise FileExistsError(document.source_path) - create_source_path_directory(document.source_path) + create_source_path_directory(document.source_path) - print(f"Moving {document_path} to {document.source_path}") - shutil.copy(document_path, document.source_path) - shutil.copy(thumbnail_path, document.thumbnail_path) - if archive_path: - shutil.copy(archive_path, document.archive_path) + print(f"Moving {document_path} to {document.source_path}") + shutil.copy(document_path, document.source_path) + shutil.copy(thumbnail_path, document.thumbnail_path) + if archive_path: + create_source_path_directory(document.archive_path) + shutil.copy(archive_path, document.archive_path) document.save() diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index 32119a0a3..8a9ce18d7 100755 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -9,11 +9,13 @@ from django.contrib.contenttypes.models import ContentType from django.db import models, DatabaseError from django.dispatch import receiver from django.utils import timezone +from filelock import FileLock from rest_framework.reverse import reverse from .. import index, matching -from ..file_handling import delete_empty_directories, generate_filename, \ - create_source_path_directory, archive_name_from_filename +from ..file_handling import delete_empty_directories, \ + create_source_path_directory, archive_name_from_filename, \ + generate_unique_filename from ..models import Document, Tag @@ -226,81 +228,94 @@ def update_filename_and_move_files(sender, instance, **kwargs): # This will in turn cause this logic to move the file where it belongs. return - old_filename = instance.filename - new_filename = generate_filename(instance) + with FileLock(settings.MEDIA_LOCK): + old_filename = instance.filename + new_filename = generate_unique_filename( + instance, settings.ORIGINALS_DIR) - if new_filename == instance.filename: - # Don't do anything if its the same. - return - - old_source_path = instance.source_path - new_source_path = os.path.join(settings.ORIGINALS_DIR, new_filename) - - if not validate_move(instance, old_source_path, new_source_path): - return - - # archive files are optional, archive checksum tells us if we have one, - # since this is None for documents without archived files. - if instance.archive_checksum: - new_archive_filename = archive_name_from_filename(new_filename) - old_archive_path = instance.archive_path - new_archive_path = os.path.join(settings.ARCHIVE_DIR, - new_archive_filename) - - if not validate_move(instance, old_archive_path, new_archive_path): + if new_filename == instance.filename: + # Don't do anything if its the same. return - create_source_path_directory(new_archive_path) - else: - old_archive_path = None - new_archive_path = None + old_source_path = instance.source_path + new_source_path = os.path.join(settings.ORIGINALS_DIR, new_filename) - create_source_path_directory(new_source_path) + if not validate_move(instance, old_source_path, new_source_path): + return - try: - os.rename(old_source_path, new_source_path) + # archive files are optional, archive checksum tells us if we have one, + # since this is None for documents without archived files. if instance.archive_checksum: - os.rename(old_archive_path, new_archive_path) - instance.filename = new_filename - # Don't save here to prevent infinite recursion. - Document.objects.filter(pk=instance.pk).update(filename=new_filename) + new_archive_filename = archive_name_from_filename(new_filename) + old_archive_path = instance.archive_path + new_archive_path = os.path.join(settings.ARCHIVE_DIR, + new_archive_filename) - logging.getLogger(__name__).debug( - f"Moved file {old_source_path} to {new_source_path}.") + if not validate_move(instance, old_archive_path, new_archive_path): + return - if instance.archive_checksum: - logging.getLogger(__name__).debug( - f"Moved file {old_archive_path} to {new_archive_path}.") + create_source_path_directory(new_archive_path) + else: + old_archive_path = None + new_archive_path = None + + create_source_path_directory(new_source_path) - except OSError as e: - instance.filename = old_filename - # this happens when we can't move a file. If that's the case for the - # archive file, we try our best to revert the changes. try: + os.rename(old_source_path, new_source_path) + if instance.archive_checksum: + os.rename(old_archive_path, new_archive_path) + instance.filename = new_filename + + # Don't save() here to prevent infinite recursion. + Document.objects.filter(pk=instance.pk).update( + filename=new_filename) + + logging.getLogger(__name__).debug( + f"Moved file {old_source_path} to {new_source_path}.") + + if instance.archive_checksum: + logging.getLogger(__name__).debug( + f"Moved file {old_archive_path} to {new_archive_path}.") + + except OSError as e: + instance.filename = old_filename + # this happens when we can't move a file. If that's the case for + # the archive file, we try our best to revert the changes. + # no need to save the instance, the update() has not happened yet. + try: + os.rename(new_source_path, old_source_path) + os.rename(new_archive_path, old_archive_path) + except Exception as e: + # This is fine, since: + # A: if we managed to move source from A to B, we will also + # manage to move it from B to A. If not, we have a serious + # issue that's going to get caught by the santiy checker. + # All files remain in place and will never be overwritten, + # so this is not the end of the world. + # B: if moving the orignal file failed, nothing has changed + # anyway. + pass + except DatabaseError as e: + # this happens after moving files, so move them back into place. + # since moving them once succeeded, it's very likely going to + # succeed again. os.rename(new_source_path, old_source_path) - os.rename(new_archive_path, old_archive_path) - except Exception as e: - # This is fine, since: - # A: if we managed to move source from A to B, we will also manage - # to move it from B to A. If not, we have a serious issue - # that's going to get caught by the santiy checker. - # all files remain in place and will never be overwritten, - # so this is not the end of the world. - # B: if moving the orignal file failed, nothing has changed anyway. - pass - except DatabaseError as e: - os.rename(new_source_path, old_source_path) - if instance.archive_checksum: - os.rename(new_archive_path, old_archive_path) - instance.filename = old_filename + if instance.archive_checksum: + os.rename(new_archive_path, old_archive_path) + instance.filename = old_filename + # again, no need to save the instance, since the actual update() + # operation failed. - if not os.path.isfile(old_source_path): - delete_empty_directories(os.path.dirname(old_source_path), - root=settings.ORIGINALS_DIR) + # finally, remove any empty sub folders. This will do nothing if + # something has failed above. + if not os.path.isfile(old_source_path): + delete_empty_directories(os.path.dirname(old_source_path), + root=settings.ORIGINALS_DIR) - if old_archive_path and not os.path.isfile(old_archive_path): - delete_empty_directories(os.path.dirname(old_archive_path), - root=settings.ARCHIVE_DIR) + if old_archive_path and not os.path.isfile(old_archive_path): + delete_empty_directories(os.path.dirname(old_archive_path), + root=settings.ARCHIVE_DIR) def set_log_entry(sender, document=None, logging_group=None, **kwargs): diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index f785bc695..f828d3e11 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -598,10 +598,10 @@ class TestConsumer(DirectoriesMixin, TestCase): self.assertEqual(document.title, "new docs") self.assertEqual(document.correspondent.name, "Bank") - self.assertEqual(document.filename, "Bank/new docs-0000001.pdf") + self.assertEqual(document.filename, "Bank/new docs.pdf") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") - @mock.patch("documents.signals.handlers.generate_filename") + @mock.patch("documents.signals.handlers.generate_unique_filename") def testFilenameHandlingUnstableFormat(self, m): filenames = ["this", "that", "now this", "i cant decide"] @@ -611,7 +611,7 @@ class TestConsumer(DirectoriesMixin, TestCase): filenames.insert(0, f) return f - m.side_effect = lambda f: get_filename() + m.side_effect = lambda f, root: get_filename() filename = self.get_test_file() diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index 4ed93d1d4..f0a74ca4f 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -40,13 +40,13 @@ class TestFileHandling(DirectoriesMixin, TestCase): document.filename = generate_filename(document) # Ensure that filename is properly generated - self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk)) + self.assertEqual(document.filename, "none/none.pdf") # Enable encryption and check again document.storage_type = Document.STORAGE_TYPE_GPG document.filename = generate_filename(document) self.assertEqual(document.filename, - "none/none-{:07d}.pdf.gpg".format(document.pk)) + "none/none.pdf.gpg") document.save() @@ -62,7 +62,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Check proper handling of files self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) - self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test-{:07d}.pdf.gpg".format(document.pk)), True) + self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), True) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") def test_file_renaming_missing_permissions(self): @@ -74,12 +74,12 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated document.filename = generate_filename(document) self.assertEqual(document.filename, - "none/none-{:07d}.pdf".format(document.pk)) + "none/none.pdf") create_source_path_directory(document.source_path) Path(document.source_path).touch() # Test source_path - self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)) + self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none.pdf") # Make the folder read- and execute-only (no writing and no renaming) os.chmod(settings.ORIGINALS_DIR + "/none", 0o555) @@ -89,8 +89,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): document.save() # Check proper handling of files - self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)), True) - self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk)) + self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True) + self.assertEqual(document.filename, "none/none.pdf") os.chmod(settings.ORIGINALS_DIR + "/none", 0o777) @@ -108,7 +108,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated document.filename = generate_filename(document) self.assertEqual(document.filename, - "none/none-{:07d}.pdf".format(document.pk)) + "none/none.pdf") create_source_path_directory(document.source_path) Path(document.source_path).touch() @@ -125,8 +125,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Check proper handling of files self.assertTrue(os.path.isfile(document.source_path)) - self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)), True) - self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk)) + self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True) + self.assertEqual(document.filename, "none/none.pdf") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") def test_document_delete(self): @@ -138,7 +138,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated document.filename = generate_filename(document) self.assertEqual(document.filename, - "none/none-{:07d}.pdf".format(document.pk)) + "none/none.pdf") create_source_path_directory(document.source_path) Path(document.source_path).touch() @@ -146,7 +146,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure file deletion after delete pk = document.pk document.delete() - self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(pk)), False) + self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") @@ -168,7 +168,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated document.filename = generate_filename(document) self.assertEqual(document.filename, - "none/none-{:07d}.pdf".format(document.pk)) + "none/none.pdf") create_source_path_directory(document.source_path) @@ -199,7 +199,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated self.assertEqual(generate_filename(document), - "demo-{:07d}.pdf".format(document.pk)) + "demo.pdf") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") def test_tags_with_dash(self): @@ -215,7 +215,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated self.assertEqual(generate_filename(document), - "demo-{:07d}.pdf".format(document.pk)) + "demo.pdf") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") def test_tags_malformed(self): @@ -231,7 +231,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated self.assertEqual(generate_filename(document), - "none-{:07d}.pdf".format(document.pk)) + "none.pdf") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}") def test_tags_all(self): @@ -246,7 +246,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated self.assertEqual(generate_filename(document), - "demo-{:07d}.pdf".format(document.pk)) + "demo.pdf") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}") def test_tags_out_of_bounds(self): @@ -261,7 +261,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated self.assertEqual(generate_filename(document), - "none-{:07d}.pdf".format(document.pk)) + "none.pdf") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}") def test_nested_directory_cleanup(self): @@ -272,7 +272,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated document.filename = generate_filename(document) - self.assertEqual(document.filename, "none/none/none-{:07d}.pdf".format(document.pk)) + self.assertEqual(document.filename, "none/none/none.pdf") create_source_path_directory(document.source_path) Path(document.source_path).touch() @@ -282,7 +282,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): pk = document.pk document.delete() - self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none-{:07d}.pdf".format(pk)), False) + self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True) @@ -330,6 +330,48 @@ class TestFileHandling(DirectoriesMixin, TestCase): self.assertEqual(generate_filename(document), "0000001.pdf") + @override_settings(PAPERLESS_FILENAME_FORMAT="{title}") + def test_duplicates(self): + document = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="A", pk=1) + document2 = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="B", pk=2) + Path(document.source_path).touch() + Path(document2.source_path).touch() + document.filename = "0000001.pdf" + document.save() + + self.assertTrue(os.path.isfile(document.source_path)) + self.assertEqual(document.filename, "qwe.pdf") + + document2.filename = "0000002.pdf" + document2.save() + + self.assertTrue(os.path.isfile(document.source_path)) + self.assertEqual(document2.filename, "qwe_01.pdf") + + # saving should not change the file names. + + document.save() + + self.assertTrue(os.path.isfile(document.source_path)) + self.assertEqual(document.filename, "qwe.pdf") + + document2.save() + + self.assertTrue(os.path.isfile(document.source_path)) + self.assertEqual(document2.filename, "qwe_01.pdf") + + document.delete() + + self.assertFalse(os.path.isfile(document.source_path)) + + # filename free, should remove _01 suffix + + document2.save() + + self.assertTrue(os.path.isfile(document.source_path)) + self.assertEqual(document2.filename, "qwe.pdf") + + class TestFileHandlingWithArchive(DirectoriesMixin, TestCase): @@ -358,15 +400,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase): self.assertFalse(os.path.isfile(archive)) self.assertTrue(os.path.isfile(doc.source_path)) self.assertTrue(os.path.isfile(doc.archive_path)) - self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc-0000001.pdf")) - self.assertEqual(doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc-0000001.pdf")) + self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf")) + self.assertEqual(doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") def test_move_archive_gone(self): original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf") archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf") Path(original).touch() - #Path(archive).touch() doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B") self.assertTrue(os.path.isfile(original)) @@ -381,7 +422,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase): Path(original).touch() Path(archive).touch() os.makedirs(os.path.join(settings.ARCHIVE_DIR, "none")) - Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc-0000001.pdf")).touch() + Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")).touch() doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B") self.assertTrue(os.path.isfile(original)) @@ -494,14 +535,14 @@ class TestFilenameGeneration(TestCase): def test_invalid_characters(self): doc = Document.objects.create(title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1") - self.assertEqual(generate_filename(doc), "This. is the title-0000001.pdf") + self.assertEqual(generate_filename(doc), "This. is the title.pdf") doc = Document.objects.create(title="my\\invalid/../title:yay", mime_type="application/pdf", pk=2, checksum="2") - self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay-0000002.pdf") + self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay.pdf") @override_settings( PAPERLESS_FILENAME_FORMAT="{created}" ) def test_date(self): doc = Document.objects.create(title="does not matter", created=datetime.datetime(2020,5,21, 7,36,51, 153), mime_type="application/pdf", pk=2, checksum="2") - self.assertEqual(generate_filename(doc), "2020-05-21-0000002.pdf") + self.assertEqual(generate_filename(doc), "2020-05-21.pdf") diff --git a/src/paperless/settings.py b/src/paperless/settings.py index c7ecf7645..cf0c3e28d 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -53,6 +53,10 @@ ARCHIVE_DIR = os.path.join(MEDIA_ROOT, "documents", "archive") THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails") DATA_DIR = os.getenv('PAPERLESS_DATA_DIR', os.path.join(BASE_DIR, "..", "data")) + +# Lock file for synchronizing changes to the MEDIA directory across multiple +# threads. +MEDIA_LOCK = os.path.join(MEDIA_ROOT, "media.lock") INDEX_DIR = os.path.join(DATA_DIR, "index") MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle")