This commit is contained in:
jonaswinkler 2020-12-08 13:54:35 +01:00
parent c240fa1883
commit 9da11f29c7
10 changed files with 245 additions and 148 deletions

View File

@ -19,6 +19,7 @@ django-extensions = "*"
django-filter = "~=2.4.0" django-filter = "~=2.4.0"
django-q = "~=1.3.4" django-q = "~=1.3.4"
djangorestframework = "~=3.12.2" djangorestframework = "~=3.12.2"
filelock = "*"
fuzzywuzzy = "*" fuzzywuzzy = "*"
gunicorn = "*" gunicorn = "*"
imap-tools = "*" imap-tools = "*"

37
Pipfile.lock generated
View File

@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "b10db53eb22d917723aa6107ff0970dc4e2aa886ee03d3ae08a994a856d57986" "sha256": "3c187671ead11714d48b56f4714b145f68814e09edea818610b87f18b4f7f6fd"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": { "requires": {
@ -197,6 +197,14 @@
"index": "pypi", "index": "pypi",
"version": "==3.12.2" "version": "==3.12.2"
}, },
"filelock": {
"hashes": [
"sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59",
"sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"
],
"index": "pypi",
"version": "==3.0.12"
},
"fuzzywuzzy": { "fuzzywuzzy": {
"hashes": [ "hashes": [
"sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8", "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8",
@ -858,10 +866,10 @@
}, },
"certifi": { "certifi": {
"hashes": [ "hashes": [
"sha256:1f422849db327d534e3d0c5f02a263458c3955ec0aae4ff09b95f195c59f4edd", "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c",
"sha256:f05def092c44fbf25834a51509ef6e631dc19765ab8a57b4e7ab85531f0a9cf4" "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"
], ],
"version": "==2020.11.8" "version": "==2020.12.5"
}, },
"chardet": { "chardet": {
"hashes": [ "hashes": [
@ -961,17 +969,18 @@
}, },
"faker": { "faker": {
"hashes": [ "hashes": [
"sha256:7bca5b074299ac6532be2f72979e6793f1a2403ca8105cb4cf0b385a964469c4", "sha256:1fcb415562ee6e2395b041e85fa6901d4708d30b84d54015226fa754ed0822c3",
"sha256:fb21a76064847561033d8cab1cfd11af436ddf2c6fe72eb51b3cda51dff86bdc" "sha256:e8beccb398ee9b8cc1a91d9295121d66512b6753b4846eb1e7370545d46b3311"
], ],
"markers": "python_version >= '3.5'", "markers": "python_version >= '3.6'",
"version": "==5.0.0" "version": "==5.0.1"
}, },
"filelock": { "filelock": {
"hashes": [ "hashes": [
"sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59", "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59",
"sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836" "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"
], ],
"index": "pypi",
"version": "==3.0.12" "version": "==3.0.12"
}, },
"idna": { "idna": {
@ -1100,11 +1109,11 @@
}, },
"pygments": { "pygments": {
"hashes": [ "hashes": [
"sha256:381985fcc551eb9d37c52088a32914e00517e57f4a21609f48141ba08e193fa0", "sha256:ccf3acacf3782cbed4a989426012f1c535c9a90d3a7fc3f16d231b9372d2b716",
"sha256:88a0bbcd659fcb9573703957c6b9cff9fab7295e6e76db54c9d00ae42df32773" "sha256:f275b6c0909e5dafd2d6269a656aa90fa58ebf4a74f8fcf9053195d226b24a08"
], ],
"markers": "python_version >= '3.5'", "markers": "python_version >= '3.5'",
"version": "==2.7.2" "version": "==2.7.3"
}, },
"pyparsing": { "pyparsing": {
"hashes": [ "hashes": [
@ -1313,11 +1322,11 @@
}, },
"virtualenv": { "virtualenv": {
"hashes": [ "hashes": [
"sha256:07cff122e9d343140366055f31be4dcd61fd598c69d11cd33a9d9c8df4546dd7", "sha256:54b05fc737ea9c9ee9f8340f579e5da5b09fb64fd010ab5757eb90268616907c",
"sha256:e0aac7525e880a429764cefd3aaaff54afb5d9f25c82627563603f5d7de5a6e5" "sha256:b7a8ec323ee02fb2312f098b6b4c9de99559b462775bc8fe3627a73706603c1b"
], ],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==20.2.1" "version": "==20.2.2"
}, },
"zipp": { "zipp": {
"hashes": [ "hashes": [

View File

@ -31,6 +31,8 @@ This release focusses primarily on many small issues with the UI.
* ``FILENAME_FORMAT`` placeholder for document types. * ``FILENAME_FORMAT`` placeholder for document types.
* The filename formatter is now less restrictive with file names and tries to * The filename formatter is now less restrictive with file names and tries to
conserve the original correspondents, types and titles as much as possible. conserve the original correspondents, types and titles as much as possible.
* The filename formatter does not include the document ID in filenames anymore. It will
rather append ``_01``, ``_02``, etc when it detects duplicate filenames.
paperless-ng 0.9.5 paperless-ng 0.9.5

View File

@ -8,13 +8,14 @@ from django.conf import settings
from django.db import transaction from django.db import transaction
from django.db.models import Q from django.db.models import Q
from django.utils import timezone from django.utils import timezone
from filelock import FileLock
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
from .file_handling import create_source_path_directory from .file_handling import create_source_path_directory, \
generate_unique_filename
from .loggers import LoggingMixin from .loggers import LoggingMixin
from .models import Document, FileInfo, Correspondent, DocumentType, Tag from .models import Document, FileInfo, Correspondent, DocumentType, Tag
from .parsers import ParseError, get_parser_class_for_mime_type, \ from .parsers import ParseError, get_parser_class_for_mime_type, parse_date
get_supported_file_extensions, parse_date
from .signals import ( from .signals import (
document_consumption_finished, document_consumption_finished,
document_consumption_started document_consumption_started
@ -38,6 +39,10 @@ class Consumer(LoggingMixin):
def pre_check_file_exists(self): def pre_check_file_exists(self):
if not os.path.isfile(self.path): if not os.path.isfile(self.path):
self.log(
"error",
"Cannot consume {}: It is not a file.".format(self.path)
)
raise ConsumerError("Cannot consume {}: It is not a file".format( raise ConsumerError("Cannot consume {}: It is not a file".format(
self.path)) self.path))
@ -47,6 +52,10 @@ class Consumer(LoggingMixin):
if Document.objects.filter(Q(checksum=checksum) | Q(archive_checksum=checksum)).exists(): # NOQA: E501 if Document.objects.filter(Q(checksum=checksum) | Q(archive_checksum=checksum)).exists(): # NOQA: E501
if settings.CONSUMER_DELETE_DUPLICATES: if settings.CONSUMER_DELETE_DUPLICATES:
os.unlink(self.path) os.unlink(self.path)
self.log(
"error",
"Not consuming {}: It is a duplicate.".format(self.filename)
)
raise ConsumerError( raise ConsumerError(
"Not consuming {}: It is a duplicate.".format(self.filename) "Not consuming {}: It is a duplicate.".format(self.filename)
) )
@ -148,8 +157,9 @@ class Consumer(LoggingMixin):
classifier = DocumentClassifier() classifier = DocumentClassifier()
classifier.reload() classifier.reload()
except (FileNotFoundError, IncompatibleClassifierVersionError) as e: except (FileNotFoundError, IncompatibleClassifierVersionError) as e:
logging.getLogger(__name__).warning( self.log(
"Cannot classify documents: {}.".format(e)) "warning",
f"Cannot classify documents: {e}.")
classifier = None classifier = None
# now that everything is done, we can start to store the document # now that everything is done, we can start to store the document
@ -176,9 +186,9 @@ class Consumer(LoggingMixin):
# After everything is in the database, copy the files into # After everything is in the database, copy the files into
# place. If this fails, we'll also rollback the transaction. # place. If this fails, we'll also rollback the transaction.
with FileLock(settings.MEDIA_LOCK):
# TODO: not required, since this is done by the file handling document.filename = generate_unique_filename(
# logic document, settings.ORIGINALS_DIR)
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
self._write(document.storage_type, self._write(document.storage_type,
@ -188,19 +198,14 @@ class Consumer(LoggingMixin):
thumbnail, document.thumbnail_path) thumbnail, document.thumbnail_path)
if archive_path and os.path.isfile(archive_path): if archive_path and os.path.isfile(archive_path):
create_source_path_directory(document.archive_path)
self._write(document.storage_type, self._write(document.storage_type,
archive_path, document.archive_path) archive_path, document.archive_path)
with open(archive_path, 'rb') as f: with open(archive_path, 'rb') as f:
document.archive_checksum = hashlib.md5( document.archive_checksum = hashlib.md5(
f.read()).hexdigest() f.read()).hexdigest()
document.save()
# Afte performing all database operations and moving files
# into place, tell paperless where the file is.
document.filename = os.path.basename(document.source_path)
# Saving the document now will trigger the filename handling
# logic.
document.save() document.save()
# Delete the file only if it was successfully consumed # Delete the file only if it was successfully consumed

View File

@ -70,7 +70,22 @@ def many_to_dictionary(field):
return mydictionary return mydictionary
def generate_filename(doc): def generate_unique_filename(doc, root):
counter = 0
while True:
new_filename = generate_filename(doc, counter)
if new_filename == doc.filename:
# still the same as before.
return new_filename
if os.path.exists(os.path.join(root, new_filename)):
counter += 1
else:
return new_filename
def generate_filename(doc, counter=0):
path = "" path = ""
try: try:
@ -112,11 +127,11 @@ def generate_filename(doc):
f"Invalid PAPERLESS_FILENAME_FORMAT: " f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default") f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
# Always append the primary key to guarantee uniqueness of filename counter_str = f"_{counter:02}" if counter else ""
if len(path) > 0: if len(path) > 0:
filename = "%s-%07i%s" % (path, doc.pk, doc.file_type) filename = f"{path}{counter_str}{doc.file_type}"
else: else:
filename = "%07i%s" % (doc.pk, doc.file_type) filename = f"{doc.pk:07}{counter_str}{doc.file_type}"
# Append .gpg for encrypted files # Append .gpg for encrypted files
if doc.storage_type == doc.STORAGE_TYPE_GPG: if doc.storage_type == doc.STORAGE_TYPE_GPG:

View File

@ -5,11 +5,13 @@ import shutil
from django.conf import settings from django.conf import settings
from django.core.management import call_command from django.core.management import call_command
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand, CommandError
from filelock import FileLock
from documents.models import Document from documents.models import Document
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \ from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
EXPORTER_ARCHIVE_NAME EXPORTER_ARCHIVE_NAME
from ...file_handling import generate_filename, create_source_path_directory from ...file_handling import create_source_path_directory, \
generate_unique_filename
from ...mixins import Renderable from ...mixins import Renderable
@ -114,7 +116,9 @@ class Command(Renderable, BaseCommand):
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.filename = generate_filename(document) with FileLock(settings.MEDIA_LOCK):
document.filename = generate_unique_filename(
document, settings.ORIGINALS_DIR)
if os.path.isfile(document.source_path): if os.path.isfile(document.source_path):
raise FileExistsError(document.source_path) raise FileExistsError(document.source_path)
@ -125,6 +129,7 @@ class Command(Renderable, BaseCommand):
shutil.copy(document_path, document.source_path) shutil.copy(document_path, document.source_path)
shutil.copy(thumbnail_path, document.thumbnail_path) shutil.copy(thumbnail_path, document.thumbnail_path)
if archive_path: if archive_path:
create_source_path_directory(document.archive_path)
shutil.copy(archive_path, document.archive_path) shutil.copy(archive_path, document.archive_path)
document.save() document.save()

View File

@ -9,11 +9,13 @@ from django.contrib.contenttypes.models import ContentType
from django.db import models, DatabaseError from django.db import models, DatabaseError
from django.dispatch import receiver from django.dispatch import receiver
from django.utils import timezone from django.utils import timezone
from filelock import FileLock
from rest_framework.reverse import reverse from rest_framework.reverse import reverse
from .. import index, matching from .. import index, matching
from ..file_handling import delete_empty_directories, generate_filename, \ from ..file_handling import delete_empty_directories, \
create_source_path_directory, archive_name_from_filename create_source_path_directory, archive_name_from_filename, \
generate_unique_filename
from ..models import Document, Tag from ..models import Document, Tag
@ -226,8 +228,10 @@ def update_filename_and_move_files(sender, instance, **kwargs):
# This will in turn cause this logic to move the file where it belongs. # This will in turn cause this logic to move the file where it belongs.
return return
with FileLock(settings.MEDIA_LOCK):
old_filename = instance.filename old_filename = instance.filename
new_filename = generate_filename(instance) new_filename = generate_unique_filename(
instance, settings.ORIGINALS_DIR)
if new_filename == instance.filename: if new_filename == instance.filename:
# Don't do anything if its the same. # Don't do anything if its the same.
@ -262,8 +266,10 @@ def update_filename_and_move_files(sender, instance, **kwargs):
if instance.archive_checksum: if instance.archive_checksum:
os.rename(old_archive_path, new_archive_path) os.rename(old_archive_path, new_archive_path)
instance.filename = new_filename instance.filename = new_filename
# Don't save here to prevent infinite recursion.
Document.objects.filter(pk=instance.pk).update(filename=new_filename) # Don't save() here to prevent infinite recursion.
Document.objects.filter(pk=instance.pk).update(
filename=new_filename)
logging.getLogger(__name__).debug( logging.getLogger(__name__).debug(
f"Moved file {old_source_path} to {new_source_path}.") f"Moved file {old_source_path} to {new_source_path}.")
@ -274,26 +280,35 @@ def update_filename_and_move_files(sender, instance, **kwargs):
except OSError as e: except OSError as e:
instance.filename = old_filename instance.filename = old_filename
# this happens when we can't move a file. If that's the case for the # this happens when we can't move a file. If that's the case for
# archive file, we try our best to revert the changes. # the archive file, we try our best to revert the changes.
# no need to save the instance, the update() has not happened yet.
try: try:
os.rename(new_source_path, old_source_path) os.rename(new_source_path, old_source_path)
os.rename(new_archive_path, old_archive_path) os.rename(new_archive_path, old_archive_path)
except Exception as e: except Exception as e:
# This is fine, since: # This is fine, since:
# A: if we managed to move source from A to B, we will also manage # A: if we managed to move source from A to B, we will also
# to move it from B to A. If not, we have a serious issue # manage to move it from B to A. If not, we have a serious
# that's going to get caught by the santiy checker. # issue that's going to get caught by the santiy checker.
# all files remain in place and will never be overwritten, # All files remain in place and will never be overwritten,
# so this is not the end of the world. # so this is not the end of the world.
# B: if moving the orignal file failed, nothing has changed anyway. # B: if moving the orignal file failed, nothing has changed
# anyway.
pass pass
except DatabaseError as e: except DatabaseError as e:
# this happens after moving files, so move them back into place.
# since moving them once succeeded, it's very likely going to
# succeed again.
os.rename(new_source_path, old_source_path) os.rename(new_source_path, old_source_path)
if instance.archive_checksum: if instance.archive_checksum:
os.rename(new_archive_path, old_archive_path) os.rename(new_archive_path, old_archive_path)
instance.filename = old_filename instance.filename = old_filename
# again, no need to save the instance, since the actual update()
# operation failed.
# finally, remove any empty sub folders. This will do nothing if
# something has failed above.
if not os.path.isfile(old_source_path): if not os.path.isfile(old_source_path):
delete_empty_directories(os.path.dirname(old_source_path), delete_empty_directories(os.path.dirname(old_source_path),
root=settings.ORIGINALS_DIR) root=settings.ORIGINALS_DIR)

View File

@ -598,10 +598,10 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertEqual(document.title, "new docs") self.assertEqual(document.title, "new docs")
self.assertEqual(document.correspondent.name, "Bank") self.assertEqual(document.correspondent.name, "Bank")
self.assertEqual(document.filename, "Bank/new docs-0000001.pdf") self.assertEqual(document.filename, "Bank/new docs.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@mock.patch("documents.signals.handlers.generate_filename") @mock.patch("documents.signals.handlers.generate_unique_filename")
def testFilenameHandlingUnstableFormat(self, m): def testFilenameHandlingUnstableFormat(self, m):
filenames = ["this", "that", "now this", "i cant decide"] filenames = ["this", "that", "now this", "i cant decide"]
@ -611,7 +611,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
filenames.insert(0, f) filenames.insert(0, f)
return f return f
m.side_effect = lambda f: get_filename() m.side_effect = lambda f, root: get_filename()
filename = self.get_test_file() filename = self.get_test_file()

View File

@ -40,13 +40,13 @@ class TestFileHandling(DirectoriesMixin, TestCase):
document.filename = generate_filename(document) document.filename = generate_filename(document)
# Ensure that filename is properly generated # Ensure that filename is properly generated
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk)) self.assertEqual(document.filename, "none/none.pdf")
# Enable encryption and check again # Enable encryption and check again
document.storage_type = Document.STORAGE_TYPE_GPG document.storage_type = Document.STORAGE_TYPE_GPG
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, self.assertEqual(document.filename,
"none/none-{:07d}.pdf.gpg".format(document.pk)) "none/none.pdf.gpg")
document.save() document.save()
@ -62,7 +62,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Check proper handling of files # Check proper handling of files
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test-{:07d}.pdf.gpg".format(document.pk)), True) self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), True)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_file_renaming_missing_permissions(self): def test_file_renaming_missing_permissions(self):
@ -74,12 +74,12 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk)) "none/none.pdf")
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
Path(document.source_path).touch() Path(document.source_path).touch()
# Test source_path # Test source_path
self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)) self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none.pdf")
# Make the folder read- and execute-only (no writing and no renaming) # Make the folder read- and execute-only (no writing and no renaming)
os.chmod(settings.ORIGINALS_DIR + "/none", 0o555) os.chmod(settings.ORIGINALS_DIR + "/none", 0o555)
@ -89,8 +89,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
document.save() document.save()
# Check proper handling of files # Check proper handling of files
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)), True) self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True)
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk)) self.assertEqual(document.filename, "none/none.pdf")
os.chmod(settings.ORIGINALS_DIR + "/none", 0o777) os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)
@ -108,7 +108,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk)) "none/none.pdf")
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
Path(document.source_path).touch() Path(document.source_path).touch()
@ -125,8 +125,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Check proper handling of files # Check proper handling of files
self.assertTrue(os.path.isfile(document.source_path)) self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)), True) self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True)
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk)) self.assertEqual(document.filename, "none/none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_document_delete(self): def test_document_delete(self):
@ -138,7 +138,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk)) "none/none.pdf")
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
Path(document.source_path).touch() Path(document.source_path).touch()
@ -146,7 +146,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure file deletion after delete # Ensure file deletion after delete
pk = document.pk pk = document.pk
document.delete() document.delete()
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(pk)), False) self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@ -168,7 +168,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk)) "none/none.pdf")
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
@ -199,7 +199,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
self.assertEqual(generate_filename(document), self.assertEqual(generate_filename(document),
"demo-{:07d}.pdf".format(document.pk)) "demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_with_dash(self): def test_tags_with_dash(self):
@ -215,7 +215,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
self.assertEqual(generate_filename(document), self.assertEqual(generate_filename(document),
"demo-{:07d}.pdf".format(document.pk)) "demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_malformed(self): def test_tags_malformed(self):
@ -231,7 +231,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
self.assertEqual(generate_filename(document), self.assertEqual(generate_filename(document),
"none-{:07d}.pdf".format(document.pk)) "none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
def test_tags_all(self): def test_tags_all(self):
@ -246,7 +246,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
self.assertEqual(generate_filename(document), self.assertEqual(generate_filename(document),
"demo-{:07d}.pdf".format(document.pk)) "demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}")
def test_tags_out_of_bounds(self): def test_tags_out_of_bounds(self):
@ -261,7 +261,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
self.assertEqual(generate_filename(document), self.assertEqual(generate_filename(document),
"none-{:07d}.pdf".format(document.pk)) "none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}")
def test_nested_directory_cleanup(self): def test_nested_directory_cleanup(self):
@ -272,7 +272,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, "none/none/none-{:07d}.pdf".format(document.pk)) self.assertEqual(document.filename, "none/none/none.pdf")
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
Path(document.source_path).touch() Path(document.source_path).touch()
@ -282,7 +282,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
pk = document.pk pk = document.pk
document.delete() document.delete()
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none-{:07d}.pdf".format(pk)), False) self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True)
@ -330,6 +330,48 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(document), "0000001.pdf") self.assertEqual(generate_filename(document), "0000001.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_duplicates(self):
document = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="A", pk=1)
document2 = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="B", pk=2)
Path(document.source_path).touch()
Path(document2.source_path).touch()
document.filename = "0000001.pdf"
document.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document.filename, "qwe.pdf")
document2.filename = "0000002.pdf"
document2.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document2.filename, "qwe_01.pdf")
# saving should not change the file names.
document.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document.filename, "qwe.pdf")
document2.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document2.filename, "qwe_01.pdf")
document.delete()
self.assertFalse(os.path.isfile(document.source_path))
# filename free, should remove _01 suffix
document2.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document2.filename, "qwe.pdf")
class TestFileHandlingWithArchive(DirectoriesMixin, TestCase): class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
@ -358,15 +400,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertFalse(os.path.isfile(archive)) self.assertFalse(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path)) self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path)) self.assertTrue(os.path.isfile(doc.archive_path))
self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc-0000001.pdf")) self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf"))
self.assertEqual(doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc-0000001.pdf")) self.assertEqual(doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
def test_move_archive_gone(self): def test_move_archive_gone(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf") original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf") archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch() Path(original).touch()
#Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B") doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
self.assertTrue(os.path.isfile(original)) self.assertTrue(os.path.isfile(original))
@ -381,7 +422,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
Path(original).touch() Path(original).touch()
Path(archive).touch() Path(archive).touch()
os.makedirs(os.path.join(settings.ARCHIVE_DIR, "none")) os.makedirs(os.path.join(settings.ARCHIVE_DIR, "none"))
Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc-0000001.pdf")).touch() Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B") doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
self.assertTrue(os.path.isfile(original)) self.assertTrue(os.path.isfile(original))
@ -494,14 +535,14 @@ class TestFilenameGeneration(TestCase):
def test_invalid_characters(self): def test_invalid_characters(self):
doc = Document.objects.create(title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1") doc = Document.objects.create(title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1")
self.assertEqual(generate_filename(doc), "This. is the title-0000001.pdf") self.assertEqual(generate_filename(doc), "This. is the title.pdf")
doc = Document.objects.create(title="my\\invalid/../title:yay", mime_type="application/pdf", pk=2, checksum="2") doc = Document.objects.create(title="my\\invalid/../title:yay", mime_type="application/pdf", pk=2, checksum="2")
self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay-0000002.pdf") self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay.pdf")
@override_settings( @override_settings(
PAPERLESS_FILENAME_FORMAT="{created}" PAPERLESS_FILENAME_FORMAT="{created}"
) )
def test_date(self): def test_date(self):
doc = Document.objects.create(title="does not matter", created=datetime.datetime(2020,5,21, 7,36,51, 153), mime_type="application/pdf", pk=2, checksum="2") doc = Document.objects.create(title="does not matter", created=datetime.datetime(2020,5,21, 7,36,51, 153), mime_type="application/pdf", pk=2, checksum="2")
self.assertEqual(generate_filename(doc), "2020-05-21-0000002.pdf") self.assertEqual(generate_filename(doc), "2020-05-21.pdf")

View File

@ -53,6 +53,10 @@ ARCHIVE_DIR = os.path.join(MEDIA_ROOT, "documents", "archive")
THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails") THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails")
DATA_DIR = os.getenv('PAPERLESS_DATA_DIR', os.path.join(BASE_DIR, "..", "data")) DATA_DIR = os.getenv('PAPERLESS_DATA_DIR', os.path.join(BASE_DIR, "..", "data"))
# Lock file for synchronizing changes to the MEDIA directory across multiple
# threads.
MEDIA_LOCK = os.path.join(MEDIA_ROOT, "media.lock")
INDEX_DIR = os.path.join(DATA_DIR, "index") INDEX_DIR = os.path.join(DATA_DIR, "index")
MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle") MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle")