This commit is contained in:
jonaswinkler 2020-12-08 13:54:35 +01:00
parent c240fa1883
commit 9da11f29c7
10 changed files with 245 additions and 148 deletions

View File

@ -19,6 +19,7 @@ django-extensions = "*"
django-filter = "~=2.4.0"
django-q = "~=1.3.4"
djangorestframework = "~=3.12.2"
filelock = "*"
fuzzywuzzy = "*"
gunicorn = "*"
imap-tools = "*"

37
Pipfile.lock generated
View File

@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "b10db53eb22d917723aa6107ff0970dc4e2aa886ee03d3ae08a994a856d57986"
"sha256": "3c187671ead11714d48b56f4714b145f68814e09edea818610b87f18b4f7f6fd"
},
"pipfile-spec": 6,
"requires": {
@ -197,6 +197,14 @@
"index": "pypi",
"version": "==3.12.2"
},
"filelock": {
"hashes": [
"sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59",
"sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"
],
"index": "pypi",
"version": "==3.0.12"
},
"fuzzywuzzy": {
"hashes": [
"sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8",
@ -858,10 +866,10 @@
},
"certifi": {
"hashes": [
"sha256:1f422849db327d534e3d0c5f02a263458c3955ec0aae4ff09b95f195c59f4edd",
"sha256:f05def092c44fbf25834a51509ef6e631dc19765ab8a57b4e7ab85531f0a9cf4"
"sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c",
"sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"
],
"version": "==2020.11.8"
"version": "==2020.12.5"
},
"chardet": {
"hashes": [
@ -961,17 +969,18 @@
},
"faker": {
"hashes": [
"sha256:7bca5b074299ac6532be2f72979e6793f1a2403ca8105cb4cf0b385a964469c4",
"sha256:fb21a76064847561033d8cab1cfd11af436ddf2c6fe72eb51b3cda51dff86bdc"
"sha256:1fcb415562ee6e2395b041e85fa6901d4708d30b84d54015226fa754ed0822c3",
"sha256:e8beccb398ee9b8cc1a91d9295121d66512b6753b4846eb1e7370545d46b3311"
],
"markers": "python_version >= '3.5'",
"version": "==5.0.0"
"markers": "python_version >= '3.6'",
"version": "==5.0.1"
},
"filelock": {
"hashes": [
"sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59",
"sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"
],
"index": "pypi",
"version": "==3.0.12"
},
"idna": {
@ -1100,11 +1109,11 @@
},
"pygments": {
"hashes": [
"sha256:381985fcc551eb9d37c52088a32914e00517e57f4a21609f48141ba08e193fa0",
"sha256:88a0bbcd659fcb9573703957c6b9cff9fab7295e6e76db54c9d00ae42df32773"
"sha256:ccf3acacf3782cbed4a989426012f1c535c9a90d3a7fc3f16d231b9372d2b716",
"sha256:f275b6c0909e5dafd2d6269a656aa90fa58ebf4a74f8fcf9053195d226b24a08"
],
"markers": "python_version >= '3.5'",
"version": "==2.7.2"
"version": "==2.7.3"
},
"pyparsing": {
"hashes": [
@ -1313,11 +1322,11 @@
},
"virtualenv": {
"hashes": [
"sha256:07cff122e9d343140366055f31be4dcd61fd598c69d11cd33a9d9c8df4546dd7",
"sha256:e0aac7525e880a429764cefd3aaaff54afb5d9f25c82627563603f5d7de5a6e5"
"sha256:54b05fc737ea9c9ee9f8340f579e5da5b09fb64fd010ab5757eb90268616907c",
"sha256:b7a8ec323ee02fb2312f098b6b4c9de99559b462775bc8fe3627a73706603c1b"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==20.2.1"
"version": "==20.2.2"
},
"zipp": {
"hashes": [

View File

@ -31,6 +31,8 @@ This release focusses primarily on many small issues with the UI.
* ``FILENAME_FORMAT`` placeholder for document types.
* The filename formatter is now less restrictive with file names and tries to
conserve the original correspondents, types and titles as much as possible.
* The filename formatter does not include the document ID in filenames anymore. It will
rather append ``_01``, ``_02``, etc when it detects duplicate filenames.
paperless-ng 0.9.5

View File

@ -8,13 +8,14 @@ from django.conf import settings
from django.db import transaction
from django.db.models import Q
from django.utils import timezone
from filelock import FileLock
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
from .file_handling import create_source_path_directory
from .file_handling import create_source_path_directory, \
generate_unique_filename
from .loggers import LoggingMixin
from .models import Document, FileInfo, Correspondent, DocumentType, Tag
from .parsers import ParseError, get_parser_class_for_mime_type, \
get_supported_file_extensions, parse_date
from .parsers import ParseError, get_parser_class_for_mime_type, parse_date
from .signals import (
document_consumption_finished,
document_consumption_started
@ -38,6 +39,10 @@ class Consumer(LoggingMixin):
def pre_check_file_exists(self):
if not os.path.isfile(self.path):
self.log(
"error",
"Cannot consume {}: It is not a file.".format(self.path)
)
raise ConsumerError("Cannot consume {}: It is not a file".format(
self.path))
@ -47,6 +52,10 @@ class Consumer(LoggingMixin):
if Document.objects.filter(Q(checksum=checksum) | Q(archive_checksum=checksum)).exists(): # NOQA: E501
if settings.CONSUMER_DELETE_DUPLICATES:
os.unlink(self.path)
self.log(
"error",
"Not consuming {}: It is a duplicate.".format(self.filename)
)
raise ConsumerError(
"Not consuming {}: It is a duplicate.".format(self.filename)
)
@ -148,8 +157,9 @@ class Consumer(LoggingMixin):
classifier = DocumentClassifier()
classifier.reload()
except (FileNotFoundError, IncompatibleClassifierVersionError) as e:
logging.getLogger(__name__).warning(
"Cannot classify documents: {}.".format(e))
self.log(
"warning",
f"Cannot classify documents: {e}.")
classifier = None
# now that everything is done, we can start to store the document
@ -176,31 +186,26 @@ class Consumer(LoggingMixin):
# After everything is in the database, copy the files into
# place. If this fails, we'll also rollback the transaction.
with FileLock(settings.MEDIA_LOCK):
document.filename = generate_unique_filename(
document, settings.ORIGINALS_DIR)
create_source_path_directory(document.source_path)
# TODO: not required, since this is done by the file handling
# logic
create_source_path_directory(document.source_path)
self._write(document.storage_type,
self.path, document.source_path)
self._write(document.storage_type,
thumbnail, document.thumbnail_path)
if archive_path and os.path.isfile(archive_path):
self._write(document.storage_type,
archive_path, document.archive_path)
self.path, document.source_path)
with open(archive_path, 'rb') as f:
document.archive_checksum = hashlib.md5(
f.read()).hexdigest()
document.save()
self._write(document.storage_type,
thumbnail, document.thumbnail_path)
if archive_path and os.path.isfile(archive_path):
create_source_path_directory(document.archive_path)
self._write(document.storage_type,
archive_path, document.archive_path)
with open(archive_path, 'rb') as f:
document.archive_checksum = hashlib.md5(
f.read()).hexdigest()
# Afte performing all database operations and moving files
# into place, tell paperless where the file is.
document.filename = os.path.basename(document.source_path)
# Saving the document now will trigger the filename handling
# logic.
document.save()
# Delete the file only if it was successfully consumed

View File

@ -70,7 +70,22 @@ def many_to_dictionary(field):
return mydictionary
def generate_filename(doc):
def generate_unique_filename(doc, root):
counter = 0
while True:
new_filename = generate_filename(doc, counter)
if new_filename == doc.filename:
# still the same as before.
return new_filename
if os.path.exists(os.path.join(root, new_filename)):
counter += 1
else:
return new_filename
def generate_filename(doc, counter=0):
path = ""
try:
@ -112,11 +127,11 @@ def generate_filename(doc):
f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
# Always append the primary key to guarantee uniqueness of filename
counter_str = f"_{counter:02}" if counter else ""
if len(path) > 0:
filename = "%s-%07i%s" % (path, doc.pk, doc.file_type)
filename = f"{path}{counter_str}{doc.file_type}"
else:
filename = "%07i%s" % (doc.pk, doc.file_type)
filename = f"{doc.pk:07}{counter_str}{doc.file_type}"
# Append .gpg for encrypted files
if doc.storage_type == doc.STORAGE_TYPE_GPG:

View File

@ -5,11 +5,13 @@ import shutil
from django.conf import settings
from django.core.management import call_command
from django.core.management.base import BaseCommand, CommandError
from filelock import FileLock
from documents.models import Document
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
EXPORTER_ARCHIVE_NAME
from ...file_handling import generate_filename, create_source_path_directory
from ...file_handling import create_source_path_directory, \
generate_unique_filename
from ...mixins import Renderable
@ -114,17 +116,20 @@ class Command(Renderable, BaseCommand):
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.filename = generate_filename(document)
with FileLock(settings.MEDIA_LOCK):
document.filename = generate_unique_filename(
document, settings.ORIGINALS_DIR)
if os.path.isfile(document.source_path):
raise FileExistsError(document.source_path)
if os.path.isfile(document.source_path):
raise FileExistsError(document.source_path)
create_source_path_directory(document.source_path)
create_source_path_directory(document.source_path)
print(f"Moving {document_path} to {document.source_path}")
shutil.copy(document_path, document.source_path)
shutil.copy(thumbnail_path, document.thumbnail_path)
if archive_path:
shutil.copy(archive_path, document.archive_path)
print(f"Moving {document_path} to {document.source_path}")
shutil.copy(document_path, document.source_path)
shutil.copy(thumbnail_path, document.thumbnail_path)
if archive_path:
create_source_path_directory(document.archive_path)
shutil.copy(archive_path, document.archive_path)
document.save()

View File

@ -9,11 +9,13 @@ from django.contrib.contenttypes.models import ContentType
from django.db import models, DatabaseError
from django.dispatch import receiver
from django.utils import timezone
from filelock import FileLock
from rest_framework.reverse import reverse
from .. import index, matching
from ..file_handling import delete_empty_directories, generate_filename, \
create_source_path_directory, archive_name_from_filename
from ..file_handling import delete_empty_directories, \
create_source_path_directory, archive_name_from_filename, \
generate_unique_filename
from ..models import Document, Tag
@ -226,81 +228,94 @@ def update_filename_and_move_files(sender, instance, **kwargs):
# This will in turn cause this logic to move the file where it belongs.
return
old_filename = instance.filename
new_filename = generate_filename(instance)
with FileLock(settings.MEDIA_LOCK):
old_filename = instance.filename
new_filename = generate_unique_filename(
instance, settings.ORIGINALS_DIR)
if new_filename == instance.filename:
# Don't do anything if its the same.
return
old_source_path = instance.source_path
new_source_path = os.path.join(settings.ORIGINALS_DIR, new_filename)
if not validate_move(instance, old_source_path, new_source_path):
return
# archive files are optional, archive checksum tells us if we have one,
# since this is None for documents without archived files.
if instance.archive_checksum:
new_archive_filename = archive_name_from_filename(new_filename)
old_archive_path = instance.archive_path
new_archive_path = os.path.join(settings.ARCHIVE_DIR,
new_archive_filename)
if not validate_move(instance, old_archive_path, new_archive_path):
if new_filename == instance.filename:
# Don't do anything if its the same.
return
create_source_path_directory(new_archive_path)
else:
old_archive_path = None
new_archive_path = None
old_source_path = instance.source_path
new_source_path = os.path.join(settings.ORIGINALS_DIR, new_filename)
create_source_path_directory(new_source_path)
if not validate_move(instance, old_source_path, new_source_path):
return
try:
os.rename(old_source_path, new_source_path)
# archive files are optional, archive checksum tells us if we have one,
# since this is None for documents without archived files.
if instance.archive_checksum:
os.rename(old_archive_path, new_archive_path)
instance.filename = new_filename
# Don't save here to prevent infinite recursion.
Document.objects.filter(pk=instance.pk).update(filename=new_filename)
new_archive_filename = archive_name_from_filename(new_filename)
old_archive_path = instance.archive_path
new_archive_path = os.path.join(settings.ARCHIVE_DIR,
new_archive_filename)
logging.getLogger(__name__).debug(
f"Moved file {old_source_path} to {new_source_path}.")
if not validate_move(instance, old_archive_path, new_archive_path):
return
if instance.archive_checksum:
logging.getLogger(__name__).debug(
f"Moved file {old_archive_path} to {new_archive_path}.")
create_source_path_directory(new_archive_path)
else:
old_archive_path = None
new_archive_path = None
create_source_path_directory(new_source_path)
except OSError as e:
instance.filename = old_filename
# this happens when we can't move a file. If that's the case for the
# archive file, we try our best to revert the changes.
try:
os.rename(old_source_path, new_source_path)
if instance.archive_checksum:
os.rename(old_archive_path, new_archive_path)
instance.filename = new_filename
# Don't save() here to prevent infinite recursion.
Document.objects.filter(pk=instance.pk).update(
filename=new_filename)
logging.getLogger(__name__).debug(
f"Moved file {old_source_path} to {new_source_path}.")
if instance.archive_checksum:
logging.getLogger(__name__).debug(
f"Moved file {old_archive_path} to {new_archive_path}.")
except OSError as e:
instance.filename = old_filename
# this happens when we can't move a file. If that's the case for
# the archive file, we try our best to revert the changes.
# no need to save the instance, the update() has not happened yet.
try:
os.rename(new_source_path, old_source_path)
os.rename(new_archive_path, old_archive_path)
except Exception as e:
# This is fine, since:
# A: if we managed to move source from A to B, we will also
# manage to move it from B to A. If not, we have a serious
# issue that's going to get caught by the santiy checker.
# All files remain in place and will never be overwritten,
# so this is not the end of the world.
# B: if moving the orignal file failed, nothing has changed
# anyway.
pass
except DatabaseError as e:
# this happens after moving files, so move them back into place.
# since moving them once succeeded, it's very likely going to
# succeed again.
os.rename(new_source_path, old_source_path)
os.rename(new_archive_path, old_archive_path)
except Exception as e:
# This is fine, since:
# A: if we managed to move source from A to B, we will also manage
# to move it from B to A. If not, we have a serious issue
# that's going to get caught by the santiy checker.
# all files remain in place and will never be overwritten,
# so this is not the end of the world.
# B: if moving the orignal file failed, nothing has changed anyway.
pass
except DatabaseError as e:
os.rename(new_source_path, old_source_path)
if instance.archive_checksum:
os.rename(new_archive_path, old_archive_path)
instance.filename = old_filename
if instance.archive_checksum:
os.rename(new_archive_path, old_archive_path)
instance.filename = old_filename
# again, no need to save the instance, since the actual update()
# operation failed.
if not os.path.isfile(old_source_path):
delete_empty_directories(os.path.dirname(old_source_path),
root=settings.ORIGINALS_DIR)
# finally, remove any empty sub folders. This will do nothing if
# something has failed above.
if not os.path.isfile(old_source_path):
delete_empty_directories(os.path.dirname(old_source_path),
root=settings.ORIGINALS_DIR)
if old_archive_path and not os.path.isfile(old_archive_path):
delete_empty_directories(os.path.dirname(old_archive_path),
root=settings.ARCHIVE_DIR)
if old_archive_path and not os.path.isfile(old_archive_path):
delete_empty_directories(os.path.dirname(old_archive_path),
root=settings.ARCHIVE_DIR)
def set_log_entry(sender, document=None, logging_group=None, **kwargs):

View File

@ -598,10 +598,10 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertEqual(document.title, "new docs")
self.assertEqual(document.correspondent.name, "Bank")
self.assertEqual(document.filename, "Bank/new docs-0000001.pdf")
self.assertEqual(document.filename, "Bank/new docs.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@mock.patch("documents.signals.handlers.generate_filename")
@mock.patch("documents.signals.handlers.generate_unique_filename")
def testFilenameHandlingUnstableFormat(self, m):
filenames = ["this", "that", "now this", "i cant decide"]
@ -611,7 +611,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
filenames.insert(0, f)
return f
m.side_effect = lambda f: get_filename()
m.side_effect = lambda f, root: get_filename()
filename = self.get_test_file()

View File

@ -40,13 +40,13 @@ class TestFileHandling(DirectoriesMixin, TestCase):
document.filename = generate_filename(document)
# Ensure that filename is properly generated
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
self.assertEqual(document.filename, "none/none.pdf")
# Enable encryption and check again
document.storage_type = Document.STORAGE_TYPE_GPG
document.filename = generate_filename(document)
self.assertEqual(document.filename,
"none/none-{:07d}.pdf.gpg".format(document.pk))
"none/none.pdf.gpg")
document.save()
@ -62,7 +62,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test-{:07d}.pdf.gpg".format(document.pk)), True)
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), True)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_file_renaming_missing_permissions(self):
@ -74,12 +74,12 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
document.filename = generate_filename(document)
self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk))
"none/none.pdf")
create_source_path_directory(document.source_path)
Path(document.source_path).touch()
# Test source_path
self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk))
self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none.pdf")
# Make the folder read- and execute-only (no writing and no renaming)
os.chmod(settings.ORIGINALS_DIR + "/none", 0o555)
@ -89,8 +89,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
document.save()
# Check proper handling of files
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)), True)
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True)
self.assertEqual(document.filename, "none/none.pdf")
os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)
@ -108,7 +108,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
document.filename = generate_filename(document)
self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk))
"none/none.pdf")
create_source_path_directory(document.source_path)
Path(document.source_path).touch()
@ -125,8 +125,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Check proper handling of files
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)), True)
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True)
self.assertEqual(document.filename, "none/none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_document_delete(self):
@ -138,7 +138,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
document.filename = generate_filename(document)
self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk))
"none/none.pdf")
create_source_path_directory(document.source_path)
Path(document.source_path).touch()
@ -146,7 +146,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure file deletion after delete
pk = document.pk
document.delete()
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(pk)), False)
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@ -168,7 +168,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
document.filename = generate_filename(document)
self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk))
"none/none.pdf")
create_source_path_directory(document.source_path)
@ -199,7 +199,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document),
"demo-{:07d}.pdf".format(document.pk))
"demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_with_dash(self):
@ -215,7 +215,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document),
"demo-{:07d}.pdf".format(document.pk))
"demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_malformed(self):
@ -231,7 +231,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document),
"none-{:07d}.pdf".format(document.pk))
"none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
def test_tags_all(self):
@ -246,7 +246,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document),
"demo-{:07d}.pdf".format(document.pk))
"demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}")
def test_tags_out_of_bounds(self):
@ -261,7 +261,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document),
"none-{:07d}.pdf".format(document.pk))
"none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}")
def test_nested_directory_cleanup(self):
@ -272,7 +272,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
document.filename = generate_filename(document)
self.assertEqual(document.filename, "none/none/none-{:07d}.pdf".format(document.pk))
self.assertEqual(document.filename, "none/none/none.pdf")
create_source_path_directory(document.source_path)
Path(document.source_path).touch()
@ -282,7 +282,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
pk = document.pk
document.delete()
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none-{:07d}.pdf".format(pk)), False)
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True)
@ -330,6 +330,48 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(document), "0000001.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_duplicates(self):
document = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="A", pk=1)
document2 = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="B", pk=2)
Path(document.source_path).touch()
Path(document2.source_path).touch()
document.filename = "0000001.pdf"
document.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document.filename, "qwe.pdf")
document2.filename = "0000002.pdf"
document2.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document2.filename, "qwe_01.pdf")
# saving should not change the file names.
document.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document.filename, "qwe.pdf")
document2.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document2.filename, "qwe_01.pdf")
document.delete()
self.assertFalse(os.path.isfile(document.source_path))
# filename free, should remove _01 suffix
document2.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document2.filename, "qwe.pdf")
class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
@ -358,15 +400,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertFalse(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc-0000001.pdf"))
self.assertEqual(doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc-0000001.pdf"))
self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf"))
self.assertEqual(doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
def test_move_archive_gone(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
#Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
self.assertTrue(os.path.isfile(original))
@ -381,7 +422,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
Path(original).touch()
Path(archive).touch()
os.makedirs(os.path.join(settings.ARCHIVE_DIR, "none"))
Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc-0000001.pdf")).touch()
Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
self.assertTrue(os.path.isfile(original))
@ -494,14 +535,14 @@ class TestFilenameGeneration(TestCase):
def test_invalid_characters(self):
doc = Document.objects.create(title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1")
self.assertEqual(generate_filename(doc), "This. is the title-0000001.pdf")
self.assertEqual(generate_filename(doc), "This. is the title.pdf")
doc = Document.objects.create(title="my\\invalid/../title:yay", mime_type="application/pdf", pk=2, checksum="2")
self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay-0000002.pdf")
self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay.pdf")
@override_settings(
PAPERLESS_FILENAME_FORMAT="{created}"
)
def test_date(self):
doc = Document.objects.create(title="does not matter", created=datetime.datetime(2020,5,21, 7,36,51, 153), mime_type="application/pdf", pk=2, checksum="2")
self.assertEqual(generate_filename(doc), "2020-05-21-0000002.pdf")
self.assertEqual(generate_filename(doc), "2020-05-21.pdf")

View File

@ -53,6 +53,10 @@ ARCHIVE_DIR = os.path.join(MEDIA_ROOT, "documents", "archive")
THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails")
DATA_DIR = os.getenv('PAPERLESS_DATA_DIR', os.path.join(BASE_DIR, "..", "data"))
# Lock file for synchronizing changes to the MEDIA directory across multiple
# threads.
MEDIA_LOCK = os.path.join(MEDIA_ROOT, "media.lock")
INDEX_DIR = os.path.join(DATA_DIR, "index")
MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle")