merge file handling

This commit is contained in:
shamoon
2025-04-08 16:34:43 -07:00
parent 2bb0a137f0
commit ea5ec58967
9 changed files with 17 additions and 17 deletions

View File

@@ -1,150 +0,0 @@
import os
from django.conf import settings
from documents.templating.filepath import validate_filepath_template_and_render
from documents.templating.utils import convert_format_str_to_template_format
from paperless.models import Document
def create_source_path_directory(source_path):
os.makedirs(os.path.dirname(source_path), exist_ok=True)
def delete_empty_directories(directory, root):
if not os.path.isdir(directory):
return
# Go up in the directory hierarchy and try to delete all directories
directory = os.path.normpath(directory)
root = os.path.normpath(root)
if not directory.startswith(root + os.path.sep):
# don't do anything outside our originals folder.
# append os.path.set so that we avoid these cases:
# directory = /home/originals2/test
# root = /home/originals ("/" gets appended and startswith fails)
return
while directory != root:
if not os.listdir(directory):
# it's empty
try:
os.rmdir(directory)
except OSError:
# whatever. empty directories aren't that bad anyway.
return
else:
# it's not empty.
return
# go one level up
directory = os.path.normpath(os.path.dirname(directory))
def generate_unique_filename(doc, *, archive_filename=False):
"""
Generates a unique filename for doc in settings.ORIGINALS_DIR.
The returned filename is guaranteed to be either the current filename
of the document if unchanged, or a new filename that does not correspondent
to any existing files. The function will append _01, _02, etc to the
filename before the extension to avoid conflicts.
If archive_filename is True, return a unique archive filename instead.
"""
if archive_filename:
old_filename = doc.archive_filename
root = settings.ARCHIVE_DIR
else:
old_filename = doc.filename
root = settings.ORIGINALS_DIR
# If generating archive filenames, try to make a name that is similar to
# the original filename first.
if archive_filename and doc.filename:
new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
if new_filename == old_filename or not os.path.exists(
os.path.join(root, new_filename),
):
return new_filename
counter = 0
while True:
new_filename = generate_filename(
doc,
counter=counter,
archive_filename=archive_filename,
)
if new_filename == old_filename:
# still the same as before.
return new_filename
if os.path.exists(os.path.join(root, new_filename)):
counter += 1
else:
return new_filename
def generate_filename(
doc: Document,
*,
counter=0,
append_gpg=True,
archive_filename=False,
):
path = ""
def format_filename(document: Document, template_str: str) -> str | None:
rendered_filename = validate_filepath_template_and_render(
template_str,
document,
)
if rendered_filename is None:
return None
# Apply this setting. It could become a filter in the future (or users could use |default)
if settings.FILENAME_FORMAT_REMOVE_NONE:
rendered_filename = rendered_filename.replace("/-none-/", "/")
rendered_filename = rendered_filename.replace(" -none-", "")
rendered_filename = rendered_filename.replace("-none-", "")
rendered_filename = rendered_filename.strip(os.sep)
rendered_filename = rendered_filename.replace(
"-none-",
"none",
) # backward compatibility
return rendered_filename
# Determine the source of the format string
if doc.storage_path is not None:
filename_format = doc.storage_path.path
elif settings.FILENAME_FORMAT is not None:
# Maybe convert old to new style
filename_format = convert_format_str_to_template_format(
settings.FILENAME_FORMAT,
)
else:
filename_format = None
# If we have one, render it
if filename_format is not None:
path = format_filename(doc, filename_format)
counter_str = f"_{counter:02}" if counter else ""
filetype_str = ".pdf" if archive_filename else doc.file_type
if path:
filename = f"{path}{counter_str}{filetype_str}"
else:
filename = f"{doc.pk:07}{counter_str}{filetype_str}"
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
filename += ".gpg"
return filename

View File

@@ -32,14 +32,14 @@ if TYPE_CHECKING:
if settings.AUDIT_LOG_ENABLED:
from auditlog.models import LogEntry
from documents.file_handling import delete_empty_directories
from documents.file_handling import generate_filename
from documents.management.commands.mixins import CryptMixin
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from paperless import version
from paperless.db import GnuPG
from paperless.file_handling import delete_empty_directories
from paperless.file_handling import generate_filename
from paperless.models import ApplicationConfiguration
from paperless.models import Correspondent
from paperless.models import CustomField

View File

@@ -21,7 +21,6 @@ from django.db.models.signals import m2m_changed
from django.db.models.signals import post_save
from filelock import FileLock
from documents.file_handling import create_source_path_directory
from documents.management.commands.mixins import CryptMixin
from documents.parsers import run_convert
from documents.settings import EXPORTER_ARCHIVE_NAME
@@ -31,6 +30,7 @@ from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.signals.handlers import check_paths_and_prune_custom_fields
from documents.signals.handlers import update_filename_and_move_files
from paperless import version
from paperless.file_handling import create_source_path_directory
from paperless.models import Correspondent
from paperless.models import CustomField
from paperless.models import CustomFieldInstance

View File

@@ -24,12 +24,12 @@ from filelock import FileLock
from guardian.shortcuts import remove_perm
from documents.caching import clear_document_caches
from documents.file_handling import create_source_path_directory
from documents.file_handling import delete_empty_directories
from documents.file_handling import generate_unique_filename
from documents.mail import send_email
from documents.templating.workflows import parse_w_workflow_placeholders
from paperless import matching
from paperless.file_handling import create_source_path_directory
from paperless.file_handling import delete_empty_directories
from paperless.file_handling import generate_unique_filename
from paperless.models import Correspondent
from paperless.models import CustomField
from paperless.models import CustomFieldInstance

View File

@@ -21,8 +21,6 @@ from whoosh.writing import AsyncWriter
from documents import sanity_checker
from documents.caching import clear_document_caches
from documents.file_handling import create_source_path_directory
from documents.file_handling import generate_unique_filename
from documents.parsers import DocumentParser
from documents.parsers import get_parser_class_for_mime_type
from documents.plugins.base import ConsumeTaskPlugin
@@ -42,6 +40,8 @@ from paperless.consumer import WorkflowTriggerPlugin
from paperless.data_models import ConsumableDocument
from paperless.data_models import DocumentMetadataOverrides
from paperless.double_sided import CollatePlugin
from paperless.file_handling import create_source_path_directory
from paperless.file_handling import generate_unique_filename
from paperless.models import Correspondent
from paperless.models import CustomFieldInstance
from paperless.models import Document

View File

@@ -13,12 +13,12 @@ from django.test import TestCase
from django.test import override_settings
from django.utils import timezone
from documents.file_handling import create_source_path_directory
from documents.file_handling import delete_empty_directories
from documents.file_handling import generate_filename
from documents.tasks import empty_trash
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from paperless.file_handling import create_source_path_directory
from paperless.file_handling import delete_empty_directories
from paperless.file_handling import generate_filename
from paperless.models import Correspondent
from paperless.models import CustomField
from paperless.models import CustomFieldInstance

View File

@@ -13,10 +13,10 @@ from django.core.management import call_command
from django.test import TestCase
from django.test import override_settings
from documents.file_handling import generate_filename
from documents.tasks import update_document_content_maybe_archive_file
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from paperless.file_handling import generate_filename
from paperless.models import Document
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")