mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-09-03 01:56:16 +00:00
merge file handling
This commit is contained in:
@@ -1,150 +0,0 @@
|
||||
import os
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from documents.templating.filepath import validate_filepath_template_and_render
|
||||
from documents.templating.utils import convert_format_str_to_template_format
|
||||
from paperless.models import Document
|
||||
|
||||
|
||||
def create_source_path_directory(source_path):
|
||||
os.makedirs(os.path.dirname(source_path), exist_ok=True)
|
||||
|
||||
|
||||
def delete_empty_directories(directory, root):
|
||||
if not os.path.isdir(directory):
|
||||
return
|
||||
|
||||
# Go up in the directory hierarchy and try to delete all directories
|
||||
directory = os.path.normpath(directory)
|
||||
root = os.path.normpath(root)
|
||||
|
||||
if not directory.startswith(root + os.path.sep):
|
||||
# don't do anything outside our originals folder.
|
||||
|
||||
# append os.path.set so that we avoid these cases:
|
||||
# directory = /home/originals2/test
|
||||
# root = /home/originals ("/" gets appended and startswith fails)
|
||||
return
|
||||
|
||||
while directory != root:
|
||||
if not os.listdir(directory):
|
||||
# it's empty
|
||||
try:
|
||||
os.rmdir(directory)
|
||||
except OSError:
|
||||
# whatever. empty directories aren't that bad anyway.
|
||||
return
|
||||
else:
|
||||
# it's not empty.
|
||||
return
|
||||
|
||||
# go one level up
|
||||
directory = os.path.normpath(os.path.dirname(directory))
|
||||
|
||||
|
||||
def generate_unique_filename(doc, *, archive_filename=False):
|
||||
"""
|
||||
Generates a unique filename for doc in settings.ORIGINALS_DIR.
|
||||
|
||||
The returned filename is guaranteed to be either the current filename
|
||||
of the document if unchanged, or a new filename that does not correspondent
|
||||
to any existing files. The function will append _01, _02, etc to the
|
||||
filename before the extension to avoid conflicts.
|
||||
|
||||
If archive_filename is True, return a unique archive filename instead.
|
||||
|
||||
"""
|
||||
if archive_filename:
|
||||
old_filename = doc.archive_filename
|
||||
root = settings.ARCHIVE_DIR
|
||||
else:
|
||||
old_filename = doc.filename
|
||||
root = settings.ORIGINALS_DIR
|
||||
|
||||
# If generating archive filenames, try to make a name that is similar to
|
||||
# the original filename first.
|
||||
|
||||
if archive_filename and doc.filename:
|
||||
new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
|
||||
if new_filename == old_filename or not os.path.exists(
|
||||
os.path.join(root, new_filename),
|
||||
):
|
||||
return new_filename
|
||||
|
||||
counter = 0
|
||||
|
||||
while True:
|
||||
new_filename = generate_filename(
|
||||
doc,
|
||||
counter=counter,
|
||||
archive_filename=archive_filename,
|
||||
)
|
||||
if new_filename == old_filename:
|
||||
# still the same as before.
|
||||
return new_filename
|
||||
|
||||
if os.path.exists(os.path.join(root, new_filename)):
|
||||
counter += 1
|
||||
else:
|
||||
return new_filename
|
||||
|
||||
|
||||
def generate_filename(
|
||||
doc: Document,
|
||||
*,
|
||||
counter=0,
|
||||
append_gpg=True,
|
||||
archive_filename=False,
|
||||
):
|
||||
path = ""
|
||||
|
||||
def format_filename(document: Document, template_str: str) -> str | None:
|
||||
rendered_filename = validate_filepath_template_and_render(
|
||||
template_str,
|
||||
document,
|
||||
)
|
||||
if rendered_filename is None:
|
||||
return None
|
||||
|
||||
# Apply this setting. It could become a filter in the future (or users could use |default)
|
||||
if settings.FILENAME_FORMAT_REMOVE_NONE:
|
||||
rendered_filename = rendered_filename.replace("/-none-/", "/")
|
||||
rendered_filename = rendered_filename.replace(" -none-", "")
|
||||
rendered_filename = rendered_filename.replace("-none-", "")
|
||||
rendered_filename = rendered_filename.strip(os.sep)
|
||||
|
||||
rendered_filename = rendered_filename.replace(
|
||||
"-none-",
|
||||
"none",
|
||||
) # backward compatibility
|
||||
|
||||
return rendered_filename
|
||||
|
||||
# Determine the source of the format string
|
||||
if doc.storage_path is not None:
|
||||
filename_format = doc.storage_path.path
|
||||
elif settings.FILENAME_FORMAT is not None:
|
||||
# Maybe convert old to new style
|
||||
filename_format = convert_format_str_to_template_format(
|
||||
settings.FILENAME_FORMAT,
|
||||
)
|
||||
else:
|
||||
filename_format = None
|
||||
|
||||
# If we have one, render it
|
||||
if filename_format is not None:
|
||||
path = format_filename(doc, filename_format)
|
||||
|
||||
counter_str = f"_{counter:02}" if counter else ""
|
||||
filetype_str = ".pdf" if archive_filename else doc.file_type
|
||||
|
||||
if path:
|
||||
filename = f"{path}{counter_str}{filetype_str}"
|
||||
else:
|
||||
filename = f"{doc.pk:07}{counter_str}{filetype_str}"
|
||||
|
||||
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
|
||||
filename += ".gpg"
|
||||
|
||||
return filename
|
@@ -32,14 +32,14 @@ if TYPE_CHECKING:
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
from auditlog.models import LogEntry
|
||||
|
||||
from documents.file_handling import delete_empty_directories
|
||||
from documents.file_handling import generate_filename
|
||||
from documents.management.commands.mixins import CryptMixin
|
||||
from documents.settings import EXPORTER_ARCHIVE_NAME
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.settings import EXPORTER_THUMBNAIL_NAME
|
||||
from paperless import version
|
||||
from paperless.db import GnuPG
|
||||
from paperless.file_handling import delete_empty_directories
|
||||
from paperless.file_handling import generate_filename
|
||||
from paperless.models import ApplicationConfiguration
|
||||
from paperless.models import Correspondent
|
||||
from paperless.models import CustomField
|
||||
|
@@ -21,7 +21,6 @@ from django.db.models.signals import m2m_changed
|
||||
from django.db.models.signals import post_save
|
||||
from filelock import FileLock
|
||||
|
||||
from documents.file_handling import create_source_path_directory
|
||||
from documents.management.commands.mixins import CryptMixin
|
||||
from documents.parsers import run_convert
|
||||
from documents.settings import EXPORTER_ARCHIVE_NAME
|
||||
@@ -31,6 +30,7 @@ from documents.settings import EXPORTER_THUMBNAIL_NAME
|
||||
from documents.signals.handlers import check_paths_and_prune_custom_fields
|
||||
from documents.signals.handlers import update_filename_and_move_files
|
||||
from paperless import version
|
||||
from paperless.file_handling import create_source_path_directory
|
||||
from paperless.models import Correspondent
|
||||
from paperless.models import CustomField
|
||||
from paperless.models import CustomFieldInstance
|
||||
|
@@ -24,12 +24,12 @@ from filelock import FileLock
|
||||
from guardian.shortcuts import remove_perm
|
||||
|
||||
from documents.caching import clear_document_caches
|
||||
from documents.file_handling import create_source_path_directory
|
||||
from documents.file_handling import delete_empty_directories
|
||||
from documents.file_handling import generate_unique_filename
|
||||
from documents.mail import send_email
|
||||
from documents.templating.workflows import parse_w_workflow_placeholders
|
||||
from paperless import matching
|
||||
from paperless.file_handling import create_source_path_directory
|
||||
from paperless.file_handling import delete_empty_directories
|
||||
from paperless.file_handling import generate_unique_filename
|
||||
from paperless.models import Correspondent
|
||||
from paperless.models import CustomField
|
||||
from paperless.models import CustomFieldInstance
|
||||
|
@@ -21,8 +21,6 @@ from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents import sanity_checker
|
||||
from documents.caching import clear_document_caches
|
||||
from documents.file_handling import create_source_path_directory
|
||||
from documents.file_handling import generate_unique_filename
|
||||
from documents.parsers import DocumentParser
|
||||
from documents.parsers import get_parser_class_for_mime_type
|
||||
from documents.plugins.base import ConsumeTaskPlugin
|
||||
@@ -42,6 +40,8 @@ from paperless.consumer import WorkflowTriggerPlugin
|
||||
from paperless.data_models import ConsumableDocument
|
||||
from paperless.data_models import DocumentMetadataOverrides
|
||||
from paperless.double_sided import CollatePlugin
|
||||
from paperless.file_handling import create_source_path_directory
|
||||
from paperless.file_handling import generate_unique_filename
|
||||
from paperless.models import Correspondent
|
||||
from paperless.models import CustomFieldInstance
|
||||
from paperless.models import Document
|
||||
|
@@ -13,12 +13,12 @@ from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
from django.utils import timezone
|
||||
|
||||
from documents.file_handling import create_source_path_directory
|
||||
from documents.file_handling import delete_empty_directories
|
||||
from documents.file_handling import generate_filename
|
||||
from documents.tasks import empty_trash
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
from paperless.file_handling import create_source_path_directory
|
||||
from paperless.file_handling import delete_empty_directories
|
||||
from paperless.file_handling import generate_filename
|
||||
from paperless.models import Correspondent
|
||||
from paperless.models import CustomField
|
||||
from paperless.models import CustomFieldInstance
|
||||
|
@@ -13,10 +13,10 @@ from django.core.management import call_command
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
|
||||
from documents.file_handling import generate_filename
|
||||
from documents.tasks import update_document_content_maybe_archive_file
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
from paperless.file_handling import generate_filename
|
||||
from paperless.models import Document
|
||||
|
||||
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
||||
|
Reference in New Issue
Block a user