Fixhancement: refactor email attachment logic

This commit is contained in:
shamoon
2025-11-10 11:28:31 -08:00
parent 44f0191bfb
commit f6705ae036
4 changed files with 143 additions and 51 deletions

View File

@@ -1,25 +1,26 @@
from __future__ import annotations
from dataclasses import dataclass
from email import message_from_bytes
from typing import TYPE_CHECKING
from pathlib import Path
from django.conf import settings
from django.core.mail import EmailMessage
from filelock import FileLock
from documents.data_models import ConsumableDocument
if TYPE_CHECKING:
from documents.models import Document
@dataclass(frozen=True)
class EmailAttachment:
path: Path
mime_type: str
friendly_name: str
def send_email(
subject: str,
body: str,
to: list[str],
attachments: list[Document | ConsumableDocument],
*,
use_archive: bool,
attachments: list[EmailAttachment],
) -> int:
"""
Send an email with attachments.
@@ -28,8 +29,7 @@ def send_email(
subject: Email subject
body: Email body text
to: List of recipient email addresses
attachments: List of documents to attach (the list may be empty)
use_archive: Whether to attach archive versions when available
attachments: List of attachments
Returns:
Number of emails sent
@@ -46,47 +46,41 @@ def send_email(
# Something could be renaming the file concurrently so it can't be attached
with FileLock(settings.MEDIA_LOCK):
for document in attachments:
if isinstance(document, ConsumableDocument):
attachment_path = document.original_file
friendly_filename = document.original_file.name
else:
attachment_path = (
document.archive_path
if use_archive and document.has_archive_version
else document.source_path
)
friendly_filename = _get_unique_filename(
document,
used_filenames,
archive=use_archive and document.has_archive_version,
)
used_filenames.add(friendly_filename)
for attachment in attachments:
filename = _get_unique_filename(
attachment.friendly_name,
used_filenames,
)
used_filenames.add(filename)
with attachment_path.open("rb") as f:
with attachment.path.open("rb") as f:
content = f.read()
if document.mime_type == "message/rfc822":
if attachment.mime_type == "message/rfc822":
# See https://forum.djangoproject.com/t/using-emailmessage-with-an-attached-email-file-crashes-due-to-non-ascii/37981
content = message_from_bytes(content)
email.attach(
filename=friendly_filename,
filename=filename,
content=content,
mimetype=document.mime_type,
mimetype=attachment.mime_type,
)
return email.send()
def _get_unique_filename(doc: Document, used_names: set[str], *, archive: bool) -> str:
def _get_unique_filename(friendly_name: str, used_names: set[str]) -> str:
"""
Constructs a unique friendly filename for the given document.
Constructs a unique friendly filename for the given document, append a counter if needed.
"""
if friendly_name not in used_names:
return friendly_name
The filename might not be unique enough, so a counter is appended if needed.
"""
counter = 0
stem = Path(friendly_name).stem
suffix = "".join(Path(friendly_name).suffixes)
counter = 1
while True:
filename = doc.get_public_filename(archive=archive, counter=counter)
filename = f"{stem}_{counter:02}{suffix}"
if filename not in used_names:
return filename
counter += 1

View File

@@ -31,11 +31,10 @@ from guardian.shortcuts import remove_perm
from documents import matching
from documents.caching import clear_document_caches
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentSource
from documents.file_handling import create_source_path_directory
from documents.file_handling import delete_empty_directories
from documents.file_handling import generate_unique_filename
from documents.mail import EmailAttachment
from documents.mail import send_email
from documents.models import Correspondent
from documents.models import CustomField
@@ -57,6 +56,7 @@ from documents.templating.workflows import parse_w_workflow_placeholders
if TYPE_CHECKING:
from documents.classifier import DocumentClassifier
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
logger = logging.getLogger("paperless.handlers")
@@ -1163,28 +1163,41 @@ def run_workflows(
else ""
)
try:
attachments = []
attachments: list[EmailAttachment] = []
if action.email.include_document:
attachment: EmailAttachment | None = None
if trigger_type in [
WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
]:
# Updated and scheduled can pass the document directly
attachments = [document]
] and isinstance(document, Document):
friendly_name = (
Path(current_filename).name
if current_filename
else document.source_path.name
)
attachment = EmailAttachment(
path=document.source_path,
mime_type=document.mime_type,
friendly_name=friendly_name,
)
elif original_file:
# For consumed and added document is not yet saved, so pass the original file
attachments = [
ConsumableDocument(
source=DocumentSource.ApiUpload,
original_file=original_file,
),
]
friendly_name = (
Path(current_filename).name
if current_filename
else original_file.name
)
attachment = EmailAttachment(
path=original_file,
mime_type=document.mime_type,
friendly_name=friendly_name,
)
if attachment:
attachments = [attachment]
n_messages = send_email(
subject=subject,
body=body,
to=action.email.to.split(","),
attachments=attachments,
use_archive=False,
)
logger.debug(
f"Sent {n_messages} notification email(s) to {action.email.to}",

View File

@@ -8,8 +8,10 @@ from typing import TYPE_CHECKING
from unittest import mock
import pytest
from django.conf import settings
from django.contrib.auth.models import Group
from django.contrib.auth.models import User
from django.core import mail
from django.test import override_settings
from django.utils import timezone
from guardian.shortcuts import assign_perm
@@ -21,6 +23,8 @@ from pytest_httpx import HTTPXMock
from rest_framework.test import APIClient
from rest_framework.test import APITestCase
from documents.file_handling import create_source_path_directory
from documents.file_handling import generate_unique_filename
from documents.signals.handlers import run_workflows
from documents.signals.handlers import send_webhook
@@ -2989,6 +2993,70 @@ class TestWorkflows(
mock_email_send.assert_called_once()
@override_settings(
PAPERLESS_EMAIL_HOST="localhost",
EMAIL_ENABLED=True,
PAPERLESS_URL="http://localhost:8000",
EMAIL_BACKEND="django.core.mail.backends.locmem.EmailBackend",
)
def test_workflow_email_attachment_uses_storage_filename(self):
"""
GIVEN:
- Document updated workflow with include document action
- Document stored with formatted storage-path filename
WHEN:
- Workflow sends an email
THEN:
- Attachment filename matches the stored filename
"""
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
)
email_action = WorkflowActionEmail.objects.create(
subject="Test Notification: {doc_title}",
body="Test message: {doc_url}",
to="me@example.com",
include_document=True,
)
action = WorkflowAction.objects.create(
type=WorkflowAction.WorkflowActionType.EMAIL,
email=email_action,
)
workflow = Workflow.objects.create(
name="Workflow attachment filename",
order=0,
)
workflow.triggers.add(trigger)
workflow.actions.add(action)
workflow.save()
storage_path = StoragePath.objects.create(
name="Fancy Path",
path="formatted/{{ document.pk }}/{{ title }}",
)
doc = Document.objects.create(
title="workflow doc",
correspondent=self.c,
checksum="workflow-email-attachment",
mime_type="application/pdf",
storage_path=storage_path,
original_filename="workflow-orig.pdf",
)
# eg what happens in update_filename_and_move_files
generated = generate_unique_filename(doc)
destination = (settings.ORIGINALS_DIR / generated).resolve()
create_source_path_directory(destination)
shutil.copy(self.SAMPLE_DIR / "simple.pdf", destination)
Document.objects.filter(pk=doc.pk).update(filename=generated.as_posix())
run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc)
self.assertEqual(len(mail.outbox), 1)
attachment_names = [att[0] for att in mail.outbox[0].attachments]
self.assertEqual(attachment_names, [Path(generated).name])
@override_settings(
EMAIL_ENABLED=False,
)

View File

@@ -120,6 +120,7 @@ from documents.filters import PaperlessTaskFilterSet
from documents.filters import ShareLinkFilterSet
from documents.filters import StoragePathFilterSet
from documents.filters import TagFilterSet
from documents.mail import EmailAttachment
from documents.mail import send_email
from documents.matching import match_correspondents
from documents.matching import match_document_types
@@ -1216,12 +1217,28 @@ class DocumentViewSet(
return HttpResponseForbidden("Insufficient permissions")
try:
attachments: list[EmailAttachment] = []
for doc in documents:
attachment_path = (
doc.archive_path
if use_archive_version and doc.has_archive_version
else doc.source_path
)
attachments.append(
EmailAttachment(
path=attachment_path,
mime_type=doc.mime_type,
friendly_name=doc.get_public_filename(
archive=use_archive_version and doc.has_archive_version,
),
),
)
send_email(
subject=subject,
body=message,
to=addresses,
attachments=documents,
use_archive=use_archive_version,
attachments=attachments,
)
logger.debug(