mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
split handle_message function
This commit is contained in:
parent
1fa735eb23
commit
df101f5e7a
@ -351,11 +351,15 @@ class MailAccountHandler(LoggingMixin):
|
|||||||
return total_processed_files
|
return total_processed_files
|
||||||
|
|
||||||
def handle_message(self, message, rule: MailRule) -> int:
|
def handle_message(self, message, rule: MailRule) -> int:
|
||||||
|
processed_elements = 0
|
||||||
|
|
||||||
|
# Skip Message handling when only attachments are to be processed but
|
||||||
|
# message doesn't have any.
|
||||||
if (
|
if (
|
||||||
not message.attachments
|
not message.attachments
|
||||||
and rule.consumption_scope == MailRule.ConsumptionScope.ATTACHMENTS_ONLY
|
and rule.consumption_scope == MailRule.ConsumptionScope.ATTACHMENTS_ONLY
|
||||||
):
|
):
|
||||||
return 0
|
return processed_elements
|
||||||
|
|
||||||
self.log(
|
self.log(
|
||||||
"debug",
|
"debug",
|
||||||
@ -368,130 +372,162 @@ class MailAccountHandler(LoggingMixin):
|
|||||||
tag_ids = [tag.id for tag in rule.assign_tags.all()]
|
tag_ids = [tag.id for tag in rule.assign_tags.all()]
|
||||||
doc_type = rule.assign_document_type
|
doc_type = rule.assign_document_type
|
||||||
|
|
||||||
processed_attachments = 0
|
|
||||||
|
|
||||||
if (
|
if (
|
||||||
rule.consumption_scope == MailRule.ConsumptionScope.EML_ONLY
|
rule.consumption_scope == MailRule.ConsumptionScope.EML_ONLY
|
||||||
or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING
|
or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING
|
||||||
):
|
):
|
||||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
processed_elements += self.process_eml(
|
||||||
_, temp_filename = tempfile.mkstemp(
|
message,
|
||||||
prefix="paperless-mail-",
|
rule,
|
||||||
dir=settings.SCRATCH_DIR,
|
correspondent,
|
||||||
suffix=".eml",
|
tag_ids,
|
||||||
|
doc_type,
|
||||||
)
|
)
|
||||||
with open(temp_filename, "wb") as f:
|
|
||||||
# Move "From"-header to beginning of file
|
|
||||||
# TODO: This ugly workaround is needed because the parser is
|
|
||||||
# chosen only by the mime_type detected via magic
|
|
||||||
# (see documents/consumer.py "mime_type = magic.from_file")
|
|
||||||
# Unfortunately magic sometimes fails to detect the mime
|
|
||||||
# type of .eml files correctly as message/rfc822 and instead
|
|
||||||
# detects text/plain.
|
|
||||||
# This also effects direct file consumption of .eml files
|
|
||||||
# which are not treated with this workaround.
|
|
||||||
from_element = None
|
|
||||||
for i, header in enumerate(message.obj._headers):
|
|
||||||
if header[0] == "From":
|
|
||||||
from_element = i
|
|
||||||
if from_element:
|
|
||||||
new_headers = [message.obj._headers.pop(from_element)]
|
|
||||||
new_headers += message.obj._headers
|
|
||||||
message.obj._headers = new_headers
|
|
||||||
|
|
||||||
f.write(message.obj.as_bytes())
|
|
||||||
|
|
||||||
self.log(
|
|
||||||
"info",
|
|
||||||
f"Rule {rule}: "
|
|
||||||
f"Consuming eml from mail "
|
|
||||||
f"{message.subject} from {message.from_}",
|
|
||||||
)
|
|
||||||
|
|
||||||
consume_file.delay(
|
|
||||||
path=temp_filename,
|
|
||||||
override_filename=pathvalidate.sanitize_filename(
|
|
||||||
message.subject + ".eml",
|
|
||||||
),
|
|
||||||
override_title=message.subject,
|
|
||||||
override_correspondent_id=correspondent.id if correspondent else None,
|
|
||||||
override_document_type_id=doc_type.id if doc_type else None,
|
|
||||||
override_tag_ids=tag_ids,
|
|
||||||
)
|
|
||||||
processed_attachments += 1
|
|
||||||
|
|
||||||
if (
|
if (
|
||||||
rule.consumption_scope == MailRule.ConsumptionScope.ATTACHMENTS_ONLY
|
rule.consumption_scope == MailRule.ConsumptionScope.ATTACHMENTS_ONLY
|
||||||
or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING
|
or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING
|
||||||
):
|
):
|
||||||
for att in message.attachments:
|
processed_elements += self.process_attachments(
|
||||||
|
message,
|
||||||
|
rule,
|
||||||
|
correspondent,
|
||||||
|
tag_ids,
|
||||||
|
doc_type,
|
||||||
|
)
|
||||||
|
|
||||||
if (
|
return processed_elements
|
||||||
not att.content_disposition == "attachment"
|
|
||||||
and rule.attachment_type
|
def process_attachments(
|
||||||
== MailRule.AttachmentProcessing.ATTACHMENTS_ONLY
|
self,
|
||||||
|
message: MailMessage,
|
||||||
|
rule: MailRule,
|
||||||
|
correspondent,
|
||||||
|
tag_ids,
|
||||||
|
doc_type,
|
||||||
|
):
|
||||||
|
processed_attachments = 0
|
||||||
|
for att in message.attachments:
|
||||||
|
|
||||||
|
if (
|
||||||
|
not att.content_disposition == "attachment"
|
||||||
|
and rule.attachment_type
|
||||||
|
== MailRule.AttachmentProcessing.ATTACHMENTS_ONLY
|
||||||
|
):
|
||||||
|
self.log(
|
||||||
|
"debug",
|
||||||
|
f"Rule {rule}: "
|
||||||
|
f"Skipping attachment {att.filename} "
|
||||||
|
f"with content disposition {att.content_disposition}",
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if rule.filter_attachment_filename:
|
||||||
|
# Force the filename and pattern to the lowercase
|
||||||
|
# as this is system dependent otherwise
|
||||||
|
if not fnmatch(
|
||||||
|
att.filename.lower(),
|
||||||
|
rule.filter_attachment_filename.lower(),
|
||||||
):
|
):
|
||||||
self.log(
|
|
||||||
"debug",
|
|
||||||
f"Rule {rule}: "
|
|
||||||
f"Skipping attachment {att.filename} "
|
|
||||||
f"with content disposition {att.content_disposition}",
|
|
||||||
)
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if rule.filter_attachment_filename:
|
title = self.get_title(message, att, rule)
|
||||||
# Force the filename and pattern to the lowercase
|
|
||||||
# as this is system dependent otherwise
|
|
||||||
if not fnmatch(
|
|
||||||
att.filename.lower(),
|
|
||||||
rule.filter_attachment_filename.lower(),
|
|
||||||
):
|
|
||||||
continue
|
|
||||||
|
|
||||||
title = self.get_title(message, att, rule)
|
# don't trust the content type of the attachment. Could be
|
||||||
|
# generic application/octet-stream.
|
||||||
|
mime_type = magic.from_buffer(att.payload, mime=True)
|
||||||
|
|
||||||
# don't trust the content type of the attachment. Could be
|
if is_mime_type_supported(mime_type):
|
||||||
# generic application/octet-stream.
|
|
||||||
mime_type = magic.from_buffer(att.payload, mime=True)
|
|
||||||
|
|
||||||
if is_mime_type_supported(mime_type):
|
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||||
|
_, temp_filename = tempfile.mkstemp(
|
||||||
|
prefix="paperless-mail-",
|
||||||
|
dir=settings.SCRATCH_DIR,
|
||||||
|
)
|
||||||
|
with open(temp_filename, "wb") as f:
|
||||||
|
f.write(att.payload)
|
||||||
|
|
||||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
self.log(
|
||||||
_, temp_filename = tempfile.mkstemp(
|
"info",
|
||||||
prefix="paperless-mail-",
|
f"Rule {rule}: "
|
||||||
dir=settings.SCRATCH_DIR,
|
f"Consuming attachment {att.filename} from mail "
|
||||||
)
|
f"{message.subject} from {message.from_}",
|
||||||
with open(temp_filename, "wb") as f:
|
)
|
||||||
f.write(att.payload)
|
|
||||||
|
|
||||||
self.log(
|
consume_file.delay(
|
||||||
"info",
|
path=temp_filename,
|
||||||
f"Rule {rule}: "
|
override_filename=pathvalidate.sanitize_filename(
|
||||||
f"Consuming attachment {att.filename} from mail "
|
att.filename,
|
||||||
f"{message.subject} from {message.from_}",
|
),
|
||||||
)
|
override_title=title,
|
||||||
|
override_correspondent_id=correspondent.id
|
||||||
|
if correspondent
|
||||||
|
else None,
|
||||||
|
override_document_type_id=doc_type.id if doc_type else None,
|
||||||
|
override_tag_ids=tag_ids,
|
||||||
|
)
|
||||||
|
|
||||||
consume_file.delay(
|
processed_attachments += 1
|
||||||
path=temp_filename,
|
else:
|
||||||
override_filename=pathvalidate.sanitize_filename(
|
self.log(
|
||||||
att.filename,
|
"debug",
|
||||||
),
|
f"Rule {rule}: "
|
||||||
override_title=title,
|
f"Skipping attachment {att.filename} "
|
||||||
override_correspondent_id=correspondent.id
|
f"since guessed mime type {mime_type} is not supported "
|
||||||
if correspondent
|
f"by paperless",
|
||||||
else None,
|
)
|
||||||
override_document_type_id=doc_type.id if doc_type else None,
|
|
||||||
override_tag_ids=tag_ids,
|
|
||||||
)
|
|
||||||
|
|
||||||
processed_attachments += 1
|
def process_eml(
|
||||||
else:
|
self,
|
||||||
self.log(
|
message: MailMessage,
|
||||||
"debug",
|
rule: MailRule,
|
||||||
f"Rule {rule}: "
|
correspondent,
|
||||||
f"Skipping attachment {att.filename} "
|
tag_ids,
|
||||||
f"since guessed mime type {mime_type} is not supported "
|
doc_type,
|
||||||
f"by paperless",
|
):
|
||||||
)
|
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||||
|
_, temp_filename = tempfile.mkstemp(
|
||||||
|
prefix="paperless-mail-",
|
||||||
|
dir=settings.SCRATCH_DIR,
|
||||||
|
suffix=".eml",
|
||||||
|
)
|
||||||
|
with open(temp_filename, "wb") as f:
|
||||||
|
# Move "From"-header to beginning of file
|
||||||
|
# TODO: This ugly workaround is needed because the parser is
|
||||||
|
# chosen only by the mime_type detected via magic
|
||||||
|
# (see documents/consumer.py "mime_type = magic.from_file")
|
||||||
|
# Unfortunately magic sometimes fails to detect the mime
|
||||||
|
# type of .eml files correctly as message/rfc822 and instead
|
||||||
|
# detects text/plain.
|
||||||
|
# This also effects direct file consumption of .eml files
|
||||||
|
# which are not treated with this workaround.
|
||||||
|
from_element = None
|
||||||
|
for i, header in enumerate(message.obj._headers):
|
||||||
|
if header[0] == "From":
|
||||||
|
from_element = i
|
||||||
|
if from_element:
|
||||||
|
new_headers = [message.obj._headers.pop(from_element)]
|
||||||
|
new_headers += message.obj._headers
|
||||||
|
message.obj._headers = new_headers
|
||||||
|
|
||||||
return processed_attachments
|
f.write(message.obj.as_bytes())
|
||||||
|
|
||||||
|
self.log(
|
||||||
|
"info",
|
||||||
|
f"Rule {rule}: "
|
||||||
|
f"Consuming eml from mail "
|
||||||
|
f"{message.subject} from {message.from_}",
|
||||||
|
)
|
||||||
|
|
||||||
|
consume_file.delay(
|
||||||
|
path=temp_filename,
|
||||||
|
override_filename=pathvalidate.sanitize_filename(
|
||||||
|
message.subject + ".eml",
|
||||||
|
),
|
||||||
|
override_title=message.subject,
|
||||||
|
override_correspondent_id=correspondent.id if correspondent else None,
|
||||||
|
override_document_type_id=doc_type.id if doc_type else None,
|
||||||
|
override_tag_ids=tag_ids,
|
||||||
|
)
|
||||||
|
processed_elements = 1
|
||||||
|
return processed_elements
|
||||||
|
Loading…
x
Reference in New Issue
Block a user