Only get or create a Correspondent if the email or attachment matches rule filters (#4431)

This commit is contained in:
Trenton H 2023-10-25 17:34:52 -07:00 committed by GitHub
parent 7769b8968c
commit 4dbdb216f9

View File

@ -8,6 +8,7 @@ import traceback
from datetime import date from datetime import date
from datetime import timedelta from datetime import timedelta
from fnmatch import fnmatch from fnmatch import fnmatch
from typing import Optional
from typing import Union from typing import Union
import magic import magic
@ -21,6 +22,7 @@ from django.utils.timezone import is_naive
from django.utils.timezone import make_aware from django.utils.timezone import make_aware
from imap_tools import AND from imap_tools import AND
from imap_tools import NOT from imap_tools import NOT
from imap_tools import MailAttachment
from imap_tools import MailBox from imap_tools import MailBox
from imap_tools import MailboxFolderSelectError from imap_tools import MailboxFolderSelectError
from imap_tools import MailBoxUnencrypted from imap_tools import MailBoxUnencrypted
@ -422,14 +424,19 @@ class MailAccountHandler(LoggingMixin):
logging_name = "paperless_mail" logging_name = "paperless_mail"
def _correspondent_from_name(self, name): def _correspondent_from_name(self, name: str) -> Optional[Correspondent]:
try: try:
return Correspondent.objects.get_or_create(name=name)[0] return Correspondent.objects.get_or_create(name=name)[0]
except DatabaseError as e: except DatabaseError as e:
self.log.error(f"Error while retrieving correspondent {name}: {e}") self.log.error(f"Error while retrieving correspondent {name}: {e}")
return None return None
def _get_title(self, message, att, rule): def _get_title(
self,
message: MailMessage,
att: MailAttachment,
rule: MailRule,
) -> Optional[str]:
if rule.assign_title_from == MailRule.TitleSource.FROM_SUBJECT: if rule.assign_title_from == MailRule.TitleSource.FROM_SUBJECT:
return message.subject return message.subject
@ -444,7 +451,11 @@ class MailAccountHandler(LoggingMixin):
"Unknown title selector.", "Unknown title selector.",
) # pragma: nocover ) # pragma: nocover
def _get_correspondent(self, message: MailMessage, rule): def _get_correspondent(
self,
message: MailMessage,
rule: MailRule,
) -> Optional[Correspondent]:
c_from = rule.assign_correspondent_from c_from = rule.assign_correspondent_from
if c_from == MailRule.CorrespondentSource.FROM_NOTHING: if c_from == MailRule.CorrespondentSource.FROM_NOTHING:
@ -606,7 +617,6 @@ class MailAccountHandler(LoggingMixin):
f"{len(message.attachments)} attachment(s)", f"{len(message.attachments)} attachment(s)",
) )
correspondent = self._get_correspondent(message, rule)
tag_ids = [tag.id for tag in rule.assign_tags.all()] tag_ids = [tag.id for tag in rule.assign_tags.all()]
doc_type = rule.assign_document_type doc_type = rule.assign_document_type
@ -617,7 +627,6 @@ class MailAccountHandler(LoggingMixin):
processed_elements += self._process_eml( processed_elements += self._process_eml(
message, message,
rule, rule,
correspondent,
tag_ids, tag_ids,
doc_type, doc_type,
) )
@ -629,7 +638,6 @@ class MailAccountHandler(LoggingMixin):
processed_elements += self._process_attachments( processed_elements += self._process_attachments(
message, message,
rule, rule,
correspondent,
tag_ids, tag_ids,
doc_type, doc_type,
) )
@ -640,7 +648,6 @@ class MailAccountHandler(LoggingMixin):
self, self,
message: MailMessage, message: MailMessage,
rule: MailRule, rule: MailRule,
correspondent,
tag_ids, tag_ids,
doc_type, doc_type,
): ):
@ -669,6 +676,8 @@ class MailAccountHandler(LoggingMixin):
# as this is system dependent otherwise # as this is system dependent otherwise
continue continue
correspondent = self._get_correspondent(message, rule)
title = self._get_title(message, att, rule) title = self._get_title(message, att, rule)
# don't trust the content type of the attachment. Could be # don't trust the content type of the attachment. Could be
@ -750,7 +759,6 @@ class MailAccountHandler(LoggingMixin):
self, self,
message: MailMessage, message: MailMessage,
rule: MailRule, rule: MailRule,
correspondent,
tag_ids, tag_ids,
doc_type, doc_type,
): ):
@ -781,6 +789,8 @@ class MailAccountHandler(LoggingMixin):
f.write(message.obj.as_bytes()) f.write(message.obj.as_bytes())
correspondent = self._get_correspondent(message, rule)
self.log.info( self.log.info(
f"Rule {rule}: " f"Rule {rule}: "
f"Consuming eml from mail " f"Consuming eml from mail "