eml parsing requires tika

This commit is contained in:
phail 2022-11-20 14:22:30 +01:00
parent d132eba143
commit ebe21a0114
2 changed files with 9 additions and 8 deletions

View File

@ -1,4 +1,5 @@
from django.apps import AppConfig
from django.conf import settings
from django.utils.translation import gettext_lazy as _
from paperless_mail.signals import mail_consumer_declaration
@ -11,5 +12,6 @@ class PaperlessMailConfig(AppConfig):
def ready(self):
from documents.signals import document_consumer_declaration
document_consumer_declaration.connect(mail_consumer_declaration)
if settings.TIKA_ENABLED:
document_consumer_declaration.connect(mail_consumer_declaration)
AppConfig.ready(self)

View File

@ -159,7 +159,12 @@ class MailDocumentParser(DocumentParser):
pdf_collection.append(("1_mail.pdf", self.generate_pdf_from_mail(mail)))
if mail.html != "":
if mail.html == "":
with open(pdf_path, "wb") as file:
file.write(pdf_collection[0][1])
file.close()
return pdf_path
else:
pdf_collection.append(
(
"2_html.pdf",
@ -167,12 +172,6 @@ class MailDocumentParser(DocumentParser):
),
)
if len(pdf_collection) == 1:
with open(pdf_path, "wb") as file:
file.write(pdf_collection[0][1])
file.close()
return pdf_path
files = {}
for name, content in pdf_collection:
files[name] = (name, BytesIO(content))