The mailconsumer isn't a consumer at all. Best fixt that

This commit is contained in:
Daniel Quinn 2016-02-05 20:15:08 +00:00
parent d8ad6b589b
commit 71075a691a
4 changed files with 65 additions and 24 deletions

View File

@ -28,6 +28,7 @@ class Consumer(object):
SCRATCH = settings.SCRATCH_DIR SCRATCH = settings.SCRATCH_DIR
CONVERT = settings.CONVERT_BINARY CONVERT = settings.CONVERT_BINARY
CONSUME = settings.CONSUMPTION_DIR
OCR = pyocr.get_available_tools()[0] OCR = pyocr.get_available_tools()[0]
DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE

View File

@ -1,8 +1,6 @@
import os import os
import re import re
from django.conf import settings
from .base import Consumer, OCRError from .base import Consumer, OCRError
@ -12,8 +10,6 @@ class FileConsumerError(Exception):
class FileConsumer(Consumer): class FileConsumer(Consumer):
CONSUME = settings.CONSUMPTION_DIR
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
Consumer.__init__(self, *args, **kwargs) Consumer.__init__(self, *args, **kwargs)

View File

@ -1,9 +1,10 @@
import datetime import datetime
import email import email
import imaplib import imaplib
import os
import re
from base64 import b64decode from base64 import b64decode
from io import BytesIO
from django.conf import settings from django.conf import settings
@ -14,11 +15,48 @@ class MailConsumerError(Exception):
pass pass
class MailConsumer(Consumer): class Message(object):
"""
A crude, but simple email message class. We assume that there's a subject
and exactly one attachment, and that we don't care about the message body.
"""
def __init__(self, *args, **kwargs): SAFE_SUBJECT_REGEX = re.compile(r"^[\w\- ,.]+$")
SAFE_SUFFIX_REGEX = re.compile(
r"^(application/(pdf))|(image/(png|jpg|gif|tiff))$")
Consumer.__init__(self, *args, **kwargs) def __init__(self, subject, attachment):
self.subject = subject
self.attachment = attachment
self.suffix = None
m = self.SAFE_SUFFIX_REGEX.match(attachment.content_type)
if not m:
raise MailConsumerError(
"Not-awesome file type: {}".format(attachment.content_type))
self.suffix = m.group(1) or m.group(3)
@property
def file_name(self):
if self.SAFE_SUFFIX_REGEX.match(self.subject):
return "{}.{}".format(self.subject, self.suffix)
class Attachment(object):
def __init__(self, data):
self.content_type = None
self.size = None
self.name = None
self.created = None
self.modified = None
self.data = data
class MailFetcher(object):
def __init__(self):
self._connection = None self._connection = None
self._host = settings.MAIL_CONSUMPTION["HOST"] self._host = settings.MAIL_CONSUMPTION["HOST"]
@ -51,10 +89,25 @@ class MailConsumer(Consumer):
yield data[0][1] yield data[0][1]
def consume(self): def consume(self):
"""
We don't actually consume here 'cause it's much easier to do that with
files and we already have a FileConsumer. So instead, we simply write
the attachment to the consumption directory as a file with the proper
format so the FileConsumer can do its job.
"""
if self._enabled: if self._enabled:
for message in self.get_messages(): for message in self.get_messages():
pass
t = message.attachment.created or \
message.attachment.modified or \
datetime.datetime.now()
file_name = os.path.join(Consumer.CONSUME, message.file_name)
with open(file_name, "wb") as f:
f.write(message.attachment.data)
os.utime(file_name, times=(t, t))
self.last_checked = datetime.datetime.now() self.last_checked = datetime.datetime.now()
@ -96,13 +149,9 @@ class MailConsumer(Consumer):
continue continue
file_data = part.get_payload() file_data = part.get_payload()
attachment = BytesIO(b64decode(file_data)) attachment = Attachment(b64decode(file_data))
attachment.content_type = part.get_content_type() attachment.content_type = part.get_content_type()
attachment.size = len(file_data) attachment.size = len(file_data)
attachment.name = None
attachment.create_date = None
attachment.mod_date = None
attachment.read_date = None
for param in dispositions[1:]: for param in dispositions[1:]:
@ -112,15 +161,10 @@ class MailConsumer(Consumer):
if name == "filename": if name == "filename":
attachment.name = value attachment.name = value
elif name == "create-date": elif name == "create-date":
attachment.create_date = value attachment.created = value
elif name == "modification-date": elif name == "modification-date":
attachment.mod_date = value attachment.modified = value
elif name == "read-date":
attachment.read_date = value
r.append({ r.append(Message(message.get("Subject"), attachment))
"subject": message.get("Subject"),
"attachment": attachment,
})
return r return r

View File

@ -61,8 +61,8 @@ class Command(BaseCommand):
self.file_consumer.consume() self.file_consumer.consume()
now = datetime.datetime.now() delta = self.mail_consumer.last_checked + self.MAIL_DELTA
if self.mail_consumer.last_checked + self.MAIL_DELTA > now: if delta > datetime.datetime.now():
self.mail_consumer.consume() self.mail_consumer.consume()
def _render(self, text, verbosity): def _render(self, text, verbosity):