mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Image imports and consumption by mail work
This commit is contained in:
parent
71075a691a
commit
48761911b3
@ -16,15 +16,27 @@ from django.template.defaultfilters import slugify
|
||||
|
||||
from paperless.db import GnuPG
|
||||
|
||||
from ..models import Sender, Tag, Document
|
||||
from ..languages import ISO639
|
||||
from .models import Sender, Tag, Document
|
||||
from .languages import ISO639
|
||||
|
||||
|
||||
class OCRError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ConsumerError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Consumer(object):
|
||||
"""
|
||||
Loop over every file found in CONSUMPTION_DIR and:
|
||||
1. Convert it to a greyscale png
|
||||
2. Use tesseract on the png
|
||||
3. Encrypt and store the document in the MEDIA_ROOT
|
||||
4. Store the OCR'd text in the database
|
||||
5. Delete the document and image(s)
|
||||
"""
|
||||
|
||||
SCRATCH = settings.SCRATCH_DIR
|
||||
CONVERT = settings.CONVERT_BINARY
|
||||
@ -34,15 +46,15 @@ class Consumer(object):
|
||||
DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
|
||||
|
||||
REGEX_TITLE = re.compile(
|
||||
r"^.*/(.*)\.(pdf|jpe?g|png|gif|tiff)$",
|
||||
r"^.*/([^/]*)\.(pdf|jpe?g|png|gif|tiff)$",
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
REGEX_SENDER_TITLE = re.compile(
|
||||
r"^.*/(.*) - (.*)\.(pdf|jpe?g|png|gif|tiff)",
|
||||
r"^[^/]*/(.+) - ([^/]+)\.(pdf|jpe?g|png|gif|tiff)$",
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
REGEX_SENDER_TITLE_TAGS = re.compile(
|
||||
r"^.*/(.*) - (.*) - ([a-z\-,])\.(pdf|jpe?g|png|gif|tiff)",
|
||||
r"^.*/([^/]+) - ([^/]+) - ([a-z\-,]+)\.(pdf|jpe?g|png|gif|tiff)$",
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
|
||||
@ -55,6 +67,51 @@ class Consumer(object):
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
self.stats = {}
|
||||
self._ignore = []
|
||||
|
||||
if not self.CONSUME:
|
||||
raise ConsumerError(
|
||||
"The CONSUMPTION_DIR settings variable does not appear to be "
|
||||
"set."
|
||||
)
|
||||
|
||||
if not os.path.exists(self.CONSUME):
|
||||
raise ConsumerError(
|
||||
"Consumption directory {} does not exist".format(self.CONSUME))
|
||||
|
||||
def consume(self):
|
||||
|
||||
for doc in os.listdir(self.CONSUME):
|
||||
|
||||
doc = os.path.join(self.CONSUME, doc)
|
||||
|
||||
if not os.path.isfile(doc):
|
||||
continue
|
||||
|
||||
if not re.match(self.REGEX_TITLE, doc):
|
||||
continue
|
||||
|
||||
if doc in self._ignore:
|
||||
continue
|
||||
|
||||
if self._is_ready(doc):
|
||||
continue
|
||||
|
||||
self._render("Consuming {}".format(doc), 1)
|
||||
|
||||
pngs = self._get_greyscale(doc)
|
||||
|
||||
try:
|
||||
text = self._get_ocr(pngs)
|
||||
except OCRError:
|
||||
self._ignore.append(doc)
|
||||
self._render("OCR FAILURE: {}".format(doc), 0)
|
||||
continue
|
||||
|
||||
self._store(text, doc)
|
||||
self._cleanup(pngs, doc)
|
||||
|
||||
def _get_greyscale(self, doc):
|
||||
|
||||
self._render(" Generating greyscale image", 2)
|
||||
@ -69,17 +126,27 @@ class Consumer(object):
|
||||
|
||||
return sorted(glob.glob(os.path.join(self.SCRATCH, "{}*".format(i))))
|
||||
|
||||
def _guess_language(self, text):
|
||||
try:
|
||||
guess = langdetect.detect(text)
|
||||
self._render(" Language detected: {}".format(guess), 2)
|
||||
return guess
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _get_ocr(self, pngs):
|
||||
"""
|
||||
Attempts to do the best job possible OCR'ing the document based on
|
||||
simple language detection trial & error.
|
||||
"""
|
||||
|
||||
self._render(" OCRing the document", 2)
|
||||
|
||||
raw_text = self._ocr(pngs, self.DEFAULT_OCR_LANGUAGE)
|
||||
|
||||
guessed_language = langdetect.detect(raw_text)
|
||||
guessed_language = self._guess_language(raw_text)
|
||||
|
||||
self._render(" Language detected: {}".format(guessed_language), 2)
|
||||
|
||||
if guessed_language not in ISO639:
|
||||
if not guessed_language or guessed_language not in ISO639:
|
||||
self._render("Language detection failed!", 0)
|
||||
if settings.FORGIVING_OCR:
|
||||
self._render(
|
||||
@ -108,6 +175,9 @@ class Consumer(object):
|
||||
raise OCRError
|
||||
|
||||
def _ocr(self, pngs, lang):
|
||||
"""
|
||||
Performs a single OCR attempt.
|
||||
"""
|
||||
|
||||
self._render(" Parsing for {}".format(lang), 2)
|
||||
|
||||
@ -161,10 +231,11 @@ class Consumer(object):
|
||||
|
||||
def _store(self, text, doc):
|
||||
|
||||
sender, title, file_type = self._guess_attributes_from_name(doc)
|
||||
sender, title, tags, file_type = self._guess_attributes_from_name(doc)
|
||||
|
||||
lower_text = text.lower()
|
||||
relevant_tags = [t for t in Tag.objects.all() if t.matches(lower_text)]
|
||||
relevant_tags = set(
|
||||
[t for t in Tag.objects.all() if t.matches(lower_text)] + tags)
|
||||
|
||||
stats = os.stat(doc)
|
||||
|
||||
@ -205,3 +276,19 @@ class Consumer(object):
|
||||
def _render(self, text, verbosity):
|
||||
if self.verbosity >= verbosity:
|
||||
print(text)
|
||||
|
||||
def _is_ready(self, doc):
|
||||
"""
|
||||
Detect whether `doc` is ready to consume or if it's still being written
|
||||
to by the uploader.
|
||||
"""
|
||||
|
||||
t = os.stat(doc).st_mtime
|
||||
|
||||
if self.stats.get(doc) == t:
|
||||
del(self.stats[doc])
|
||||
return True
|
||||
|
||||
self.stats[doc] = t
|
||||
|
||||
return False
|
@ -1,3 +0,0 @@
|
||||
from .base import Consumer
|
||||
from .file import FileConsumer, FileConsumerError
|
||||
from .mail import MailConsumer, MailConsumerError
|
@ -1,76 +0,0 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
from .base import Consumer, OCRError
|
||||
|
||||
|
||||
class FileConsumerError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class FileConsumer(Consumer):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
|
||||
Consumer.__init__(self, *args, **kwargs)
|
||||
|
||||
self.stats = {}
|
||||
self._ignore = []
|
||||
|
||||
if not self.CONSUME:
|
||||
raise FileConsumerError(
|
||||
"The CONSUMPTION_DIR settings variable does not appear to be "
|
||||
"set."
|
||||
)
|
||||
|
||||
if not os.path.exists(self.CONSUME):
|
||||
raise FileConsumerError(
|
||||
"Consumption directory {} does not exist".format(self.CONSUME))
|
||||
|
||||
def consume(self):
|
||||
|
||||
for doc in os.listdir(self.CONSUME):
|
||||
|
||||
doc = os.path.join(self.CONSUME, doc)
|
||||
|
||||
if not os.path.isfile(doc):
|
||||
continue
|
||||
|
||||
if not re.match(self.REGEX_TITLE, doc):
|
||||
continue
|
||||
|
||||
if doc in self._ignore:
|
||||
continue
|
||||
|
||||
if self._is_ready(doc):
|
||||
continue
|
||||
|
||||
self._render("Consuming {}".format(doc), 1)
|
||||
|
||||
pngs = self._get_greyscale(doc)
|
||||
|
||||
try:
|
||||
text = self._get_ocr(pngs)
|
||||
except OCRError:
|
||||
self._ignore.append(doc)
|
||||
self._render("OCR FAILURE: {}".format(doc), 0)
|
||||
continue
|
||||
|
||||
self._store(text, doc)
|
||||
self._cleanup(pngs, doc)
|
||||
|
||||
def _is_ready(self, doc):
|
||||
"""
|
||||
Detect whether `doc` is ready to consume or if it's still being written
|
||||
to by the uploader.
|
||||
"""
|
||||
|
||||
t = os.stat(doc).st_mtime
|
||||
|
||||
if self.stats.get(doc) == t:
|
||||
del(self.stats[doc])
|
||||
return True
|
||||
|
||||
self.stats[doc] = t
|
||||
|
||||
return False
|
@ -1,170 +0,0 @@
|
||||
import datetime
|
||||
import email
|
||||
import imaplib
|
||||
import os
|
||||
import re
|
||||
|
||||
from base64 import b64decode
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from . import Consumer
|
||||
|
||||
|
||||
class MailConsumerError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Message(object):
|
||||
"""
|
||||
A crude, but simple email message class. We assume that there's a subject
|
||||
and exactly one attachment, and that we don't care about the message body.
|
||||
"""
|
||||
|
||||
SAFE_SUBJECT_REGEX = re.compile(r"^[\w\- ,.]+$")
|
||||
SAFE_SUFFIX_REGEX = re.compile(
|
||||
r"^(application/(pdf))|(image/(png|jpg|gif|tiff))$")
|
||||
|
||||
def __init__(self, subject, attachment):
|
||||
|
||||
self.subject = subject
|
||||
self.attachment = attachment
|
||||
self.suffix = None
|
||||
|
||||
m = self.SAFE_SUFFIX_REGEX.match(attachment.content_type)
|
||||
if not m:
|
||||
raise MailConsumerError(
|
||||
"Not-awesome file type: {}".format(attachment.content_type))
|
||||
self.suffix = m.group(1) or m.group(3)
|
||||
|
||||
@property
|
||||
def file_name(self):
|
||||
if self.SAFE_SUFFIX_REGEX.match(self.subject):
|
||||
return "{}.{}".format(self.subject, self.suffix)
|
||||
|
||||
|
||||
class Attachment(object):
|
||||
|
||||
def __init__(self, data):
|
||||
self.content_type = None
|
||||
self.size = None
|
||||
self.name = None
|
||||
self.created = None
|
||||
self.modified = None
|
||||
self.data = data
|
||||
|
||||
|
||||
class MailFetcher(object):
|
||||
|
||||
def __init__(self):
|
||||
|
||||
self._connection = None
|
||||
self._host = settings.MAIL_CONSUMPTION["HOST"]
|
||||
self._port = settings.MAIL_CONSUMPTION["PORT"]
|
||||
self._username = settings.MAIL_CONSUMPTION["USERNAME"]
|
||||
self._password = settings.MAIL_CONSUMPTION["PASSWORD"]
|
||||
self._inbox = settings.MAIL_CONSUMPTION["INBOX"]
|
||||
|
||||
self._enabled = bool(self._host)
|
||||
|
||||
self.last_checked = datetime.datetime.now()
|
||||
|
||||
def _connect(self):
|
||||
self._connection = imaplib.IMAP4_SSL(self._host, self._port)
|
||||
|
||||
def _login(self):
|
||||
|
||||
login = self._connection.login(self._username, self._password)
|
||||
if not login[0] == "OK":
|
||||
raise MailConsumerError("Can't log into mail: {}".format(login[1]))
|
||||
|
||||
inbox = self._connection.select("INBOX")
|
||||
if not inbox[0] == "OK":
|
||||
raise MailConsumerError("Can't find the inbox: {}".format(inbox[1]))
|
||||
|
||||
def _fetch(self):
|
||||
for num in self._connection.search(None, "ALL")[1][0].split():
|
||||
typ, data = self._connection.fetch(num, "(RFC822)")
|
||||
# self._connection.store(num, "+FLAGS", "\\Deleted")
|
||||
yield data[0][1]
|
||||
|
||||
def consume(self):
|
||||
"""
|
||||
We don't actually consume here 'cause it's much easier to do that with
|
||||
files and we already have a FileConsumer. So instead, we simply write
|
||||
the attachment to the consumption directory as a file with the proper
|
||||
format so the FileConsumer can do its job.
|
||||
"""
|
||||
|
||||
if self._enabled:
|
||||
|
||||
for message in self.get_messages():
|
||||
|
||||
t = message.attachment.created or \
|
||||
message.attachment.modified or \
|
||||
datetime.datetime.now()
|
||||
|
||||
file_name = os.path.join(Consumer.CONSUME, message.file_name)
|
||||
with open(file_name, "wb") as f:
|
||||
f.write(message.attachment.data)
|
||||
os.utime(file_name, times=(t, t))
|
||||
|
||||
self.last_checked = datetime.datetime.now()
|
||||
|
||||
def get_messages(self):
|
||||
|
||||
self._connect()
|
||||
self._login()
|
||||
|
||||
messages = []
|
||||
for data in self._fetch():
|
||||
message = self._parse_message(data)
|
||||
if message:
|
||||
messages.append(message)
|
||||
|
||||
self._connection.expunge()
|
||||
self._connection.close()
|
||||
self._connection.logout()
|
||||
|
||||
return messages
|
||||
|
||||
@staticmethod
|
||||
def _parse_message(data):
|
||||
"""
|
||||
Cribbed heavily from
|
||||
https://www.ianlewis.org/en/parsing-email-attachments-python
|
||||
"""
|
||||
|
||||
r = []
|
||||
message = email.message_from_string(data)
|
||||
|
||||
for part in message.walk():
|
||||
|
||||
content_disposition = part.get("Content-Disposition")
|
||||
if not content_disposition:
|
||||
continue
|
||||
|
||||
dispositions = content_disposition.strip().split(";")
|
||||
if not dispositions[0].lower() == "attachment":
|
||||
continue
|
||||
|
||||
file_data = part.get_payload()
|
||||
attachment = Attachment(b64decode(file_data))
|
||||
attachment.content_type = part.get_content_type()
|
||||
attachment.size = len(file_data)
|
||||
|
||||
for param in dispositions[1:]:
|
||||
|
||||
name, value = param.split("=")
|
||||
name = name.lower()
|
||||
|
||||
if name == "filename":
|
||||
attachment.name = value
|
||||
elif name == "create-date":
|
||||
attachment.created = value
|
||||
elif name == "modification-date":
|
||||
attachment.modified = value
|
||||
|
||||
r.append(Message(message.get("Subject"), attachment))
|
||||
|
||||
return r
|
208
src/documents/mail.py
Normal file
208
src/documents/mail.py
Normal file
@ -0,0 +1,208 @@
|
||||
import datetime
|
||||
import email
|
||||
import imaplib
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
|
||||
from base64 import b64decode
|
||||
from dateutil import parser
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from .consumer import Consumer
|
||||
|
||||
|
||||
class MailFetcherError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidMessageError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Message(object):
|
||||
"""
|
||||
A crude, but simple email message class. We assume that there's a subject
|
||||
and n attachments, and that we don't care about the message body.
|
||||
"""
|
||||
|
||||
# This regex is probably more restrictive than it needs to be, but it's
|
||||
# better safe than sorry.
|
||||
SAFE_SUBJECT_REGEX = re.compile(r"^[\w\- ,.']+$")
|
||||
|
||||
def _set_time(self, message):
|
||||
self.time = datetime.datetime.now()
|
||||
message_time = message.get("Date")
|
||||
if message_time:
|
||||
try:
|
||||
self.time = parser.parse(message_time)
|
||||
except (ValueError, AttributeError):
|
||||
pass # We assume that "now" is ok
|
||||
|
||||
def __init__(self, data):
|
||||
"""
|
||||
Cribbed heavily from
|
||||
https://www.ianlewis.org/en/parsing-email-attachments-python
|
||||
"""
|
||||
|
||||
self.subject = None
|
||||
self.time = None
|
||||
self.attachment = None
|
||||
|
||||
message = email.message_from_bytes(data)
|
||||
self.subject = message.get("Subject")
|
||||
|
||||
self._set_time(message)
|
||||
|
||||
if self.subject is None:
|
||||
raise InvalidMessageError("Message does not have a subject")
|
||||
if not self.SAFE_SUBJECT_REGEX.match(self.subject):
|
||||
raise InvalidMessageError("Message subject is unsafe")
|
||||
|
||||
print('Fetching email: "{}"'.format(self.subject))
|
||||
|
||||
attachments = []
|
||||
for part in message.walk():
|
||||
|
||||
content_disposition = part.get("Content-Disposition")
|
||||
if not content_disposition:
|
||||
continue
|
||||
|
||||
dispositions = content_disposition.strip().split(";")
|
||||
if not dispositions[0].lower() == "attachment":
|
||||
continue
|
||||
|
||||
file_data = part.get_payload()
|
||||
|
||||
attachments.append(Attachment(
|
||||
b64decode(file_data), content_type=part.get_content_type()))
|
||||
|
||||
if len(attachments) == 0:
|
||||
raise InvalidMessageError(
|
||||
"There don't appear to be any attachments to this message")
|
||||
|
||||
if len(attachments) > 1:
|
||||
raise InvalidMessageError(
|
||||
"There's more than one attachment to this message. It cannot "
|
||||
"be indexed automatically."
|
||||
)
|
||||
|
||||
self.attachment = attachments[0]
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self.attachment)
|
||||
|
||||
@property
|
||||
def file_name(self):
|
||||
|
||||
prefix = str(random.randint(100000, 999999))
|
||||
if self.SAFE_SUBJECT_REGEX.match(self.subject):
|
||||
prefix = self.subject
|
||||
|
||||
return "{}.{}".format(prefix, self.attachment.suffix)
|
||||
|
||||
|
||||
class Attachment(object):
|
||||
|
||||
SAFE_SUFFIX_REGEX = re.compile(
|
||||
r"^(application/(pdf))|(image/(png|jpeg|gif|tiff))$")
|
||||
|
||||
def __init__(self, data, content_type):
|
||||
|
||||
self.content_type = content_type
|
||||
self.data = data
|
||||
self.suffix = None
|
||||
|
||||
m = self.SAFE_SUFFIX_REGEX.match(self.content_type)
|
||||
if not m:
|
||||
raise MailFetcherError(
|
||||
"Not-awesome file type: {}".format(self.content_type))
|
||||
self.suffix = m.group(2) or m.group(4)
|
||||
|
||||
def read(self):
|
||||
return self.data
|
||||
|
||||
|
||||
class MailFetcher(object):
|
||||
|
||||
def __init__(self):
|
||||
|
||||
self._connection = None
|
||||
self._host = settings.MAIL_CONSUMPTION["HOST"]
|
||||
self._port = settings.MAIL_CONSUMPTION["PORT"]
|
||||
self._username = settings.MAIL_CONSUMPTION["USERNAME"]
|
||||
self._password = settings.MAIL_CONSUMPTION["PASSWORD"]
|
||||
self._inbox = settings.MAIL_CONSUMPTION["INBOX"]
|
||||
|
||||
self._enabled = bool(self._host)
|
||||
|
||||
self.last_checked = datetime.datetime.now()
|
||||
|
||||
def pull(self):
|
||||
"""
|
||||
Fetch all available mail at the target address and store it locally in
|
||||
the consumption directory so that the file consumer can pick it up and
|
||||
do its thing.
|
||||
"""
|
||||
|
||||
if self._enabled:
|
||||
|
||||
for message in self._get_messages():
|
||||
|
||||
print("Storing email: \"{}\"".format(message.subject))
|
||||
|
||||
t = int(time.mktime(message.time.timetuple()))
|
||||
file_name = os.path.join(Consumer.CONSUME, message.file_name)
|
||||
with open(file_name, "wb") as f:
|
||||
f.write(message.attachment.data)
|
||||
os.utime(file_name, times=(t, t))
|
||||
|
||||
self.last_checked = datetime.datetime.now()
|
||||
|
||||
def _get_messages(self):
|
||||
|
||||
self._connect()
|
||||
self._login()
|
||||
|
||||
r = []
|
||||
for message in self._fetch():
|
||||
if message:
|
||||
r.append(message)
|
||||
|
||||
self._connection.expunge()
|
||||
self._connection.close()
|
||||
self._connection.logout()
|
||||
|
||||
return r
|
||||
|
||||
def _connect(self):
|
||||
self._connection = imaplib.IMAP4_SSL(self._host, self._port)
|
||||
|
||||
def _login(self):
|
||||
|
||||
login = self._connection.login(self._username, self._password)
|
||||
if not login[0] == "OK":
|
||||
raise MailFetcherError("Can't log into mail: {}".format(login[1]))
|
||||
|
||||
inbox = self._connection.select("INBOX")
|
||||
if not inbox[0] == "OK":
|
||||
raise MailFetcherError("Can't find the inbox: {}".format(inbox[1]))
|
||||
|
||||
def _fetch(self):
|
||||
|
||||
for num in self._connection.search(None, "ALL")[1][0].split():
|
||||
|
||||
__, data = self._connection.fetch(num, "(RFC822)")
|
||||
|
||||
message = None
|
||||
try:
|
||||
message = Message(data[0][1])
|
||||
except InvalidMessageError as e:
|
||||
print(e)
|
||||
pass
|
||||
|
||||
self._connection.store(num, "+FLAGS", "\\Deleted")
|
||||
if message:
|
||||
yield message
|
@ -5,18 +5,14 @@ import time
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
from ...consumers import (
|
||||
FileConsumer, FileConsumerError, MailConsumer, MailConsumerError)
|
||||
from ...consumer import Consumer, ConsumerError
|
||||
from ...mail import MailFetcher, MailFetcherError
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""
|
||||
Loop over every file found in CONSUMPTION_DIR and:
|
||||
1. Convert it to a greyscale png
|
||||
2. Use tesseract on the png
|
||||
3. Encrypt and store the document in the MEDIA_ROOT
|
||||
4. Store the OCR'd text in the database
|
||||
5. Delete the document and image(s)
|
||||
On every iteration of an infinite loop, consume what we can from the
|
||||
consumption directory, and fetch any mail available.
|
||||
"""
|
||||
|
||||
LOOP_TIME = 10 # Seconds
|
||||
@ -29,7 +25,7 @@ class Command(BaseCommand):
|
||||
self.verbosity = 0
|
||||
|
||||
self.file_consumer = None
|
||||
self.mail_consumer = None
|
||||
self.mail_fetcher = None
|
||||
|
||||
BaseCommand.__init__(self, *args, **kwargs)
|
||||
|
||||
@ -38,9 +34,9 @@ class Command(BaseCommand):
|
||||
self.verbosity = options["verbosity"]
|
||||
|
||||
try:
|
||||
self.file_consumer = FileConsumer(verbosity=self.verbosity)
|
||||
self.mail_consumer = MailConsumer(verbosity=self.verbosity)
|
||||
except (FileConsumerError, MailConsumerError) as e:
|
||||
self.file_consumer = Consumer(verbosity=self.verbosity)
|
||||
self.mail_fetcher = MailFetcher()
|
||||
except (ConsumerError, MailFetcherError) as e:
|
||||
raise CommandError(e)
|
||||
|
||||
try:
|
||||
@ -59,11 +55,13 @@ class Command(BaseCommand):
|
||||
|
||||
def loop(self):
|
||||
|
||||
# Consume whatever files we can
|
||||
self.file_consumer.consume()
|
||||
|
||||
delta = self.mail_consumer.last_checked + self.MAIL_DELTA
|
||||
# Occasionally fetch mail and store it to be consumed on the next loop
|
||||
delta = self.mail_fetcher.last_checked + self.MAIL_DELTA
|
||||
if delta > datetime.datetime.now():
|
||||
self.mail_consumer.consume()
|
||||
self.mail_fetcher.pull()
|
||||
|
||||
def _render(self, text, verbosity):
|
||||
if self.verbosity >= verbosity:
|
||||
|
@ -1,3 +1,4 @@
|
||||
import base64
|
||||
import os
|
||||
import magic
|
||||
|
||||
@ -6,10 +7,10 @@ from hashlib import md5
|
||||
from django.conf import settings
|
||||
from django.test import TestCase
|
||||
|
||||
from ...consumers.mail import MailConsumer
|
||||
from ...mail import Message, Attachment
|
||||
|
||||
|
||||
class TestMailConsumer(TestCase):
|
||||
class TestMessage(TestCase):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
|
||||
@ -23,21 +24,33 @@ class TestMailConsumer(TestCase):
|
||||
"mail.txt"
|
||||
)
|
||||
|
||||
def test_parse(self):
|
||||
consumer = MailConsumer()
|
||||
with open(self.sample) as f:
|
||||
def test_init(self):
|
||||
|
||||
messages = consumer._parse_message(f.read())
|
||||
with open(self.sample, "rb") as f:
|
||||
|
||||
self.assertTrue(len(messages), 1)
|
||||
self.assertEqual(messages[0]["subject"], "Test 0")
|
||||
message = Message(f.read())
|
||||
|
||||
attachment = messages[0]["attachment"]
|
||||
data = attachment.read()
|
||||
self.assertTrue(message)
|
||||
self.assertEqual(message.subject, "Test 0")
|
||||
|
||||
data = message.attachment.read()
|
||||
|
||||
self.assertEqual(
|
||||
md5(data).hexdigest(), "7c89655f9e9eb7dd8cde8568e8115d59")
|
||||
|
||||
self.assertEqual(attachment.content_type, "application/pdf")
|
||||
self.assertEqual(
|
||||
message.attachment.content_type, "application/pdf")
|
||||
with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m:
|
||||
self.assertEqual(m.id_buffer(data), "application/pdf")
|
||||
|
||||
|
||||
class TestAttachment(TestCase):
|
||||
|
||||
def test_init(self):
|
||||
data = base64.encodebytes(b"0")
|
||||
self.assertEqual(Attachment(data, "application/pdf").suffix, "pdf")
|
||||
self.assertEqual(Attachment(data, "image/png").suffix, "png")
|
||||
self.assertEqual(Attachment(data, "image/jpeg").suffix, "jpeg")
|
||||
self.assertEqual(Attachment(data, "image/gif").suffix, "gif")
|
||||
self.assertEqual(Attachment(data, "image/tiff").suffix, "tiff")
|
||||
self.assertEqual(Attachment(data, "image/png").read(), data)
|
||||
|
@ -1 +1 @@
|
||||
from .consumers.mail import TestMailConsumer
|
||||
from .consumers.mail import TestMailFetcher
|
||||
|
Loading…
x
Reference in New Issue
Block a user