diff --git a/docs/consumption.rst b/docs/consumption.rst index 03430b2e0..8b9b35433 100644 --- a/docs/consumption.rst +++ b/docs/consumption.rst @@ -90,13 +90,16 @@ So, with all that in mind, here's what you do to get it running: folder in an existing email box and note the path to that folder. 2. In ``settings.py`` set all of the appropriate values in ``MAIL_CONSUMPTION``. If you decided to use a subfolder of an existing account, then make sure you - set ``INBOX`` accordingly here. + set ``INBOX`` accordingly here. You also have to set the + ``UPLOAD_SHARED_SECRET`` to something you can remember 'cause you'll have to + include that in every email you send. 3. Restart the :ref:`consumer `. The consumer will check the configured email account every 10 minutes for something new and pull down whatever it finds. 4. Send yourself an email! Note that the subject is treated as the file name, so if you set the subject to ``Sender - Title - tag,tag,tag``, you'll get - what you expect. + what you expect. Also, you must include the aforementioned secret string in + every email so the fetcher knows that it's safe to import. 5. After a few minutes, the consumer will poll your mailbox, pull down the message, and place the attachment in the consumption directory with the appropriate name. A few minutes later, the consumer will import it like any diff --git a/src/documents/mail.py b/src/documents/mail.py index bea54a0ac..550c4bd83 100644 --- a/src/documents/mail.py +++ b/src/documents/mail.py @@ -1,11 +1,12 @@ import datetime -import email import imaplib import os import re import time from base64 import b64decode +from email import policy +from email.parser import BytesParser from dateutil import parser from django.conf import settings @@ -29,14 +30,7 @@ class Message(Renderable): and n attachments, and that we don't care about the message body. """ - def _set_time(self, message): - self.time = datetime.datetime.now() - message_time = message.get("Date") - if message_time: - try: - self.time = parser.parse(message_time) - except (ValueError, AttributeError): - pass # We assume that "now" is ok + SECRET = settings.UPLOAD_SHARED_SECRET def __init__(self, data, verbosity=1): """ @@ -50,17 +44,15 @@ class Message(Renderable): self.time = None self.attachment = None - message = email.message_from_bytes(data) - self.subject = message.get("Subject").replace("\r\n", "") + message = BytesParser(policy=policy.default).parsebytes(data) + self.subject = str(message["Subject"]).replace("\r\n", "") + self.body = str(message.get_body()) + + self.check_subject() + self.check_body() self._set_time(message) - if self.subject is None: - raise InvalidMessageError("Message does not have a subject") - if not Sender.SAFE_REGEX.match(self.subject): - raise InvalidMessageError("Message subject is unsafe: {}".format( - self.subject)) - self._render('Fetching email: "{}"'.format(self.subject), 1) attachments = [] @@ -94,6 +86,26 @@ class Message(Renderable): def __bool__(self): return bool(self.attachment) + def check_subject(self): + if self.subject is None: + raise InvalidMessageError("Message does not have a subject") + if not Sender.SAFE_REGEX.match(self.subject): + raise InvalidMessageError("Message subject is unsafe: {}".format( + self.subject)) + + def check_body(self): + if self.SECRET not in self.body: + raise InvalidMessageError("The secret wasn't in the body") + + def _set_time(self, message): + self.time = datetime.datetime.now() + message_time = message.get("Date") + if message_time: + try: + self.time = parser.parse(message_time) + except (ValueError, AttributeError): + pass # We assume that "now" is ok + @property def file_name(self): return "{}.{}".format(self.subject, self.attachment.suffix) diff --git a/src/documents/tests/samples/mail.txt b/src/documents/tests/samples/mail.txt index a4e2a267d..b7712eee5 100644 --- a/src/documents/tests/samples/mail.txt +++ b/src/documents/tests/samples/mail.txt @@ -38,7 +38,7 @@ This is a multi-part message in MIME format. Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit -This is the test body. +The secret word is "paperless" :-) --------------090701020702030809070008 Content-Type: application/pdf;