The 'API' is written but untested

This commit is contained in:
Daniel Quinn 2016-02-08 23:46:16 +00:00
parent 212752f46e
commit 0eaed36420
6 changed files with 127 additions and 11 deletions

86
src/documents/forms.py Normal file
View File

@ -0,0 +1,86 @@
import magic
import os
from datetime import datetime
from hashlib import sha256
from time import mktime
from django import forms
from django.conf import settings
from .models import Document, Sender
from .consumer import Consumer
class UploadForm(forms.Form):
SECRET = settings.UPLOAD_SHARED_SECRET
TYPE_LOOKUP = {
"application/pdf": Document.TYPE_PDF,
"image/png": Document.TYPE_PNG,
"image/jpeg": Document.TYPE_JPG,
"image/gif": Document.TYPE_GIF,
"image/tiff": Document.TYPE_TIF,
}
sender = forms.CharField(
max_length=Sender._meta.get_field("name").max_length, required=False)
title = forms.CharField(
max_length=Document._meta.get_field("title").max_length, required=False)
document = forms.FileField()
signature = forms.CharField(max_length=256)
def clean_sender(self):
"""
I suppose it might look cleaner to use .get_or_create() here, but that
would also allow someone to fill up the db with bogus senders before all
validation was met.
"""
sender = self.cleaned_data.get("sender")
if not sender:
return None
if not Sender.SAFE_REGEX.match(sender) or " - " in sender:
raise forms.ValidationError("That sender name is suspicious.")
return sender
def clean_title(self):
title = self.cleaned_data.get("title")
if not title:
return None
if not Sender.SAFE_REGEX.match(title) or " - " in title:
raise forms.ValidationError("That title is suspicious.")
def clean_document(self):
document = self.cleaned_data.get("document").read()
with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m:
file_type = m.id_buffer(document)
if file_type not in self.TYPE_LOOKUP:
raise forms.ValidationError("The file type is invalid.")
return document, self.TYPE_LOOKUP[file_type]
def clean(self):
sender = self.clened_data("sender")
title = self.cleaned_data("title")
signature = self.cleaned_data("signature")
if sha256(sender + title + self.SECRET).hexdigest() == signature:
return True
return False
def save(self):
"""
Since the consumer already does a lot of work, it's easier just to save
to-be-consumed files to the consumption directory rather than have the
form do that as well. Think of it as a poor-man's queue server.
"""
sender = self.clened_data("sender")
title = self.cleaned_data("title")
document, file_type = self.cleaned_data.get("document")
t = int(mktime(datetime.now()))
file_name = os.path.join(
Consumer.CONSUME, "{} - {}.{}".format(sender, title, file_type))
with open(file_name, "wb") as f:
f.write(document)
os.utime(file_name, times=(t, t))

View File

@ -12,6 +12,7 @@ from dateutil import parser
from django.conf import settings
from .consumer import Consumer
from .models import Sender
class MailFetcherError(Exception):
@ -28,10 +29,6 @@ class Message(object):
and n attachments, and that we don't care about the message body.
"""
# This regex is probably more restrictive than it needs to be, but it's
# better safe than sorry.
SAFE_SUBJECT_REGEX = re.compile(r"^[\w\- ,.']+$")
def _set_time(self, message):
self.time = datetime.datetime.now()
message_time = message.get("Date")
@ -58,7 +55,7 @@ class Message(object):
if self.subject is None:
raise InvalidMessageError("Message does not have a subject")
if not self.SAFE_SUBJECT_REGEX.match(self.subject):
if not Sender.SAFE_REGEX.match(self.subject):
raise InvalidMessageError("Message subject is unsafe")
print('Fetching email: "{}"'.format(self.subject))

View File

@ -26,6 +26,10 @@ class SluggedModel(models.Model):
class Sender(SluggedModel):
# This regex is probably more restrictive than it needs to be, but it's
# better safe than sorry.
SAFE_REGEX = re.compile(r"^[\w\- ,.']+$")
class Meta(object):
ordering = ("name",)
@ -72,7 +76,7 @@ class Tag(SluggedModel):
"appear in the PDF, albeit not in the order provided. A "
"\"literal\" match means that the text you enter must appear in "
"the PDF exactly as you've entered it, and \"regular expression\" "
"uses a regex to match the PDF. If you don't know what a regex"
"uses a regex to match the PDF. If you don't know what a regex "
"is, you probably don't want this option."
)
)
@ -127,7 +131,8 @@ class Document(models.Model):
editable=False,
choices=tuple([(t, t.upper()) for t in TYPES])
)
tags = models.ManyToManyField(Tag, related_name="documents")
tags = models.ManyToManyField(
Tag, related_name="documents", blank=True)
created = models.DateTimeField(default=timezone.now, editable=False)
modified = models.DateTimeField(auto_now=True, editable=False)

View File

@ -1,10 +1,12 @@
from django.http import HttpResponse
from django.template.defaultfilters import slugify
from django.views.generic.detail import DetailView
from django.views.decorators.csrf import csrf_exempt
from django.views.generic import FormView, DetailView
from paperless.db import GnuPG
from .models import Document
from .forms import UploadForm
class PdfView(DetailView):
@ -32,3 +34,21 @@ class PdfView(DetailView):
slugify(str(self.object)) + "." + self.object.file_type)
return response
class PushView(FormView):
"""
A crude REST API for creating documents.
"""
form_class = UploadForm
@classmethod
def as_view(cls, **kwargs):
return csrf_exempt(FormView.as_view(**kwargs))
def form_valid(self, form):
return HttpResponse("1")
def form_invalid(self, form):
return HttpResponse("0")

View File

@ -176,7 +176,12 @@ MAIL_CONSUMPTION = {
# want to download them. Set it and change the permissions on this file to
# 0600, or set it to `None` and you'll be prompted for the passphrase at
# runtime. The default looks for an environment variable.
# DON'T FORGET TO SET THIS as leaving it blank may cause some strang things with
# GPG, including an interesting case where it may "encrypt" zero-byte files.
# DON'T FORGET TO SET THIS as leaving it blank may cause some strange things
# with GPG, including an interesting case where it may "encrypt" zero-byte
# files.
PASSPHRASE = os.environ.get("PAPERLESS_PASSPHRASE")
# If you intend to use the "API" to push files into the consumer, you'll need to
# provide a shared secret here. Leaving this as the default will disable the
# API.
UPLOAD_SHARED_SECRET = os.environ.get("PAPERLESS_SECRET", "")

View File

@ -18,9 +18,12 @@ from django.conf import settings
from django.conf.urls import url, static
from django.contrib import admin
from documents.views import PdfView
from documents.views import PdfView, PushView
urlpatterns = [
url(r"^fetch/(?P<pk>\d+)$", PdfView.as_view(), name="fetch"),
url(r'', admin.site.urls),
] + static.static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
if settings.UPLOAD_SHARED_SECRET:
urlpatterns.insert(0, url(r"^push$", PushView.as_view(), name="push"))