diff --git a/src/documents/forms.py b/src/documents/forms.py new file mode 100644 index 000000000..d544917b4 --- /dev/null +++ b/src/documents/forms.py @@ -0,0 +1,86 @@ +import magic +import os + +from datetime import datetime +from hashlib import sha256 +from time import mktime + +from django import forms +from django.conf import settings + +from .models import Document, Sender +from .consumer import Consumer + + +class UploadForm(forms.Form): + + SECRET = settings.UPLOAD_SHARED_SECRET + TYPE_LOOKUP = { + "application/pdf": Document.TYPE_PDF, + "image/png": Document.TYPE_PNG, + "image/jpeg": Document.TYPE_JPG, + "image/gif": Document.TYPE_GIF, + "image/tiff": Document.TYPE_TIF, + } + + sender = forms.CharField( + max_length=Sender._meta.get_field("name").max_length, required=False) + title = forms.CharField( + max_length=Document._meta.get_field("title").max_length, required=False) + document = forms.FileField() + signature = forms.CharField(max_length=256) + + def clean_sender(self): + """ + I suppose it might look cleaner to use .get_or_create() here, but that + would also allow someone to fill up the db with bogus senders before all + validation was met. + """ + sender = self.cleaned_data.get("sender") + if not sender: + return None + if not Sender.SAFE_REGEX.match(sender) or " - " in sender: + raise forms.ValidationError("That sender name is suspicious.") + return sender + + def clean_title(self): + title = self.cleaned_data.get("title") + if not title: + return None + if not Sender.SAFE_REGEX.match(title) or " - " in title: + raise forms.ValidationError("That title is suspicious.") + + def clean_document(self): + document = self.cleaned_data.get("document").read() + with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m: + file_type = m.id_buffer(document) + if file_type not in self.TYPE_LOOKUP: + raise forms.ValidationError("The file type is invalid.") + return document, self.TYPE_LOOKUP[file_type] + + def clean(self): + sender = self.clened_data("sender") + title = self.cleaned_data("title") + signature = self.cleaned_data("signature") + if sha256(sender + title + self.SECRET).hexdigest() == signature: + return True + return False + + def save(self): + """ + Since the consumer already does a lot of work, it's easier just to save + to-be-consumed files to the consumption directory rather than have the + form do that as well. Think of it as a poor-man's queue server. + """ + + sender = self.clened_data("sender") + title = self.cleaned_data("title") + document, file_type = self.cleaned_data.get("document") + + t = int(mktime(datetime.now())) + file_name = os.path.join( + Consumer.CONSUME, "{} - {}.{}".format(sender, title, file_type)) + + with open(file_name, "wb") as f: + f.write(document) + os.utime(file_name, times=(t, t)) diff --git a/src/documents/mail.py b/src/documents/mail.py index feb370945..2fc8e4698 100644 --- a/src/documents/mail.py +++ b/src/documents/mail.py @@ -12,6 +12,7 @@ from dateutil import parser from django.conf import settings from .consumer import Consumer +from .models import Sender class MailFetcherError(Exception): @@ -28,10 +29,6 @@ class Message(object): and n attachments, and that we don't care about the message body. """ - # This regex is probably more restrictive than it needs to be, but it's - # better safe than sorry. - SAFE_SUBJECT_REGEX = re.compile(r"^[\w\- ,.']+$") - def _set_time(self, message): self.time = datetime.datetime.now() message_time = message.get("Date") @@ -58,7 +55,7 @@ class Message(object): if self.subject is None: raise InvalidMessageError("Message does not have a subject") - if not self.SAFE_SUBJECT_REGEX.match(self.subject): + if not Sender.SAFE_REGEX.match(self.subject): raise InvalidMessageError("Message subject is unsafe") print('Fetching email: "{}"'.format(self.subject)) diff --git a/src/documents/models.py b/src/documents/models.py index 78ed64832..133fdf6fc 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -26,6 +26,10 @@ class SluggedModel(models.Model): class Sender(SluggedModel): + # This regex is probably more restrictive than it needs to be, but it's + # better safe than sorry. + SAFE_REGEX = re.compile(r"^[\w\- ,.']+$") + class Meta(object): ordering = ("name",) @@ -72,7 +76,7 @@ class Tag(SluggedModel): "appear in the PDF, albeit not in the order provided. A " "\"literal\" match means that the text you enter must appear in " "the PDF exactly as you've entered it, and \"regular expression\" " - "uses a regex to match the PDF. If you don't know what a regex" + "uses a regex to match the PDF. If you don't know what a regex " "is, you probably don't want this option." ) ) @@ -127,7 +131,8 @@ class Document(models.Model): editable=False, choices=tuple([(t, t.upper()) for t in TYPES]) ) - tags = models.ManyToManyField(Tag, related_name="documents") + tags = models.ManyToManyField( + Tag, related_name="documents", blank=True) created = models.DateTimeField(default=timezone.now, editable=False) modified = models.DateTimeField(auto_now=True, editable=False) diff --git a/src/documents/views.py b/src/documents/views.py index 517b4192e..c92b6af09 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -1,10 +1,12 @@ from django.http import HttpResponse from django.template.defaultfilters import slugify -from django.views.generic.detail import DetailView +from django.views.decorators.csrf import csrf_exempt +from django.views.generic import FormView, DetailView from paperless.db import GnuPG from .models import Document +from .forms import UploadForm class PdfView(DetailView): @@ -32,3 +34,21 @@ class PdfView(DetailView): slugify(str(self.object)) + "." + self.object.file_type) return response + + +class PushView(FormView): + """ + A crude REST API for creating documents. + """ + + form_class = UploadForm + + @classmethod + def as_view(cls, **kwargs): + return csrf_exempt(FormView.as_view(**kwargs)) + + def form_valid(self, form): + return HttpResponse("1") + + def form_invalid(self, form): + return HttpResponse("0") diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 6d9da46fd..f9a124049 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -176,7 +176,12 @@ MAIL_CONSUMPTION = { # want to download them. Set it and change the permissions on this file to # 0600, or set it to `None` and you'll be prompted for the passphrase at # runtime. The default looks for an environment variable. -# DON'T FORGET TO SET THIS as leaving it blank may cause some strang things with -# GPG, including an interesting case where it may "encrypt" zero-byte files. +# DON'T FORGET TO SET THIS as leaving it blank may cause some strange things +# with GPG, including an interesting case where it may "encrypt" zero-byte +# files. PASSPHRASE = os.environ.get("PAPERLESS_PASSPHRASE") +# If you intend to use the "API" to push files into the consumer, you'll need to +# provide a shared secret here. Leaving this as the default will disable the +# API. +UPLOAD_SHARED_SECRET = os.environ.get("PAPERLESS_SECRET", "") diff --git a/src/paperless/urls.py b/src/paperless/urls.py index 2b311f858..060953676 100644 --- a/src/paperless/urls.py +++ b/src/paperless/urls.py @@ -18,9 +18,12 @@ from django.conf import settings from django.conf.urls import url, static from django.contrib import admin -from documents.views import PdfView +from documents.views import PdfView, PushView urlpatterns = [ url(r"^fetch/(?P\d+)$", PdfView.as_view(), name="fetch"), url(r'', admin.site.urls), ] + static.static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) + +if settings.UPLOAD_SHARED_SECRET: + urlpatterns.insert(0, url(r"^push$", PushView.as_view(), name="push"))