Fix HTTP POST of documents

After tinkering with this for about 2 hours, I'm reasonably sure this
ever worked.  This feature was added by me in haste and poked by by the
occasional contributor, and it suffered from neglect.

* Removed the requirement for signature generation in favour of simply
  requiring BasicAuth or a valid session id.
* Fixed a number of bugs in the form itself that would have ensured that
  the form never accepted anything.
* Documented it all properly so now (hopefully) people will have less
  trouble figuring it out in the future.
This commit is contained in:
Daniel Quinn 2017-06-11 01:23:37 +01:00
parent c82d45689c
commit 6f635c74fc
4 changed files with 73 additions and 49 deletions

View File

@ -147,46 +147,83 @@ So, with all that in mind, here's what you do to get it running:
HTTP POST HTTP POST
========= =========
You can also submit a document via HTTP POST. It doesn't do tags yet, and the You can also submit a document via HTTP POST, so long as you do so after
URL schema isn't concrete, but it's a start. authenticating. To push your document to Paperless, send an HTTP POST to the
server with the following name/value pairs:
To push your document to Paperless, send an HTTP POST to the server with the
following name/value pairs:
* ``correspondent``: The name of the document's correspondent. Note that there * ``correspondent``: The name of the document's correspondent. Note that there
are restrictions on what characters you can use here. Specifically, are restrictions on what characters you can use here. Specifically,
alphanumeric characters, `-`, `,`, `.`, and `'` are ok, everything else it alphanumeric characters, `-`, `,`, `.`, and `'` are ok, everything else is
out. You also can't use the sequence ` - ` (space, dash, space). out. You also can't use the sequence ` - ` (space, dash, space).
* ``title``: The title of the document. The rules for characters is the same * ``title``: The title of the document. The rules for characters is the same
here as the correspondent. here as the correspondent.
* ``signature``: For security reasons, we have the correspondent send a * ``document``: The file you're uploading
signature using a "shared secret" method to make sure that random strangers
don't start uploading stuff to your server. The means of generating this
signature is defined below.
Specify ``enctype="multipart/form-data"``, and then POST your file with:: Specify ``enctype="multipart/form-data"``, and then POST your file with::
Content-Disposition: form-data; name="document"; filename="whatever.pdf" Content-Disposition: form-data; name="document"; filename="whatever.pdf"
An example of this in HTML is a typical form:
.. _consumption-http-signature: .. code:: html
Generating the Signature <form method="post" enctype="multipart/form-data">
------------------------ <input type="text" name="correspondent" value="My Correspondent" />
<input type="text" name="title" value="My Title" />
<input type="file" name="document" />
<input type="submit" name="go" value="Do the thing" />
</form>
Generating a signature based a shared secret is pretty simple: define a secret, But a potentially more useful way to do this would be in Python. Here we use
and store it on the server and the client. Then use that secret, along with the requests library to handle basic authentication and to send the POST data
the text you want to verify to generate a string that you can use for to the URL.
verification.
In the case of Paperless, you configure the server with the secret by setting
``UPLOAD_SHARED_SECRET``. Then on your client, you generate your signature by
concatenating the correspondent, title, and the secret, and then using sha256
to generate a hexdigest.
If you're using Python, this is what that looks like:
.. code:: python .. code:: python
import os
from hashlib import sha256 from hashlib import sha256
signature = sha256(correspondent + title + secret).hexdigest()
import requests
from requests.auth import HTTPBasicAuth
# You authenticate via BasicAuth or with a session id.
# We use BasicAuth here
username = "my-username"
password = "my-super-secret-password"
# Where you have Paperless installed and listening
url = "http://localhost:8000/push"
# Document metadata
correspondent = "Test Correspondent"
title = "Test Title"
# The local file you want to push
path = "/path/to/some/directory/my-document.pdf"
with open(path, "rb") as f:
response = requests.post(
url=url,
data={"title": title, "correspondent": correspondent},
files={"document": (os.path.basename(path), f, "application/pdf")},
auth=HTTPBasicAuth(username, password),
allow_redirects=False
)
if response.status_code == 202:
# Everything worked out ok
print("Upload successful")
else:
# If you don't get a 202, it's probably because your credentials
# are wrong or something. This will give you a rough idea of what
# happened.
print("We got HTTP status code: {}".format(response.status_code))
for k, v in response.headers.items():
print("{}: {}".format(k, v))

View File

@ -2,7 +2,6 @@ import magic
import os import os
from datetime import datetime from datetime import datetime
from hashlib import sha256
from time import mktime from time import mktime
from django import forms from django import forms
@ -32,10 +31,9 @@ class UploadForm(forms.Form):
required=False required=False
) )
document = forms.FileField() document = forms.FileField()
signature = forms.CharField(max_length=256)
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
forms.Form.__init__(*args, **kwargs) forms.Form.__init__(self, *args, **kwargs)
self._file_type = None self._file_type = None
def clean_correspondent(self): def clean_correspondent(self):
@ -82,17 +80,6 @@ class UploadForm(forms.Form):
return document return document
def clean(self):
corresp = self.cleaned_data.get("correspondent")
title = self.cleaned_data.get("title")
signature = self.cleaned_data.get("signature")
if sha256(corresp + title + self.SECRET).hexdigest() == signature:
return self.cleaned_data
raise forms.ValidationError("The signature provided did not validate")
def save(self): def save(self):
""" """
Since the consumer already does a lot of work, it's easier just to save Since the consumer already does a lot of work, it's easier just to save
@ -104,7 +91,7 @@ class UploadForm(forms.Form):
title = self.cleaned_data.get("title") title = self.cleaned_data.get("title")
document = self.cleaned_data.get("document") document = self.cleaned_data.get("document")
t = int(mktime(datetime.now())) t = int(mktime(datetime.now().timetuple()))
file_name = os.path.join( file_name = os.path.join(
Consumer.CONSUME, Consumer.CONSUME,
"{} - {}.{}".format(correspondent, title, self._file_type) "{} - {}.{}".format(correspondent, title, self._file_type)

View File

@ -1,5 +1,4 @@
from django.http import HttpResponse from django.http import HttpResponse, HttpResponseBadRequest
from django.views.decorators.csrf import csrf_exempt
from django.views.generic import DetailView, FormView, TemplateView from django.views.generic import DetailView, FormView, TemplateView
from django_filters.rest_framework import DjangoFilterBackend from django_filters.rest_framework import DjangoFilterBackend
from paperless.db import GnuPG from paperless.db import GnuPG
@ -81,15 +80,12 @@ class PushView(SessionOrBasicAuthMixin, FormView):
form_class = UploadForm form_class = UploadForm
@classmethod
def as_view(cls, **kwargs):
return csrf_exempt(FormView.as_view(**kwargs))
def form_valid(self, form): def form_valid(self, form):
return HttpResponse("1") form.save()
return HttpResponse("1", status=202)
def form_invalid(self, form): def form_invalid(self, form):
return HttpResponse("0") return HttpResponseBadRequest(str(form.errors))
class CorrespondentViewSet(ModelViewSet): class CorrespondentViewSet(ModelViewSet):

View File

@ -1,6 +1,7 @@
from django.conf import settings from django.conf import settings
from django.conf.urls import url, static, include from django.conf.urls import url, static, include
from django.contrib import admin from django.contrib import admin
from django.views.decorators.csrf import csrf_exempt
from rest_framework.routers import DefaultRouter from rest_framework.routers import DefaultRouter
@ -40,7 +41,10 @@ urlpatterns = [
] + static.static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) ] + static.static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
if settings.SHARED_SECRET: if settings.SHARED_SECRET:
urlpatterns.insert(0, url(r"^push$", PushView.as_view(), name="push")) urlpatterns.insert(
0,
url(r"^push$", csrf_exempt(PushView.as_view()), name="push")
)
# Text in each page's <h1> (and above login form). # Text in each page's <h1> (and above login form).
admin.site.site_header = 'Paperless' admin.site.site_header = 'Paperless'