updated the API, it now supports tags, correspondents, types and title when uploading documents.

This commit is contained in:
jonaswinkler 2020-12-03 18:36:23 +01:00
parent 20fc065567
commit 8b16cd99dc
7 changed files with 302 additions and 82 deletions

View File

@ -38,6 +38,50 @@ individual documents:
are in place. However, if you use these old URLs to access documents, you are in place. However, if you use these old URLs to access documents, you
should update your app or script to use the new URLs. should update your app or script to use the new URLs.
.. note::
The document endpoint provides tags, document types and correspondents as
ids in their corresponding fields. These are writeable. Paperless also
offers read-only objects for assigned tags, types and correspondents,
however, these might be removed in the future. As for now, the front end
requires them.
Authorization
#############
The REST api provides three different forms of authentication.
1. Basic authentication
Authorize by providing a HTTP header in the form
.. code::
Authorization: Basic <credentials>
where ``credentials`` is a base64-encoded string of ``<username>:<password>``
2. Session authentication
When you're logged into paperless in your browser, you're automatically
logged into the API as well and don't need to provide any authorization
headers.
3. Token authentication
Paperless also offers an endpoint to acquire authentication tokens.
POST a username and password as a form or json string to ``/api/token/``
and paperless will respond with a token, if the login data is correct.
This token can be used to authenticate other requests with the
following HTTP header:
.. code::
Authorization: Token <token>
Tokens can be managed and revoked in the paperless admin.
Searching for documents Searching for documents
####################### #######################
@ -166,8 +210,19 @@ The API provides a special endpoint for file uploads:
POST a multipart form to this endpoint, where the form field ``document`` contains POST a multipart form to this endpoint, where the form field ``document`` contains
the document that you want to upload to paperless. The filename is sanitized and the document that you want to upload to paperless. The filename is sanitized and
then used to store the document in the consumption folder, where the consumer will then used to store the document in a temporary directory, and the consumer will
detect the document and process it as any other document. be instructed to consume the document from there.
The endpoint will immediately return "OK." if the document was stored in the The endpoint supports the following optional form fields:
consumption directory.
* ``title``: Specify a title that the consumer should use for the document.
* ``correspondent``: Specify a correspondent that the consumer should use for the document.
Case sensitive. If the specified correspondent does not exist, it will be created with this
name and default settings.
* ``document_type``: Similar to correspondent.
* ``tags``: Similar to correspondent. Specify this multiple times to have multiple tags added
to the document.
The endpoint will immediately return "OK" if the document consumption process
was started successfully. No additional status information about the consumption
process itself is available, since that happens in a different process.

View File

@ -1,59 +0,0 @@
import os
import tempfile
from datetime import datetime
from time import mktime
import magic
from django import forms
from django.conf import settings
from django_q.tasks import async_task
from pathvalidate import validate_filename, ValidationError
from documents.parsers import is_mime_type_supported
class UploadForm(forms.Form):
document = forms.FileField()
def clean_document(self):
document_name = self.cleaned_data.get("document").name
try:
validate_filename(document_name)
except ValidationError:
raise forms.ValidationError("That filename is suspicious.")
document_data = self.cleaned_data.get("document").read()
mime_type = magic.from_buffer(document_data, mime=True)
if not is_mime_type_supported(mime_type):
raise forms.ValidationError("This mime type is not supported.")
return document_name, document_data
def save(self):
"""
Since the consumer already does a lot of work, it's easier just to save
to-be-consumed files to the consumption directory rather than have the
form do that as well. Think of it as a poor-man's queue server.
"""
original_filename, data = self.cleaned_data.get("document")
t = int(mktime(datetime.now().timetuple()))
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
with tempfile.NamedTemporaryFile(prefix="paperless-upload-",
dir=settings.SCRATCH_DIR,
delete=False) as f:
f.write(data)
os.utime(f.name, times=(t, t))
async_task("documents.tasks.consume_file",
f.name,
override_filename=original_filename,
task_name=os.path.basename(original_filename)[:100])

View File

@ -1,6 +1,9 @@
import magic
from pathvalidate import validate_filename, ValidationError
from rest_framework import serializers from rest_framework import serializers
from .models import Correspondent, Tag, Document, Log, DocumentType from .models import Correspondent, Tag, Document, Log, DocumentType
from .parsers import is_mime_type_supported
class CorrespondentSerializer(serializers.HyperlinkedModelSerializer): class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
@ -113,3 +116,85 @@ class LogSerializer(serializers.ModelSerializer):
"group", "group",
"level" "level"
) )
class PostDocumentSerializer(serializers.Serializer):
document = serializers.FileField(
label="Document",
write_only=True,
)
title = serializers.CharField(
label="Title",
write_only=True,
required=False,
)
correspondent = serializers.CharField(
label="Correspondent",
write_only=True,
required=False,
)
document_type = serializers.CharField(
label="Document type",
write_only=True,
required=False,
)
tags = serializers.ListField(
child=serializers.CharField(),
label="Tags",
source="tag",
write_only=True,
required=False,
)
def validate(self, attrs):
document = attrs.get('document')
try:
validate_filename(document.name)
except ValidationError:
raise serializers.ValidationError("Invalid filename.")
document_data = document.file.read()
mime_type = magic.from_buffer(document_data, mime=True)
if not is_mime_type_supported(mime_type):
raise serializers.ValidationError(
"This mime type is not supported.")
attrs['document_data'] = document_data
title = attrs.get('title')
if not title:
attrs['title'] = None
correspondent = attrs.get('correspondent')
if correspondent:
c, _ = Correspondent.objects.get_or_create(name=correspondent)
attrs['correspondent_id'] = c.id
else:
attrs['correspondent_id'] = None
document_type = attrs.get('document_type')
if document_type:
dt, _ = DocumentType.objects.get_or_create(name=document_type)
attrs['document_type_id'] = dt.id
else:
attrs['document_type_id'] = None
tags = attrs.get('tag')
if tags:
tag_ids = []
for tag in tags:
tag, _ = Tag.objects.get_or_create(name=tag)
tag_ids.append(tag.id)
attrs['tag_ids'] = tag_ids
else:
attrs['tag_ids'] = None
return attrs

View File

@ -358,7 +358,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.data['documents_total'], 3) self.assertEqual(response.data['documents_total'], 3)
self.assertEqual(response.data['documents_inbox'], 1) self.assertEqual(response.data['documents_inbox'], 1)
@mock.patch("documents.forms.async_task") @mock.patch("documents.views.async_task")
def test_upload(self, m): def test_upload(self, m):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f: with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
@ -370,8 +370,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
args, kwargs = m.call_args args, kwargs = m.call_args
self.assertEqual(kwargs['override_filename'], "simple.pdf") self.assertEqual(kwargs['override_filename'], "simple.pdf")
self.assertIsNone(kwargs['override_title'])
self.assertIsNone(kwargs['override_correspondent_id'])
self.assertIsNone(kwargs['override_document_type_id'])
self.assertIsNone(kwargs['override_tag_ids'])
@mock.patch("documents.forms.async_task") @mock.patch("documents.views.async_task")
def test_upload_invalid_form(self, m): def test_upload_invalid_form(self, m):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f: with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
@ -379,7 +383,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 400) self.assertEqual(response.status_code, 400)
m.assert_not_called() m.assert_not_called()
@mock.patch("documents.forms.async_task") @mock.patch("documents.views.async_task")
def test_upload_invalid_file(self, m): def test_upload_invalid_file(self, m):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), "rb") as f: with open(os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), "rb") as f:
@ -387,8 +391,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 400) self.assertEqual(response.status_code, 400)
m.assert_not_called() m.assert_not_called()
@mock.patch("documents.forms.async_task") @mock.patch("documents.views.async_task")
@mock.patch("documents.forms.validate_filename") @mock.patch("documents.serialisers.validate_filename")
def test_upload_invalid_filename(self, validate_filename, async_task): def test_upload_invalid_filename(self, validate_filename, async_task):
validate_filename.side_effect = ValidationError() validate_filename.side_effect = ValidationError()
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f: with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
@ -396,3 +400,83 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 400) self.assertEqual(response.status_code, 400)
async_task.assert_not_called() async_task.assert_not_called()
@mock.patch("documents.views.async_task")
def test_upload_with_title(self, async_task):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post("/api/documents/post_document/", {"document": f, "title": "my custom title"})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
self.assertEqual(kwargs['override_title'], "my custom title")
@mock.patch("documents.views.async_task")
def test_upload_with_correspondent(self, async_task):
c = Correspondent.objects.create(name="test-corres")
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post("/api/documents/post_document/", {"document": f, "correspondent": "test-corres"})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
self.assertEqual(kwargs['override_correspondent_id'], c.id)
@mock.patch("documents.views.async_task")
def test_upload_with_new_correspondent(self, async_task):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post("/api/documents/post_document/", {"document": f, "correspondent": "test-corres2"})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
c = Correspondent.objects.get(name="test-corres2")
self.assertEqual(kwargs['override_correspondent_id'], c.id)
@mock.patch("documents.views.async_task")
def test_upload_with_document_type(self, async_task):
dt = DocumentType.objects.create(name="invoice")
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post("/api/documents/post_document/", {"document": f, "document_type": "invoice"})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
self.assertEqual(kwargs['override_document_type_id'], dt.id)
@mock.patch("documents.views.async_task")
def test_upload_with_new_document_type(self, async_task):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post("/api/documents/post_document/", {"document": f, "document_type": "invoice2"})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
dt = DocumentType.objects.get(name="invoice2")
self.assertEqual(kwargs['override_document_type_id'], dt.id)
@mock.patch("documents.views.async_task")
def test_upload_with_tags(self, async_task):
t1 = Tag.objects.create(name="tag1")
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f, "tags": ["tag1", "tag2"]})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
t2 = Tag.objects.get(name="tag2")
self.assertCountEqual(kwargs['override_tag_ids'], [t1.id, t2.id])

View File

@ -1,10 +1,16 @@
import os import os
import tempfile
from datetime import datetime
from time import mktime
from django.conf import settings
from django.db.models import Count, Max from django.db.models import Count, Max
from django.http import HttpResponse, HttpResponseBadRequest, Http404 from django.http import HttpResponse, HttpResponseBadRequest, Http404
from django.views.decorators.cache import cache_control from django.views.decorators.cache import cache_control
from django.views.generic import TemplateView from django.views.generic import TemplateView
from django_filters.rest_framework import DjangoFilterBackend from django_filters.rest_framework import DjangoFilterBackend
from django_q.tasks import async_task
from rest_framework import parsers
from rest_framework.decorators import action from rest_framework.decorators import action
from rest_framework.filters import OrderingFilter, SearchFilter from rest_framework.filters import OrderingFilter, SearchFilter
from rest_framework.mixins import ( from rest_framework.mixins import (
@ -32,14 +38,14 @@ from .filters import (
DocumentTypeFilterSet, DocumentTypeFilterSet,
LogFilterSet LogFilterSet
) )
from .forms import UploadForm
from .models import Correspondent, Document, Log, Tag, DocumentType from .models import Correspondent, Document, Log, Tag, DocumentType
from .serialisers import ( from .serialisers import (
CorrespondentSerializer, CorrespondentSerializer,
DocumentSerializer, DocumentSerializer,
LogSerializer, LogSerializer,
TagSerializer, TagSerializer,
DocumentTypeSerializer DocumentTypeSerializer,
PostDocumentSerializer
) )
@ -154,16 +160,6 @@ class DocumentViewSet(RetrieveModelMixin,
disposition, filename) disposition, filename)
return response return response
@action(methods=['post'], detail=False)
def post_document(self, request, pk=None):
# TODO: is this a good implementation?
form = UploadForm(data=request.POST, files=request.FILES)
if form.is_valid():
form.save()
return Response("OK")
else:
return HttpResponseBadRequest(str(form.errors))
@action(methods=['get'], detail=True) @action(methods=['get'], detail=True)
def metadata(self, request, pk=None): def metadata(self, request, pk=None):
try: try:
@ -217,6 +213,56 @@ class LogViewSet(ReadOnlyModelViewSet):
ordering_fields = ("created",) ordering_fields = ("created",)
class PostDocumentView(APIView):
permission_classes = (IsAuthenticated,)
serializer_class = PostDocumentSerializer
parser_classes = (parsers.MultiPartParser,)
def get_serializer_context(self):
return {
'request': self.request,
'format': self.format_kwarg,
'view': self
}
def get_serializer(self, *args, **kwargs):
kwargs['context'] = self.get_serializer_context()
return self.serializer_class(*args, **kwargs)
def post(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
document = serializer.validated_data['document']
document_data = serializer.validated_data['document_data']
correspondent_id = serializer.validated_data['correspondent_id']
document_type_id = serializer.validated_data['document_type_id']
tag_ids = serializer.validated_data['tag_ids']
title = serializer.validated_data['title']
t = int(mktime(datetime.now().timetuple()))
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
with tempfile.NamedTemporaryFile(prefix="paperless-upload-",
dir=settings.SCRATCH_DIR,
delete=False) as f:
f.write(document_data)
os.utime(f.name, times=(t, t))
async_task("documents.tasks.consume_file",
f.name,
override_filename=document.name,
override_title=title,
override_correspondent_id=correspondent_id,
override_document_type_id=document_type_id,
override_tag_ids=tag_ids,
task_name=os.path.basename(document.name)[:100])
return Response("OK")
class SearchView(APIView): class SearchView(APIView):
permission_classes = (IsAuthenticated,) permission_classes = (IsAuthenticated,)

View File

@ -86,6 +86,7 @@ INSTALLED_APPS = [
"django.contrib.admin", "django.contrib.admin",
"rest_framework", "rest_framework",
"rest_framework.authtoken",
"django_filters", "django_filters",
"django_q", "django_q",
@ -95,7 +96,8 @@ INSTALLED_APPS = [
REST_FRAMEWORK = { REST_FRAMEWORK = {
'DEFAULT_AUTHENTICATION_CLASSES': [ 'DEFAULT_AUTHENTICATION_CLASSES': [
'rest_framework.authentication.BasicAuthentication', 'rest_framework.authentication.BasicAuthentication',
'rest_framework.authentication.SessionAuthentication' 'rest_framework.authentication.SessionAuthentication',
'rest_framework.authentication.TokenAuthentication'
] ]
} }

View File

@ -4,6 +4,7 @@ from django.contrib.auth.decorators import login_required
from django.urls import path, re_path from django.urls import path, re_path
from django.views.decorators.csrf import csrf_exempt from django.views.decorators.csrf import csrf_exempt
from django.views.generic import RedirectView from django.views.generic import RedirectView
from rest_framework.authtoken import views
from rest_framework.routers import DefaultRouter from rest_framework.routers import DefaultRouter
from documents.views import ( from documents.views import (
@ -15,7 +16,8 @@ from documents.views import (
SearchView, SearchView,
IndexView, IndexView,
SearchAutoCompleteView, SearchAutoCompleteView,
StatisticsView StatisticsView,
PostDocumentView
) )
from paperless.views import FaviconView from paperless.views import FaviconView
@ -45,6 +47,11 @@ urlpatterns = [
StatisticsView.as_view(), StatisticsView.as_view(),
name="statistics"), name="statistics"),
re_path(r"^documents/post_document/", PostDocumentView.as_view(),
name="post_document"),
path('token/', views.obtain_auth_token)
] + api_router.urls)), ] + api_router.urls)),
re_path(r"^favicon.ico$", FaviconView.as_view(), name="favicon"), re_path(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),