updated the API, it now supports tags, correspondents, types and title when uploading documents.

This commit is contained in:
jonaswinkler 2020-12-03 18:36:23 +01:00
parent 20fc065567
commit 8b16cd99dc
7 changed files with 302 additions and 82 deletions

View File

@ -38,6 +38,50 @@ individual documents:
are in place. However, if you use these old URLs to access documents, you
should update your app or script to use the new URLs.
.. note::
The document endpoint provides tags, document types and correspondents as
ids in their corresponding fields. These are writeable. Paperless also
offers read-only objects for assigned tags, types and correspondents,
however, these might be removed in the future. As for now, the front end
requires them.
Authorization
#############
The REST api provides three different forms of authentication.
1. Basic authentication
Authorize by providing a HTTP header in the form
.. code::
Authorization: Basic <credentials>
where ``credentials`` is a base64-encoded string of ``<username>:<password>``
2. Session authentication
When you're logged into paperless in your browser, you're automatically
logged into the API as well and don't need to provide any authorization
headers.
3. Token authentication
Paperless also offers an endpoint to acquire authentication tokens.
POST a username and password as a form or json string to ``/api/token/``
and paperless will respond with a token, if the login data is correct.
This token can be used to authenticate other requests with the
following HTTP header:
.. code::
Authorization: Token <token>
Tokens can be managed and revoked in the paperless admin.
Searching for documents
#######################
@ -166,8 +210,19 @@ The API provides a special endpoint for file uploads:
POST a multipart form to this endpoint, where the form field ``document`` contains
the document that you want to upload to paperless. The filename is sanitized and
then used to store the document in the consumption folder, where the consumer will
detect the document and process it as any other document.
then used to store the document in a temporary directory, and the consumer will
be instructed to consume the document from there.
The endpoint will immediately return "OK." if the document was stored in the
consumption directory.
The endpoint supports the following optional form fields:
* ``title``: Specify a title that the consumer should use for the document.
* ``correspondent``: Specify a correspondent that the consumer should use for the document.
Case sensitive. If the specified correspondent does not exist, it will be created with this
name and default settings.
* ``document_type``: Similar to correspondent.
* ``tags``: Similar to correspondent. Specify this multiple times to have multiple tags added
to the document.
The endpoint will immediately return "OK" if the document consumption process
was started successfully. No additional status information about the consumption
process itself is available, since that happens in a different process.

View File

@ -1,59 +0,0 @@
import os
import tempfile
from datetime import datetime
from time import mktime
import magic
from django import forms
from django.conf import settings
from django_q.tasks import async_task
from pathvalidate import validate_filename, ValidationError
from documents.parsers import is_mime_type_supported
class UploadForm(forms.Form):
document = forms.FileField()
def clean_document(self):
document_name = self.cleaned_data.get("document").name
try:
validate_filename(document_name)
except ValidationError:
raise forms.ValidationError("That filename is suspicious.")
document_data = self.cleaned_data.get("document").read()
mime_type = magic.from_buffer(document_data, mime=True)
if not is_mime_type_supported(mime_type):
raise forms.ValidationError("This mime type is not supported.")
return document_name, document_data
def save(self):
"""
Since the consumer already does a lot of work, it's easier just to save
to-be-consumed files to the consumption directory rather than have the
form do that as well. Think of it as a poor-man's queue server.
"""
original_filename, data = self.cleaned_data.get("document")
t = int(mktime(datetime.now().timetuple()))
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
with tempfile.NamedTemporaryFile(prefix="paperless-upload-",
dir=settings.SCRATCH_DIR,
delete=False) as f:
f.write(data)
os.utime(f.name, times=(t, t))
async_task("documents.tasks.consume_file",
f.name,
override_filename=original_filename,
task_name=os.path.basename(original_filename)[:100])

View File

@ -1,6 +1,9 @@
import magic
from pathvalidate import validate_filename, ValidationError
from rest_framework import serializers
from .models import Correspondent, Tag, Document, Log, DocumentType
from .parsers import is_mime_type_supported
class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
@ -113,3 +116,85 @@ class LogSerializer(serializers.ModelSerializer):
"group",
"level"
)
class PostDocumentSerializer(serializers.Serializer):
document = serializers.FileField(
label="Document",
write_only=True,
)
title = serializers.CharField(
label="Title",
write_only=True,
required=False,
)
correspondent = serializers.CharField(
label="Correspondent",
write_only=True,
required=False,
)
document_type = serializers.CharField(
label="Document type",
write_only=True,
required=False,
)
tags = serializers.ListField(
child=serializers.CharField(),
label="Tags",
source="tag",
write_only=True,
required=False,
)
def validate(self, attrs):
document = attrs.get('document')
try:
validate_filename(document.name)
except ValidationError:
raise serializers.ValidationError("Invalid filename.")
document_data = document.file.read()
mime_type = magic.from_buffer(document_data, mime=True)
if not is_mime_type_supported(mime_type):
raise serializers.ValidationError(
"This mime type is not supported.")
attrs['document_data'] = document_data
title = attrs.get('title')
if not title:
attrs['title'] = None
correspondent = attrs.get('correspondent')
if correspondent:
c, _ = Correspondent.objects.get_or_create(name=correspondent)
attrs['correspondent_id'] = c.id
else:
attrs['correspondent_id'] = None
document_type = attrs.get('document_type')
if document_type:
dt, _ = DocumentType.objects.get_or_create(name=document_type)
attrs['document_type_id'] = dt.id
else:
attrs['document_type_id'] = None
tags = attrs.get('tag')
if tags:
tag_ids = []
for tag in tags:
tag, _ = Tag.objects.get_or_create(name=tag)
tag_ids.append(tag.id)
attrs['tag_ids'] = tag_ids
else:
attrs['tag_ids'] = None
return attrs

View File

@ -358,7 +358,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.data['documents_total'], 3)
self.assertEqual(response.data['documents_inbox'], 1)
@mock.patch("documents.forms.async_task")
@mock.patch("documents.views.async_task")
def test_upload(self, m):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
@ -370,8 +370,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
args, kwargs = m.call_args
self.assertEqual(kwargs['override_filename'], "simple.pdf")
self.assertIsNone(kwargs['override_title'])
self.assertIsNone(kwargs['override_correspondent_id'])
self.assertIsNone(kwargs['override_document_type_id'])
self.assertIsNone(kwargs['override_tag_ids'])
@mock.patch("documents.forms.async_task")
@mock.patch("documents.views.async_task")
def test_upload_invalid_form(self, m):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
@ -379,7 +383,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 400)
m.assert_not_called()
@mock.patch("documents.forms.async_task")
@mock.patch("documents.views.async_task")
def test_upload_invalid_file(self, m):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), "rb") as f:
@ -387,8 +391,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 400)
m.assert_not_called()
@mock.patch("documents.forms.async_task")
@mock.patch("documents.forms.validate_filename")
@mock.patch("documents.views.async_task")
@mock.patch("documents.serialisers.validate_filename")
def test_upload_invalid_filename(self, validate_filename, async_task):
validate_filename.side_effect = ValidationError()
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
@ -396,3 +400,83 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 400)
async_task.assert_not_called()
@mock.patch("documents.views.async_task")
def test_upload_with_title(self, async_task):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post("/api/documents/post_document/", {"document": f, "title": "my custom title"})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
self.assertEqual(kwargs['override_title'], "my custom title")
@mock.patch("documents.views.async_task")
def test_upload_with_correspondent(self, async_task):
c = Correspondent.objects.create(name="test-corres")
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post("/api/documents/post_document/", {"document": f, "correspondent": "test-corres"})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
self.assertEqual(kwargs['override_correspondent_id'], c.id)
@mock.patch("documents.views.async_task")
def test_upload_with_new_correspondent(self, async_task):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post("/api/documents/post_document/", {"document": f, "correspondent": "test-corres2"})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
c = Correspondent.objects.get(name="test-corres2")
self.assertEqual(kwargs['override_correspondent_id'], c.id)
@mock.patch("documents.views.async_task")
def test_upload_with_document_type(self, async_task):
dt = DocumentType.objects.create(name="invoice")
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post("/api/documents/post_document/", {"document": f, "document_type": "invoice"})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
self.assertEqual(kwargs['override_document_type_id'], dt.id)
@mock.patch("documents.views.async_task")
def test_upload_with_new_document_type(self, async_task):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post("/api/documents/post_document/", {"document": f, "document_type": "invoice2"})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
dt = DocumentType.objects.get(name="invoice2")
self.assertEqual(kwargs['override_document_type_id'], dt.id)
@mock.patch("documents.views.async_task")
def test_upload_with_tags(self, async_task):
t1 = Tag.objects.create(name="tag1")
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f, "tags": ["tag1", "tag2"]})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
t2 = Tag.objects.get(name="tag2")
self.assertCountEqual(kwargs['override_tag_ids'], [t1.id, t2.id])

View File

@ -1,10 +1,16 @@
import os
import tempfile
from datetime import datetime
from time import mktime
from django.conf import settings
from django.db.models import Count, Max
from django.http import HttpResponse, HttpResponseBadRequest, Http404
from django.views.decorators.cache import cache_control
from django.views.generic import TemplateView
from django_filters.rest_framework import DjangoFilterBackend
from django_q.tasks import async_task
from rest_framework import parsers
from rest_framework.decorators import action
from rest_framework.filters import OrderingFilter, SearchFilter
from rest_framework.mixins import (
@ -32,14 +38,14 @@ from .filters import (
DocumentTypeFilterSet,
LogFilterSet
)
from .forms import UploadForm
from .models import Correspondent, Document, Log, Tag, DocumentType
from .serialisers import (
CorrespondentSerializer,
DocumentSerializer,
LogSerializer,
TagSerializer,
DocumentTypeSerializer
DocumentTypeSerializer,
PostDocumentSerializer
)
@ -154,16 +160,6 @@ class DocumentViewSet(RetrieveModelMixin,
disposition, filename)
return response
@action(methods=['post'], detail=False)
def post_document(self, request, pk=None):
# TODO: is this a good implementation?
form = UploadForm(data=request.POST, files=request.FILES)
if form.is_valid():
form.save()
return Response("OK")
else:
return HttpResponseBadRequest(str(form.errors))
@action(methods=['get'], detail=True)
def metadata(self, request, pk=None):
try:
@ -217,6 +213,56 @@ class LogViewSet(ReadOnlyModelViewSet):
ordering_fields = ("created",)
class PostDocumentView(APIView):
permission_classes = (IsAuthenticated,)
serializer_class = PostDocumentSerializer
parser_classes = (parsers.MultiPartParser,)
def get_serializer_context(self):
return {
'request': self.request,
'format': self.format_kwarg,
'view': self
}
def get_serializer(self, *args, **kwargs):
kwargs['context'] = self.get_serializer_context()
return self.serializer_class(*args, **kwargs)
def post(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
document = serializer.validated_data['document']
document_data = serializer.validated_data['document_data']
correspondent_id = serializer.validated_data['correspondent_id']
document_type_id = serializer.validated_data['document_type_id']
tag_ids = serializer.validated_data['tag_ids']
title = serializer.validated_data['title']
t = int(mktime(datetime.now().timetuple()))
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
with tempfile.NamedTemporaryFile(prefix="paperless-upload-",
dir=settings.SCRATCH_DIR,
delete=False) as f:
f.write(document_data)
os.utime(f.name, times=(t, t))
async_task("documents.tasks.consume_file",
f.name,
override_filename=document.name,
override_title=title,
override_correspondent_id=correspondent_id,
override_document_type_id=document_type_id,
override_tag_ids=tag_ids,
task_name=os.path.basename(document.name)[:100])
return Response("OK")
class SearchView(APIView):
permission_classes = (IsAuthenticated,)

View File

@ -86,6 +86,7 @@ INSTALLED_APPS = [
"django.contrib.admin",
"rest_framework",
"rest_framework.authtoken",
"django_filters",
"django_q",
@ -95,7 +96,8 @@ INSTALLED_APPS = [
REST_FRAMEWORK = {
'DEFAULT_AUTHENTICATION_CLASSES': [
'rest_framework.authentication.BasicAuthentication',
'rest_framework.authentication.SessionAuthentication'
'rest_framework.authentication.SessionAuthentication',
'rest_framework.authentication.TokenAuthentication'
]
}

View File

@ -4,6 +4,7 @@ from django.contrib.auth.decorators import login_required
from django.urls import path, re_path
from django.views.decorators.csrf import csrf_exempt
from django.views.generic import RedirectView
from rest_framework.authtoken import views
from rest_framework.routers import DefaultRouter
from documents.views import (
@ -15,7 +16,8 @@ from documents.views import (
SearchView,
IndexView,
SearchAutoCompleteView,
StatisticsView
StatisticsView,
PostDocumentView
)
from paperless.views import FaviconView
@ -45,6 +47,11 @@ urlpatterns = [
StatisticsView.as_view(),
name="statistics"),
re_path(r"^documents/post_document/", PostDocumentView.as_view(),
name="post_document"),
path('token/', views.obtain_auth_token)
] + api_router.urls)),
re_path(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),