updated the API, it now supports tags, correspondents, types and title when uploading documents.

2026-01-10 21:34:20 -06:00 · 2020-12-03 18:36:23 +01:00
parent 20fc065567
commit 8b16cd99dc
7 changed files with 302 additions and 82 deletions
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -38,6 +38,50 @@ individual documents:
    are in place. However, if you use these old URLs to access documents, you
    should update your app or script to use the new URLs.

+.. note::
+
+    The document endpoint provides tags, document types and correspondents as
+    ids in their corresponding fields. These are writeable. Paperless also
+    offers read-only objects for assigned tags, types and correspondents,
+    however, these might be removed in the future. As for now, the front end
+    requires them.
+
+Authorization
+#############
+
+The REST api provides three different forms of authentication.
+
+1.  Basic authentication
+
+    Authorize by providing a HTTP header in the form
+    
+    .. code::
+
+        Authorization: Basic <credentials>
+    
+    where ``credentials`` is a base64-encoded string of ``<username>:<password>``
+
+2.  Session authentication
+
+    When you're logged into paperless in your browser, you're automatically
+    logged into the API as well and don't need to provide any authorization
+    headers.
+
+3.  Token authentication
+
+    Paperless also offers an endpoint to acquire authentication tokens.
+
+    POST a username and password as a form or json string to ``/api/token/``
+    and paperless will respond with a token, if the login data is correct.
+    This token can be used to authenticate other requests with the
+    following HTTP header:
+
+    .. code::
+
+        Authorization: Token <token>
+    
+    Tokens can be managed and revoked in the paperless admin.
+
 Searching for documents
 #######################

@@ -166,8 +210,19 @@ The API provides a special endpoint for file uploads:

 POST a multipart form to this endpoint, where the form field ``document`` contains
 the document that you want to upload to paperless. The filename is sanitized and
-then used to store the document in the consumption folder, where the consumer will
-detect the document and process it as any other document.
+then used to store the document in a temporary directory, and the consumer will
+be instructed to consume the document from there.

-The endpoint will immediately return "OK." if the document was stored in the
-consumption directory.
+The endpoint supports the following optional form fields:
+
+*   ``title``: Specify a title that the consumer should use for the document.
+*   ``correspondent``: Specify a correspondent that the consumer should use for the document.
+    Case sensitive. If the specified correspondent does not exist, it will be created with this
+    name and default settings.
+*   ``document_type``: Similar to correspondent.
+*   ``tags``: Similar to correspondent. Specify this multiple times to have multiple tags added
+    to the document.
+
+The endpoint will immediately return "OK" if the document consumption process
+was started successfully. No additional status information about the consumption
+process itself is available, since that happens in a different process.
--- a/src/documents/forms.py
+++ b/src/documents/forms.py
@@ -1,59 +0,0 @@
-import os
-import tempfile
-from datetime import datetime
-from time import mktime
-
-import magic
-from django import forms
-from django.conf import settings
-from django_q.tasks import async_task
-from pathvalidate import validate_filename, ValidationError
-
-from documents.parsers import is_mime_type_supported
-
-
-class UploadForm(forms.Form):
-
-    document = forms.FileField()
-
-    def clean_document(self):
-        document_name = self.cleaned_data.get("document").name
-
-        try:
-            validate_filename(document_name)
-        except ValidationError:
-            raise forms.ValidationError("That filename is suspicious.")
-
-        document_data = self.cleaned_data.get("document").read()
-
-        mime_type = magic.from_buffer(document_data, mime=True)
-
-        if not is_mime_type_supported(mime_type):
-            raise forms.ValidationError("This mime type is not supported.")
-
-        return document_name, document_data
-
-    def save(self):
-        """
-        Since the consumer already does a lot of work, it's easier just to save
-        to-be-consumed files to the consumption directory rather than have the
-        form do that as well.  Think of it as a poor-man's queue server.
-        """
-
-        original_filename, data = self.cleaned_data.get("document")
-
-        t = int(mktime(datetime.now().timetuple()))
-
-        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
-
-        with tempfile.NamedTemporaryFile(prefix="paperless-upload-",
-                                         dir=settings.SCRATCH_DIR,
-                                         delete=False) as f:
-
-            f.write(data)
-            os.utime(f.name, times=(t, t))
-
-            async_task("documents.tasks.consume_file",
-                       f.name,
-                       override_filename=original_filename,
-                       task_name=os.path.basename(original_filename)[:100])
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -1,6 +1,9 @@
+import magic
+from pathvalidate import validate_filename, ValidationError
 from rest_framework import serializers

 from .models import Correspondent, Tag, Document, Log, DocumentType
+from .parsers import is_mime_type_supported


 class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
@@ -113,3 +116,85 @@ class LogSerializer(serializers.ModelSerializer):
            "group",
            "level"
        )
+
+
+class PostDocumentSerializer(serializers.Serializer):
+
+    document = serializers.FileField(
+        label="Document",
+        write_only=True,
+    )
+
+    title = serializers.CharField(
+        label="Title",
+        write_only=True,
+        required=False,
+    )
+
+    correspondent = serializers.CharField(
+        label="Correspondent",
+        write_only=True,
+        required=False,
+    )
+
+    document_type = serializers.CharField(
+        label="Document type",
+        write_only=True,
+        required=False,
+    )
+
+    tags = serializers.ListField(
+        child=serializers.CharField(),
+        label="Tags",
+        source="tag",
+        write_only=True,
+        required=False,
+    )
+
+    def validate(self, attrs):
+        document = attrs.get('document')
+
+        try:
+            validate_filename(document.name)
+        except ValidationError:
+            raise serializers.ValidationError("Invalid filename.")
+
+        document_data = document.file.read()
+        mime_type = magic.from_buffer(document_data, mime=True)
+
+        if not is_mime_type_supported(mime_type):
+            raise serializers.ValidationError(
+                "This mime type is not supported.")
+
+        attrs['document_data'] = document_data
+
+        title = attrs.get('title')
+
+        if not title:
+            attrs['title'] = None
+
+        correspondent = attrs.get('correspondent')
+        if correspondent:
+            c, _ = Correspondent.objects.get_or_create(name=correspondent)
+            attrs['correspondent_id'] = c.id
+        else:
+            attrs['correspondent_id'] = None
+
+        document_type = attrs.get('document_type')
+        if document_type:
+            dt, _ = DocumentType.objects.get_or_create(name=document_type)
+            attrs['document_type_id'] = dt.id
+        else:
+            attrs['document_type_id'] = None
+
+        tags = attrs.get('tag')
+        if tags:
+            tag_ids = []
+            for tag in tags:
+                tag, _ = Tag.objects.get_or_create(name=tag)
+                tag_ids.append(tag.id)
+            attrs['tag_ids'] = tag_ids
+        else:
+            attrs['tag_ids'] = None
+
+        return attrs
--- a/src/documents/tests/test_api.py
+++ b/src/documents/tests/test_api.py
@@ -358,7 +358,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertEqual(response.data['documents_total'], 3)
        self.assertEqual(response.data['documents_inbox'], 1)

-    @mock.patch("documents.forms.async_task")
+    @mock.patch("documents.views.async_task")
    def test_upload(self, m):

        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
@@ -370,8 +370,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):

        args, kwargs = m.call_args
        self.assertEqual(kwargs['override_filename'], "simple.pdf")
+        self.assertIsNone(kwargs['override_title'])
+        self.assertIsNone(kwargs['override_correspondent_id'])
+        self.assertIsNone(kwargs['override_document_type_id'])
+        self.assertIsNone(kwargs['override_tag_ids'])

-    @mock.patch("documents.forms.async_task")
+    @mock.patch("documents.views.async_task")
    def test_upload_invalid_form(self, m):

        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
@@ -379,7 +383,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertEqual(response.status_code, 400)
        m.assert_not_called()

-    @mock.patch("documents.forms.async_task")
+    @mock.patch("documents.views.async_task")
    def test_upload_invalid_file(self, m):

        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), "rb") as f:
@@ -387,8 +391,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertEqual(response.status_code, 400)
        m.assert_not_called()

-    @mock.patch("documents.forms.async_task")
-    @mock.patch("documents.forms.validate_filename")
+    @mock.patch("documents.views.async_task")
+    @mock.patch("documents.serialisers.validate_filename")
    def test_upload_invalid_filename(self, validate_filename, async_task):
        validate_filename.side_effect = ValidationError()
        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
@@ -396,3 +400,83 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
        self.assertEqual(response.status_code, 400)

        async_task.assert_not_called()
+
+    @mock.patch("documents.views.async_task")
+    def test_upload_with_title(self, async_task):
+        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
+            response = self.client.post("/api/documents/post_document/", {"document": f, "title": "my custom title"})
+        self.assertEqual(response.status_code, 200)
+
+        async_task.assert_called_once()
+
+        args, kwargs = async_task.call_args
+
+        self.assertEqual(kwargs['override_title'], "my custom title")
+
+    @mock.patch("documents.views.async_task")
+    def test_upload_with_correspondent(self, async_task):
+        c = Correspondent.objects.create(name="test-corres")
+        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
+            response = self.client.post("/api/documents/post_document/", {"document": f, "correspondent": "test-corres"})
+        self.assertEqual(response.status_code, 200)
+
+        async_task.assert_called_once()
+
+        args, kwargs = async_task.call_args
+
+        self.assertEqual(kwargs['override_correspondent_id'], c.id)
+
+    @mock.patch("documents.views.async_task")
+    def test_upload_with_new_correspondent(self, async_task):
+        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
+            response = self.client.post("/api/documents/post_document/", {"document": f, "correspondent": "test-corres2"})
+        self.assertEqual(response.status_code, 200)
+
+        async_task.assert_called_once()
+
+        args, kwargs = async_task.call_args
+
+        c = Correspondent.objects.get(name="test-corres2")
+        self.assertEqual(kwargs['override_correspondent_id'], c.id)
+
+    @mock.patch("documents.views.async_task")
+    def test_upload_with_document_type(self, async_task):
+        dt = DocumentType.objects.create(name="invoice")
+        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
+            response = self.client.post("/api/documents/post_document/", {"document": f, "document_type": "invoice"})
+        self.assertEqual(response.status_code, 200)
+
+        async_task.assert_called_once()
+
+        args, kwargs = async_task.call_args
+
+        self.assertEqual(kwargs['override_document_type_id'], dt.id)
+
+    @mock.patch("documents.views.async_task")
+    def test_upload_with_new_document_type(self, async_task):
+        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
+            response = self.client.post("/api/documents/post_document/", {"document": f, "document_type": "invoice2"})
+        self.assertEqual(response.status_code, 200)
+
+        async_task.assert_called_once()
+
+        args, kwargs = async_task.call_args
+
+        dt = DocumentType.objects.get(name="invoice2")
+        self.assertEqual(kwargs['override_document_type_id'], dt.id)
+
+    @mock.patch("documents.views.async_task")
+    def test_upload_with_tags(self, async_task):
+        t1 = Tag.objects.create(name="tag1")
+        with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
+            response = self.client.post(
+                "/api/documents/post_document/",
+                {"document": f, "tags": ["tag1", "tag2"]})
+        self.assertEqual(response.status_code, 200)
+
+        async_task.assert_called_once()
+
+        args, kwargs = async_task.call_args
+
+        t2 = Tag.objects.get(name="tag2")
+        self.assertCountEqual(kwargs['override_tag_ids'], [t1.id, t2.id])
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -1,10 +1,16 @@
 import os
+import tempfile
+from datetime import datetime
+from time import mktime

+from django.conf import settings
 from django.db.models import Count, Max
 from django.http import HttpResponse, HttpResponseBadRequest, Http404
 from django.views.decorators.cache import cache_control
 from django.views.generic import TemplateView
 from django_filters.rest_framework import DjangoFilterBackend
+from django_q.tasks import async_task
+from rest_framework import parsers
 from rest_framework.decorators import action
 from rest_framework.filters import OrderingFilter, SearchFilter
 from rest_framework.mixins import (
@@ -32,14 +38,14 @@ from .filters import (
    DocumentTypeFilterSet,
    LogFilterSet
 )
-from .forms import UploadForm
 from .models import Correspondent, Document, Log, Tag, DocumentType
 from .serialisers import (
    CorrespondentSerializer,
    DocumentSerializer,
    LogSerializer,
    TagSerializer,
-    DocumentTypeSerializer
+    DocumentTypeSerializer,
+    PostDocumentSerializer
 )


@@ -154,16 +160,6 @@ class DocumentViewSet(RetrieveModelMixin,
            disposition, filename)
        return response

-    @action(methods=['post'], detail=False)
-    def post_document(self, request, pk=None):
-        # TODO: is this a good implementation?
-        form = UploadForm(data=request.POST, files=request.FILES)
-        if form.is_valid():
-            form.save()
-            return Response("OK")
-        else:
-            return HttpResponseBadRequest(str(form.errors))
-
    @action(methods=['get'], detail=True)
    def metadata(self, request, pk=None):
        try:
@@ -217,6 +213,56 @@ class LogViewSet(ReadOnlyModelViewSet):
    ordering_fields = ("created",)


+class PostDocumentView(APIView):
+
+    permission_classes = (IsAuthenticated,)
+    serializer_class = PostDocumentSerializer
+    parser_classes = (parsers.MultiPartParser,)
+
+    def get_serializer_context(self):
+        return {
+            'request': self.request,
+            'format': self.format_kwarg,
+            'view': self
+        }
+
+    def get_serializer(self, *args, **kwargs):
+        kwargs['context'] = self.get_serializer_context()
+        return self.serializer_class(*args, **kwargs)
+
+    def post(self, request, *args, **kwargs):
+
+        serializer = self.get_serializer(data=request.data)
+        serializer.is_valid(raise_exception=True)
+
+        document = serializer.validated_data['document']
+        document_data = serializer.validated_data['document_data']
+        correspondent_id = serializer.validated_data['correspondent_id']
+        document_type_id = serializer.validated_data['document_type_id']
+        tag_ids = serializer.validated_data['tag_ids']
+        title = serializer.validated_data['title']
+
+        t = int(mktime(datetime.now().timetuple()))
+
+        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
+
+        with tempfile.NamedTemporaryFile(prefix="paperless-upload-",
+                                         dir=settings.SCRATCH_DIR,
+                                         delete=False) as f:
+            f.write(document_data)
+            os.utime(f.name, times=(t, t))
+
+            async_task("documents.tasks.consume_file",
+                       f.name,
+                       override_filename=document.name,
+                       override_title=title,
+                       override_correspondent_id=correspondent_id,
+                       override_document_type_id=document_type_id,
+                       override_tag_ids=tag_ids,
+                       task_name=os.path.basename(document.name)[:100])
+        return Response("OK")
+
+
 class SearchView(APIView):

    permission_classes = (IsAuthenticated,)
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -86,6 +86,7 @@ INSTALLED_APPS = [
    "django.contrib.admin",

    "rest_framework",
+    "rest_framework.authtoken",
    "django_filters",

    "django_q",
@@ -95,7 +96,8 @@ INSTALLED_APPS = [
 REST_FRAMEWORK = {
    'DEFAULT_AUTHENTICATION_CLASSES': [
        'rest_framework.authentication.BasicAuthentication',
-        'rest_framework.authentication.SessionAuthentication'
+        'rest_framework.authentication.SessionAuthentication',
+        'rest_framework.authentication.TokenAuthentication'
    ]
 }

--- a/src/paperless/urls.py
+++ b/src/paperless/urls.py
@@ -4,6 +4,7 @@ from django.contrib.auth.decorators import login_required
 from django.urls import path, re_path
 from django.views.decorators.csrf import csrf_exempt
 from django.views.generic import RedirectView
+from rest_framework.authtoken import views
 from rest_framework.routers import DefaultRouter

 from documents.views import (
@@ -15,7 +16,8 @@ from documents.views import (
    SearchView,
    IndexView,
    SearchAutoCompleteView,
-    StatisticsView
+    StatisticsView,
+    PostDocumentView
 )
 from paperless.views import FaviconView

@@ -45,6 +47,11 @@ urlpatterns = [
                StatisticsView.as_view(),
                name="statistics"),

+        re_path(r"^documents/post_document/", PostDocumentView.as_view(),
+                name="post_document"),
+
+        path('token/', views.obtain_auth_token)
+
    ] + api_router.urls)),

    re_path(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),