mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
updated the API, it now supports tags, correspondents, types and title when uploading documents.
This commit is contained in:
parent
20fc065567
commit
8b16cd99dc
63
docs/api.rst
63
docs/api.rst
@ -38,6 +38,50 @@ individual documents:
|
||||
are in place. However, if you use these old URLs to access documents, you
|
||||
should update your app or script to use the new URLs.
|
||||
|
||||
.. note::
|
||||
|
||||
The document endpoint provides tags, document types and correspondents as
|
||||
ids in their corresponding fields. These are writeable. Paperless also
|
||||
offers read-only objects for assigned tags, types and correspondents,
|
||||
however, these might be removed in the future. As for now, the front end
|
||||
requires them.
|
||||
|
||||
Authorization
|
||||
#############
|
||||
|
||||
The REST api provides three different forms of authentication.
|
||||
|
||||
1. Basic authentication
|
||||
|
||||
Authorize by providing a HTTP header in the form
|
||||
|
||||
.. code::
|
||||
|
||||
Authorization: Basic <credentials>
|
||||
|
||||
where ``credentials`` is a base64-encoded string of ``<username>:<password>``
|
||||
|
||||
2. Session authentication
|
||||
|
||||
When you're logged into paperless in your browser, you're automatically
|
||||
logged into the API as well and don't need to provide any authorization
|
||||
headers.
|
||||
|
||||
3. Token authentication
|
||||
|
||||
Paperless also offers an endpoint to acquire authentication tokens.
|
||||
|
||||
POST a username and password as a form or json string to ``/api/token/``
|
||||
and paperless will respond with a token, if the login data is correct.
|
||||
This token can be used to authenticate other requests with the
|
||||
following HTTP header:
|
||||
|
||||
.. code::
|
||||
|
||||
Authorization: Token <token>
|
||||
|
||||
Tokens can be managed and revoked in the paperless admin.
|
||||
|
||||
Searching for documents
|
||||
#######################
|
||||
|
||||
@ -166,8 +210,19 @@ The API provides a special endpoint for file uploads:
|
||||
|
||||
POST a multipart form to this endpoint, where the form field ``document`` contains
|
||||
the document that you want to upload to paperless. The filename is sanitized and
|
||||
then used to store the document in the consumption folder, where the consumer will
|
||||
detect the document and process it as any other document.
|
||||
then used to store the document in a temporary directory, and the consumer will
|
||||
be instructed to consume the document from there.
|
||||
|
||||
The endpoint will immediately return "OK." if the document was stored in the
|
||||
consumption directory.
|
||||
The endpoint supports the following optional form fields:
|
||||
|
||||
* ``title``: Specify a title that the consumer should use for the document.
|
||||
* ``correspondent``: Specify a correspondent that the consumer should use for the document.
|
||||
Case sensitive. If the specified correspondent does not exist, it will be created with this
|
||||
name and default settings.
|
||||
* ``document_type``: Similar to correspondent.
|
||||
* ``tags``: Similar to correspondent. Specify this multiple times to have multiple tags added
|
||||
to the document.
|
||||
|
||||
The endpoint will immediately return "OK" if the document consumption process
|
||||
was started successfully. No additional status information about the consumption
|
||||
process itself is available, since that happens in a different process.
|
||||
|
@ -1,59 +0,0 @@
|
||||
import os
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from time import mktime
|
||||
|
||||
import magic
|
||||
from django import forms
|
||||
from django.conf import settings
|
||||
from django_q.tasks import async_task
|
||||
from pathvalidate import validate_filename, ValidationError
|
||||
|
||||
from documents.parsers import is_mime_type_supported
|
||||
|
||||
|
||||
class UploadForm(forms.Form):
|
||||
|
||||
document = forms.FileField()
|
||||
|
||||
def clean_document(self):
|
||||
document_name = self.cleaned_data.get("document").name
|
||||
|
||||
try:
|
||||
validate_filename(document_name)
|
||||
except ValidationError:
|
||||
raise forms.ValidationError("That filename is suspicious.")
|
||||
|
||||
document_data = self.cleaned_data.get("document").read()
|
||||
|
||||
mime_type = magic.from_buffer(document_data, mime=True)
|
||||
|
||||
if not is_mime_type_supported(mime_type):
|
||||
raise forms.ValidationError("This mime type is not supported.")
|
||||
|
||||
return document_name, document_data
|
||||
|
||||
def save(self):
|
||||
"""
|
||||
Since the consumer already does a lot of work, it's easier just to save
|
||||
to-be-consumed files to the consumption directory rather than have the
|
||||
form do that as well. Think of it as a poor-man's queue server.
|
||||
"""
|
||||
|
||||
original_filename, data = self.cleaned_data.get("document")
|
||||
|
||||
t = int(mktime(datetime.now().timetuple()))
|
||||
|
||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||
|
||||
with tempfile.NamedTemporaryFile(prefix="paperless-upload-",
|
||||
dir=settings.SCRATCH_DIR,
|
||||
delete=False) as f:
|
||||
|
||||
f.write(data)
|
||||
os.utime(f.name, times=(t, t))
|
||||
|
||||
async_task("documents.tasks.consume_file",
|
||||
f.name,
|
||||
override_filename=original_filename,
|
||||
task_name=os.path.basename(original_filename)[:100])
|
@ -1,6 +1,9 @@
|
||||
import magic
|
||||
from pathvalidate import validate_filename, ValidationError
|
||||
from rest_framework import serializers
|
||||
|
||||
from .models import Correspondent, Tag, Document, Log, DocumentType
|
||||
from .parsers import is_mime_type_supported
|
||||
|
||||
|
||||
class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
|
||||
@ -113,3 +116,85 @@ class LogSerializer(serializers.ModelSerializer):
|
||||
"group",
|
||||
"level"
|
||||
)
|
||||
|
||||
|
||||
class PostDocumentSerializer(serializers.Serializer):
|
||||
|
||||
document = serializers.FileField(
|
||||
label="Document",
|
||||
write_only=True,
|
||||
)
|
||||
|
||||
title = serializers.CharField(
|
||||
label="Title",
|
||||
write_only=True,
|
||||
required=False,
|
||||
)
|
||||
|
||||
correspondent = serializers.CharField(
|
||||
label="Correspondent",
|
||||
write_only=True,
|
||||
required=False,
|
||||
)
|
||||
|
||||
document_type = serializers.CharField(
|
||||
label="Document type",
|
||||
write_only=True,
|
||||
required=False,
|
||||
)
|
||||
|
||||
tags = serializers.ListField(
|
||||
child=serializers.CharField(),
|
||||
label="Tags",
|
||||
source="tag",
|
||||
write_only=True,
|
||||
required=False,
|
||||
)
|
||||
|
||||
def validate(self, attrs):
|
||||
document = attrs.get('document')
|
||||
|
||||
try:
|
||||
validate_filename(document.name)
|
||||
except ValidationError:
|
||||
raise serializers.ValidationError("Invalid filename.")
|
||||
|
||||
document_data = document.file.read()
|
||||
mime_type = magic.from_buffer(document_data, mime=True)
|
||||
|
||||
if not is_mime_type_supported(mime_type):
|
||||
raise serializers.ValidationError(
|
||||
"This mime type is not supported.")
|
||||
|
||||
attrs['document_data'] = document_data
|
||||
|
||||
title = attrs.get('title')
|
||||
|
||||
if not title:
|
||||
attrs['title'] = None
|
||||
|
||||
correspondent = attrs.get('correspondent')
|
||||
if correspondent:
|
||||
c, _ = Correspondent.objects.get_or_create(name=correspondent)
|
||||
attrs['correspondent_id'] = c.id
|
||||
else:
|
||||
attrs['correspondent_id'] = None
|
||||
|
||||
document_type = attrs.get('document_type')
|
||||
if document_type:
|
||||
dt, _ = DocumentType.objects.get_or_create(name=document_type)
|
||||
attrs['document_type_id'] = dt.id
|
||||
else:
|
||||
attrs['document_type_id'] = None
|
||||
|
||||
tags = attrs.get('tag')
|
||||
if tags:
|
||||
tag_ids = []
|
||||
for tag in tags:
|
||||
tag, _ = Tag.objects.get_or_create(name=tag)
|
||||
tag_ids.append(tag.id)
|
||||
attrs['tag_ids'] = tag_ids
|
||||
else:
|
||||
attrs['tag_ids'] = None
|
||||
|
||||
return attrs
|
||||
|
@ -358,7 +358,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.data['documents_total'], 3)
|
||||
self.assertEqual(response.data['documents_inbox'], 1)
|
||||
|
||||
@mock.patch("documents.forms.async_task")
|
||||
@mock.patch("documents.views.async_task")
|
||||
def test_upload(self, m):
|
||||
|
||||
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
|
||||
@ -370,8 +370,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
args, kwargs = m.call_args
|
||||
self.assertEqual(kwargs['override_filename'], "simple.pdf")
|
||||
self.assertIsNone(kwargs['override_title'])
|
||||
self.assertIsNone(kwargs['override_correspondent_id'])
|
||||
self.assertIsNone(kwargs['override_document_type_id'])
|
||||
self.assertIsNone(kwargs['override_tag_ids'])
|
||||
|
||||
@mock.patch("documents.forms.async_task")
|
||||
@mock.patch("documents.views.async_task")
|
||||
def test_upload_invalid_form(self, m):
|
||||
|
||||
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
|
||||
@ -379,7 +383,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, 400)
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.forms.async_task")
|
||||
@mock.patch("documents.views.async_task")
|
||||
def test_upload_invalid_file(self, m):
|
||||
|
||||
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), "rb") as f:
|
||||
@ -387,8 +391,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, 400)
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.forms.async_task")
|
||||
@mock.patch("documents.forms.validate_filename")
|
||||
@mock.patch("documents.views.async_task")
|
||||
@mock.patch("documents.serialisers.validate_filename")
|
||||
def test_upload_invalid_filename(self, validate_filename, async_task):
|
||||
validate_filename.side_effect = ValidationError()
|
||||
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
|
||||
@ -396,3 +400,83 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, 400)
|
||||
|
||||
async_task.assert_not_called()
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
def test_upload_with_title(self, async_task):
|
||||
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
|
||||
response = self.client.post("/api/documents/post_document/", {"document": f, "title": "my custom title"})
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
async_task.assert_called_once()
|
||||
|
||||
args, kwargs = async_task.call_args
|
||||
|
||||
self.assertEqual(kwargs['override_title'], "my custom title")
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
def test_upload_with_correspondent(self, async_task):
|
||||
c = Correspondent.objects.create(name="test-corres")
|
||||
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
|
||||
response = self.client.post("/api/documents/post_document/", {"document": f, "correspondent": "test-corres"})
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
async_task.assert_called_once()
|
||||
|
||||
args, kwargs = async_task.call_args
|
||||
|
||||
self.assertEqual(kwargs['override_correspondent_id'], c.id)
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
def test_upload_with_new_correspondent(self, async_task):
|
||||
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
|
||||
response = self.client.post("/api/documents/post_document/", {"document": f, "correspondent": "test-corres2"})
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
async_task.assert_called_once()
|
||||
|
||||
args, kwargs = async_task.call_args
|
||||
|
||||
c = Correspondent.objects.get(name="test-corres2")
|
||||
self.assertEqual(kwargs['override_correspondent_id'], c.id)
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
def test_upload_with_document_type(self, async_task):
|
||||
dt = DocumentType.objects.create(name="invoice")
|
||||
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
|
||||
response = self.client.post("/api/documents/post_document/", {"document": f, "document_type": "invoice"})
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
async_task.assert_called_once()
|
||||
|
||||
args, kwargs = async_task.call_args
|
||||
|
||||
self.assertEqual(kwargs['override_document_type_id'], dt.id)
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
def test_upload_with_new_document_type(self, async_task):
|
||||
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
|
||||
response = self.client.post("/api/documents/post_document/", {"document": f, "document_type": "invoice2"})
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
async_task.assert_called_once()
|
||||
|
||||
args, kwargs = async_task.call_args
|
||||
|
||||
dt = DocumentType.objects.get(name="invoice2")
|
||||
self.assertEqual(kwargs['override_document_type_id'], dt.id)
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
def test_upload_with_tags(self, async_task):
|
||||
t1 = Tag.objects.create(name="tag1")
|
||||
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
|
||||
response = self.client.post(
|
||||
"/api/documents/post_document/",
|
||||
{"document": f, "tags": ["tag1", "tag2"]})
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
async_task.assert_called_once()
|
||||
|
||||
args, kwargs = async_task.call_args
|
||||
|
||||
t2 = Tag.objects.get(name="tag2")
|
||||
self.assertCountEqual(kwargs['override_tag_ids'], [t1.id, t2.id])
|
||||
|
@ -1,10 +1,16 @@
|
||||
import os
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from time import mktime
|
||||
|
||||
from django.conf import settings
|
||||
from django.db.models import Count, Max
|
||||
from django.http import HttpResponse, HttpResponseBadRequest, Http404
|
||||
from django.views.decorators.cache import cache_control
|
||||
from django.views.generic import TemplateView
|
||||
from django_filters.rest_framework import DjangoFilterBackend
|
||||
from django_q.tasks import async_task
|
||||
from rest_framework import parsers
|
||||
from rest_framework.decorators import action
|
||||
from rest_framework.filters import OrderingFilter, SearchFilter
|
||||
from rest_framework.mixins import (
|
||||
@ -32,14 +38,14 @@ from .filters import (
|
||||
DocumentTypeFilterSet,
|
||||
LogFilterSet
|
||||
)
|
||||
from .forms import UploadForm
|
||||
from .models import Correspondent, Document, Log, Tag, DocumentType
|
||||
from .serialisers import (
|
||||
CorrespondentSerializer,
|
||||
DocumentSerializer,
|
||||
LogSerializer,
|
||||
TagSerializer,
|
||||
DocumentTypeSerializer
|
||||
DocumentTypeSerializer,
|
||||
PostDocumentSerializer
|
||||
)
|
||||
|
||||
|
||||
@ -154,16 +160,6 @@ class DocumentViewSet(RetrieveModelMixin,
|
||||
disposition, filename)
|
||||
return response
|
||||
|
||||
@action(methods=['post'], detail=False)
|
||||
def post_document(self, request, pk=None):
|
||||
# TODO: is this a good implementation?
|
||||
form = UploadForm(data=request.POST, files=request.FILES)
|
||||
if form.is_valid():
|
||||
form.save()
|
||||
return Response("OK")
|
||||
else:
|
||||
return HttpResponseBadRequest(str(form.errors))
|
||||
|
||||
@action(methods=['get'], detail=True)
|
||||
def metadata(self, request, pk=None):
|
||||
try:
|
||||
@ -217,6 +213,56 @@ class LogViewSet(ReadOnlyModelViewSet):
|
||||
ordering_fields = ("created",)
|
||||
|
||||
|
||||
class PostDocumentView(APIView):
|
||||
|
||||
permission_classes = (IsAuthenticated,)
|
||||
serializer_class = PostDocumentSerializer
|
||||
parser_classes = (parsers.MultiPartParser,)
|
||||
|
||||
def get_serializer_context(self):
|
||||
return {
|
||||
'request': self.request,
|
||||
'format': self.format_kwarg,
|
||||
'view': self
|
||||
}
|
||||
|
||||
def get_serializer(self, *args, **kwargs):
|
||||
kwargs['context'] = self.get_serializer_context()
|
||||
return self.serializer_class(*args, **kwargs)
|
||||
|
||||
def post(self, request, *args, **kwargs):
|
||||
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
|
||||
document = serializer.validated_data['document']
|
||||
document_data = serializer.validated_data['document_data']
|
||||
correspondent_id = serializer.validated_data['correspondent_id']
|
||||
document_type_id = serializer.validated_data['document_type_id']
|
||||
tag_ids = serializer.validated_data['tag_ids']
|
||||
title = serializer.validated_data['title']
|
||||
|
||||
t = int(mktime(datetime.now().timetuple()))
|
||||
|
||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||
|
||||
with tempfile.NamedTemporaryFile(prefix="paperless-upload-",
|
||||
dir=settings.SCRATCH_DIR,
|
||||
delete=False) as f:
|
||||
f.write(document_data)
|
||||
os.utime(f.name, times=(t, t))
|
||||
|
||||
async_task("documents.tasks.consume_file",
|
||||
f.name,
|
||||
override_filename=document.name,
|
||||
override_title=title,
|
||||
override_correspondent_id=correspondent_id,
|
||||
override_document_type_id=document_type_id,
|
||||
override_tag_ids=tag_ids,
|
||||
task_name=os.path.basename(document.name)[:100])
|
||||
return Response("OK")
|
||||
|
||||
|
||||
class SearchView(APIView):
|
||||
|
||||
permission_classes = (IsAuthenticated,)
|
||||
|
@ -86,6 +86,7 @@ INSTALLED_APPS = [
|
||||
"django.contrib.admin",
|
||||
|
||||
"rest_framework",
|
||||
"rest_framework.authtoken",
|
||||
"django_filters",
|
||||
|
||||
"django_q",
|
||||
@ -95,7 +96,8 @@ INSTALLED_APPS = [
|
||||
REST_FRAMEWORK = {
|
||||
'DEFAULT_AUTHENTICATION_CLASSES': [
|
||||
'rest_framework.authentication.BasicAuthentication',
|
||||
'rest_framework.authentication.SessionAuthentication'
|
||||
'rest_framework.authentication.SessionAuthentication',
|
||||
'rest_framework.authentication.TokenAuthentication'
|
||||
]
|
||||
}
|
||||
|
||||
|
@ -4,6 +4,7 @@ from django.contrib.auth.decorators import login_required
|
||||
from django.urls import path, re_path
|
||||
from django.views.decorators.csrf import csrf_exempt
|
||||
from django.views.generic import RedirectView
|
||||
from rest_framework.authtoken import views
|
||||
from rest_framework.routers import DefaultRouter
|
||||
|
||||
from documents.views import (
|
||||
@ -15,7 +16,8 @@ from documents.views import (
|
||||
SearchView,
|
||||
IndexView,
|
||||
SearchAutoCompleteView,
|
||||
StatisticsView
|
||||
StatisticsView,
|
||||
PostDocumentView
|
||||
)
|
||||
from paperless.views import FaviconView
|
||||
|
||||
@ -45,6 +47,11 @@ urlpatterns = [
|
||||
StatisticsView.as_view(),
|
||||
name="statistics"),
|
||||
|
||||
re_path(r"^documents/post_document/", PostDocumentView.as_view(),
|
||||
name="post_document"),
|
||||
|
||||
path('token/', views.obtain_auth_token)
|
||||
|
||||
] + api_router.urls)),
|
||||
|
||||
re_path(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),
|
||||
|
Loading…
x
Reference in New Issue
Block a user