updated the API, it now supports tags, correspondents, types and title when uploading documents.

This commit is contained in:
jonaswinkler
2020-12-03 18:36:23 +01:00
parent cb92d4c691
commit 9546d6bf8c
7 changed files with 302 additions and 82 deletions

View File

@@ -1,59 +0,0 @@
import os
import tempfile
from datetime import datetime
from time import mktime
import magic
from django import forms
from django.conf import settings
from django_q.tasks import async_task
from pathvalidate import validate_filename, ValidationError
from documents.parsers import is_mime_type_supported
class UploadForm(forms.Form):
document = forms.FileField()
def clean_document(self):
document_name = self.cleaned_data.get("document").name
try:
validate_filename(document_name)
except ValidationError:
raise forms.ValidationError("That filename is suspicious.")
document_data = self.cleaned_data.get("document").read()
mime_type = magic.from_buffer(document_data, mime=True)
if not is_mime_type_supported(mime_type):
raise forms.ValidationError("This mime type is not supported.")
return document_name, document_data
def save(self):
"""
Since the consumer already does a lot of work, it's easier just to save
to-be-consumed files to the consumption directory rather than have the
form do that as well. Think of it as a poor-man's queue server.
"""
original_filename, data = self.cleaned_data.get("document")
t = int(mktime(datetime.now().timetuple()))
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
with tempfile.NamedTemporaryFile(prefix="paperless-upload-",
dir=settings.SCRATCH_DIR,
delete=False) as f:
f.write(data)
os.utime(f.name, times=(t, t))
async_task("documents.tasks.consume_file",
f.name,
override_filename=original_filename,
task_name=os.path.basename(original_filename)[:100])

View File

@@ -1,6 +1,9 @@
import magic
from pathvalidate import validate_filename, ValidationError
from rest_framework import serializers
from .models import Correspondent, Tag, Document, Log, DocumentType
from .parsers import is_mime_type_supported
class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
@@ -113,3 +116,85 @@ class LogSerializer(serializers.ModelSerializer):
"group",
"level"
)
class PostDocumentSerializer(serializers.Serializer):
document = serializers.FileField(
label="Document",
write_only=True,
)
title = serializers.CharField(
label="Title",
write_only=True,
required=False,
)
correspondent = serializers.CharField(
label="Correspondent",
write_only=True,
required=False,
)
document_type = serializers.CharField(
label="Document type",
write_only=True,
required=False,
)
tags = serializers.ListField(
child=serializers.CharField(),
label="Tags",
source="tag",
write_only=True,
required=False,
)
def validate(self, attrs):
document = attrs.get('document')
try:
validate_filename(document.name)
except ValidationError:
raise serializers.ValidationError("Invalid filename.")
document_data = document.file.read()
mime_type = magic.from_buffer(document_data, mime=True)
if not is_mime_type_supported(mime_type):
raise serializers.ValidationError(
"This mime type is not supported.")
attrs['document_data'] = document_data
title = attrs.get('title')
if not title:
attrs['title'] = None
correspondent = attrs.get('correspondent')
if correspondent:
c, _ = Correspondent.objects.get_or_create(name=correspondent)
attrs['correspondent_id'] = c.id
else:
attrs['correspondent_id'] = None
document_type = attrs.get('document_type')
if document_type:
dt, _ = DocumentType.objects.get_or_create(name=document_type)
attrs['document_type_id'] = dt.id
else:
attrs['document_type_id'] = None
tags = attrs.get('tag')
if tags:
tag_ids = []
for tag in tags:
tag, _ = Tag.objects.get_or_create(name=tag)
tag_ids.append(tag.id)
attrs['tag_ids'] = tag_ids
else:
attrs['tag_ids'] = None
return attrs

View File

@@ -358,7 +358,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.data['documents_total'], 3)
self.assertEqual(response.data['documents_inbox'], 1)
@mock.patch("documents.forms.async_task")
@mock.patch("documents.views.async_task")
def test_upload(self, m):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
@@ -370,8 +370,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
args, kwargs = m.call_args
self.assertEqual(kwargs['override_filename'], "simple.pdf")
self.assertIsNone(kwargs['override_title'])
self.assertIsNone(kwargs['override_correspondent_id'])
self.assertIsNone(kwargs['override_document_type_id'])
self.assertIsNone(kwargs['override_tag_ids'])
@mock.patch("documents.forms.async_task")
@mock.patch("documents.views.async_task")
def test_upload_invalid_form(self, m):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
@@ -379,7 +383,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 400)
m.assert_not_called()
@mock.patch("documents.forms.async_task")
@mock.patch("documents.views.async_task")
def test_upload_invalid_file(self, m):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), "rb") as f:
@@ -387,8 +391,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 400)
m.assert_not_called()
@mock.patch("documents.forms.async_task")
@mock.patch("documents.forms.validate_filename")
@mock.patch("documents.views.async_task")
@mock.patch("documents.serialisers.validate_filename")
def test_upload_invalid_filename(self, validate_filename, async_task):
validate_filename.side_effect = ValidationError()
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
@@ -396,3 +400,83 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 400)
async_task.assert_not_called()
@mock.patch("documents.views.async_task")
def test_upload_with_title(self, async_task):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post("/api/documents/post_document/", {"document": f, "title": "my custom title"})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
self.assertEqual(kwargs['override_title'], "my custom title")
@mock.patch("documents.views.async_task")
def test_upload_with_correspondent(self, async_task):
c = Correspondent.objects.create(name="test-corres")
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post("/api/documents/post_document/", {"document": f, "correspondent": "test-corres"})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
self.assertEqual(kwargs['override_correspondent_id'], c.id)
@mock.patch("documents.views.async_task")
def test_upload_with_new_correspondent(self, async_task):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post("/api/documents/post_document/", {"document": f, "correspondent": "test-corres2"})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
c = Correspondent.objects.get(name="test-corres2")
self.assertEqual(kwargs['override_correspondent_id'], c.id)
@mock.patch("documents.views.async_task")
def test_upload_with_document_type(self, async_task):
dt = DocumentType.objects.create(name="invoice")
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post("/api/documents/post_document/", {"document": f, "document_type": "invoice"})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
self.assertEqual(kwargs['override_document_type_id'], dt.id)
@mock.patch("documents.views.async_task")
def test_upload_with_new_document_type(self, async_task):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post("/api/documents/post_document/", {"document": f, "document_type": "invoice2"})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
dt = DocumentType.objects.get(name="invoice2")
self.assertEqual(kwargs['override_document_type_id'], dt.id)
@mock.patch("documents.views.async_task")
def test_upload_with_tags(self, async_task):
t1 = Tag.objects.create(name="tag1")
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f, "tags": ["tag1", "tag2"]})
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
t2 = Tag.objects.get(name="tag2")
self.assertCountEqual(kwargs['override_tag_ids'], [t1.id, t2.id])

View File

@@ -1,10 +1,16 @@
import os
import tempfile
from datetime import datetime
from time import mktime
from django.conf import settings
from django.db.models import Count, Max
from django.http import HttpResponse, HttpResponseBadRequest, Http404
from django.views.decorators.cache import cache_control
from django.views.generic import TemplateView
from django_filters.rest_framework import DjangoFilterBackend
from django_q.tasks import async_task
from rest_framework import parsers
from rest_framework.decorators import action
from rest_framework.filters import OrderingFilter, SearchFilter
from rest_framework.mixins import (
@@ -32,14 +38,14 @@ from .filters import (
DocumentTypeFilterSet,
LogFilterSet
)
from .forms import UploadForm
from .models import Correspondent, Document, Log, Tag, DocumentType
from .serialisers import (
CorrespondentSerializer,
DocumentSerializer,
LogSerializer,
TagSerializer,
DocumentTypeSerializer
DocumentTypeSerializer,
PostDocumentSerializer
)
@@ -154,16 +160,6 @@ class DocumentViewSet(RetrieveModelMixin,
disposition, filename)
return response
@action(methods=['post'], detail=False)
def post_document(self, request, pk=None):
# TODO: is this a good implementation?
form = UploadForm(data=request.POST, files=request.FILES)
if form.is_valid():
form.save()
return Response("OK")
else:
return HttpResponseBadRequest(str(form.errors))
@action(methods=['get'], detail=True)
def metadata(self, request, pk=None):
try:
@@ -217,6 +213,56 @@ class LogViewSet(ReadOnlyModelViewSet):
ordering_fields = ("created",)
class PostDocumentView(APIView):
permission_classes = (IsAuthenticated,)
serializer_class = PostDocumentSerializer
parser_classes = (parsers.MultiPartParser,)
def get_serializer_context(self):
return {
'request': self.request,
'format': self.format_kwarg,
'view': self
}
def get_serializer(self, *args, **kwargs):
kwargs['context'] = self.get_serializer_context()
return self.serializer_class(*args, **kwargs)
def post(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
document = serializer.validated_data['document']
document_data = serializer.validated_data['document_data']
correspondent_id = serializer.validated_data['correspondent_id']
document_type_id = serializer.validated_data['document_type_id']
tag_ids = serializer.validated_data['tag_ids']
title = serializer.validated_data['title']
t = int(mktime(datetime.now().timetuple()))
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
with tempfile.NamedTemporaryFile(prefix="paperless-upload-",
dir=settings.SCRATCH_DIR,
delete=False) as f:
f.write(document_data)
os.utime(f.name, times=(t, t))
async_task("documents.tasks.consume_file",
f.name,
override_filename=document.name,
override_title=title,
override_correspondent_id=correspondent_id,
override_document_type_id=document_type_id,
override_tag_ids=tag_ids,
task_name=os.path.basename(document.name)[:100])
return Response("OK")
class SearchView(APIView):
permission_classes = (IsAuthenticated,)