2020-12-17 21:36:21 +01:00

406 lines
13 KiB
Python
Executable File

import os
import tempfile
from datetime import datetime
from time import mktime
from django.conf import settings
from django.db.models import Count, Max
from django.http import HttpResponse, HttpResponseBadRequest, Http404
from django.views.decorators.cache import cache_control
from django.views.generic import TemplateView
from django_filters.rest_framework import DjangoFilterBackend
from django_q.tasks import async_task
from rest_framework import parsers
from rest_framework.decorators import action
from rest_framework.filters import OrderingFilter, SearchFilter
from rest_framework.mixins import (
DestroyModelMixin,
ListModelMixin,
RetrieveModelMixin,
UpdateModelMixin
)
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
from rest_framework.views import APIView
from rest_framework.viewsets import (
GenericViewSet,
ModelViewSet,
ReadOnlyModelViewSet
)
import documents.index as index
from paperless.db import GnuPG
from paperless.views import StandardPagination
from .filters import (
CorrespondentFilterSet,
DocumentFilterSet,
TagFilterSet,
DocumentTypeFilterSet,
LogFilterSet
)
from .models import Correspondent, Document, Log, Tag, DocumentType, SavedView
from .parsers import get_parser_class_for_mime_type
from .serialisers import (
CorrespondentSerializer,
DocumentSerializer,
LogSerializer,
TagSerializer,
DocumentTypeSerializer,
PostDocumentSerializer,
SavedViewSerializer
)
class IndexView(TemplateView):
template_name = "index.html"
class CorrespondentViewSet(ModelViewSet):
model = Correspondent
queryset = Correspondent.objects.annotate(
document_count=Count('documents'),
last_correspondence=Max('documents__created')).order_by('name')
serializer_class = CorrespondentSerializer
pagination_class = StandardPagination
permission_classes = (IsAuthenticated,)
filter_backends = (DjangoFilterBackend, OrderingFilter)
filterset_class = CorrespondentFilterSet
ordering_fields = (
"name",
"matching_algorithm",
"match",
"document_count",
"last_correspondence")
class TagViewSet(ModelViewSet):
model = Tag
queryset = Tag.objects.annotate(
document_count=Count('documents')).order_by('name')
serializer_class = TagSerializer
pagination_class = StandardPagination
permission_classes = (IsAuthenticated,)
filter_backends = (DjangoFilterBackend, OrderingFilter)
filterset_class = TagFilterSet
ordering_fields = ("name", "matching_algorithm", "match", "document_count")
class DocumentTypeViewSet(ModelViewSet):
model = DocumentType
queryset = DocumentType.objects.annotate(
document_count=Count('documents')).order_by('name')
serializer_class = DocumentTypeSerializer
pagination_class = StandardPagination
permission_classes = (IsAuthenticated,)
filter_backends = (DjangoFilterBackend, OrderingFilter)
filterset_class = DocumentTypeFilterSet
ordering_fields = ("name", "matching_algorithm", "match", "document_count")
class DocumentViewSet(RetrieveModelMixin,
UpdateModelMixin,
DestroyModelMixin,
ListModelMixin,
GenericViewSet):
model = Document
queryset = Document.objects.all()
serializer_class = DocumentSerializer
pagination_class = StandardPagination
permission_classes = (IsAuthenticated,)
filter_backends = (DjangoFilterBackend, SearchFilter, OrderingFilter)
filterset_class = DocumentFilterSet
search_fields = ("title", "correspondent__name", "content")
ordering_fields = (
"id",
"title",
"correspondent__name",
"document_type__name",
"created",
"modified",
"added",
"archive_serial_number")
def update(self, request, *args, **kwargs):
response = super(DocumentViewSet, self).update(
request, *args, **kwargs)
index.add_or_update_document(self.get_object())
return response
def destroy(self, request, *args, **kwargs):
index.remove_document_from_index(self.get_object())
return super(DocumentViewSet, self).destroy(request, *args, **kwargs)
@staticmethod
def original_requested(request):
return (
'original' in request.query_params and
request.query_params['original'] == 'true'
)
def file_response(self, pk, request, disposition):
doc = Document.objects.get(id=pk)
if not self.original_requested(request) and os.path.isfile(doc.archive_path): # NOQA: E501
file_handle = doc.archive_file
filename = doc.get_public_filename(archive=True)
mime_type = 'application/pdf'
else:
file_handle = doc.source_file
filename = doc.get_public_filename()
mime_type = doc.mime_type
if doc.storage_type == Document.STORAGE_TYPE_GPG:
file_handle = GnuPG.decrypted(file_handle)
response = HttpResponse(file_handle, content_type=mime_type)
response["Content-Disposition"] = '{}; filename="{}"'.format(
disposition, filename)
return response
def get_metadata(self, file, mime_type):
if not os.path.isfile(file):
return None
parser_class = get_parser_class_for_mime_type(mime_type)
if parser_class:
parser = parser_class(logging_group=None)
try:
return parser.extract_metadata(file, mime_type)
except Exception as e:
# TODO: cover GPG errors, remove later.
return []
else:
return []
@action(methods=['get'], detail=True)
def metadata(self, request, pk=None):
try:
doc = Document.objects.get(pk=pk)
meta = {
"original_checksum": doc.checksum,
"original_size": os.stat(doc.source_path).st_size,
"original_mime_type": doc.mime_type,
"media_filename": doc.filename,
"has_archive_version": os.path.isfile(doc.archive_path),
"original_metadata": self.get_metadata(
doc.source_path, doc.mime_type)
}
if doc.archive_checksum and os.path.isfile(doc.archive_path):
meta['archive_checksum'] = doc.archive_checksum
meta['archive_size'] = os.stat(doc.archive_path).st_size,
meta['archive_metadata'] = self.get_metadata(
doc.archive_path, "application/pdf")
else:
meta['archive_checksum'] = None
meta['archive_size'] = None
meta['archive_metadata'] = None
return Response(meta)
except Document.DoesNotExist:
raise Http404()
@action(methods=['get'], detail=True)
def preview(self, request, pk=None):
try:
response = self.file_response(
pk, request, "inline")
return response
except (FileNotFoundError, Document.DoesNotExist):
raise Http404()
@action(methods=['get'], detail=True)
@cache_control(public=False, max_age=315360000)
def thumb(self, request, pk=None):
try:
doc = Document.objects.get(id=pk)
if doc.storage_type == Document.STORAGE_TYPE_GPG:
handle = GnuPG.decrypted(doc.thumbnail_file)
else:
handle = doc.thumbnail_file
return HttpResponse(handle,
content_type='image/png')
except (FileNotFoundError, Document.DoesNotExist):
raise Http404()
@action(methods=['get'], detail=True)
def download(self, request, pk=None):
try:
return self.file_response(
pk, request, "attachment")
except (FileNotFoundError, Document.DoesNotExist):
raise Http404()
class LogViewSet(ReadOnlyModelViewSet):
model = Log
queryset = Log.objects.all()
serializer_class = LogSerializer
pagination_class = StandardPagination
permission_classes = (IsAuthenticated,)
filter_backends = (DjangoFilterBackend, OrderingFilter)
filterset_class = LogFilterSet
ordering_fields = ("created",)
class SavedViewViewSet(ModelViewSet):
model = SavedView
queryset = SavedView.objects.all()
serializer_class = SavedViewSerializer
pagination_class = StandardPagination
permission_classes = (IsAuthenticated,)
def get_queryset(self):
user = self.request.user
return SavedView.objects.filter(user=user)
def perform_create(self, serializer):
serializer.save(user=self.request.user)
class PostDocumentView(APIView):
permission_classes = (IsAuthenticated,)
serializer_class = PostDocumentSerializer
parser_classes = (parsers.MultiPartParser,)
def get_serializer_context(self):
return {
'request': self.request,
'format': self.format_kwarg,
'view': self
}
def get_serializer(self, *args, **kwargs):
kwargs['context'] = self.get_serializer_context()
return self.serializer_class(*args, **kwargs)
def post(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
doc_name, doc_data = serializer.validated_data.get('document')
correspondent_id = serializer.validated_data.get('correspondent')
document_type_id = serializer.validated_data.get('document_type')
tag_ids = serializer.validated_data.get('tags')
title = serializer.validated_data.get('title')
t = int(mktime(datetime.now().timetuple()))
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
with tempfile.NamedTemporaryFile(prefix="paperless-upload-",
dir=settings.SCRATCH_DIR,
delete=False) as f:
f.write(doc_data)
os.utime(f.name, times=(t, t))
async_task("documents.tasks.consume_file",
f.name,
override_filename=doc_name,
override_title=title,
override_correspondent_id=correspondent_id,
override_document_type_id=document_type_id,
override_tag_ids=tag_ids,
task_name=os.path.basename(doc_name)[:100])
return Response("OK")
class SearchView(APIView):
permission_classes = (IsAuthenticated,)
def __init__(self, *args, **kwargs):
super(SearchView, self).__init__(*args, **kwargs)
self.ix = index.open_index()
def add_infos_to_hit(self, r):
doc = Document.objects.get(id=r['id'])
return {'id': r['id'],
'highlights': r.highlights("content", text=doc.content),
'score': r.score,
'rank': r.rank,
'document': DocumentSerializer(doc).data,
'title': r['title']
}
def get(self, request, format=None):
if 'query' in request.query_params:
query = request.query_params['query']
else:
query = None
if 'more_like' in request.query_params:
more_like_id = request.query_params['more_like']
more_like_content = Document.objects.get(id=more_like_id).content
else:
more_like_id = None
more_like_content = None
try:
page = int(request.query_params.get('page', 1))
except (ValueError, TypeError):
page = 1
if page < 1:
page = 1
try:
with index.query_page(self.ix, page, query, more_like_id, more_like_content) as (result_page,
corrected_query):
return Response(
{'count': len(result_page),
'page': result_page.pagenum,
'page_count': result_page.pagecount,
'corrected_query': corrected_query,
'results': list(map(self.add_infos_to_hit, result_page))})
except Exception as e:
return HttpResponseBadRequest(str(e))
class SearchAutoCompleteView(APIView):
permission_classes = (IsAuthenticated,)
def __init__(self, *args, **kwargs):
super(SearchAutoCompleteView, self).__init__(*args, **kwargs)
self.ix = index.open_index()
def get(self, request, format=None):
if 'term' in request.query_params:
term = request.query_params['term']
else:
return HttpResponseBadRequest("Term required")
if 'limit' in request.query_params:
limit = int(request.query_params['limit'])
if limit <= 0:
return HttpResponseBadRequest("Invalid limit")
else:
limit = 10
return Response(index.autocomplete(self.ix, term, limit))
class StatisticsView(APIView):
permission_classes = (IsAuthenticated,)
def get(self, request, format=None):
return Response({
'documents_total': Document.objects.all().count(),
'documents_inbox': Document.objects.filter(
tags__is_inbox_tag=True).distinct().count()
})