mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
424 lines
14 KiB
Python
Executable File
424 lines
14 KiB
Python
Executable File
import os
|
|
import tempfile
|
|
from datetime import datetime
|
|
from time import mktime
|
|
|
|
from django.conf import settings
|
|
from django.db.models import Count, Max
|
|
from django.http import HttpResponse, HttpResponseBadRequest, Http404
|
|
from django.views.decorators.cache import cache_control
|
|
from django.views.generic import TemplateView
|
|
from django_filters.rest_framework import DjangoFilterBackend
|
|
from django_q.tasks import async_task
|
|
from rest_framework import parsers
|
|
from rest_framework.decorators import action
|
|
from rest_framework.filters import OrderingFilter, SearchFilter
|
|
from rest_framework.mixins import (
|
|
DestroyModelMixin,
|
|
ListModelMixin,
|
|
RetrieveModelMixin,
|
|
UpdateModelMixin
|
|
)
|
|
from rest_framework.permissions import IsAuthenticated
|
|
from rest_framework.response import Response
|
|
from rest_framework.views import APIView
|
|
from rest_framework.viewsets import (
|
|
GenericViewSet,
|
|
ModelViewSet,
|
|
ReadOnlyModelViewSet
|
|
)
|
|
|
|
import documents.index as index
|
|
from paperless.db import GnuPG
|
|
from paperless.views import StandardPagination
|
|
from .bulk_edit import perform_bulk_edit
|
|
from .filters import (
|
|
CorrespondentFilterSet,
|
|
DocumentFilterSet,
|
|
TagFilterSet,
|
|
DocumentTypeFilterSet,
|
|
LogFilterSet
|
|
)
|
|
from .models import Correspondent, Document, Log, Tag, DocumentType
|
|
from .parsers import get_parser_class_for_mime_type
|
|
from .serialisers import (
|
|
CorrespondentSerializer,
|
|
DocumentSerializer,
|
|
LogSerializer,
|
|
TagSerializer,
|
|
DocumentTypeSerializer,
|
|
PostDocumentSerializer,
|
|
BulkEditSerializer
|
|
)
|
|
|
|
|
|
class IndexView(TemplateView):
|
|
template_name = "index.html"
|
|
|
|
|
|
class CorrespondentViewSet(ModelViewSet):
|
|
model = Correspondent
|
|
|
|
queryset = Correspondent.objects.annotate(
|
|
document_count=Count('documents'),
|
|
last_correspondence=Max('documents__created')).order_by('name')
|
|
|
|
serializer_class = CorrespondentSerializer
|
|
pagination_class = StandardPagination
|
|
permission_classes = (IsAuthenticated,)
|
|
filter_backends = (DjangoFilterBackend, OrderingFilter)
|
|
filterset_class = CorrespondentFilterSet
|
|
ordering_fields = (
|
|
"name",
|
|
"matching_algorithm",
|
|
"match",
|
|
"document_count",
|
|
"last_correspondence")
|
|
|
|
|
|
class TagViewSet(ModelViewSet):
|
|
model = Tag
|
|
|
|
queryset = Tag.objects.annotate(
|
|
document_count=Count('documents')).order_by('name')
|
|
|
|
serializer_class = TagSerializer
|
|
pagination_class = StandardPagination
|
|
permission_classes = (IsAuthenticated,)
|
|
filter_backends = (DjangoFilterBackend, OrderingFilter)
|
|
filterset_class = TagFilterSet
|
|
ordering_fields = ("name", "matching_algorithm", "match", "document_count")
|
|
|
|
|
|
class DocumentTypeViewSet(ModelViewSet):
|
|
model = DocumentType
|
|
|
|
queryset = DocumentType.objects.annotate(
|
|
document_count=Count('documents')).order_by('name')
|
|
|
|
serializer_class = DocumentTypeSerializer
|
|
pagination_class = StandardPagination
|
|
permission_classes = (IsAuthenticated,)
|
|
filter_backends = (DjangoFilterBackend, OrderingFilter)
|
|
filterset_class = DocumentTypeFilterSet
|
|
ordering_fields = ("name", "matching_algorithm", "match", "document_count")
|
|
|
|
|
|
class BulkEditForm(object):
|
|
pass
|
|
|
|
|
|
class DocumentViewSet(RetrieveModelMixin,
|
|
UpdateModelMixin,
|
|
DestroyModelMixin,
|
|
ListModelMixin,
|
|
GenericViewSet):
|
|
model = Document
|
|
queryset = Document.objects.all()
|
|
serializer_class = DocumentSerializer
|
|
pagination_class = StandardPagination
|
|
permission_classes = (IsAuthenticated,)
|
|
filter_backends = (DjangoFilterBackend, SearchFilter, OrderingFilter)
|
|
filterset_class = DocumentFilterSet
|
|
search_fields = ("title", "correspondent__name", "content")
|
|
ordering_fields = (
|
|
"id",
|
|
"title",
|
|
"correspondent__name",
|
|
"document_type__name",
|
|
"created",
|
|
"modified",
|
|
"added",
|
|
"archive_serial_number")
|
|
|
|
def get_serializer(self, *args, **kwargs):
|
|
fields_param = self.request.query_params.get('fields', None)
|
|
if fields_param:
|
|
fields = fields_param.split(",")
|
|
else:
|
|
fields = None
|
|
serializer_class = self.get_serializer_class()
|
|
kwargs.setdefault('context', self.get_serializer_context())
|
|
kwargs.setdefault('fields', fields)
|
|
return serializer_class(*args, **kwargs)
|
|
|
|
def update(self, request, *args, **kwargs):
|
|
response = super(DocumentViewSet, self).update(
|
|
request, *args, **kwargs)
|
|
index.add_or_update_document(self.get_object())
|
|
return response
|
|
|
|
def destroy(self, request, *args, **kwargs):
|
|
index.remove_document_from_index(self.get_object())
|
|
return super(DocumentViewSet, self).destroy(request, *args, **kwargs)
|
|
|
|
@staticmethod
|
|
def original_requested(request):
|
|
return (
|
|
'original' in request.query_params and
|
|
request.query_params['original'] == 'true'
|
|
)
|
|
|
|
def file_response(self, pk, request, disposition):
|
|
doc = Document.objects.get(id=pk)
|
|
if not self.original_requested(request) and os.path.isfile(doc.archive_path): # NOQA: E501
|
|
file_handle = doc.archive_file
|
|
filename = doc.get_public_filename(archive=True)
|
|
mime_type = 'application/pdf'
|
|
else:
|
|
file_handle = doc.source_file
|
|
filename = doc.get_public_filename()
|
|
mime_type = doc.mime_type
|
|
|
|
if doc.storage_type == Document.STORAGE_TYPE_GPG:
|
|
file_handle = GnuPG.decrypted(file_handle)
|
|
|
|
response = HttpResponse(file_handle, content_type=mime_type)
|
|
response["Content-Disposition"] = '{}; filename="{}"'.format(
|
|
disposition, filename)
|
|
return response
|
|
|
|
def get_metadata(self, file, mime_type):
|
|
if not os.path.isfile(file):
|
|
return None
|
|
|
|
parser_class = get_parser_class_for_mime_type(mime_type)
|
|
if parser_class:
|
|
parser = parser_class(logging_group=None)
|
|
return parser.extract_metadata(file, mime_type)
|
|
else:
|
|
return []
|
|
|
|
@action(methods=['get'], detail=True)
|
|
def metadata(self, request, pk=None):
|
|
try:
|
|
doc = Document.objects.get(pk=pk)
|
|
|
|
meta = {
|
|
"original_checksum": doc.checksum,
|
|
"original_size": os.stat(doc.source_path).st_size,
|
|
"original_mime_type": doc.mime_type,
|
|
"media_filename": doc.filename,
|
|
"has_archive_version": os.path.isfile(doc.archive_path),
|
|
"original_metadata": self.get_metadata(
|
|
doc.source_path, doc.mime_type)
|
|
}
|
|
|
|
if doc.archive_checksum and os.path.isfile(doc.archive_path):
|
|
meta['archive_checksum'] = doc.archive_checksum
|
|
meta['archive_size'] = os.stat(doc.archive_path).st_size,
|
|
meta['archive_metadata'] = self.get_metadata(
|
|
doc.archive_path, "application/pdf")
|
|
else:
|
|
meta['archive_checksum'] = None
|
|
meta['archive_size'] = None
|
|
meta['archive_metadata'] = None
|
|
|
|
return Response(meta)
|
|
except Document.DoesNotExist:
|
|
raise Http404()
|
|
|
|
@action(methods=['get'], detail=True)
|
|
def preview(self, request, pk=None):
|
|
try:
|
|
response = self.file_response(
|
|
pk, request, "inline")
|
|
return response
|
|
except (FileNotFoundError, Document.DoesNotExist):
|
|
raise Http404()
|
|
|
|
@action(methods=['get'], detail=True)
|
|
@cache_control(public=False, max_age=315360000)
|
|
def thumb(self, request, pk=None):
|
|
try:
|
|
return HttpResponse(Document.objects.get(id=pk).thumbnail_file,
|
|
content_type='image/png')
|
|
except (FileNotFoundError, Document.DoesNotExist):
|
|
raise Http404()
|
|
|
|
@action(methods=['get'], detail=True)
|
|
def download(self, request, pk=None):
|
|
try:
|
|
return self.file_response(
|
|
pk, request, "attachment")
|
|
except (FileNotFoundError, Document.DoesNotExist):
|
|
raise Http404()
|
|
|
|
|
|
class LogViewSet(ReadOnlyModelViewSet):
|
|
model = Log
|
|
|
|
queryset = Log.objects.all()
|
|
serializer_class = LogSerializer
|
|
pagination_class = StandardPagination
|
|
permission_classes = (IsAuthenticated,)
|
|
filter_backends = (DjangoFilterBackend, OrderingFilter)
|
|
filterset_class = LogFilterSet
|
|
ordering_fields = ("created",)
|
|
|
|
|
|
class BulkEditView(APIView):
|
|
|
|
permission_classes = (IsAuthenticated,)
|
|
serializer_class = BulkEditSerializer
|
|
parser_classes = (parsers.JSONParser,)
|
|
|
|
def get_serializer_context(self):
|
|
return {
|
|
'request': self.request,
|
|
'format': self.format_kwarg,
|
|
'view': self
|
|
}
|
|
|
|
def get_serializer(self, *args, **kwargs):
|
|
kwargs['context'] = self.get_serializer_context()
|
|
return self.serializer_class(*args, **kwargs)
|
|
|
|
def post(self, request, *args, **kwargs):
|
|
serializer = self.get_serializer(data=request.data)
|
|
serializer.is_valid(raise_exception=True)
|
|
|
|
method = serializer.validated_data.get("method")
|
|
parameters = serializer.validated_data.get("parameters")
|
|
documents = serializer.validated_data.get("documents")
|
|
|
|
try:
|
|
# TODO: parameter validation
|
|
result = method(documents, **parameters)
|
|
return Response({"result": result})
|
|
except Exception as e:
|
|
return HttpResponseBadRequest(str(e))
|
|
|
|
|
|
class PostDocumentView(APIView):
|
|
|
|
permission_classes = (IsAuthenticated,)
|
|
serializer_class = PostDocumentSerializer
|
|
parser_classes = (parsers.MultiPartParser,)
|
|
|
|
def get_serializer_context(self):
|
|
return {
|
|
'request': self.request,
|
|
'format': self.format_kwarg,
|
|
'view': self
|
|
}
|
|
|
|
def get_serializer(self, *args, **kwargs):
|
|
kwargs['context'] = self.get_serializer_context()
|
|
return self.serializer_class(*args, **kwargs)
|
|
|
|
def post(self, request, *args, **kwargs):
|
|
|
|
serializer = self.get_serializer(data=request.data)
|
|
serializer.is_valid(raise_exception=True)
|
|
|
|
doc_name, doc_data = serializer.validated_data.get('document')
|
|
correspondent_id = serializer.validated_data.get('correspondent')
|
|
document_type_id = serializer.validated_data.get('document_type')
|
|
tag_ids = serializer.validated_data.get('tags')
|
|
title = serializer.validated_data.get('title')
|
|
|
|
t = int(mktime(datetime.now().timetuple()))
|
|
|
|
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
|
|
|
with tempfile.NamedTemporaryFile(prefix="paperless-upload-",
|
|
dir=settings.SCRATCH_DIR,
|
|
delete=False) as f:
|
|
f.write(doc_data)
|
|
os.utime(f.name, times=(t, t))
|
|
|
|
async_task("documents.tasks.consume_file",
|
|
f.name,
|
|
override_filename=doc_name,
|
|
override_title=title,
|
|
override_correspondent_id=correspondent_id,
|
|
override_document_type_id=document_type_id,
|
|
override_tag_ids=tag_ids,
|
|
task_name=os.path.basename(doc_name)[:100])
|
|
return Response("OK")
|
|
|
|
|
|
class SearchView(APIView):
|
|
|
|
permission_classes = (IsAuthenticated,)
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super(SearchView, self).__init__(*args, **kwargs)
|
|
self.ix = index.open_index()
|
|
|
|
def add_infos_to_hit(self, r):
|
|
doc = Document.objects.get(id=r['id'])
|
|
return {'id': r['id'],
|
|
'highlights': r.highlights("content", text=doc.content),
|
|
'score': r.score,
|
|
'rank': r.rank,
|
|
'document': DocumentSerializer(doc).data,
|
|
'title': r['title']
|
|
}
|
|
|
|
def get(self, request, format=None):
|
|
if 'query' not in request.query_params:
|
|
return Response({
|
|
'count': 0,
|
|
'page': 0,
|
|
'page_count': 0,
|
|
'results': []})
|
|
|
|
query = request.query_params['query']
|
|
try:
|
|
page = int(request.query_params.get('page', 1))
|
|
except (ValueError, TypeError):
|
|
page = 1
|
|
|
|
if page < 1:
|
|
page = 1
|
|
|
|
try:
|
|
with index.query_page(self.ix, query, page) as (result_page,
|
|
corrected_query):
|
|
return Response(
|
|
{'count': len(result_page),
|
|
'page': result_page.pagenum,
|
|
'page_count': result_page.pagecount,
|
|
'corrected_query': corrected_query,
|
|
'results': list(map(self.add_infos_to_hit, result_page))})
|
|
except Exception as e:
|
|
return HttpResponseBadRequest(str(e))
|
|
|
|
|
|
class SearchAutoCompleteView(APIView):
|
|
|
|
permission_classes = (IsAuthenticated,)
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super(SearchAutoCompleteView, self).__init__(*args, **kwargs)
|
|
self.ix = index.open_index()
|
|
|
|
def get(self, request, format=None):
|
|
if 'term' in request.query_params:
|
|
term = request.query_params['term']
|
|
else:
|
|
return HttpResponseBadRequest("Term required")
|
|
|
|
if 'limit' in request.query_params:
|
|
limit = int(request.query_params['limit'])
|
|
if limit <= 0:
|
|
return HttpResponseBadRequest("Invalid limit")
|
|
else:
|
|
limit = 10
|
|
|
|
return Response(index.autocomplete(self.ix, term, limit))
|
|
|
|
|
|
class StatisticsView(APIView):
|
|
|
|
permission_classes = (IsAuthenticated,)
|
|
|
|
def get(self, request, format=None):
|
|
return Response({
|
|
'documents_total': Document.objects.all().count(),
|
|
'documents_inbox': Document.objects.filter(
|
|
tags__is_inbox_tag=True).distinct().count()
|
|
})
|