add support for suggestions

This commit is contained in:
jonaswinkler 2021-01-29 16:45:23 +01:00
parent 05866da04b
commit ddcc0883eb
2 changed files with 79 additions and 22 deletions

View File

@ -590,6 +590,10 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(len(meta['original_metadata']), 0) self.assertEqual(len(meta['original_metadata']), 0)
self.assertGreater(len(meta['archive_metadata']), 0) self.assertGreater(len(meta['archive_metadata']), 0)
def test_get_metadata_invalid_doc(self):
response = self.client.get(f"/api/documents/34576/metadata/")
self.assertEqual(response.status_code, 404)
def test_get_metadata_no_archive(self): def test_get_metadata_no_archive(self):
doc = Document.objects.create(title="test", filename="file.pdf", mime_type="application/pdf") doc = Document.objects.create(title="test", filename="file.pdf", mime_type="application/pdf")
@ -605,6 +609,30 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertGreater(len(meta['original_metadata']), 0) self.assertGreater(len(meta['original_metadata']), 0)
self.assertIsNone(meta['archive_metadata']) self.assertIsNone(meta['archive_metadata'])
def test_get_empty_suggestions(self):
doc = Document.objects.create(title="test", mime_type="application/pdf")
response = self.client.get(f"/api/documents/{doc.pk}/suggestions/")
self.assertEqual(response.status_code, 200)
self.assertEqual(response.data, {'correspondents': [], 'tags': [], 'document_types': []})
def test_get_suggestions_invalid_doc(self):
response = self.client.get(f"/api/documents/34676/suggestions/")
self.assertEqual(response.status_code, 404)
@mock.patch("documents.views.match_correspondents")
@mock.patch("documents.views.match_tags")
@mock.patch("documents.views.match_document_types")
def test_get_suggestions(self, match_document_types, match_tags, match_correspondents):
doc = Document.objects.create(title="test", mime_type="application/pdf", content="this is an invoice!")
match_tags.return_value = [Tag(id=56), Tag(id=123)]
match_document_types.return_value = [DocumentType(id=23)]
match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)]
response = self.client.get(f"/api/documents/{doc.pk}/suggestions/")
self.assertEqual(response.data, {'correspondents': [88,2], 'tags': [56,123], 'document_types': [23]})
def test_saved_views(self): def test_saved_views(self):
u1 = User.objects.create_user("user1") u1 = User.objects.create_user("user1")
u2 = User.objects.create_user("user2") u2 = User.objects.create_user("user2")

View File

@ -34,6 +34,7 @@ from rest_framework.viewsets import (
import documents.index as index import documents.index as index
from paperless.db import GnuPG from paperless.db import GnuPG
from paperless.views import StandardPagination from paperless.views import StandardPagination
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
from .filters import ( from .filters import (
CorrespondentFilterSet, CorrespondentFilterSet,
DocumentFilterSet, DocumentFilterSet,
@ -41,6 +42,7 @@ from .filters import (
DocumentTypeFilterSet, DocumentTypeFilterSet,
LogFilterSet LogFilterSet
) )
from .matching import match_correspondents, match_tags, match_document_types
from .models import Correspondent, Document, Log, Tag, DocumentType, SavedView from .models import Correspondent, Document, Log, Tag, DocumentType, SavedView
from .parsers import get_parser_class_for_mime_type from .parsers import get_parser_class_for_mime_type
from .serialisers import ( from .serialisers import (
@ -225,31 +227,58 @@ class DocumentViewSet(RetrieveModelMixin,
def metadata(self, request, pk=None): def metadata(self, request, pk=None):
try: try:
doc = Document.objects.get(pk=pk) doc = Document.objects.get(pk=pk)
meta = {
"original_checksum": doc.checksum,
"original_size": os.stat(doc.source_path).st_size,
"original_mime_type": doc.mime_type,
"media_filename": doc.filename,
"has_archive_version": os.path.isfile(doc.archive_path),
"original_metadata": self.get_metadata(
doc.source_path, doc.mime_type)
}
if doc.archive_checksum and os.path.isfile(doc.archive_path):
meta['archive_checksum'] = doc.archive_checksum
meta['archive_size'] = os.stat(doc.archive_path).st_size,
meta['archive_metadata'] = self.get_metadata(
doc.archive_path, "application/pdf")
else:
meta['archive_checksum'] = None
meta['archive_size'] = None
meta['archive_metadata'] = None
return Response(meta)
except Document.DoesNotExist: except Document.DoesNotExist:
raise Http404() raise Http404()
meta = {
"original_checksum": doc.checksum,
"original_size": os.stat(doc.source_path).st_size,
"original_mime_type": doc.mime_type,
"media_filename": doc.filename,
"has_archive_version": os.path.isfile(doc.archive_path),
"original_metadata": self.get_metadata(
doc.source_path, doc.mime_type)
}
if doc.archive_checksum and os.path.isfile(doc.archive_path):
meta['archive_checksum'] = doc.archive_checksum
meta['archive_size'] = os.stat(doc.archive_path).st_size,
meta['archive_metadata'] = self.get_metadata(
doc.archive_path, "application/pdf")
else:
meta['archive_checksum'] = None
meta['archive_size'] = None
meta['archive_metadata'] = None
return Response(meta)
@action(methods=['get'], detail=True)
def suggestions(self, request, pk=None):
try:
doc = Document.objects.get(pk=pk)
except Document.DoesNotExist:
raise Http404()
try:
classifier = DocumentClassifier()
classifier.reload()
except (OSError, EOFError, IncompatibleClassifierVersionError) as e:
logging.getLogger(__name__).warning(
"Cannot load classifier: Not providing auto matching "
"suggestions"
)
classifier = None
return Response({
"correspondents": [
c.id for c in match_correspondents(doc, classifier)
],
"tags": [t.id for t in match_tags(doc, classifier)],
"document_types": [
dt.id for dt in match_document_types(doc, classifier)
]
})
@action(methods=['get'], detail=True) @action(methods=['get'], detail=True)
def preview(self, request, pk=None): def preview(self, request, pk=None):
try: try: