add support for suggestions

This commit is contained in:
jonaswinkler 2021-01-29 16:45:23 +01:00
parent 05866da04b
commit ddcc0883eb
2 changed files with 79 additions and 22 deletions
src/documents

@ -590,6 +590,10 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(len(meta['original_metadata']), 0)
self.assertGreater(len(meta['archive_metadata']), 0)
def test_get_metadata_invalid_doc(self):
response = self.client.get(f"/api/documents/34576/metadata/")
self.assertEqual(response.status_code, 404)
def test_get_metadata_no_archive(self):
doc = Document.objects.create(title="test", filename="file.pdf", mime_type="application/pdf")
@ -605,6 +609,30 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertGreater(len(meta['original_metadata']), 0)
self.assertIsNone(meta['archive_metadata'])
def test_get_empty_suggestions(self):
doc = Document.objects.create(title="test", mime_type="application/pdf")
response = self.client.get(f"/api/documents/{doc.pk}/suggestions/")
self.assertEqual(response.status_code, 200)
self.assertEqual(response.data, {'correspondents': [], 'tags': [], 'document_types': []})
def test_get_suggestions_invalid_doc(self):
response = self.client.get(f"/api/documents/34676/suggestions/")
self.assertEqual(response.status_code, 404)
@mock.patch("documents.views.match_correspondents")
@mock.patch("documents.views.match_tags")
@mock.patch("documents.views.match_document_types")
def test_get_suggestions(self, match_document_types, match_tags, match_correspondents):
doc = Document.objects.create(title="test", mime_type="application/pdf", content="this is an invoice!")
match_tags.return_value = [Tag(id=56), Tag(id=123)]
match_document_types.return_value = [DocumentType(id=23)]
match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)]
response = self.client.get(f"/api/documents/{doc.pk}/suggestions/")
self.assertEqual(response.data, {'correspondents': [88,2], 'tags': [56,123], 'document_types': [23]})
def test_saved_views(self):
u1 = User.objects.create_user("user1")
u2 = User.objects.create_user("user2")

@ -34,6 +34,7 @@ from rest_framework.viewsets import (
import documents.index as index
from paperless.db import GnuPG
from paperless.views import StandardPagination
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
from .filters import (
CorrespondentFilterSet,
DocumentFilterSet,
@ -41,6 +42,7 @@ from .filters import (
DocumentTypeFilterSet,
LogFilterSet
)
from .matching import match_correspondents, match_tags, match_document_types
from .models import Correspondent, Document, Log, Tag, DocumentType, SavedView
from .parsers import get_parser_class_for_mime_type
from .serialisers import (
@ -225,31 +227,58 @@ class DocumentViewSet(RetrieveModelMixin,
def metadata(self, request, pk=None):
try:
doc = Document.objects.get(pk=pk)
meta = {
"original_checksum": doc.checksum,
"original_size": os.stat(doc.source_path).st_size,
"original_mime_type": doc.mime_type,
"media_filename": doc.filename,
"has_archive_version": os.path.isfile(doc.archive_path),
"original_metadata": self.get_metadata(
doc.source_path, doc.mime_type)
}
if doc.archive_checksum and os.path.isfile(doc.archive_path):
meta['archive_checksum'] = doc.archive_checksum
meta['archive_size'] = os.stat(doc.archive_path).st_size,
meta['archive_metadata'] = self.get_metadata(
doc.archive_path, "application/pdf")
else:
meta['archive_checksum'] = None
meta['archive_size'] = None
meta['archive_metadata'] = None
return Response(meta)
except Document.DoesNotExist:
raise Http404()
meta = {
"original_checksum": doc.checksum,
"original_size": os.stat(doc.source_path).st_size,
"original_mime_type": doc.mime_type,
"media_filename": doc.filename,
"has_archive_version": os.path.isfile(doc.archive_path),
"original_metadata": self.get_metadata(
doc.source_path, doc.mime_type)
}
if doc.archive_checksum and os.path.isfile(doc.archive_path):
meta['archive_checksum'] = doc.archive_checksum
meta['archive_size'] = os.stat(doc.archive_path).st_size,
meta['archive_metadata'] = self.get_metadata(
doc.archive_path, "application/pdf")
else:
meta['archive_checksum'] = None
meta['archive_size'] = None
meta['archive_metadata'] = None
return Response(meta)
@action(methods=['get'], detail=True)
def suggestions(self, request, pk=None):
try:
doc = Document.objects.get(pk=pk)
except Document.DoesNotExist:
raise Http404()
try:
classifier = DocumentClassifier()
classifier.reload()
except (OSError, EOFError, IncompatibleClassifierVersionError) as e:
logging.getLogger(__name__).warning(
"Cannot load classifier: Not providing auto matching "
"suggestions"
)
classifier = None
return Response({
"correspondents": [
c.id for c in match_correspondents(doc, classifier)
],
"tags": [t.id for t in match_tags(doc, classifier)],
"document_types": [
dt.id for dt in match_document_types(doc, classifier)
]
})
@action(methods=['get'], detail=True)
def preview(self, request, pk=None):
try: