diff --git a/src/documents/tests/test_api.py b/src/documents/tests/test_api.py index 2b332a873..9e4b77189 100644 --- a/src/documents/tests/test_api.py +++ b/src/documents/tests/test_api.py @@ -590,6 +590,10 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertEqual(len(meta['original_metadata']), 0) self.assertGreater(len(meta['archive_metadata']), 0) + def test_get_metadata_invalid_doc(self): + response = self.client.get(f"/api/documents/34576/metadata/") + self.assertEqual(response.status_code, 404) + def test_get_metadata_no_archive(self): doc = Document.objects.create(title="test", filename="file.pdf", mime_type="application/pdf") @@ -605,6 +609,30 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertGreater(len(meta['original_metadata']), 0) self.assertIsNone(meta['archive_metadata']) + def test_get_empty_suggestions(self): + doc = Document.objects.create(title="test", mime_type="application/pdf") + + response = self.client.get(f"/api/documents/{doc.pk}/suggestions/") + + self.assertEqual(response.status_code, 200) + self.assertEqual(response.data, {'correspondents': [], 'tags': [], 'document_types': []}) + + def test_get_suggestions_invalid_doc(self): + response = self.client.get(f"/api/documents/34676/suggestions/") + self.assertEqual(response.status_code, 404) + + @mock.patch("documents.views.match_correspondents") + @mock.patch("documents.views.match_tags") + @mock.patch("documents.views.match_document_types") + def test_get_suggestions(self, match_document_types, match_tags, match_correspondents): + doc = Document.objects.create(title="test", mime_type="application/pdf", content="this is an invoice!") + match_tags.return_value = [Tag(id=56), Tag(id=123)] + match_document_types.return_value = [DocumentType(id=23)] + match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)] + + response = self.client.get(f"/api/documents/{doc.pk}/suggestions/") + self.assertEqual(response.data, {'correspondents': [88,2], 'tags': [56,123], 'document_types': [23]}) + def test_saved_views(self): u1 = User.objects.create_user("user1") u2 = User.objects.create_user("user2") diff --git a/src/documents/views.py b/src/documents/views.py index d6a894db6..43ae2b103 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -34,6 +34,7 @@ from rest_framework.viewsets import ( import documents.index as index from paperless.db import GnuPG from paperless.views import StandardPagination +from .classifier import DocumentClassifier, IncompatibleClassifierVersionError from .filters import ( CorrespondentFilterSet, DocumentFilterSet, @@ -41,6 +42,7 @@ from .filters import ( DocumentTypeFilterSet, LogFilterSet ) +from .matching import match_correspondents, match_tags, match_document_types from .models import Correspondent, Document, Log, Tag, DocumentType, SavedView from .parsers import get_parser_class_for_mime_type from .serialisers import ( @@ -225,31 +227,58 @@ class DocumentViewSet(RetrieveModelMixin, def metadata(self, request, pk=None): try: doc = Document.objects.get(pk=pk) - - meta = { - "original_checksum": doc.checksum, - "original_size": os.stat(doc.source_path).st_size, - "original_mime_type": doc.mime_type, - "media_filename": doc.filename, - "has_archive_version": os.path.isfile(doc.archive_path), - "original_metadata": self.get_metadata( - doc.source_path, doc.mime_type) - } - - if doc.archive_checksum and os.path.isfile(doc.archive_path): - meta['archive_checksum'] = doc.archive_checksum - meta['archive_size'] = os.stat(doc.archive_path).st_size, - meta['archive_metadata'] = self.get_metadata( - doc.archive_path, "application/pdf") - else: - meta['archive_checksum'] = None - meta['archive_size'] = None - meta['archive_metadata'] = None - - return Response(meta) except Document.DoesNotExist: raise Http404() + meta = { + "original_checksum": doc.checksum, + "original_size": os.stat(doc.source_path).st_size, + "original_mime_type": doc.mime_type, + "media_filename": doc.filename, + "has_archive_version": os.path.isfile(doc.archive_path), + "original_metadata": self.get_metadata( + doc.source_path, doc.mime_type) + } + + if doc.archive_checksum and os.path.isfile(doc.archive_path): + meta['archive_checksum'] = doc.archive_checksum + meta['archive_size'] = os.stat(doc.archive_path).st_size, + meta['archive_metadata'] = self.get_metadata( + doc.archive_path, "application/pdf") + else: + meta['archive_checksum'] = None + meta['archive_size'] = None + meta['archive_metadata'] = None + + return Response(meta) + + @action(methods=['get'], detail=True) + def suggestions(self, request, pk=None): + try: + doc = Document.objects.get(pk=pk) + except Document.DoesNotExist: + raise Http404() + + try: + classifier = DocumentClassifier() + classifier.reload() + except (OSError, EOFError, IncompatibleClassifierVersionError) as e: + logging.getLogger(__name__).warning( + "Cannot load classifier: Not providing auto matching " + "suggestions" + ) + classifier = None + + return Response({ + "correspondents": [ + c.id for c in match_correspondents(doc, classifier) + ], + "tags": [t.id for t in match_tags(doc, classifier)], + "document_types": [ + dt.id for dt in match_document_types(doc, classifier) + ] + }) + @action(methods=['get'], detail=True) def preview(self, request, pk=None): try: