From 74102a8c30e76135e9194c4c0cf887394aee2b1f Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Thu, 24 Apr 2025 23:56:51 -0700 Subject: [PATCH] Individual doc chat [ci skip] --- src/documents/views.py | 11 ++++++++++- src/paperless/ai/chat.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/documents/views.py b/src/documents/views.py index c2bf79f43..f36d05cac 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -172,6 +172,7 @@ from documents.templating.filepath import validate_filepath_template_and_render from paperless import version from paperless.ai.ai_classifier import get_ai_document_classification from paperless.ai.chat import chat_with_documents +from paperless.ai.chat import chat_with_single_document from paperless.ai.matching import extract_unmatched_names from paperless.ai.matching import match_correspondents_by_name from paperless.ai.matching import match_document_types_by_name @@ -1142,7 +1143,15 @@ class DocumentViewSet( return HttpResponseBadRequest("AI is required for this feature") question = request.data["q"] - result = chat_with_documents(question, request.user) + doc_id = request.data.get("document_id", None) + if doc_id: + document = Document.objects.get(id=doc_id) + if not has_perms_owner_aware(request.user, "view_document", document): + return HttpResponseForbidden("Insufficient permissions") + + result = chat_with_single_document(document, question, request.user) + else: + result = chat_with_documents(question, request.user) return Response({"answer": result}) diff --git a/src/paperless/ai/chat.py b/src/paperless/ai/chat.py index eb485b641..6e75884d9 100644 --- a/src/paperless/ai/chat.py +++ b/src/paperless/ai/chat.py @@ -1,10 +1,12 @@ import logging from django.contrib.auth.models import User +from llama_index.core import VectorStoreIndex from llama_index.core.query_engine import RetrieverQueryEngine from paperless.ai.client import AIClient from paperless.ai.indexing import get_document_retriever +from paperless.ai.indexing import load_index logger = logging.getLogger("paperless.ai.chat") @@ -22,3 +24,29 @@ def chat_with_documents(prompt: str, user: User) -> str: response = query_engine.query(prompt) logger.debug("Document chat response: %s", response) return str(response) + + +def chat_with_single_document(document, question: str, user): + index = load_index() + + # Filter only the node(s) belonging to this doc + nodes = [ + node + for node in index.docstore.docs.values() + if node.metadata.get("document_id") == str(document.id) + ] + + if not nodes: + raise Exception("This document is not indexed yet.") + + local_index = VectorStoreIndex.from_documents(nodes) + + client = AIClient() + + engine = RetrieverQueryEngine.from_args( + retriever=local_index.as_retriever(similarity_top_k=3), + llm=client.llm, + ) + + response = engine.query(question) + return str(response)