mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-05-23 12:58:18 -05:00
Unify prompts, cover
This commit is contained in:
parent
014eafe3d1
commit
62fd722019
@ -21,6 +21,7 @@ def build_prompt_without_rag(document: Document) -> str:
|
||||
Never ask for further information, additional content or ask questions. Never include any other text.
|
||||
Suggested tags and document types must be strictly based on the content of the document.
|
||||
Do not change the field names or the JSON structure, only provide the values. Use double quotes and proper JSON syntax.
|
||||
Each field must be a list of plain strings.
|
||||
|
||||
The JSON object must contain the following fields:
|
||||
- title: A short, descriptive title
|
||||
@ -30,8 +31,6 @@ def build_prompt_without_rag(document: Document) -> str:
|
||||
- storage_paths: Suggested folder paths (e.g. "Medical/Insurance")
|
||||
- dates: List up to 3 relevant dates in YYYY-MM-DD format
|
||||
|
||||
Respond ONLY in JSON.
|
||||
Each field must be a list of plain strings.
|
||||
The format of the JSON object is as follows:
|
||||
{{
|
||||
"title": "xxxxx",
|
||||
@ -43,7 +42,6 @@ def build_prompt_without_rag(document: Document) -> str:
|
||||
}}
|
||||
---
|
||||
|
||||
|
||||
FILENAME:
|
||||
{filename}
|
||||
|
||||
@ -56,41 +54,9 @@ def build_prompt_without_rag(document: Document) -> str:
|
||||
|
||||
def build_prompt_with_rag(document: Document) -> str:
|
||||
context = get_context_for_document(document)
|
||||
content = document.content or ""
|
||||
filename = document.filename or ""
|
||||
prompt = build_prompt_without_rag(document)
|
||||
|
||||
prompt = f"""
|
||||
You are a helpful assistant that extracts structured information from documents.
|
||||
You have access to similar documents as context to help improve suggestions.
|
||||
|
||||
Only output valid JSON in the format below. No additional explanations.
|
||||
|
||||
The JSON object must contain:
|
||||
- title: A short, human-readable, descriptive title based on the content
|
||||
- tags: A list of relevant topics
|
||||
- correspondents: People or organizations involved
|
||||
- document_types: Type or category of the document
|
||||
- storage_paths: Suggested folder paths
|
||||
- dates: Up to 3 relevant dates in YYYY-MM-DD
|
||||
|
||||
Respond ONLY in JSON.
|
||||
Each field must be a list of plain strings.
|
||||
The format of the JSON object is as follows:
|
||||
{{
|
||||
"title": "xxxxx",
|
||||
"tags": ["xxxx", "xxxx"],
|
||||
"correspondents": ["xxxx", "xxxx"],
|
||||
"document_types": ["xxxx", "xxxx"],
|
||||
"storage_paths": ["xxxx", "xxxx"],
|
||||
"dates": ["YYYY-MM-DD", "YYYY-MM-DD", "YYYY-MM-DD"],
|
||||
}}
|
||||
|
||||
Here is the document:
|
||||
FILENAME:
|
||||
{filename}
|
||||
|
||||
CONTENT:
|
||||
{content[:4000]}
|
||||
prompt += f"""
|
||||
|
||||
CONTEXT FROM SIMILAR DOCUMENTS:
|
||||
{context[:4000]}
|
||||
|
@ -6,6 +6,8 @@ import pytest
|
||||
from django.test import override_settings
|
||||
|
||||
from documents.models import Document
|
||||
from paperless.ai.ai_classifier import build_prompt_with_rag
|
||||
from paperless.ai.ai_classifier import build_prompt_without_rag
|
||||
from paperless.ai.ai_classifier import get_ai_document_classification
|
||||
from paperless.ai.ai_classifier import parse_ai_response
|
||||
|
||||
@ -101,3 +103,15 @@ def test_use_without_rag_if_not_configured(
|
||||
mock_run_llm_query.return_value.text = json.dumps({})
|
||||
get_ai_document_classification(mock_document)
|
||||
mock_build_prompt_without_rag.assert_called_once()
|
||||
|
||||
|
||||
@override_settings(
|
||||
LLM_BACKEND="ollama",
|
||||
LLM_MODEL="some_model",
|
||||
)
|
||||
def test_prompt_with_without_rag(mock_document):
|
||||
prompt = build_prompt_without_rag(mock_document)
|
||||
assert "CONTEXT FROM SIMILAR DOCUMENTS:" not in prompt
|
||||
|
||||
prompt = build_prompt_with_rag(mock_document)
|
||||
assert "CONTEXT FROM SIMILAR DOCUMENTS:" in prompt
|
||||
|
Loading…
x
Reference in New Issue
Block a user