diff --git a/docs/configuration.md b/docs/configuration.md index 5135168bb..85c54def0 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1718,9 +1718,9 @@ suggestions. This setting is required to be set to true in order to use the AI f #### [`PAPERLESS_LLM_EMBEDDING_BACKEND=`](#PAPERLESS_LLM_EMBEDDING_BACKEND) {#PAPERLESS_LLM_EMBEDDING_BACKEND} -: The embedding backend to use for RAG. This can be either "openai" or "local". +: The embedding backend to use for RAG. This can be either "openai" or "huggingface". - Defaults to "local". + Defaults to None. #### [`PAPERLESS_LLM_EMBEDDING_MODEL=`](#PAPERLESS_LLM_EMBEDDING_MODEL) {#PAPERLESS_LLM_EMBEDDING_MODEL} diff --git a/src/paperless/ai/embedding.py b/src/paperless/ai/embedding.py index 1c33f197c..9d6a5faef 100644 --- a/src/paperless/ai/embedding.py +++ b/src/paperless/ai/embedding.py @@ -4,6 +4,7 @@ from llama_index.embeddings.openai import OpenAIEmbedding from documents.models import Document from documents.models import Note from paperless.config import AIConfig +from paperless.models import LLMEmbeddingBackend EMBEDDING_DIMENSIONS = { "text-embedding-3-small": 1536, @@ -15,12 +16,12 @@ def get_embedding_model(): config = AIConfig() match config.llm_embedding_backend: - case "openai": + case LLMEmbeddingBackend.OPENAI: return OpenAIEmbedding( model=config.llm_embedding_model or "text-embedding-3-small", api_key=config.llm_api_key, ) - case "local": + case LLMEmbeddingBackend.HUGGINGFACE: return HuggingFaceEmbedding( model_name=config.llm_embedding_model or "sentence-transformers/all-MiniLM-L6-v2", diff --git a/src/paperless/models.py b/src/paperless/models.py index fb8a44986..54fcacd7b 100644 --- a/src/paperless/models.py +++ b/src/paperless/models.py @@ -76,7 +76,7 @@ class ColorConvertChoices(models.TextChoices): class LLMEmbeddingBackend(models.TextChoices): OPENAI = ("openai", _("OpenAI")) - LOCAL = ("local", _("Local")) + HUGGINGFACE = ("huggingface", _("Huggingface")) class LLMBackend(models.TextChoices): diff --git a/src/paperless/settings.py b/src/paperless/settings.py index eb1a42fb8..e78567729 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -1284,7 +1284,7 @@ OUTLOOK_OAUTH_ENABLED = bool( AI_ENABLED = __get_boolean("PAPERLESS_AI_ENABLED", "NO") LLM_EMBEDDING_BACKEND = os.getenv( "PAPERLESS_LLM_EMBEDDING_BACKEND", -) # "local" or "openai" +) # "huggingface" or "openai" LLM_EMBEDDING_MODEL = os.getenv("PAPERLESS_LLM_EMBEDDING_MODEL") LLM_BACKEND = os.getenv("PAPERLESS_LLM_BACKEND") # "ollama" or "openai" LLM_MODEL = os.getenv("PAPERLESS_LLM_MODEL") diff --git a/src/paperless/tests/test_ai_classifier.py b/src/paperless/tests/test_ai_classifier.py index a473652fc..9302d6fd2 100644 --- a/src/paperless/tests/test_ai_classifier.py +++ b/src/paperless/tests/test_ai_classifier.py @@ -66,7 +66,7 @@ def test_parse_llm_classification_response_invalid_json(): @patch("paperless.ai.client.AIClient.run_llm_query") @patch("paperless.ai.ai_classifier.build_prompt_with_rag") @override_settings( - LLM_EMBEDDING_BACKEND="local", + LLM_EMBEDDING_BACKEND="huggingface", LLM_EMBEDDING_MODEL="some_model", LLM_BACKEND="ollama", LLM_MODEL="some_model",