diff --git a/src/documents/tasks.py b/src/documents/tasks.py index c97e54f45..5a37cb94d 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -514,12 +514,20 @@ def check_scheduled_workflows(): @shared_task -def llmindex_index(*, progress_bar_disable=True, rebuild=False, scheduled=True): +def llmindex_index( + *, + progress_bar_disable=True, + rebuild=False, + scheduled=True, + auto=False, +): ai_config = AIConfig() if ai_config.llm_index_enabled(): task = PaperlessTask.objects.create( type=PaperlessTask.TaskType.SCHEDULED_TASK if scheduled + else PaperlessTask.TaskType.AUTO + if auto else PaperlessTask.TaskType.MANUAL_TASK, task_id=uuid.uuid4(), task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE, diff --git a/src/paperless/views.py b/src/paperless/views.py index 050bb3f61..820817312 100644 --- a/src/paperless/views.py +++ b/src/paperless/views.py @@ -34,6 +34,7 @@ from rest_framework.viewsets import ModelViewSet from documents.index import DelayedQuery from documents.permissions import PaperlessObjectPermissions +from documents.tasks import llmindex_index from paperless.filters import GroupFilterSet from paperless.filters import UserFilterSet from paperless.models import ApplicationConfiguration @@ -42,6 +43,7 @@ from paperless.serialisers import GroupSerializer from paperless.serialisers import PaperlessAuthTokenSerializer from paperless.serialisers import ProfileSerializer from paperless.serialisers import UserSerializer +from paperless_ai.indexing import vector_store_file_exists class PaperlessObtainAuthTokenView(ObtainAuthToken): @@ -345,6 +347,30 @@ class ApplicationConfigurationViewSet(ModelViewSet): serializer_class = ApplicationConfigurationSerializer permission_classes = (IsAuthenticated, DjangoModelPermissions) + def perform_update(self, serializer): + old_instance = ApplicationConfiguration.objects.all().first() + old_ai_index_enabled = ( + old_instance.ai_enabled and old_instance.llm_embedding_backend + ) + + new_instance: ApplicationConfiguration = serializer.save() + new_ai_index_enabled = ( + new_instance.ai_enabled and new_instance.llm_embedding_backend + ) + + if ( + not old_ai_index_enabled + and new_ai_index_enabled + and not vector_store_file_exists() + ): + # AI index was just enabled and vector store file does not exist + llmindex_index.delay( + progress_bar_disable=True, + rebuild=True, + scheduled=False, + auto=True, + ) + @extend_schema_view( post=extend_schema( diff --git a/src/paperless_ai/indexing.py b/src/paperless_ai/indexing.py index 548b6ba51..4e0f2ffdf 100644 --- a/src/paperless_ai/indexing.py +++ b/src/paperless_ai/indexing.py @@ -115,6 +115,13 @@ def remove_document_docstore_nodes(document: Document, index: VectorStoreIndex): index.docstore.delete_document(node_id) +def vector_store_file_exists(): + """ + Check if the vector store file exists in the LLM index directory. + """ + return Path(settings.LLM_INDEX_DIR / "default__vector_store.json").exists() + + def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str: """ Rebuild or update the LLM index. @@ -127,10 +134,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str: logger.warning(msg) return msg - if ( - rebuild - or not Path(settings.LLM_INDEX_DIR / "default__vector_store.json").exists() - ): + if rebuild or not vector_store_file_exists(): # Rebuild index from scratch logger.info("Rebuilding LLM index.") embed_model = get_embedding_model()