Auto-trigger llmindex rebuild when enabled

This commit is contained in:
shamoon 2025-04-29 20:16:02 -07:00
parent 374596b1bc
commit aa0599774b
No known key found for this signature in database
3 changed files with 43 additions and 5 deletions

View File

@ -514,12 +514,20 @@ def check_scheduled_workflows():
@shared_task
def llmindex_index(*, progress_bar_disable=True, rebuild=False, scheduled=True):
def llmindex_index(
*,
progress_bar_disable=True,
rebuild=False,
scheduled=True,
auto=False,
):
ai_config = AIConfig()
if ai_config.llm_index_enabled():
task = PaperlessTask.objects.create(
type=PaperlessTask.TaskType.SCHEDULED_TASK
if scheduled
else PaperlessTask.TaskType.AUTO
if auto
else PaperlessTask.TaskType.MANUAL_TASK,
task_id=uuid.uuid4(),
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,

View File

@ -34,6 +34,7 @@ from rest_framework.viewsets import ModelViewSet
from documents.index import DelayedQuery
from documents.permissions import PaperlessObjectPermissions
from documents.tasks import llmindex_index
from paperless.filters import GroupFilterSet
from paperless.filters import UserFilterSet
from paperless.models import ApplicationConfiguration
@ -42,6 +43,7 @@ from paperless.serialisers import GroupSerializer
from paperless.serialisers import PaperlessAuthTokenSerializer
from paperless.serialisers import ProfileSerializer
from paperless.serialisers import UserSerializer
from paperless_ai.indexing import vector_store_file_exists
class PaperlessObtainAuthTokenView(ObtainAuthToken):
@ -345,6 +347,30 @@ class ApplicationConfigurationViewSet(ModelViewSet):
serializer_class = ApplicationConfigurationSerializer
permission_classes = (IsAuthenticated, DjangoModelPermissions)
def perform_update(self, serializer):
old_instance = ApplicationConfiguration.objects.all().first()
old_ai_index_enabled = (
old_instance.ai_enabled and old_instance.llm_embedding_backend
)
new_instance: ApplicationConfiguration = serializer.save()
new_ai_index_enabled = (
new_instance.ai_enabled and new_instance.llm_embedding_backend
)
if (
not old_ai_index_enabled
and new_ai_index_enabled
and not vector_store_file_exists()
):
# AI index was just enabled and vector store file does not exist
llmindex_index.delay(
progress_bar_disable=True,
rebuild=True,
scheduled=False,
auto=True,
)
@extend_schema_view(
post=extend_schema(

View File

@ -115,6 +115,13 @@ def remove_document_docstore_nodes(document: Document, index: VectorStoreIndex):
index.docstore.delete_document(node_id)
def vector_store_file_exists():
"""
Check if the vector store file exists in the LLM index directory.
"""
return Path(settings.LLM_INDEX_DIR / "default__vector_store.json").exists()
def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
"""
Rebuild or update the LLM index.
@ -127,10 +134,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
logger.warning(msg)
return msg
if (
rebuild
or not Path(settings.LLM_INDEX_DIR / "default__vector_store.json").exists()
):
if rebuild or not vector_store_file_exists():
# Rebuild index from scratch
logger.info("Rebuilding LLM index.")
embed_model = get_embedding_model()