Some cleanup, typing

This commit is contained in:
shamoon 2025-04-28 08:37:02 -07:00
parent fd8ffa62b0
commit e3d1ca77dc
No known key found for this signature in database
2 changed files with 11 additions and 15 deletions

View File

@ -61,7 +61,7 @@ def get_vector_store_index(storage_context, embed_model):
) )
def build_document_node(document) -> list[BaseNode]: def build_document_node(document: Document) -> list[BaseNode]:
""" """
Given a Document, returns parsed Nodes ready for indexing. Given a Document, returns parsed Nodes ready for indexing.
""" """
@ -109,7 +109,7 @@ def load_or_build_index(storage_context, embed_model, nodes=None):
raise raise
def remove_existing_document_nodes(document, index): def remove_document_docstore_nodes(document: Document, index: VectorStoreIndex):
""" """
Removes existing documents from docstore for a given document from the index. Removes existing documents from docstore for a given document from the index.
This is necessary because FAISS IndexFlatL2 is append-only. This is necessary because FAISS IndexFlatL2 is append-only.
@ -153,7 +153,7 @@ def rebuild_llm_index(*, progress_bar_disable=False, rebuild=False):
storage_context.persist(persist_dir=settings.LLM_INDEX_DIR) storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
def llm_index_add_or_update_document(document): def llm_index_add_or_update_document(document: Document):
""" """
Adds or updates a document in the LLM index. Adds or updates a document in the LLM index.
If the document already exists, it will be replaced. If the document already exists, it will be replaced.
@ -168,18 +168,19 @@ def llm_index_add_or_update_document(document):
index = load_or_build_index(storage_context, embed_model, nodes=new_nodes) index = load_or_build_index(storage_context, embed_model, nodes=new_nodes)
if index is None: if index is None:
# Nothing to index
return return
# Remove old nodes remove_document_docstore_nodes(document, index)
remove_existing_document_nodes(document, index)
index.insert_nodes(new_nodes) index.insert_nodes(new_nodes)
storage_context.persist(persist_dir=settings.LLM_INDEX_DIR) storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
def llm_index_remove_document(document): def llm_index_remove_document(document: Document):
"""
Removes a document from the LLM index.
"""
embed_model = get_embedding_model() embed_model = get_embedding_model()
llama_settings.embed_model = embed_model llama_settings.embed_model = embed_model
@ -187,10 +188,9 @@ def llm_index_remove_document(document):
index = load_or_build_index(storage_context, embed_model) index = load_or_build_index(storage_context, embed_model)
if index is None: if index is None:
return # Nothing to remove return
# Remove old nodes remove_document_docstore_nodes(document, index)
remove_existing_document_nodes(document, index)
storage_context.persist(persist_dir=settings.LLM_INDEX_DIR) storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
@ -202,11 +202,9 @@ def query_similar_documents(document: Document, top_k: int = 5) -> list[Document
index = load_or_build_index() index = load_or_build_index()
retriever = VectorIndexRetriever(index=index, similarity_top_k=top_k) retriever = VectorIndexRetriever(index=index, similarity_top_k=top_k)
# Build query from the document text
query_text = (document.title or "") + "\n" + (document.content or "") query_text = (document.title or "") + "\n" + (document.content or "")
results = retriever.retrieve(query_text) results = retriever.retrieve(query_text)
# Each result.node.metadata["document_id"] should match our stored doc
document_ids = [ document_ids = [
int(node.metadata["document_id"]) int(node.metadata["document_id"])
for node in results for node in results

View File

@ -202,7 +202,5 @@ class AIConfig(BaseConfig):
def llm_index_enabled(self) -> bool: def llm_index_enabled(self) -> bool:
return ( return (
self.ai_enabled self.ai_enabled and self.llm_embedding_backend and self.llm_embedding_model
and self.llm_embedding_backend
and self.llm_embedding_backend
) )