mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-05-23 12:58:18 -05:00
Some cleanup, typing
This commit is contained in:
parent
fd8ffa62b0
commit
e3d1ca77dc
@ -61,7 +61,7 @@ def get_vector_store_index(storage_context, embed_model):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def build_document_node(document) -> list[BaseNode]:
|
def build_document_node(document: Document) -> list[BaseNode]:
|
||||||
"""
|
"""
|
||||||
Given a Document, returns parsed Nodes ready for indexing.
|
Given a Document, returns parsed Nodes ready for indexing.
|
||||||
"""
|
"""
|
||||||
@ -109,7 +109,7 @@ def load_or_build_index(storage_context, embed_model, nodes=None):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
def remove_existing_document_nodes(document, index):
|
def remove_document_docstore_nodes(document: Document, index: VectorStoreIndex):
|
||||||
"""
|
"""
|
||||||
Removes existing documents from docstore for a given document from the index.
|
Removes existing documents from docstore for a given document from the index.
|
||||||
This is necessary because FAISS IndexFlatL2 is append-only.
|
This is necessary because FAISS IndexFlatL2 is append-only.
|
||||||
@ -153,7 +153,7 @@ def rebuild_llm_index(*, progress_bar_disable=False, rebuild=False):
|
|||||||
storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
|
storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
|
||||||
|
|
||||||
|
|
||||||
def llm_index_add_or_update_document(document):
|
def llm_index_add_or_update_document(document: Document):
|
||||||
"""
|
"""
|
||||||
Adds or updates a document in the LLM index.
|
Adds or updates a document in the LLM index.
|
||||||
If the document already exists, it will be replaced.
|
If the document already exists, it will be replaced.
|
||||||
@ -168,18 +168,19 @@ def llm_index_add_or_update_document(document):
|
|||||||
index = load_or_build_index(storage_context, embed_model, nodes=new_nodes)
|
index = load_or_build_index(storage_context, embed_model, nodes=new_nodes)
|
||||||
|
|
||||||
if index is None:
|
if index is None:
|
||||||
# Nothing to index
|
|
||||||
return
|
return
|
||||||
|
|
||||||
# Remove old nodes
|
remove_document_docstore_nodes(document, index)
|
||||||
remove_existing_document_nodes(document, index)
|
|
||||||
|
|
||||||
index.insert_nodes(new_nodes)
|
index.insert_nodes(new_nodes)
|
||||||
|
|
||||||
storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
|
storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
|
||||||
|
|
||||||
|
|
||||||
def llm_index_remove_document(document):
|
def llm_index_remove_document(document: Document):
|
||||||
|
"""
|
||||||
|
Removes a document from the LLM index.
|
||||||
|
"""
|
||||||
embed_model = get_embedding_model()
|
embed_model = get_embedding_model()
|
||||||
llama_settings.embed_model = embed_model
|
llama_settings.embed_model = embed_model
|
||||||
|
|
||||||
@ -187,10 +188,9 @@ def llm_index_remove_document(document):
|
|||||||
|
|
||||||
index = load_or_build_index(storage_context, embed_model)
|
index = load_or_build_index(storage_context, embed_model)
|
||||||
if index is None:
|
if index is None:
|
||||||
return # Nothing to remove
|
return
|
||||||
|
|
||||||
# Remove old nodes
|
remove_document_docstore_nodes(document, index)
|
||||||
remove_existing_document_nodes(document, index)
|
|
||||||
|
|
||||||
storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
|
storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
|
||||||
|
|
||||||
@ -202,11 +202,9 @@ def query_similar_documents(document: Document, top_k: int = 5) -> list[Document
|
|||||||
index = load_or_build_index()
|
index = load_or_build_index()
|
||||||
retriever = VectorIndexRetriever(index=index, similarity_top_k=top_k)
|
retriever = VectorIndexRetriever(index=index, similarity_top_k=top_k)
|
||||||
|
|
||||||
# Build query from the document text
|
|
||||||
query_text = (document.title or "") + "\n" + (document.content or "")
|
query_text = (document.title or "") + "\n" + (document.content or "")
|
||||||
results = retriever.retrieve(query_text)
|
results = retriever.retrieve(query_text)
|
||||||
|
|
||||||
# Each result.node.metadata["document_id"] should match our stored doc
|
|
||||||
document_ids = [
|
document_ids = [
|
||||||
int(node.metadata["document_id"])
|
int(node.metadata["document_id"])
|
||||||
for node in results
|
for node in results
|
||||||
|
@ -202,7 +202,5 @@ class AIConfig(BaseConfig):
|
|||||||
|
|
||||||
def llm_index_enabled(self) -> bool:
|
def llm_index_enabled(self) -> bool:
|
||||||
return (
|
return (
|
||||||
self.ai_enabled
|
self.ai_enabled and self.llm_embedding_backend and self.llm_embedding_model
|
||||||
and self.llm_embedding_backend
|
|
||||||
and self.llm_embedding_backend
|
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user