mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-05-23 12:58:18 -05:00
Cover partial indexing
This commit is contained in:
parent
a1fb3ee7de
commit
07a7d7b815
@ -147,8 +147,6 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False):
|
|||||||
for node in index.docstore.get_nodes(all_node_ids)
|
for node in index.docstore.get_nodes(all_node_ids)
|
||||||
}
|
}
|
||||||
|
|
||||||
node_ids_to_remove = []
|
|
||||||
|
|
||||||
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
|
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
|
||||||
doc_id = str(document.id)
|
doc_id = str(document.id)
|
||||||
document_modified = document.modified.isoformat()
|
document_modified = document.modified.isoformat()
|
||||||
@ -160,22 +158,19 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False):
|
|||||||
if node_modified == document_modified:
|
if node_modified == document_modified:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
node_ids_to_remove.append(node.node_id)
|
# Again, delete from docstore, FAISS IndexFlatL2 are append-only
|
||||||
|
index.docstore.delete_document(node.node_id)
|
||||||
nodes.extend(build_document_node(document))
|
nodes.extend(build_document_node(document))
|
||||||
else:
|
else:
|
||||||
# New document, add it
|
# New document, add it
|
||||||
nodes.extend(build_document_node(document))
|
nodes.extend(build_document_node(document))
|
||||||
|
|
||||||
if node_ids_to_remove or nodes:
|
if nodes:
|
||||||
logger.info(
|
logger.info(
|
||||||
"Updating LLM index with %d new nodes and removing %d old nodes.",
|
"Updating %d nodes in LLM index.",
|
||||||
len(nodes),
|
len(nodes),
|
||||||
len(node_ids_to_remove),
|
|
||||||
)
|
)
|
||||||
if node_ids_to_remove:
|
index.insert_nodes(nodes)
|
||||||
index.delete_nodes(node_ids_to_remove)
|
|
||||||
if nodes:
|
|
||||||
index.insert_nodes(nodes)
|
|
||||||
else:
|
else:
|
||||||
logger.info("No changes detected, skipping llm index rebuild.")
|
logger.info("No changes detected, skipping llm index rebuild.")
|
||||||
|
|
||||||
|
@ -72,6 +72,57 @@ def test_update_llm_index(
|
|||||||
assert any(temp_llm_index_dir.glob("*.json"))
|
assert any(temp_llm_index_dir.glob("*.json"))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_update_llm_index_partial_update(
|
||||||
|
temp_llm_index_dir,
|
||||||
|
real_document,
|
||||||
|
mock_embed_model,
|
||||||
|
):
|
||||||
|
doc2 = Document.objects.create(
|
||||||
|
title="Test Document 2",
|
||||||
|
content="This is some test content 2.",
|
||||||
|
added=timezone.now(),
|
||||||
|
checksum="1234567890abcdef",
|
||||||
|
)
|
||||||
|
# Initial index
|
||||||
|
with patch("documents.models.Document.objects.all") as mock_all:
|
||||||
|
mock_queryset = MagicMock()
|
||||||
|
mock_queryset.exists.return_value = True
|
||||||
|
mock_queryset.__iter__.return_value = iter([real_document, doc2])
|
||||||
|
mock_all.return_value = mock_queryset
|
||||||
|
|
||||||
|
indexing.update_llm_index(rebuild=True)
|
||||||
|
|
||||||
|
# modify document
|
||||||
|
updated_document = real_document
|
||||||
|
updated_document.modified = timezone.now() # simulate modification
|
||||||
|
|
||||||
|
# new doc
|
||||||
|
doc3 = Document.objects.create(
|
||||||
|
title="Test Document 3",
|
||||||
|
content="This is some test content 3.",
|
||||||
|
added=timezone.now(),
|
||||||
|
checksum="abcdef1234567890",
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("documents.models.Document.objects.all") as mock_all:
|
||||||
|
mock_queryset = MagicMock()
|
||||||
|
mock_queryset.exists.return_value = True
|
||||||
|
mock_queryset.__iter__.return_value = iter([updated_document, doc2, doc3])
|
||||||
|
mock_all.return_value = mock_queryset
|
||||||
|
|
||||||
|
# assert logs "Updating LLM index with %d new nodes and removing %d old nodes."
|
||||||
|
with patch("paperless.ai.indexing.logger") as mock_logger:
|
||||||
|
indexing.update_llm_index(rebuild=False)
|
||||||
|
mock_logger.info.assert_called_once_with(
|
||||||
|
"Updating %d nodes in LLM index.",
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
indexing.update_llm_index(rebuild=False)
|
||||||
|
|
||||||
|
assert any(temp_llm_index_dir.glob("*.json"))
|
||||||
|
|
||||||
|
|
||||||
def test_get_or_create_storage_context_raises_exception(
|
def test_get_or_create_storage_context_raises_exception(
|
||||||
temp_llm_index_dir,
|
temp_llm_index_dir,
|
||||||
mock_embed_model,
|
mock_embed_model,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user