mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-05-21 12:52:13 -05:00
Cover partial indexing
This commit is contained in:
parent
a1fb3ee7de
commit
07a7d7b815
@ -147,8 +147,6 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False):
|
||||
for node in index.docstore.get_nodes(all_node_ids)
|
||||
}
|
||||
|
||||
node_ids_to_remove = []
|
||||
|
||||
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
|
||||
doc_id = str(document.id)
|
||||
document_modified = document.modified.isoformat()
|
||||
@ -160,22 +158,19 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False):
|
||||
if node_modified == document_modified:
|
||||
continue
|
||||
|
||||
node_ids_to_remove.append(node.node_id)
|
||||
# Again, delete from docstore, FAISS IndexFlatL2 are append-only
|
||||
index.docstore.delete_document(node.node_id)
|
||||
nodes.extend(build_document_node(document))
|
||||
else:
|
||||
# New document, add it
|
||||
nodes.extend(build_document_node(document))
|
||||
|
||||
if node_ids_to_remove or nodes:
|
||||
if nodes:
|
||||
logger.info(
|
||||
"Updating LLM index with %d new nodes and removing %d old nodes.",
|
||||
"Updating %d nodes in LLM index.",
|
||||
len(nodes),
|
||||
len(node_ids_to_remove),
|
||||
)
|
||||
if node_ids_to_remove:
|
||||
index.delete_nodes(node_ids_to_remove)
|
||||
if nodes:
|
||||
index.insert_nodes(nodes)
|
||||
index.insert_nodes(nodes)
|
||||
else:
|
||||
logger.info("No changes detected, skipping llm index rebuild.")
|
||||
|
||||
|
@ -72,6 +72,57 @@ def test_update_llm_index(
|
||||
assert any(temp_llm_index_dir.glob("*.json"))
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_update_llm_index_partial_update(
|
||||
temp_llm_index_dir,
|
||||
real_document,
|
||||
mock_embed_model,
|
||||
):
|
||||
doc2 = Document.objects.create(
|
||||
title="Test Document 2",
|
||||
content="This is some test content 2.",
|
||||
added=timezone.now(),
|
||||
checksum="1234567890abcdef",
|
||||
)
|
||||
# Initial index
|
||||
with patch("documents.models.Document.objects.all") as mock_all:
|
||||
mock_queryset = MagicMock()
|
||||
mock_queryset.exists.return_value = True
|
||||
mock_queryset.__iter__.return_value = iter([real_document, doc2])
|
||||
mock_all.return_value = mock_queryset
|
||||
|
||||
indexing.update_llm_index(rebuild=True)
|
||||
|
||||
# modify document
|
||||
updated_document = real_document
|
||||
updated_document.modified = timezone.now() # simulate modification
|
||||
|
||||
# new doc
|
||||
doc3 = Document.objects.create(
|
||||
title="Test Document 3",
|
||||
content="This is some test content 3.",
|
||||
added=timezone.now(),
|
||||
checksum="abcdef1234567890",
|
||||
)
|
||||
|
||||
with patch("documents.models.Document.objects.all") as mock_all:
|
||||
mock_queryset = MagicMock()
|
||||
mock_queryset.exists.return_value = True
|
||||
mock_queryset.__iter__.return_value = iter([updated_document, doc2, doc3])
|
||||
mock_all.return_value = mock_queryset
|
||||
|
||||
# assert logs "Updating LLM index with %d new nodes and removing %d old nodes."
|
||||
with patch("paperless.ai.indexing.logger") as mock_logger:
|
||||
indexing.update_llm_index(rebuild=False)
|
||||
mock_logger.info.assert_called_once_with(
|
||||
"Updating %d nodes in LLM index.",
|
||||
2,
|
||||
)
|
||||
indexing.update_llm_index(rebuild=False)
|
||||
|
||||
assert any(temp_llm_index_dir.glob("*.json"))
|
||||
|
||||
|
||||
def test_get_or_create_storage_context_raises_exception(
|
||||
temp_llm_index_dir,
|
||||
mock_embed_model,
|
||||
|
Loading…
x
Reference in New Issue
Block a user