mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-05-21 12:52:13 -05:00
Use PaperlessTask for llmindex
This commit is contained in:
parent
51a7581860
commit
374596b1bc
@ -11,6 +11,7 @@ export enum PaperlessTaskName {
|
||||
TrainClassifier = 'train_classifier',
|
||||
SanityCheck = 'check_sanity',
|
||||
IndexOptimize = 'index_optimize',
|
||||
LLMIndexUpdate = 'llmindex_update',
|
||||
}
|
||||
|
||||
export enum PaperlessTaskStatus {
|
||||
|
@ -18,4 +18,5 @@ class Command(ProgressBarMixin, BaseCommand):
|
||||
llmindex_index(
|
||||
progress_bar_disable=self.no_progress_bar,
|
||||
rebuild=options["command"] == "rebuild",
|
||||
scheduled=False,
|
||||
)
|
||||
|
@ -0,0 +1,30 @@
|
||||
# Generated by Django 5.1.8 on 2025-04-30 02:38
|
||||
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "1065_workflowaction_assign_custom_fields_values"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="paperlesstask",
|
||||
name="task_name",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("consume_file", "Consume File"),
|
||||
("train_classifier", "Train Classifier"),
|
||||
("check_sanity", "Check Sanity"),
|
||||
("index_optimize", "Index Optimize"),
|
||||
("llmindex_update", "LLM Index Update"),
|
||||
],
|
||||
help_text="Name of the task that was run",
|
||||
max_length=255,
|
||||
null=True,
|
||||
verbose_name="Task Name",
|
||||
),
|
||||
),
|
||||
]
|
@ -543,6 +543,7 @@ class PaperlessTask(ModelWithOwner):
|
||||
TRAIN_CLASSIFIER = ("train_classifier", _("Train Classifier"))
|
||||
CHECK_SANITY = ("check_sanity", _("Check Sanity"))
|
||||
INDEX_OPTIMIZE = ("index_optimize", _("Index Optimize"))
|
||||
LLMINDEX_UPDATE = ("llmindex_update", _("LLM Index Update"))
|
||||
|
||||
task_id = models.CharField(
|
||||
max_length=255,
|
||||
|
@ -514,13 +514,29 @@ def check_scheduled_workflows():
|
||||
|
||||
|
||||
@shared_task
|
||||
def llmindex_index(*, progress_bar_disable=False, rebuild=False):
|
||||
def llmindex_index(*, progress_bar_disable=True, rebuild=False, scheduled=True):
|
||||
ai_config = AIConfig()
|
||||
if ai_config.llm_index_enabled():
|
||||
update_llm_index(
|
||||
task = PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK
|
||||
if scheduled
|
||||
else PaperlessTask.TaskType.MANUAL_TASK,
|
||||
task_id=uuid.uuid4(),
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
status=states.STARTED,
|
||||
date_created=timezone.now(),
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
from paperless_ai.indexing import update_llm_index
|
||||
|
||||
result = update_llm_index(
|
||||
progress_bar_disable=progress_bar_disable,
|
||||
rebuild=rebuild,
|
||||
)
|
||||
task.status = states.SUCCESS
|
||||
task.result = result
|
||||
task.date_done = timezone.now()
|
||||
task.save(update_fields=["status", "result", "date_done"])
|
||||
|
||||
|
||||
@shared_task
|
||||
@ -531,11 +547,3 @@ def update_document_in_llm_index(document):
|
||||
@shared_task
|
||||
def remove_document_from_llm_index(document):
|
||||
llm_index_remove_document(document)
|
||||
|
||||
|
||||
# TODO: schedule to run periodically
|
||||
@shared_task
|
||||
def rebuild_llm_index_task():
|
||||
from paperless_ai.indexing import update_llm_index
|
||||
|
||||
update_llm_index(rebuild=True)
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Generated by Django 5.1.7 on 2025-04-24 02:09
|
||||
# Generated by Django 5.1.8 on 2025-04-30 02:38
|
||||
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
@ -19,27 +19,6 @@ class Migration(migrations.Migration):
|
||||
verbose_name="Enables AI features",
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="applicationconfiguration",
|
||||
name="llm_embedding_backend",
|
||||
field=models.CharField(
|
||||
blank=True,
|
||||
choices=[("openai", "OpenAI"), ("local", "Local")],
|
||||
max_length=32,
|
||||
null=True,
|
||||
verbose_name="Sets the LLM Embedding backend",
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="applicationconfiguration",
|
||||
name="llm_embedding_model",
|
||||
field=models.CharField(
|
||||
blank=True,
|
||||
max_length=32,
|
||||
null=True,
|
||||
verbose_name="Sets the LLM Embedding model",
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="applicationconfiguration",
|
||||
name="llm_api_key",
|
||||
@ -61,6 +40,27 @@ class Migration(migrations.Migration):
|
||||
verbose_name="Sets the LLM backend",
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="applicationconfiguration",
|
||||
name="llm_embedding_backend",
|
||||
field=models.CharField(
|
||||
blank=True,
|
||||
choices=[("openai", "OpenAI"), ("huggingface", "Huggingface")],
|
||||
max_length=32,
|
||||
null=True,
|
||||
verbose_name="Sets the LLM embedding backend",
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="applicationconfiguration",
|
||||
name="llm_embedding_model",
|
||||
field=models.CharField(
|
||||
blank=True,
|
||||
max_length=32,
|
||||
null=True,
|
||||
verbose_name="Sets the LLM embedding model",
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="applicationconfiguration",
|
||||
name="llm_model",
|
||||
|
@ -236,9 +236,6 @@ def _parse_beat_schedule() -> dict:
|
||||
"options": {
|
||||
# 1 hour before default schedule sends again
|
||||
"expires": 23.0 * 60.0 * 60.0,
|
||||
"kwargs": {
|
||||
"progress_bar_disable": True,
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
@ -208,9 +208,6 @@ class TestCeleryScheduleParsing(TestCase):
|
||||
"schedule": crontab(minute=10, hour=2),
|
||||
"options": {
|
||||
"expires": self.LLM_INDEX_EXPIRE_TIME,
|
||||
"kwargs": {
|
||||
"progress_bar_disable": True,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -270,9 +267,6 @@ class TestCeleryScheduleParsing(TestCase):
|
||||
"schedule": crontab(minute=10, hour=2),
|
||||
"options": {
|
||||
"expires": self.LLM_INDEX_EXPIRE_TIME,
|
||||
"kwargs": {
|
||||
"progress_bar_disable": True,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -324,9 +318,6 @@ class TestCeleryScheduleParsing(TestCase):
|
||||
"schedule": crontab(minute=10, hour=2),
|
||||
"options": {
|
||||
"expires": self.LLM_INDEX_EXPIRE_TIME,
|
||||
"kwargs": {
|
||||
"progress_bar_disable": True,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -115,7 +115,7 @@ def remove_document_docstore_nodes(document: Document, index: VectorStoreIndex):
|
||||
index.docstore.delete_document(node_id)
|
||||
|
||||
|
||||
def update_llm_index(*, progress_bar_disable=False, rebuild=False):
|
||||
def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
|
||||
"""
|
||||
Rebuild or update the LLM index.
|
||||
"""
|
||||
@ -123,8 +123,9 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False):
|
||||
|
||||
documents = Document.objects.all()
|
||||
if not documents.exists():
|
||||
logger.warning("No documents found to index.")
|
||||
return
|
||||
msg = "No documents found to index."
|
||||
logger.warning(msg)
|
||||
return msg
|
||||
|
||||
if (
|
||||
rebuild
|
||||
@ -145,6 +146,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False):
|
||||
embed_model=embed_model,
|
||||
show_progress=not progress_bar_disable,
|
||||
)
|
||||
msg = "LLM index rebuilt successfully."
|
||||
else:
|
||||
# Update existing index
|
||||
index = load_or_build_index()
|
||||
@ -173,15 +175,18 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False):
|
||||
nodes.extend(build_document_node(document))
|
||||
|
||||
if nodes:
|
||||
msg = "LLM index updated successfully."
|
||||
logger.info(
|
||||
"Updating %d nodes in LLM index.",
|
||||
len(nodes),
|
||||
)
|
||||
index.insert_nodes(nodes)
|
||||
else:
|
||||
logger.info("No changes detected, skipping llm index rebuild.")
|
||||
msg = "No changes detected in LLM index."
|
||||
logger.info(msg)
|
||||
|
||||
index.storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
|
||||
return msg
|
||||
|
||||
|
||||
def llm_index_add_or_update_document(document: Document):
|
||||
|
Loading…
x
Reference in New Issue
Block a user