diff --git a/src-ui/src/app/data/paperless-task.ts b/src-ui/src/app/data/paperless-task.ts index 1bec277eb..b30af7cdd 100644 --- a/src-ui/src/app/data/paperless-task.ts +++ b/src-ui/src/app/data/paperless-task.ts @@ -11,6 +11,7 @@ export enum PaperlessTaskName { TrainClassifier = 'train_classifier', SanityCheck = 'check_sanity', IndexOptimize = 'index_optimize', + LLMIndexUpdate = 'llmindex_update', } export enum PaperlessTaskStatus { diff --git a/src/documents/management/commands/document_llmindex.py b/src/documents/management/commands/document_llmindex.py index 74c5c4d69..d2df02ed9 100644 --- a/src/documents/management/commands/document_llmindex.py +++ b/src/documents/management/commands/document_llmindex.py @@ -18,4 +18,5 @@ class Command(ProgressBarMixin, BaseCommand): llmindex_index( progress_bar_disable=self.no_progress_bar, rebuild=options["command"] == "rebuild", + scheduled=False, ) diff --git a/src/documents/migrations/1066_alter_paperlesstask_task_name.py b/src/documents/migrations/1066_alter_paperlesstask_task_name.py new file mode 100644 index 000000000..38fa5d46e --- /dev/null +++ b/src/documents/migrations/1066_alter_paperlesstask_task_name.py @@ -0,0 +1,30 @@ +# Generated by Django 5.1.8 on 2025-04-30 02:38 + +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + dependencies = [ + ("documents", "1065_workflowaction_assign_custom_fields_values"), + ] + + operations = [ + migrations.AlterField( + model_name="paperlesstask", + name="task_name", + field=models.CharField( + choices=[ + ("consume_file", "Consume File"), + ("train_classifier", "Train Classifier"), + ("check_sanity", "Check Sanity"), + ("index_optimize", "Index Optimize"), + ("llmindex_update", "LLM Index Update"), + ], + help_text="Name of the task that was run", + max_length=255, + null=True, + verbose_name="Task Name", + ), + ), + ] diff --git a/src/documents/models.py b/src/documents/models.py index 17d1035dd..993ece836 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -543,6 +543,7 @@ class PaperlessTask(ModelWithOwner): TRAIN_CLASSIFIER = ("train_classifier", _("Train Classifier")) CHECK_SANITY = ("check_sanity", _("Check Sanity")) INDEX_OPTIMIZE = ("index_optimize", _("Index Optimize")) + LLMINDEX_UPDATE = ("llmindex_update", _("LLM Index Update")) task_id = models.CharField( max_length=255, diff --git a/src/documents/tasks.py b/src/documents/tasks.py index fd0522d13..c97e54f45 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -514,13 +514,29 @@ def check_scheduled_workflows(): @shared_task -def llmindex_index(*, progress_bar_disable=False, rebuild=False): +def llmindex_index(*, progress_bar_disable=True, rebuild=False, scheduled=True): ai_config = AIConfig() if ai_config.llm_index_enabled(): - update_llm_index( + task = PaperlessTask.objects.create( + type=PaperlessTask.TaskType.SCHEDULED_TASK + if scheduled + else PaperlessTask.TaskType.MANUAL_TASK, + task_id=uuid.uuid4(), + task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE, + status=states.STARTED, + date_created=timezone.now(), + date_started=timezone.now(), + ) + from paperless_ai.indexing import update_llm_index + + result = update_llm_index( progress_bar_disable=progress_bar_disable, rebuild=rebuild, ) + task.status = states.SUCCESS + task.result = result + task.date_done = timezone.now() + task.save(update_fields=["status", "result", "date_done"]) @shared_task @@ -531,11 +547,3 @@ def update_document_in_llm_index(document): @shared_task def remove_document_from_llm_index(document): llm_index_remove_document(document) - - -# TODO: schedule to run periodically -@shared_task -def rebuild_llm_index_task(): - from paperless_ai.indexing import update_llm_index - - update_llm_index(rebuild=True) diff --git a/src/paperless/migrations/0004_applicationconfiguration_ai_enabled_and_more.py b/src/paperless/migrations/0004_applicationconfiguration_ai_enabled_and_more.py index da5180bf2..28350e3b1 100644 --- a/src/paperless/migrations/0004_applicationconfiguration_ai_enabled_and_more.py +++ b/src/paperless/migrations/0004_applicationconfiguration_ai_enabled_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 5.1.7 on 2025-04-24 02:09 +# Generated by Django 5.1.8 on 2025-04-30 02:38 from django.db import migrations from django.db import models @@ -19,27 +19,6 @@ class Migration(migrations.Migration): verbose_name="Enables AI features", ), ), - migrations.AddField( - model_name="applicationconfiguration", - name="llm_embedding_backend", - field=models.CharField( - blank=True, - choices=[("openai", "OpenAI"), ("local", "Local")], - max_length=32, - null=True, - verbose_name="Sets the LLM Embedding backend", - ), - ), - migrations.AddField( - model_name="applicationconfiguration", - name="llm_embedding_model", - field=models.CharField( - blank=True, - max_length=32, - null=True, - verbose_name="Sets the LLM Embedding model", - ), - ), migrations.AddField( model_name="applicationconfiguration", name="llm_api_key", @@ -61,6 +40,27 @@ class Migration(migrations.Migration): verbose_name="Sets the LLM backend", ), ), + migrations.AddField( + model_name="applicationconfiguration", + name="llm_embedding_backend", + field=models.CharField( + blank=True, + choices=[("openai", "OpenAI"), ("huggingface", "Huggingface")], + max_length=32, + null=True, + verbose_name="Sets the LLM embedding backend", + ), + ), + migrations.AddField( + model_name="applicationconfiguration", + name="llm_embedding_model", + field=models.CharField( + blank=True, + max_length=32, + null=True, + verbose_name="Sets the LLM embedding model", + ), + ), migrations.AddField( model_name="applicationconfiguration", name="llm_model", diff --git a/src/paperless/settings.py b/src/paperless/settings.py index a63403602..1ba942a93 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -236,9 +236,6 @@ def _parse_beat_schedule() -> dict: "options": { # 1 hour before default schedule sends again "expires": 23.0 * 60.0 * 60.0, - "kwargs": { - "progress_bar_disable": True, - }, }, }, ] diff --git a/src/paperless/tests/test_settings.py b/src/paperless/tests/test_settings.py index d9d7425a2..ab4b50ce7 100644 --- a/src/paperless/tests/test_settings.py +++ b/src/paperless/tests/test_settings.py @@ -208,9 +208,6 @@ class TestCeleryScheduleParsing(TestCase): "schedule": crontab(minute=10, hour=2), "options": { "expires": self.LLM_INDEX_EXPIRE_TIME, - "kwargs": { - "progress_bar_disable": True, - }, }, }, }, @@ -270,9 +267,6 @@ class TestCeleryScheduleParsing(TestCase): "schedule": crontab(minute=10, hour=2), "options": { "expires": self.LLM_INDEX_EXPIRE_TIME, - "kwargs": { - "progress_bar_disable": True, - }, }, }, }, @@ -324,9 +318,6 @@ class TestCeleryScheduleParsing(TestCase): "schedule": crontab(minute=10, hour=2), "options": { "expires": self.LLM_INDEX_EXPIRE_TIME, - "kwargs": { - "progress_bar_disable": True, - }, }, }, }, diff --git a/src/paperless_ai/indexing.py b/src/paperless_ai/indexing.py index afc0abb46..548b6ba51 100644 --- a/src/paperless_ai/indexing.py +++ b/src/paperless_ai/indexing.py @@ -115,7 +115,7 @@ def remove_document_docstore_nodes(document: Document, index: VectorStoreIndex): index.docstore.delete_document(node_id) -def update_llm_index(*, progress_bar_disable=False, rebuild=False): +def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str: """ Rebuild or update the LLM index. """ @@ -123,8 +123,9 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False): documents = Document.objects.all() if not documents.exists(): - logger.warning("No documents found to index.") - return + msg = "No documents found to index." + logger.warning(msg) + return msg if ( rebuild @@ -145,6 +146,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False): embed_model=embed_model, show_progress=not progress_bar_disable, ) + msg = "LLM index rebuilt successfully." else: # Update existing index index = load_or_build_index() @@ -173,15 +175,18 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False): nodes.extend(build_document_node(document)) if nodes: + msg = "LLM index updated successfully." logger.info( "Updating %d nodes in LLM index.", len(nodes), ) index.insert_nodes(nodes) else: - logger.info("No changes detected, skipping llm index rebuild.") + msg = "No changes detected in LLM index." + logger.info(msg) index.storage_context.persist(persist_dir=settings.LLM_INDEX_DIR) + return msg def llm_index_add_or_update_document(document: Document):