Use PaperlessTask for llmindex

2025-05-23 12:58:18 -05:00 · 2025-04-29 19:40:05 -07:00 · 2025-04-29 19:40:05 -07:00 · 374596b1bc
commit 374596b1bc
parent 51a7581860
9 changed files with 82 additions and 48 deletions
--- a/src-ui/src/app/data/paperless-task.ts
+++ b/src-ui/src/app/data/paperless-task.ts
@ -11,6 +11,7 @@ export enum PaperlessTaskName {
  TrainClassifier = 'train_classifier',
  SanityCheck = 'check_sanity',
  IndexOptimize = 'index_optimize',
  LLMIndexUpdate = 'llmindex_update',
 }
 export enum PaperlessTaskStatus {
--- a/src/documents/management/commands/document_llmindex.py
+++ b/src/documents/management/commands/document_llmindex.py
@ -18,4 +18,5 @@ class Command(ProgressBarMixin, BaseCommand):
            llmindex_index(
                progress_bar_disable=self.no_progress_bar,
                rebuild=options["command"] == "rebuild",
                scheduled=False,
            )
--- a/src/documents/migrations/1066_alter_paperlesstask_task_name.py
+++ b/src/documents/migrations/1066_alter_paperlesstask_task_name.py
@ -0,0 +1,30 @@
 # Generated by Django 5.1.8 on 2025-04-30 02:38
 from django.db import migrations
 from django.db import models
 class Migration(migrations.Migration):
    dependencies = [
        ("documents", "1065_workflowaction_assign_custom_fields_values"),
    ]
    operations = [
        migrations.AlterField(
            model_name="paperlesstask",
            name="task_name",
            field=models.CharField(
                choices=[
                    ("consume_file", "Consume File"),
                    ("train_classifier", "Train Classifier"),
                    ("check_sanity", "Check Sanity"),
                    ("index_optimize", "Index Optimize"),
                    ("llmindex_update", "LLM Index Update"),
                ],
                help_text="Name of the task that was run",
                max_length=255,
                null=True,
                verbose_name="Task Name",
            ),
        ),
    ]
--- a/src/documents/models.py
+++ b/src/documents/models.py
@ -543,6 +543,7 @@ class PaperlessTask(ModelWithOwner):
        TRAIN_CLASSIFIER = ("train_classifier", _("Train Classifier"))
        CHECK_SANITY = ("check_sanity", _("Check Sanity"))
        INDEX_OPTIMIZE = ("index_optimize", _("Index Optimize"))
        LLMINDEX_UPDATE = ("llmindex_update", _("LLM Index Update"))
    task_id = models.CharField(
        max_length=255,
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@ -514,13 +514,29 @@ def check_scheduled_workflows():
@shared_task
-def llmindex_index(*, progress_bar_disable=False, rebuild=False):
+def llmindex_index(*, progress_bar_disable=True, rebuild=False, scheduled=True):
    ai_config = AIConfig()
    if ai_config.llm_index_enabled():
-        update_llm_index(
+        task = PaperlessTask.objects.create(
            type=PaperlessTask.TaskType.SCHEDULED_TASK
            if scheduled
            else PaperlessTask.TaskType.MANUAL_TASK,
            task_id=uuid.uuid4(),
            task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
            status=states.STARTED,
            date_created=timezone.now(),
            date_started=timezone.now(),
        )
        from paperless_ai.indexing import update_llm_index
        result = update_llm_index(
            progress_bar_disable=progress_bar_disable,
            rebuild=rebuild,
        )
        task.status = states.SUCCESS
        task.result = result
        task.date_done = timezone.now()
        task.save(update_fields=["status", "result", "date_done"])
@shared_task
@ -531,11 +547,3 @@ def update_document_in_llm_index(document):
@shared_task
 def remove_document_from_llm_index(document):
    llm_index_remove_document(document)
 # TODO: schedule to run periodically
@shared_task
 def rebuild_llm_index_task():
    from paperless_ai.indexing import update_llm_index
    update_llm_index(rebuild=True)
--- a/src/paperless/migrations/0004_applicationconfiguration_ai_enabled_and_more.py
+++ b/src/paperless/migrations/0004_applicationconfiguration_ai_enabled_and_more.py
@ -1,4 +1,4 @@
-# Generated by Django 5.1.7 on 2025-04-24 02:09
+# Generated by Django 5.1.8 on 2025-04-30 02:38
 from django.db import migrations
 from django.db import models
@ -19,27 +19,6 @@ class Migration(migrations.Migration):
                verbose_name="Enables AI features",
            ),
        ),
        migrations.AddField(
            model_name="applicationconfiguration",
            name="llm_embedding_backend",
            field=models.CharField(
                blank=True,
                choices=[("openai", "OpenAI"), ("local", "Local")],
                max_length=32,
                null=True,
                verbose_name="Sets the LLM Embedding backend",
            ),
        ),
        migrations.AddField(
            model_name="applicationconfiguration",
            name="llm_embedding_model",
            field=models.CharField(
                blank=True,
                max_length=32,
                null=True,
                verbose_name="Sets the LLM Embedding model",
            ),
        ),
        migrations.AddField(
            model_name="applicationconfiguration",
            name="llm_api_key",
@ -61,6 +40,27 @@ class Migration(migrations.Migration):
                verbose_name="Sets the LLM backend",
            ),
        ),
        migrations.AddField(
            model_name="applicationconfiguration",
            name="llm_embedding_backend",
            field=models.CharField(
                blank=True,
                choices=[("openai", "OpenAI"), ("huggingface", "Huggingface")],
                max_length=32,
                null=True,
                verbose_name="Sets the LLM embedding backend",
            ),
        ),
        migrations.AddField(
            model_name="applicationconfiguration",
            name="llm_embedding_model",
            field=models.CharField(
                blank=True,
                max_length=32,
                null=True,
                verbose_name="Sets the LLM embedding model",
            ),
        ),
        migrations.AddField(
            model_name="applicationconfiguration",
            name="llm_model",
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@ -236,9 +236,6 @@ def _parse_beat_schedule() -> dict:
            "options": {
                # 1 hour before default schedule sends again
                "expires": 23.0 * 60.0 * 60.0,
                "kwargs": {
                    "progress_bar_disable": True,
                },
            },
        },
    ]
--- a/src/paperless/tests/test_settings.py
+++ b/src/paperless/tests/test_settings.py
@ -208,9 +208,6 @@ class TestCeleryScheduleParsing(TestCase):
                    "schedule": crontab(minute=10, hour=2),
                    "options": {
                        "expires": self.LLM_INDEX_EXPIRE_TIME,
                        "kwargs": {
                            "progress_bar_disable": True,
                        },
                    },
                },
            },
@ -270,9 +267,6 @@ class TestCeleryScheduleParsing(TestCase):
                    "schedule": crontab(minute=10, hour=2),
                    "options": {
                        "expires": self.LLM_INDEX_EXPIRE_TIME,
                        "kwargs": {
                            "progress_bar_disable": True,
                        },
                    },
                },
            },
@ -324,9 +318,6 @@ class TestCeleryScheduleParsing(TestCase):
                    "schedule": crontab(minute=10, hour=2),
                    "options": {
                        "expires": self.LLM_INDEX_EXPIRE_TIME,
                        "kwargs": {
                            "progress_bar_disable": True,
                        },
                    },
                },
            },
--- a/src/paperless_ai/indexing.py
+++ b/src/paperless_ai/indexing.py
@ -115,7 +115,7 @@ def remove_document_docstore_nodes(document: Document, index: VectorStoreIndex):
        index.docstore.delete_document(node_id)
-def update_llm_index(*, progress_bar_disable=False, rebuild=False):
+def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
    """
    Rebuild or update the LLM index.
    """
@ -123,8 +123,9 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False):
    documents = Document.objects.all()
    if not documents.exists():
-        logger.warning("No documents found to index.")
+        msg = "No documents found to index."
-        return
+        logger.warning(msg)
        return msg
    if (
        rebuild
@ -145,6 +146,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False):
            embed_model=embed_model,
            show_progress=not progress_bar_disable,
        )
        msg = "LLM index rebuilt successfully."
    else:
        # Update existing index
        index = load_or_build_index()
@ -173,15 +175,18 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False):
                nodes.extend(build_document_node(document))
        if nodes:
            msg = "LLM index updated successfully."
            logger.info(
                "Updating %d nodes in LLM index.",
                len(nodes),
            )
            index.insert_nodes(nodes)
        else:
-            logger.info("No changes detected, skipping llm index rebuild.")
+            msg = "No changes detected in LLM index."
            logger.info(msg)
    index.storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
    return msg
 def llm_index_add_or_update_document(document: Document):