Feature: Paperless AI (#10319)

2026-01-14 21:54:22 -06:00 · 2026-01-13 08:24:42 -08:00
parent 4347ba1f9c
commit e940764fe0
78 changed files with 5429 additions and 106 deletions
--- a/src/documents/apps.py
+++ b/src/documents/apps.py
@@ -11,6 +11,7 @@ class DocumentsConfig(AppConfig):
        from documents.signals import document_consumption_finished
        from documents.signals import document_updated
        from documents.signals.handlers import add_inbox_tags
+        from documents.signals.handlers import add_or_update_document_in_llm_index
        from documents.signals.handlers import add_to_index
        from documents.signals.handlers import run_workflows_added
        from documents.signals.handlers import run_workflows_updated
@@ -26,6 +27,7 @@ class DocumentsConfig(AppConfig):
        document_consumption_finished.connect(set_storage_path)
        document_consumption_finished.connect(add_to_index)
        document_consumption_finished.connect(run_workflows_added)
+        document_consumption_finished.connect(add_or_update_document_in_llm_index)
        document_updated.connect(run_workflows_updated)

        import documents.schema  # noqa: F401
--- a/src/documents/caching.py
+++ b/src/documents/caching.py
@@ -41,6 +41,7 @@ class SuggestionCacheData:
 CLASSIFIER_VERSION_KEY: Final[str] = "classifier_version"
 CLASSIFIER_HASH_KEY: Final[str] = "classifier_hash"
 CLASSIFIER_MODIFIED_KEY: Final[str] = "classifier_modified"
+LLM_CACHE_CLASSIFIER_VERSION: Final[int] = 1000  # Marker distinguishing LLM suggestions

 CACHE_1_MINUTE: Final[int] = 60
 CACHE_5_MINUTES: Final[int] = 5 * CACHE_1_MINUTE
@@ -196,6 +197,54 @@ def refresh_suggestions_cache(
    cache.touch(doc_key, timeout)


+def get_llm_suggestion_cache(
+    document_id: int,
+    backend: str,
+) -> SuggestionCacheData | None:
+    doc_key = get_suggestion_cache_key(document_id)
+    data: SuggestionCacheData = cache.get(doc_key)
+
+    if data and data.classifier_hash == backend:
+        return data
+
+    return None
+
+
+def set_llm_suggestions_cache(
+    document_id: int,
+    suggestions: dict,
+    *,
+    backend: str,
+    timeout: int = CACHE_50_MINUTES,
+) -> None:
+    """
+    Cache LLM-generated suggestions using a backend-specific identifier (e.g. 'openai:gpt-4').
+    """
+    doc_key = get_suggestion_cache_key(document_id)
+    cache.set(
+        doc_key,
+        SuggestionCacheData(
+            classifier_version=LLM_CACHE_CLASSIFIER_VERSION,
+            classifier_hash=backend,
+            suggestions=suggestions,
+        ),
+        timeout,
+    )
+
+
+def invalidate_llm_suggestions_cache(
+    document_id: int,
+) -> None:
+    """
+    Invalidate the LLM suggestions cache for a specific document and backend.
+    """
+    doc_key = get_suggestion_cache_key(document_id)
+    data: SuggestionCacheData = cache.get(doc_key)
+
+    if data:
+        cache.delete(doc_key)
+
+
 def get_metadata_cache_key(document_id: int) -> str:
    """
    Returns the basic key for a document's metadata
--- a/src/documents/management/commands/document_llmindex.py
+++ b/src/documents/management/commands/document_llmindex.py
@@ -0,0 +1,22 @@
+from django.core.management import BaseCommand
+from django.db import transaction
+
+from documents.management.commands.mixins import ProgressBarMixin
+from documents.tasks import llmindex_index
+
+
+class Command(ProgressBarMixin, BaseCommand):
+    help = "Manages the LLM-based vector index for Paperless."
+
+    def add_arguments(self, parser):
+        parser.add_argument("command", choices=["rebuild", "update"])
+        self.add_argument_progress_bar_mixin(parser)
+
+    def handle(self, *args, **options):
+        self.handle_progress_bar_mixin(**options)
+        with transaction.atomic():
+            llmindex_index(
+                progress_bar_disable=self.no_progress_bar,
+                rebuild=options["command"] == "rebuild",
+                scheduled=False,
+            )
--- a/src/documents/migrations/1075_alter_paperlesstask_task_name.py
+++ b/src/documents/migrations/1075_alter_paperlesstask_task_name.py
@@ -0,0 +1,30 @@
+# Generated by Django 5.1.8 on 2025-04-30 02:38
+
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("documents", "1074_workflowrun_deleted_at_workflowrun_restored_at_and_more"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="paperlesstask",
+            name="task_name",
+            field=models.CharField(
+                choices=[
+                    ("consume_file", "Consume File"),
+                    ("train_classifier", "Train Classifier"),
+                    ("check_sanity", "Check Sanity"),
+                    ("index_optimize", "Index Optimize"),
+                    ("llmindex_update", "LLM Index Update"),
+                ],
+                help_text="Name of the task that was run",
+                max_length=255,
+                null=True,
+                verbose_name="Task Name",
+            ),
+        ),
+    ]
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -598,6 +598,7 @@ class PaperlessTask(ModelWithOwner):
        TRAIN_CLASSIFIER = ("train_classifier", _("Train Classifier"))
        CHECK_SANITY = ("check_sanity", _("Check Sanity"))
        INDEX_OPTIMIZE = ("index_optimize", _("Index Optimize"))
+        LLMINDEX_UPDATE = ("llmindex_update", _("LLM Index Update"))

    task_id = models.CharField(
        max_length=255,
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -26,6 +26,8 @@ from filelock import FileLock

 from documents import matching
 from documents.caching import clear_document_caches
+from documents.caching import invalidate_llm_suggestions_cache
+from documents.data_models import ConsumableDocument
 from documents.file_handling import create_source_path_directory
 from documents.file_handling import delete_empty_directories
 from documents.file_handling import generate_filename
@@ -52,6 +54,7 @@ from documents.workflows.mutations import apply_assignment_to_overrides
 from documents.workflows.mutations import apply_removal_to_document
 from documents.workflows.mutations import apply_removal_to_overrides
 from documents.workflows.utils import get_workflows_for_trigger
+from paperless.config import AIConfig

 if TYPE_CHECKING:
    from documents.classifier import DocumentClassifier
@@ -638,6 +641,15 @@ def cleanup_custom_field_deletion(sender, instance: CustomField, **kwargs):
        )


+@receiver(models.signals.post_save, sender=Document)
+def update_llm_suggestions_cache(sender, instance, **kwargs):
+    """
+    Invalidate the LLM suggestions cache when a document is saved.
+    """
+    # Invalidate the cache for the document
+    invalidate_llm_suggestions_cache(instance.pk)
+
+
@receiver(models.signals.post_delete, sender=User)
@receiver(models.signals.post_delete, sender=Group)
 def cleanup_user_deletion(sender, instance: User | Group, **kwargs):
@@ -944,3 +956,26 @@ def close_connection_pool_on_worker_init(**kwargs):
    for conn in connections.all(initialized_only=True):
        if conn.alias == "default" and hasattr(conn, "pool") and conn.pool:
            conn.close_pool()
+
+
+def add_or_update_document_in_llm_index(sender, document, **kwargs):
+    """
+    Add or update a document in the LLM index when it is created or updated.
+    """
+    ai_config = AIConfig()
+    if ai_config.llm_index_enabled:
+        from documents.tasks import update_document_in_llm_index
+
+        update_document_in_llm_index.delay(document)
+
+
+@receiver(models.signals.post_delete, sender=Document)
+def delete_document_from_llm_index(sender, instance: Document, **kwargs):
+    """
+    Delete a document from the LLM index when it is deleted.
+    """
+    ai_config = AIConfig()
+    if ai_config.llm_index_enabled:
+        from documents.tasks import remove_document_from_llm_index
+
+        remove_document_from_llm_index.delay(instance)
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -54,6 +54,10 @@ from documents.signals import document_updated
 from documents.signals.handlers import cleanup_document_deletion
 from documents.signals.handlers import run_workflows
 from documents.workflows.utils import get_workflows_for_trigger
+from paperless.config import AIConfig
+from paperless_ai.indexing import llm_index_add_or_update_document
+from paperless_ai.indexing import llm_index_remove_document
+from paperless_ai.indexing import update_llm_index

 if settings.AUDIT_LOG_ENABLED:
    from auditlog.models import LogEntry
@@ -242,6 +246,13 @@ def bulk_update_documents(document_ids):
        for doc in documents:
            index.update_document(writer, doc)

+    ai_config = AIConfig()
+    if ai_config.llm_index_enabled:
+        update_llm_index(
+            progress_bar_disable=True,
+            rebuild=False,
+        )
+

@shared_task
 def update_document_content_maybe_archive_file(document_id):
@@ -341,6 +352,10 @@ def update_document_content_maybe_archive_file(document_id):
        with index.open_index_writer() as writer:
            index.update_document(writer, document)

+        ai_config = AIConfig()
+        if ai_config.llm_index_enabled:
+            llm_index_add_or_update_document(document)
+
        clear_document_caches(document.pk)

    except Exception:
@@ -558,3 +573,55 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:

    if affected:
        bulk_update_documents.delay(document_ids=list(affected))
+
+
+@shared_task
+def llmindex_index(
+    *,
+    progress_bar_disable=True,
+    rebuild=False,
+    scheduled=True,
+    auto=False,
+):
+    ai_config = AIConfig()
+    if ai_config.llm_index_enabled:
+        task = PaperlessTask.objects.create(
+            type=PaperlessTask.TaskType.SCHEDULED_TASK
+            if scheduled
+            else PaperlessTask.TaskType.AUTO
+            if auto
+            else PaperlessTask.TaskType.MANUAL_TASK,
+            task_id=uuid.uuid4(),
+            task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
+            status=states.STARTED,
+            date_created=timezone.now(),
+            date_started=timezone.now(),
+        )
+        from paperless_ai.indexing import update_llm_index
+
+        try:
+            result = update_llm_index(
+                progress_bar_disable=progress_bar_disable,
+                rebuild=rebuild,
+            )
+            task.status = states.SUCCESS
+            task.result = result
+        except Exception as e:
+            logger.error("LLM index error: " + str(e))
+            task.status = states.FAILURE
+            task.result = str(e)
+
+        task.date_done = timezone.now()
+        task.save(update_fields=["status", "result", "date_done"])
+    else:
+        logger.info("LLM index is disabled, skipping update.")
+
+
+@shared_task
+def update_document_in_llm_index(document):
+    llm_index_add_or_update_document(document)
+
+
+@shared_task
+def remove_document_from_llm_index(document):
+    llm_index_remove_document(document)
--- a/src/documents/tests/test_api_app_config.py
+++ b/src/documents/tests/test_api_app_config.py
@@ -1,6 +1,7 @@
 import json
 from io import BytesIO
 from pathlib import Path
+from unittest.mock import patch

 from django.contrib.auth.models import User
 from django.core.files.uploadedfile import SimpleUploadedFile
@@ -66,6 +67,13 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
                "barcode_max_pages": None,
                "barcode_enable_tag": None,
                "barcode_tag_mapping": None,
+                "ai_enabled": False,
+                "llm_embedding_backend": None,
+                "llm_embedding_model": None,
+                "llm_backend": None,
+                "llm_model": None,
+                "llm_api_key": None,
+                "llm_endpoint": None,
            },
        )

@@ -611,3 +619,76 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
        )
        self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED)
        self.assertEqual(ApplicationConfiguration.objects.count(), 1)
+
+    def test_update_llm_api_key(self):
+        """
+        GIVEN:
+            - Existing config with llm_api_key specified
+        WHEN:
+            - API to update llm_api_key is called with all *s
+            - API to update llm_api_key is called with empty string
+        THEN:
+            - llm_api_key is unchanged
+            - llm_api_key is set to None
+        """
+        config = ApplicationConfiguration.objects.first()
+        config.llm_api_key = "1234567890"
+        config.save()
+
+        # Test with all *
+        response = self.client.patch(
+            f"{self.ENDPOINT}1/",
+            json.dumps(
+                {
+                    "llm_api_key": "*" * 32,
+                },
+            ),
+            content_type="application/json",
+        )
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+        config.refresh_from_db()
+        self.assertEqual(config.llm_api_key, "1234567890")
+        # Test with empty string
+        response = self.client.patch(
+            f"{self.ENDPOINT}1/",
+            json.dumps(
+                {
+                    "llm_api_key": "",
+                },
+            ),
+            content_type="application/json",
+        )
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+        config.refresh_from_db()
+        self.assertEqual(config.llm_api_key, None)
+
+    def test_enable_ai_index_triggers_update(self):
+        """
+        GIVEN:
+            - Existing config with AI disabled
+        WHEN:
+            - Config is updated to enable AI with llm_embedding_backend
+        THEN:
+            - LLM index is triggered to update
+        """
+        config = ApplicationConfiguration.objects.first()
+        config.ai_enabled = False
+        config.llm_embedding_backend = None
+        config.save()
+
+        with (
+            patch("documents.tasks.llmindex_index.delay") as mock_update,
+            patch("paperless_ai.indexing.vector_store_file_exists") as mock_exists,
+        ):
+            mock_exists.return_value = False
+            self.client.patch(
+                f"{self.ENDPOINT}1/",
+                json.dumps(
+                    {
+                        "ai_enabled": True,
+                        "llm_embedding_backend": "openai",
+                    },
+                ),
+                content_type="application/json",
+            )
+            mock_update.assert_called_once()
--- a/src/documents/tests/test_api_status.py
+++ b/src/documents/tests/test_api_status.py
@@ -310,3 +310,69 @@ class TestSystemStatus(APITestCase):
            "ERROR",
        )
        self.assertIsNotNone(response.data["tasks"]["sanity_check_error"])
+
+    def test_system_status_ai_disabled(self):
+        """
+        GIVEN:
+            - The AI feature is disabled
+        WHEN:
+            - The user requests the system status
+        THEN:
+            - The response contains the correct AI status
+        """
+        with override_settings(AI_ENABLED=False):
+            self.client.force_login(self.user)
+            response = self.client.get(self.ENDPOINT)
+            self.assertEqual(response.status_code, status.HTTP_200_OK)
+            self.assertEqual(response.data["tasks"]["llmindex_status"], "DISABLED")
+            self.assertIsNone(response.data["tasks"]["llmindex_error"])
+
+    def test_system_status_ai_enabled(self):
+        """
+        GIVEN:
+            - The AI index feature is enabled, but no tasks are found
+            - The AI index feature is enabled and a task is found
+        WHEN:
+            - The user requests the system status
+        THEN:
+            - The response contains the correct AI status
+        """
+        with override_settings(AI_ENABLED=True, LLM_EMBEDDING_BACKEND="openai"):
+            self.client.force_login(self.user)
+
+            # No tasks found
+            response = self.client.get(self.ENDPOINT)
+            self.assertEqual(response.status_code, status.HTTP_200_OK)
+            self.assertEqual(response.data["tasks"]["llmindex_status"], "WARNING")
+
+            PaperlessTask.objects.create(
+                type=PaperlessTask.TaskType.SCHEDULED_TASK,
+                status=states.SUCCESS,
+                task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
+            )
+            response = self.client.get(self.ENDPOINT)
+            self.assertEqual(response.status_code, status.HTTP_200_OK)
+            self.assertEqual(response.data["tasks"]["llmindex_status"], "OK")
+            self.assertIsNone(response.data["tasks"]["llmindex_error"])
+
+    def test_system_status_ai_error(self):
+        """
+        GIVEN:
+            - The AI index feature is enabled and a task is found with an error
+        WHEN:
+            - The user requests the system status
+        THEN:
+            - The response contains the correct AI status
+        """
+        with override_settings(AI_ENABLED=True, LLM_EMBEDDING_BACKEND="openai"):
+            PaperlessTask.objects.create(
+                type=PaperlessTask.TaskType.SCHEDULED_TASK,
+                status=states.FAILURE,
+                task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
+                result="AI index update failed",
+            )
+            self.client.force_login(self.user)
+            response = self.client.get(self.ENDPOINT)
+            self.assertEqual(response.status_code, status.HTTP_200_OK)
+            self.assertEqual(response.data["tasks"]["llmindex_status"], "ERROR")
+            self.assertIsNotNone(response.data["tasks"]["llmindex_error"])
--- a/src/documents/tests/test_api_uisettings.py
+++ b/src/documents/tests/test_api_uisettings.py
@@ -49,6 +49,7 @@ class TestApiUiSettings(DirectoriesMixin, APITestCase):
                    "backend_setting": "default",
                },
                "email_enabled": False,
+                "ai_enabled": False,
            },
        )

--- a/src/documents/tests/test_tasks.py
+++ b/src/documents/tests/test_tasks.py
@@ -3,14 +3,17 @@ from datetime import timedelta
 from pathlib import Path
 from unittest import mock

+from celery import states
 from django.conf import settings
 from django.test import TestCase
+from django.test import override_settings
 from django.utils import timezone

 from documents import tasks
 from documents.models import Correspondent
 from documents.models import Document
 from documents.models import DocumentType
+from documents.models import PaperlessTask
 from documents.models import Tag
 from documents.sanity_checker import SanityCheckFailedException
 from documents.sanity_checker import SanityCheckMessages
@@ -270,3 +273,103 @@ class TestUpdateContent(DirectoriesMixin, TestCase):

        tasks.update_document_content_maybe_archive_file(doc.pk)
        self.assertNotEqual(Document.objects.get(pk=doc.pk).content, "test")
+
+
+class TestAIIndex(DirectoriesMixin, TestCase):
+    @override_settings(
+        AI_ENABLED=True,
+        LLM_EMBEDDING_BACKEND="huggingface",
+    )
+    def test_ai_index_success(self):
+        """
+        GIVEN:
+            - Document exists, AI is enabled, llm index backend is set
+        WHEN:
+            - llmindex_index task is called
+        THEN:
+            - update_llm_index is called, and the task is marked as success
+        """
+        Document.objects.create(
+            title="test",
+            content="my document",
+            checksum="wow",
+        )
+        # lazy-loaded so mock the actual function
+        with mock.patch("paperless_ai.indexing.update_llm_index") as update_llm_index:
+            update_llm_index.return_value = "LLM index updated successfully."
+            tasks.llmindex_index()
+            update_llm_index.assert_called_once()
+            task = PaperlessTask.objects.get(
+                task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
+            )
+            self.assertEqual(task.status, states.SUCCESS)
+            self.assertEqual(task.result, "LLM index updated successfully.")
+
+    @override_settings(
+        AI_ENABLED=True,
+        LLM_EMBEDDING_BACKEND="huggingface",
+    )
+    def test_ai_index_failure(self):
+        """
+        GIVEN:
+            - Document exists, AI is enabled, llm index backend is set
+        WHEN:
+            - llmindex_index task is called
+        THEN:
+            - update_llm_index raises an exception, and the task is marked as failure
+        """
+        Document.objects.create(
+            title="test",
+            content="my document",
+            checksum="wow",
+        )
+        # lazy-loaded so mock the actual function
+        with mock.patch("paperless_ai.indexing.update_llm_index") as update_llm_index:
+            update_llm_index.side_effect = Exception("LLM index update failed.")
+            tasks.llmindex_index()
+            update_llm_index.assert_called_once()
+            task = PaperlessTask.objects.get(
+                task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
+            )
+            self.assertEqual(task.status, states.FAILURE)
+            self.assertIn("LLM index update failed.", task.result)
+
+    def test_update_document_in_llm_index(self):
+        """
+        GIVEN:
+            - Nothing
+        WHEN:
+            - update_document_in_llm_index task is called
+        THEN:
+            - llm_index_add_or_update_document is called
+        """
+        doc = Document.objects.create(
+            title="test",
+            content="my document",
+            checksum="wow",
+        )
+        with mock.patch(
+            "documents.tasks.llm_index_add_or_update_document",
+        ) as llm_index_add_or_update_document:
+            tasks.update_document_in_llm_index(doc)
+            llm_index_add_or_update_document.assert_called_once_with(doc)
+
+    def test_remove_document_from_llm_index(self):
+        """
+        GIVEN:
+            - Nothing
+        WHEN:
+            - remove_document_from_llm_index task is called
+        THEN:
+            - llm_index_remove_document is called
+        """
+        doc = Document.objects.create(
+            title="test",
+            content="my document",
+            checksum="wow",
+        )
+        with mock.patch(
+            "documents.tasks.llm_index_remove_document",
+        ) as llm_index_remove_document:
+            tasks.remove_document_from_llm_index(doc)
+            llm_index_remove_document.assert_called_once_with(doc)
--- a/src/documents/tests/test_views.py
+++ b/src/documents/tests/test_views.py
@@ -2,6 +2,8 @@ import json
 import tempfile
 from datetime import timedelta
 from pathlib import Path
+from unittest.mock import MagicMock
+from unittest.mock import patch

 from django.conf import settings
 from django.contrib.auth.models import Group
@@ -15,9 +17,15 @@ from django.utils import timezone
 from guardian.shortcuts import assign_perm
 from rest_framework import status

+from documents.caching import get_llm_suggestion_cache
+from documents.caching import set_llm_suggestions_cache
+from documents.models import Correspondent
 from documents.models import Document
+from documents.models import DocumentType
 from documents.models import ShareLink
+from documents.models import StoragePath
 from documents.models import Tag
+from documents.signals.handlers import update_llm_suggestions_cache
 from documents.tests.utils import DirectoriesMixin
 from paperless.models import ApplicationConfiguration

@@ -270,3 +278,176 @@ class TestViews(DirectoriesMixin, TestCase):
            f"Possible N+1 queries detected: {num_queries_small} queries for 2 tags, "
            f"but {num_queries_large} queries for 50 tags"
        )
+
+
+class TestAISuggestions(DirectoriesMixin, TestCase):
+    def setUp(self):
+        self.user = User.objects.create_superuser(username="testuser")
+        self.document = Document.objects.create(
+            title="Test Document",
+            filename="test.pdf",
+            mime_type="application/pdf",
+        )
+        self.tag1 = Tag.objects.create(name="tag1")
+        self.correspondent1 = Correspondent.objects.create(name="correspondent1")
+        self.document_type1 = DocumentType.objects.create(name="type1")
+        self.path1 = StoragePath.objects.create(name="path1")
+        super().setUp()
+
+    @patch("documents.views.get_llm_suggestion_cache")
+    @patch("documents.views.refresh_suggestions_cache")
+    @override_settings(
+        AI_ENABLED=True,
+        LLM_BACKEND="mock_backend",
+    )
+    def test_suggestions_with_cached_llm(self, mock_refresh_cache, mock_get_cache):
+        mock_get_cache.return_value = MagicMock(suggestions={"tags": ["tag1", "tag2"]})
+
+        self.client.force_login(user=self.user)
+        response = self.client.get(f"/api/documents/{self.document.pk}/suggestions/")
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+        self.assertEqual(response.json(), {"tags": ["tag1", "tag2"]})
+        mock_refresh_cache.assert_called_once_with(self.document.pk)
+
+    @patch("documents.views.get_ai_document_classification")
+    @override_settings(
+        AI_ENABLED=True,
+        LLM_BACKEND="mock_backend",
+    )
+    def test_suggestions_with_ai_enabled(
+        self,
+        mock_get_ai_classification,
+    ):
+        mock_get_ai_classification.return_value = {
+            "title": "AI Title",
+            "tags": ["tag1", "tag2"],
+            "correspondents": ["correspondent1"],
+            "document_types": ["type1"],
+            "storage_paths": ["path1"],
+            "dates": ["2023-01-01"],
+        }
+
+        self.client.force_login(user=self.user)
+        response = self.client.get(f"/api/documents/{self.document.pk}/suggestions/")
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+        self.assertEqual(
+            response.json(),
+            {
+                "title": "AI Title",
+                "tags": [self.tag1.pk],
+                "suggested_tags": ["tag2"],
+                "correspondents": [self.correspondent1.pk],
+                "suggested_correspondents": [],
+                "document_types": [self.document_type1.pk],
+                "suggested_document_types": [],
+                "storage_paths": [self.path1.pk],
+                "suggested_storage_paths": [],
+                "dates": ["2023-01-01"],
+            },
+        )
+
+    def test_invalidate_suggestions_cache(self):
+        self.client.force_login(user=self.user)
+        suggestions = {
+            "title": "AI Title",
+            "tags": ["tag1", "tag2"],
+            "correspondents": ["correspondent1"],
+            "document_types": ["type1"],
+            "storage_paths": ["path1"],
+            "dates": ["2023-01-01"],
+        }
+        set_llm_suggestions_cache(
+            self.document.pk,
+            suggestions,
+            backend="mock_backend",
+        )
+        self.assertEqual(
+            get_llm_suggestion_cache(
+                self.document.pk,
+                backend="mock_backend",
+            ).suggestions,
+            suggestions,
+        )
+        # post_save signal triggered
+        update_llm_suggestions_cache(
+            sender=None,
+            instance=self.document,
+        )
+        self.assertIsNone(
+            get_llm_suggestion_cache(
+                self.document.pk,
+                backend="mock_backend",
+            ),
+        )
+
+
+class TestAIChatStreamingView(DirectoriesMixin, TestCase):
+    ENDPOINT = "/api/documents/chat/"
+
+    def setUp(self):
+        self.user = User.objects.create_user(username="testuser", password="pass")
+        self.client.force_login(user=self.user)
+        self.document = Document.objects.create(
+            title="Test Document",
+            filename="test.pdf",
+            mime_type="application/pdf",
+        )
+        super().setUp()
+
+    @override_settings(AI_ENABLED=False)
+    def test_post_ai_disabled(self):
+        response = self.client.post(
+            self.ENDPOINT,
+            data='{"q": "question"}',
+            content_type="application/json",
+        )
+        self.assertEqual(response.status_code, 400)
+        self.assertIn(b"AI is required for this feature", response.content)
+
+    @patch("documents.views.stream_chat_with_documents")
+    @patch("documents.views.get_objects_for_user_owner_aware")
+    @override_settings(AI_ENABLED=True)
+    def test_post_no_document_id(self, mock_get_objects, mock_stream_chat):
+        mock_get_objects.return_value = [self.document]
+        mock_stream_chat.return_value = iter([b"data"])
+        response = self.client.post(
+            self.ENDPOINT,
+            data='{"q": "question"}',
+            content_type="application/json",
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response["Content-Type"], "text/event-stream")
+
+    @patch("documents.views.stream_chat_with_documents")
+    @override_settings(AI_ENABLED=True)
+    def test_post_with_document_id(self, mock_stream_chat):
+        mock_stream_chat.return_value = iter([b"data"])
+        response = self.client.post(
+            self.ENDPOINT,
+            data=f'{{"q": "question", "document_id": {self.document.pk}}}',
+            content_type="application/json",
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response["Content-Type"], "text/event-stream")
+
+    @override_settings(AI_ENABLED=True)
+    def test_post_with_invalid_document_id(self):
+        response = self.client.post(
+            self.ENDPOINT,
+            data='{"q": "question", "document_id": 999999}',
+            content_type="application/json",
+        )
+        self.assertEqual(response.status_code, 400)
+        self.assertIn(b"Document not found", response.content)
+
+    @patch("documents.views.has_perms_owner_aware")
+    @override_settings(AI_ENABLED=True)
+    def test_post_with_document_id_no_permission(self, mock_has_perms):
+        mock_has_perms.return_value = False
+        response = self.client.post(
+            self.ENDPOINT,
+            data=f'{{"q": "question", "document_id": {self.document.pk}}}',
+            content_type="application/json",
+        )
+        self.assertEqual(response.status_code, 403)
+        self.assertIn(b"Insufficient permissions", response.content)
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -45,6 +45,7 @@ from django.http import HttpResponseBadRequest
 from django.http import HttpResponseForbidden
 from django.http import HttpResponseRedirect
 from django.http import HttpResponseServerError
+from django.http import StreamingHttpResponse
 from django.shortcuts import get_object_or_404
 from django.utils import timezone
 from django.utils.decorators import method_decorator
@@ -52,6 +53,7 @@ from django.utils.timezone import make_aware
 from django.utils.translation import get_language
 from django.views import View
 from django.views.decorators.cache import cache_control
+from django.views.decorators.csrf import ensure_csrf_cookie
 from django.views.decorators.http import condition
 from django.views.decorators.http import last_modified
 from django.views.generic import TemplateView
@@ -91,10 +93,12 @@ from documents import index
 from documents.bulk_download import ArchiveOnlyStrategy
 from documents.bulk_download import OriginalAndArchiveStrategy
 from documents.bulk_download import OriginalsOnlyStrategy
+from documents.caching import get_llm_suggestion_cache
 from documents.caching import get_metadata_cache
 from documents.caching import get_suggestion_cache
 from documents.caching import refresh_metadata_cache
 from documents.caching import refresh_suggestions_cache
+from documents.caching import set_llm_suggestions_cache
 from documents.caching import set_metadata_cache
 from documents.caching import set_suggestions_cache
 from documents.classifier import load_classifier
@@ -182,18 +186,27 @@ from documents.signals import document_updated
 from documents.tasks import consume_file
 from documents.tasks import empty_trash
 from documents.tasks import index_optimize
+from documents.tasks import llmindex_index
 from documents.tasks import sanity_check
 from documents.tasks import train_classifier
 from documents.tasks import update_document_parent_tags
 from documents.utils import get_boolean
 from paperless import version
 from paperless.celery import app as celery_app
+from paperless.config import AIConfig
 from paperless.config import GeneralConfig
 from paperless.db import GnuPG
 from paperless.models import ApplicationConfiguration
 from paperless.serialisers import GroupSerializer
 from paperless.serialisers import UserSerializer
 from paperless.views import StandardPagination
+from paperless_ai.ai_classifier import get_ai_document_classification
+from paperless_ai.chat import stream_chat_with_documents
+from paperless_ai.matching import extract_unmatched_names
+from paperless_ai.matching import match_correspondents_by_name
+from paperless_ai.matching import match_document_types_by_name
+from paperless_ai.matching import match_storage_paths_by_name
+from paperless_ai.matching import match_tags_by_name
 from paperless_mail.models import MailAccount
 from paperless_mail.models import MailRule
 from paperless_mail.oauth import PaperlessMailOAuth2Manager
@@ -934,37 +947,103 @@ class DocumentViewSet(
        ):
            return HttpResponseForbidden("Insufficient permissions")

-        document_suggestions = get_suggestion_cache(doc.pk)
+        ai_config = AIConfig()

-        if document_suggestions is not None:
-            refresh_suggestions_cache(doc.pk)
-            return Response(document_suggestions.suggestions)
-
-        classifier = load_classifier()
-
-        dates = []
-        if settings.NUMBER_OF_SUGGESTED_DATES > 0:
-            gen = parse_date_generator(doc.filename, doc.content)
-            dates = sorted(
-                {i for i in itertools.islice(gen, settings.NUMBER_OF_SUGGESTED_DATES)},
+        if ai_config.ai_enabled:
+            cached_llm_suggestions = get_llm_suggestion_cache(
+                doc.pk,
+                backend=ai_config.llm_backend,
            )

-        resp_data = {
-            "correspondents": [
-                c.id for c in match_correspondents(doc, classifier, request.user)
-            ],
-            "tags": [t.id for t in match_tags(doc, classifier, request.user)],
-            "document_types": [
-                dt.id for dt in match_document_types(doc, classifier, request.user)
-            ],
-            "storage_paths": [
-                dt.id for dt in match_storage_paths(doc, classifier, request.user)
-            ],
-            "dates": [date.strftime("%Y-%m-%d") for date in dates if date is not None],
-        }
+            if cached_llm_suggestions:
+                refresh_suggestions_cache(doc.pk)
+                return Response(cached_llm_suggestions.suggestions)

-        # Cache the suggestions and the classifier hash for later
-        set_suggestions_cache(doc.pk, resp_data, classifier)
+            llm_suggestions = get_ai_document_classification(doc, request.user)
+
+            matched_tags = match_tags_by_name(
+                llm_suggestions.get("tags", []),
+                request.user,
+            )
+            matched_correspondents = match_correspondents_by_name(
+                llm_suggestions.get("correspondents", []),
+                request.user,
+            )
+            matched_types = match_document_types_by_name(
+                llm_suggestions.get("document_types", []),
+                request.user,
+            )
+            matched_paths = match_storage_paths_by_name(
+                llm_suggestions.get("storage_paths", []),
+                request.user,
+            )
+
+            resp_data = {
+                "title": llm_suggestions.get("title"),
+                "tags": [t.id for t in matched_tags],
+                "suggested_tags": extract_unmatched_names(
+                    llm_suggestions.get("tags", []),
+                    matched_tags,
+                ),
+                "correspondents": [c.id for c in matched_correspondents],
+                "suggested_correspondents": extract_unmatched_names(
+                    llm_suggestions.get("correspondents", []),
+                    matched_correspondents,
+                ),
+                "document_types": [d.id for d in matched_types],
+                "suggested_document_types": extract_unmatched_names(
+                    llm_suggestions.get("document_types", []),
+                    matched_types,
+                ),
+                "storage_paths": [s.id for s in matched_paths],
+                "suggested_storage_paths": extract_unmatched_names(
+                    llm_suggestions.get("storage_paths", []),
+                    matched_paths,
+                ),
+                "dates": llm_suggestions.get("dates", []),
+            }
+
+            set_llm_suggestions_cache(doc.pk, resp_data, backend=ai_config.llm_backend)
+        else:
+            document_suggestions = get_suggestion_cache(doc.pk)
+
+            if document_suggestions is not None:
+                refresh_suggestions_cache(doc.pk)
+                return Response(document_suggestions.suggestions)
+
+            classifier = load_classifier()
+
+            dates = []
+            if settings.NUMBER_OF_SUGGESTED_DATES > 0:
+                gen = parse_date_generator(doc.filename, doc.content)
+                dates = sorted(
+                    {
+                        i
+                        for i in itertools.islice(
+                            gen,
+                            settings.NUMBER_OF_SUGGESTED_DATES,
+                        )
+                    },
+                )
+
+            resp_data = {
+                "correspondents": [
+                    c.id for c in match_correspondents(doc, classifier, request.user)
+                ],
+                "tags": [t.id for t in match_tags(doc, classifier, request.user)],
+                "document_types": [
+                    dt.id for dt in match_document_types(doc, classifier, request.user)
+                ],
+                "storage_paths": [
+                    dt.id for dt in match_storage_paths(doc, classifier, request.user)
+                ],
+                "dates": [
+                    date.strftime("%Y-%m-%d") for date in dates if date is not None
+                ],
+            }
+
+            # Cache the suggestions and the classifier hash for later
+            set_suggestions_cache(doc.pk, resp_data, classifier)

        return Response(resp_data)

@@ -1288,6 +1367,59 @@ class DocumentViewSet(
            )


+class ChatStreamingSerializer(serializers.Serializer):
+    q = serializers.CharField(required=True)
+    document_id = serializers.IntegerField(required=False, allow_null=True)
+
+
+@method_decorator(
+    [
+        ensure_csrf_cookie,
+        cache_control(no_cache=True),
+    ],
+    name="dispatch",
+)
+class ChatStreamingView(GenericAPIView):
+    permission_classes = (IsAuthenticated,)
+    serializer_class = ChatStreamingSerializer
+
+    def post(self, request, *args, **kwargs):
+        request.compress_exempt = True
+        ai_config = AIConfig()
+        if not ai_config.ai_enabled:
+            return HttpResponseBadRequest("AI is required for this feature")
+
+        try:
+            question = request.data["q"]
+        except KeyError:
+            return HttpResponseBadRequest("Invalid request")
+
+        doc_id = request.data.get("document_id")
+
+        if doc_id:
+            try:
+                document = Document.objects.get(id=doc_id)
+            except Document.DoesNotExist:
+                return HttpResponseBadRequest("Document not found")
+
+            if not has_perms_owner_aware(request.user, "view_document", document):
+                return HttpResponseForbidden("Insufficient permissions")
+
+            documents = [document]
+        else:
+            documents = get_objects_for_user_owner_aware(
+                request.user,
+                "view_document",
+                Document,
+            )
+
+        response = StreamingHttpResponse(
+            stream_chat_with_documents(query_str=question, documents=documents),
+            content_type="text/event-stream",
+        )
+        return response
+
+
@extend_schema_view(
    list=extend_schema(
        description="Document views including search",
@@ -2446,6 +2578,10 @@ class UiSettingsView(GenericAPIView):

        ui_settings["email_enabled"] = settings.EMAIL_ENABLED

+        ai_config = AIConfig()
+
+        ui_settings["ai_enabled"] = ai_config.ai_enabled
+
        user_resp = {
            "id": user.id,
            "username": user.username,
@@ -2587,6 +2723,10 @@ class TasksViewSet(ReadOnlyModelViewSet):
            sanity_check,
            {"scheduled": False, "raise_on_error": False},
        ),
+        PaperlessTask.TaskName.LLMINDEX_UPDATE: (
+            llmindex_index,
+            {"scheduled": False, "rebuild": False},
+        ),
    }

    def get_queryset(self):
@@ -3106,6 +3246,31 @@ class SystemStatusView(PassUserMixin):
            last_sanity_check.date_done if last_sanity_check else None
        )

+        ai_config = AIConfig()
+        if not ai_config.llm_index_enabled:
+            llmindex_status = "DISABLED"
+            llmindex_error = None
+            llmindex_last_modified = None
+        else:
+            last_llmindex_update = (
+                PaperlessTask.objects.filter(
+                    task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
+                )
+                .order_by("-date_done")
+                .first()
+            )
+            llmindex_status = "OK"
+            llmindex_error = None
+            if last_llmindex_update is None:
+                llmindex_status = "WARNING"
+                llmindex_error = "No LLM index update tasks found"
+            elif last_llmindex_update and last_llmindex_update.status == states.FAILURE:
+                llmindex_status = "ERROR"
+                llmindex_error = last_llmindex_update.result
+            llmindex_last_modified = (
+                last_llmindex_update.date_done if last_llmindex_update else None
+            )
+
        return Response(
            {
                "pngx_version": current_version,
@@ -3143,6 +3308,9 @@ class SystemStatusView(PassUserMixin):
                    "sanity_check_status": sanity_check_status,
                    "sanity_check_last_run": sanity_check_last_run,
                    "sanity_check_error": sanity_check_error,
+                    "llmindex_status": llmindex_status,
+                    "llmindex_last_modified": llmindex_last_modified,
+                    "llmindex_error": llmindex_error,
                },
            },
        )
--- a/src/paperless/config.py
+++ b/src/paperless/config.py
@@ -169,3 +169,37 @@ class GeneralConfig(BaseConfig):

        self.app_title = app_config.app_title or None
        self.app_logo = app_config.app_logo.url if app_config.app_logo else None
+
+
+@dataclasses.dataclass
+class AIConfig(BaseConfig):
+    """
+    AI related settings that require global scope
+    """
+
+    ai_enabled: bool = dataclasses.field(init=False)
+    llm_embedding_backend: str = dataclasses.field(init=False)
+    llm_embedding_model: str = dataclasses.field(init=False)
+    llm_backend: str = dataclasses.field(init=False)
+    llm_model: str = dataclasses.field(init=False)
+    llm_api_key: str = dataclasses.field(init=False)
+    llm_endpoint: str = dataclasses.field(init=False)
+
+    def __post_init__(self) -> None:
+        app_config = self._get_config_instance()
+
+        self.ai_enabled = app_config.ai_enabled or settings.AI_ENABLED
+        self.llm_embedding_backend = (
+            app_config.llm_embedding_backend or settings.LLM_EMBEDDING_BACKEND
+        )
+        self.llm_embedding_model = (
+            app_config.llm_embedding_model or settings.LLM_EMBEDDING_MODEL
+        )
+        self.llm_backend = app_config.llm_backend or settings.LLM_BACKEND
+        self.llm_model = app_config.llm_model or settings.LLM_MODEL
+        self.llm_api_key = app_config.llm_api_key or settings.LLM_API_KEY
+        self.llm_endpoint = app_config.llm_endpoint or settings.LLM_ENDPOINT
+
+    @property
+    def llm_index_enabled(self) -> bool:
+        return bool(self.ai_enabled and self.llm_embedding_backend)
--- a/src/paperless/migrations/0005_applicationconfiguration_ai_enabled_and_more.py
+++ b/src/paperless/migrations/0005_applicationconfiguration_ai_enabled_and_more.py
@@ -0,0 +1,84 @@
+# Generated by Django 5.2.6 on 2025-09-30 17:43
+
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("paperless", "0004_applicationconfiguration_barcode_asn_prefix_and_more"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="applicationconfiguration",
+            name="ai_enabled",
+            field=models.BooleanField(
+                default=False,
+                null=True,
+                verbose_name="Enables AI features",
+            ),
+        ),
+        migrations.AddField(
+            model_name="applicationconfiguration",
+            name="llm_api_key",
+            field=models.CharField(
+                blank=True,
+                max_length=1024,
+                null=True,
+                verbose_name="Sets the LLM API key",
+            ),
+        ),
+        migrations.AddField(
+            model_name="applicationconfiguration",
+            name="llm_backend",
+            field=models.CharField(
+                blank=True,
+                choices=[("openai", "OpenAI"), ("ollama", "Ollama")],
+                max_length=128,
+                null=True,
+                verbose_name="Sets the LLM backend",
+            ),
+        ),
+        migrations.AddField(
+            model_name="applicationconfiguration",
+            name="llm_embedding_backend",
+            field=models.CharField(
+                blank=True,
+                choices=[("openai", "OpenAI"), ("huggingface", "Huggingface")],
+                max_length=128,
+                null=True,
+                verbose_name="Sets the LLM embedding backend",
+            ),
+        ),
+        migrations.AddField(
+            model_name="applicationconfiguration",
+            name="llm_embedding_model",
+            field=models.CharField(
+                blank=True,
+                max_length=128,
+                null=True,
+                verbose_name="Sets the LLM embedding model",
+            ),
+        ),
+        migrations.AddField(
+            model_name="applicationconfiguration",
+            name="llm_endpoint",
+            field=models.CharField(
+                blank=True,
+                max_length=256,
+                null=True,
+                verbose_name="Sets the LLM endpoint, optional",
+            ),
+        ),
+        migrations.AddField(
+            model_name="applicationconfiguration",
+            name="llm_model",
+            field=models.CharField(
+                blank=True,
+                max_length=128,
+                null=True,
+                verbose_name="Sets the LLM model",
+            ),
+        ),
+    ]
--- a/src/paperless/models.py
+++ b/src/paperless/models.py
@@ -74,6 +74,20 @@ class ColorConvertChoices(models.TextChoices):
    CMYK = ("CMYK", _("CMYK"))


+class LLMEmbeddingBackend(models.TextChoices):
+    OPENAI = ("openai", _("OpenAI"))
+    HUGGINGFACE = ("huggingface", _("Huggingface"))
+
+
+class LLMBackend(models.TextChoices):
+    """
+    Matches to --llm-backend
+    """
+
+    OPENAI = ("openai", _("OpenAI"))
+    OLLAMA = ("ollama", _("Ollama"))
+
+
 class ApplicationConfiguration(AbstractSingletonModel):
    """
    Settings which are common across more than 1 parser
@@ -265,6 +279,60 @@ class ApplicationConfiguration(AbstractSingletonModel):
        null=True,
    )

+    """
+    AI related settings
+    """
+
+    ai_enabled = models.BooleanField(
+        verbose_name=_("Enables AI features"),
+        null=True,
+        default=False,
+    )
+
+    llm_embedding_backend = models.CharField(
+        verbose_name=_("Sets the LLM embedding backend"),
+        blank=True,
+        null=True,
+        max_length=128,
+        choices=LLMEmbeddingBackend.choices,
+    )
+
+    llm_embedding_model = models.CharField(
+        verbose_name=_("Sets the LLM embedding model"),
+        blank=True,
+        null=True,
+        max_length=128,
+    )
+
+    llm_backend = models.CharField(
+        verbose_name=_("Sets the LLM backend"),
+        blank=True,
+        null=True,
+        max_length=128,
+        choices=LLMBackend.choices,
+    )
+
+    llm_model = models.CharField(
+        verbose_name=_("Sets the LLM model"),
+        blank=True,
+        null=True,
+        max_length=128,
+    )
+
+    llm_api_key = models.CharField(
+        verbose_name=_("Sets the LLM API key"),
+        blank=True,
+        null=True,
+        max_length=1024,
+    )
+
+    llm_endpoint = models.CharField(
+        verbose_name=_("Sets the LLM endpoint, optional"),
+        blank=True,
+        null=True,
+        max_length=256,
+    )
+
    class Meta:
        verbose_name = _("paperless application settings")

--- a/src/paperless/serialisers.py
+++ b/src/paperless/serialisers.py
@@ -206,6 +206,10 @@ class ProfileSerializer(PasswordValidationMixin, serializers.ModelSerializer):
 class ApplicationConfigurationSerializer(serializers.ModelSerializer):
    user_args = serializers.JSONField(binary=True, allow_null=True)
    barcode_tag_mapping = serializers.JSONField(binary=True, allow_null=True)
+    llm_api_key = ObfuscatedPasswordField(
+        required=False,
+        allow_null=True,
+    )

    def run_validation(self, data):
        # Empty strings treated as None to avoid unexpected behavior
@@ -215,6 +219,11 @@ class ApplicationConfigurationSerializer(serializers.ModelSerializer):
            data["barcode_tag_mapping"] = None
        if "language" in data and data["language"] == "":
            data["language"] = None
+        if "llm_api_key" in data and data["llm_api_key"] is not None:
+            if data["llm_api_key"] == "":
+                data["llm_api_key"] = None
+            elif len(data["llm_api_key"].replace("*", "")) == 0:
+                del data["llm_api_key"]
        return super().run_validation(data)

    def update(self, instance, validated_data):
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -12,6 +12,7 @@ from typing import Final
 from urllib.parse import urlparse

 from celery.schedules import crontab
+from compression_middleware.middleware import CompressionMiddleware
 from dateparser.languages.loader import LocaleDataLoader
 from django.utils.translation import gettext_lazy as _
 from dotenv import load_dotenv
@@ -229,6 +230,17 @@ def _parse_beat_schedule() -> dict:
                "expires": 59.0 * 60.0,
            },
        },
+        {
+            "name": "Rebuild LLM index",
+            "env_key": "PAPERLESS_LLM_INDEX_TASK_CRON",
+            # Default daily at 02:10
+            "env_default": "10 2 * * *",
+            "task": "documents.tasks.llmindex_index",
+            "options": {
+                # 1 hour before default schedule sends again
+                "expires": 23.0 * 60.0 * 60.0,
+            },
+        },
    ]
    for task in tasks:
        # Either get the environment setting or use the default
@@ -287,6 +299,7 @@ MODEL_FILE = __get_path(
    "PAPERLESS_MODEL_FILE",
    DATA_DIR / "classification_model.pickle",
 )
+LLM_INDEX_DIR = DATA_DIR / "llm_index"

 LOGGING_DIR = __get_path("PAPERLESS_LOGGING_DIR", DATA_DIR / "log")

@@ -380,6 +393,19 @@ MIDDLEWARE = [
 if __get_boolean("PAPERLESS_ENABLE_COMPRESSION", "yes"):  # pragma: no cover
    MIDDLEWARE.insert(0, "compression_middleware.middleware.CompressionMiddleware")

+# Workaround to not compress streaming responses (e.g. chat).
+# See https://github.com/friedelwolff/django-compression-middleware/pull/7
+original_process_response = CompressionMiddleware.process_response
+
+
+def patched_process_response(self, request, response):
+    if getattr(request, "compress_exempt", False):
+        return response
+    return original_process_response(self, request, response)
+
+
+CompressionMiddleware.process_response = patched_process_response
+
 ROOT_URLCONF = "paperless.urls"


@@ -585,6 +611,10 @@ X_FRAME_OPTIONS = "SAMEORIGIN"
 # The next 3 settings can also be set using just PAPERLESS_URL
 CSRF_TRUSTED_ORIGINS = __get_list("PAPERLESS_CSRF_TRUSTED_ORIGINS")

+if DEBUG:
+    # Allow access from the angular development server during debugging
+    CSRF_TRUSTED_ORIGINS.append("http://localhost:4200")
+
 # We allow CORS from localhost:8000
 CORS_ALLOWED_ORIGINS = __get_list(
    "PAPERLESS_CORS_ALLOWED_HOSTS",
@@ -595,6 +625,8 @@ if DEBUG:
    # Allow access from the angular development server during debugging
    CORS_ALLOWED_ORIGINS.append("http://localhost:4200")

+CORS_ALLOW_CREDENTIALS = True
+
 CORS_EXPOSE_HEADERS = [
    "Content-Disposition",
 ]
@@ -868,6 +900,7 @@ LOGGING = {
    "loggers": {
        "paperless": {"handlers": ["file_paperless"], "level": "DEBUG"},
        "paperless_mail": {"handlers": ["file_mail"], "level": "DEBUG"},
+        "paperless_ai": {"handlers": ["file_paperless"], "level": "DEBUG"},
        "ocrmypdf": {"handlers": ["file_paperless"], "level": "INFO"},
        "celery": {"handlers": ["file_celery"], "level": "DEBUG"},
        "kombu": {"handlers": ["file_celery"], "level": "DEBUG"},
@@ -1404,3 +1437,16 @@ WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean(
 REMOTE_OCR_ENGINE = os.getenv("PAPERLESS_REMOTE_OCR_ENGINE")
 REMOTE_OCR_API_KEY = os.getenv("PAPERLESS_REMOTE_OCR_API_KEY")
 REMOTE_OCR_ENDPOINT = os.getenv("PAPERLESS_REMOTE_OCR_ENDPOINT")
+
+################################################################################
+# AI Settings                                                                  #
+################################################################################
+AI_ENABLED = __get_boolean("PAPERLESS_AI_ENABLED", "NO")
+LLM_EMBEDDING_BACKEND = os.getenv(
+    "PAPERLESS_AI_LLM_EMBEDDING_BACKEND",
+)  # "huggingface" or "openai"
+LLM_EMBEDDING_MODEL = os.getenv("PAPERLESS_AI_LLM_EMBEDDING_MODEL")
+LLM_BACKEND = os.getenv("PAPERLESS_AI_LLM_BACKEND")  # "ollama" or "openai"
+LLM_MODEL = os.getenv("PAPERLESS_AI_LLM_MODEL")
+LLM_API_KEY = os.getenv("PAPERLESS_AI_LLM_API_KEY")
+LLM_ENDPOINT = os.getenv("PAPERLESS_AI_LLM_ENDPOINT")
--- a/src/paperless/tests/test_settings.py
+++ b/src/paperless/tests/test_settings.py
@@ -160,6 +160,7 @@ class TestCeleryScheduleParsing(TestCase):
    SANITY_EXPIRE_TIME = ((7.0 * 24.0) - 1.0) * 60.0 * 60.0
    EMPTY_TRASH_EXPIRE_TIME = 23.0 * 60.0 * 60.0
    RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME = 59.0 * 60.0
+    LLM_INDEX_EXPIRE_TIME = 23.0 * 60.0 * 60.0

    def test_schedule_configuration_default(self):
        """
@@ -204,6 +205,13 @@ class TestCeleryScheduleParsing(TestCase):
                    "schedule": crontab(minute="5", hour="*/1"),
                    "options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME},
                },
+                "Rebuild LLM index": {
+                    "task": "documents.tasks.llmindex_index",
+                    "schedule": crontab(minute=10, hour=2),
+                    "options": {
+                        "expires": self.LLM_INDEX_EXPIRE_TIME,
+                    },
+                },
            },
            schedule,
        )
@@ -256,6 +264,13 @@ class TestCeleryScheduleParsing(TestCase):
                    "schedule": crontab(minute="5", hour="*/1"),
                    "options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME},
                },
+                "Rebuild LLM index": {
+                    "task": "documents.tasks.llmindex_index",
+                    "schedule": crontab(minute=10, hour=2),
+                    "options": {
+                        "expires": self.LLM_INDEX_EXPIRE_TIME,
+                    },
+                },
            },
            schedule,
        )
@@ -300,6 +315,13 @@ class TestCeleryScheduleParsing(TestCase):
                    "schedule": crontab(minute="5", hour="*/1"),
                    "options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME},
                },
+                "Rebuild LLM index": {
+                    "task": "documents.tasks.llmindex_index",
+                    "schedule": crontab(minute=10, hour=2),
+                    "options": {
+                        "expires": self.LLM_INDEX_EXPIRE_TIME,
+                    },
+                },
            },
            schedule,
        )
@@ -322,6 +344,7 @@ class TestCeleryScheduleParsing(TestCase):
                "PAPERLESS_INDEX_TASK_CRON": "disable",
                "PAPERLESS_EMPTY_TRASH_TASK_CRON": "disable",
                "PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON": "disable",
+                "PAPERLESS_LLM_INDEX_TASK_CRON": "disable",
            },
        ):
            schedule = _parse_beat_schedule()
--- a/src/paperless/urls.py
+++ b/src/paperless/urls.py
@@ -18,6 +18,7 @@ from rest_framework.routers import DefaultRouter
 from documents.views import BulkDownloadView
 from documents.views import BulkEditObjectsView
 from documents.views import BulkEditView
+from documents.views import ChatStreamingView
 from documents.views import CorrespondentViewSet
 from documents.views import CustomFieldViewSet
 from documents.views import DocumentTypeViewSet
@@ -139,6 +140,11 @@ urlpatterns = [
                                SelectionDataView.as_view(),
                                name="selection_data",
                            ),
+                            re_path(
+                                "^chat/",
+                                ChatStreamingView.as_view(),
+                                name="chat_streaming_view",
+                            ),
                        ],
                    ),
                ),
--- a/src/paperless/views.py
+++ b/src/paperless/views.py
@@ -35,6 +35,7 @@ from rest_framework.viewsets import ModelViewSet

 from documents.index import DelayedQuery
 from documents.permissions import PaperlessObjectPermissions
+from documents.tasks import llmindex_index
 from paperless.filters import GroupFilterSet
 from paperless.filters import UserFilterSet
 from paperless.models import ApplicationConfiguration
@@ -43,6 +44,7 @@ from paperless.serialisers import GroupSerializer
 from paperless.serialisers import PaperlessAuthTokenSerializer
 from paperless.serialisers import ProfileSerializer
 from paperless.serialisers import UserSerializer
+from paperless_ai.indexing import vector_store_file_exists


 class PaperlessObtainAuthTokenView(ObtainAuthToken):
@@ -358,6 +360,30 @@ class ApplicationConfigurationViewSet(ModelViewSet):
    def create(self, request, *args, **kwargs):
        return Response(status=405)  # Not Allowed

+    def perform_update(self, serializer):
+        old_instance = ApplicationConfiguration.objects.all().first()
+        old_ai_index_enabled = (
+            old_instance.ai_enabled and old_instance.llm_embedding_backend
+        )
+
+        new_instance: ApplicationConfiguration = serializer.save()
+        new_ai_index_enabled = (
+            new_instance.ai_enabled and new_instance.llm_embedding_backend
+        )
+
+        if (
+            not old_ai_index_enabled
+            and new_ai_index_enabled
+            and not vector_store_file_exists()
+        ):
+            # AI index was just enabled and vector store file does not exist
+            llmindex_index.delay(
+                progress_bar_disable=True,
+                rebuild=True,
+                scheduled=False,
+                auto=True,
+            )
+

@extend_schema_view(
    post=extend_schema(
--- a/src/paperless_ai/init.py
+++ b/src/paperless_ai/init.py
--- a/src/paperless_ai/ai_classifier.py
+++ b/src/paperless_ai/ai_classifier.py
@@ -0,0 +1,102 @@
+import logging
+
+from django.contrib.auth.models import User
+
+from documents.models import Document
+from documents.permissions import get_objects_for_user_owner_aware
+from paperless.config import AIConfig
+from paperless_ai.client import AIClient
+from paperless_ai.indexing import query_similar_documents
+from paperless_ai.indexing import truncate_content
+
+logger = logging.getLogger("paperless_ai.rag_classifier")
+
+
+def build_prompt_without_rag(document: Document) -> str:
+    filename = document.filename or ""
+    content = truncate_content(document.content[:4000] or "")
+
+    return f"""
+    You are a document classification assistant.
+
+    Analyze the following document and extract the following information:
+    - A short descriptive title
+    - Tags that reflect the content
+    - Names of people or organizations mentioned
+    - The type or category of the document
+    - Suggested folder paths for storing the document
+    - Up to 3 relevant dates in YYYY-MM-DD format
+
+    Filename:
+    {filename}
+
+    Content:
+    {content}
+    """.strip()
+
+
+def build_prompt_with_rag(document: Document, user: User | None = None) -> str:
+    base_prompt = build_prompt_without_rag(document)
+    context = truncate_content(get_context_for_document(document, user))
+
+    return f"""{base_prompt}
+
+    Additional context from similar documents:
+    {context}
+    """.strip()
+
+
+def get_context_for_document(
+    doc: Document,
+    user: User | None = None,
+    max_docs: int = 5,
+) -> str:
+    visible_documents = (
+        get_objects_for_user_owner_aware(
+            user,
+            "view_document",
+            Document,
+        )
+        if user
+        else None
+    )
+    similar_docs = query_similar_documents(
+        document=doc,
+        document_ids=[document.pk for document in visible_documents]
+        if visible_documents
+        else None,
+    )[:max_docs]
+    context_blocks = []
+    for similar in similar_docs:
+        text = similar.content[:1000] or ""
+        title = similar.title or similar.filename or "Untitled"
+        context_blocks.append(f"TITLE: {title}\n{text}")
+    return "\n\n".join(context_blocks)
+
+
+def parse_ai_response(raw: dict) -> dict:
+    return {
+        "title": raw.get("title", ""),
+        "tags": raw.get("tags", []),
+        "correspondents": raw.get("correspondents", []),
+        "document_types": raw.get("document_types", []),
+        "storage_paths": raw.get("storage_paths", []),
+        "dates": raw.get("dates", []),
+    }
+
+
+def get_ai_document_classification(
+    document: Document,
+    user: User | None = None,
+) -> dict:
+    ai_config = AIConfig()
+
+    prompt = (
+        build_prompt_with_rag(document, user)
+        if ai_config.llm_embedding_backend
+        else build_prompt_without_rag(document)
+    )
+
+    client = AIClient()
+    result = client.run_llm_query(prompt)
+    return parse_ai_response(result)
--- a/src/paperless_ai/base_model.py
+++ b/src/paperless_ai/base_model.py
@@ -0,0 +1,10 @@
+from llama_index.core.bridge.pydantic import BaseModel
+
+
+class DocumentClassifierSchema(BaseModel):
+    title: str
+    tags: list[str]
+    correspondents: list[str]
+    document_types: list[str]
+    storage_paths: list[str]
+    dates: list[str]
--- a/src/paperless_ai/chat.py
+++ b/src/paperless_ai/chat.py
@@ -0,0 +1,105 @@
+import logging
+import sys
+
+from llama_index.core import VectorStoreIndex
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.query_engine import RetrieverQueryEngine
+
+from documents.models import Document
+from paperless_ai.client import AIClient
+from paperless_ai.indexing import load_or_build_index
+
+logger = logging.getLogger("paperless_ai.chat")
+
+MAX_SINGLE_DOC_CONTEXT_CHARS = 15000
+SINGLE_DOC_SNIPPET_CHARS = 800
+
+CHAT_PROMPT_TMPL = PromptTemplate(
+    template="""Context information is below.
+    ---------------------
+    {context_str}
+    ---------------------
+    Given the context information and not prior knowledge, answer the query.
+    Query: {query_str}
+    Answer:""",
+)
+
+
+def stream_chat_with_documents(query_str: str, documents: list[Document]):
+    client = AIClient()
+    index = load_or_build_index()
+
+    doc_ids = [str(doc.pk) for doc in documents]
+
+    # Filter only the node(s) that match the document IDs
+    nodes = [
+        node
+        for node in index.docstore.docs.values()
+        if node.metadata.get("document_id") in doc_ids
+    ]
+
+    if len(nodes) == 0:
+        logger.warning("No nodes found for the given documents.")
+        yield "Sorry, I couldn't find any content to answer your question."
+        return
+
+    local_index = VectorStoreIndex(nodes=nodes)
+    retriever = local_index.as_retriever(
+        similarity_top_k=3 if len(documents) == 1 else 5,
+    )
+
+    if len(documents) == 1:
+        # Just one doc — provide full content
+        doc = documents[0]
+        # TODO: include document metadata in the context
+        content = doc.content or ""
+        context_body = content
+
+        if len(content) > MAX_SINGLE_DOC_CONTEXT_CHARS:
+            logger.info(
+                "Truncating single-document context from %s to %s characters",
+                len(content),
+                MAX_SINGLE_DOC_CONTEXT_CHARS,
+            )
+            context_body = content[:MAX_SINGLE_DOC_CONTEXT_CHARS]
+
+            top_nodes = retriever.retrieve(query_str)
+            if len(top_nodes) > 0:
+                snippets = "\n\n".join(
+                    f"TITLE: {node.metadata.get('title')}\n{node.text[:SINGLE_DOC_SNIPPET_CHARS]}"
+                    for node in top_nodes
+                )
+                context_body = f"{context_body}\n\nTOP MATCHES:\n{snippets}"
+
+        context = f"TITLE: {doc.title or doc.filename}\n{context_body}"
+    else:
+        top_nodes = retriever.retrieve(query_str)
+
+        if len(top_nodes) == 0:
+            logger.warning("Retriever returned no nodes for the given documents.")
+            yield "Sorry, I couldn't find any content to answer your question."
+            return
+
+        context = "\n\n".join(
+            f"TITLE: {node.metadata.get('title')}\n{node.text[:SINGLE_DOC_SNIPPET_CHARS]}"
+            for node in top_nodes
+        )
+
+    prompt = CHAT_PROMPT_TMPL.partial_format(
+        context_str=context,
+        query_str=query_str,
+    ).format(llm=client.llm)
+
+    query_engine = RetrieverQueryEngine.from_args(
+        retriever=retriever,
+        llm=client.llm,
+        streaming=True,
+    )
+
+    logger.debug("Document chat prompt: %s", prompt)
+
+    response_stream = query_engine.query(prompt)
+
+    for chunk in response_stream.response_gen:
+        yield chunk
+        sys.stdout.flush()
--- a/src/paperless_ai/client.py
+++ b/src/paperless_ai/client.py
@@ -0,0 +1,69 @@
+import logging
+
+from llama_index.core.llms import ChatMessage
+from llama_index.core.program.function_program import get_function_tool
+from llama_index.llms.ollama import Ollama
+from llama_index.llms.openai import OpenAI
+
+from paperless.config import AIConfig
+from paperless_ai.base_model import DocumentClassifierSchema
+
+logger = logging.getLogger("paperless_ai.client")
+
+
+class AIClient:
+    """
+    A client for interacting with an LLM backend.
+    """
+
+    def __init__(self):
+        self.settings = AIConfig()
+        self.llm = self.get_llm()
+
+    def get_llm(self) -> Ollama | OpenAI:
+        if self.settings.llm_backend == "ollama":
+            return Ollama(
+                model=self.settings.llm_model or "llama3",
+                base_url=self.settings.llm_endpoint or "http://localhost:11434",
+                request_timeout=120,
+            )
+        elif self.settings.llm_backend == "openai":
+            return OpenAI(
+                model=self.settings.llm_model or "gpt-3.5-turbo",
+                api_base=self.settings.llm_endpoint or None,
+                api_key=self.settings.llm_api_key,
+            )
+        else:
+            raise ValueError(f"Unsupported LLM backend: {self.settings.llm_backend}")
+
+    def run_llm_query(self, prompt: str) -> str:
+        logger.debug(
+            "Running LLM query against %s with model %s",
+            self.settings.llm_backend,
+            self.settings.llm_model,
+        )
+
+        user_msg = ChatMessage(role="user", content=prompt)
+        tool = get_function_tool(DocumentClassifierSchema)
+        result = self.llm.chat_with_tools(
+            tools=[tool],
+            user_msg=user_msg,
+            chat_history=[],
+        )
+        tool_calls = self.llm.get_tool_calls_from_response(
+            result,
+            error_on_no_tool_calls=True,
+        )
+        logger.debug("LLM query result: %s", tool_calls)
+        parsed = DocumentClassifierSchema(**tool_calls[0].tool_kwargs)
+        return parsed.model_dump()
+
+    def run_chat(self, messages: list[ChatMessage]) -> str:
+        logger.debug(
+            "Running chat query against %s with model %s",
+            self.settings.llm_backend,
+            self.settings.llm_model,
+        )
+        result = self.llm.chat(messages)
+        logger.debug("Chat result: %s", result)
+        return result
--- a/src/paperless_ai/embedding.py
+++ b/src/paperless_ai/embedding.py
@@ -0,0 +1,92 @@
+import json
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+from django.conf import settings
+from llama_index.core.base.embeddings.base import BaseEmbedding
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.embeddings.openai import OpenAIEmbedding
+
+from documents.models import Document
+from documents.models import Note
+from paperless.config import AIConfig
+from paperless.models import LLMEmbeddingBackend
+
+
+def get_embedding_model() -> BaseEmbedding:
+    config = AIConfig()
+
+    match config.llm_embedding_backend:
+        case LLMEmbeddingBackend.OPENAI:
+            return OpenAIEmbedding(
+                model=config.llm_embedding_model or "text-embedding-3-small",
+                api_key=config.llm_api_key,
+            )
+        case LLMEmbeddingBackend.HUGGINGFACE:
+            return HuggingFaceEmbedding(
+                model_name=config.llm_embedding_model
+                or "sentence-transformers/all-MiniLM-L6-v2",
+            )
+        case _:
+            raise ValueError(
+                f"Unsupported embedding backend: {config.llm_embedding_backend}",
+            )
+
+
+def get_embedding_dim() -> int:
+    """
+    Loads embedding dimension from meta.json if available, otherwise infers it
+    from a dummy embedding and stores it for future use.
+    """
+    config = AIConfig()
+    model = config.llm_embedding_model or (
+        "text-embedding-3-small"
+        if config.llm_embedding_backend == "openai"
+        else "sentence-transformers/all-MiniLM-L6-v2"
+    )
+
+    meta_path: Path = settings.LLM_INDEX_DIR / "meta.json"
+    if meta_path.exists():
+        with meta_path.open() as f:
+            meta = json.load(f)
+        if meta.get("embedding_model") != model:
+            raise RuntimeError(
+                f"Embedding model changed from {meta.get('embedding_model')} to {model}. "
+                "You must rebuild the index.",
+            )
+        return meta["dim"]
+
+    embedding_model = get_embedding_model()
+    test_embed = embedding_model.get_text_embedding("test")
+    dim = len(test_embed)
+
+    with meta_path.open("w") as f:
+        json.dump({"embedding_model": model, "dim": dim}, f)
+
+    return dim
+
+
+def build_llm_index_text(doc: Document) -> str:
+    lines = [
+        f"Title: {doc.title}",
+        f"Filename: {doc.filename}",
+        f"Created: {doc.created}",
+        f"Added: {doc.added}",
+        f"Modified: {doc.modified}",
+        f"Tags: {', '.join(tag.name for tag in doc.tags.all())}",
+        f"Document Type: {doc.document_type.name if doc.document_type else ''}",
+        f"Correspondent: {doc.correspondent.name if doc.correspondent else ''}",
+        f"Storage Path: {doc.storage_path.name if doc.storage_path else ''}",
+        f"Archive Serial Number: {doc.archive_serial_number or ''}",
+        f"Notes: {','.join([str(c.note) for c in Note.objects.filter(document=doc)])}",
+    ]
+
+    for instance in doc.custom_fields.all():
+        lines.append(f"Custom Field - {instance.field.name}: {instance}")
+
+    lines.append("\nContent:\n")
+    lines.append(doc.content or "")
+
+    return "\n".join(lines)
--- a/src/paperless_ai/indexing.py
+++ b/src/paperless_ai/indexing.py
@@ -0,0 +1,283 @@
+import logging
+import shutil
+from pathlib import Path
+
+import faiss
+import llama_index.core.settings as llama_settings
+import tqdm
+from django.conf import settings
+from llama_index.core import Document as LlamaDocument
+from llama_index.core import StorageContext
+from llama_index.core import VectorStoreIndex
+from llama_index.core import load_index_from_storage
+from llama_index.core.indices.prompt_helper import PromptHelper
+from llama_index.core.node_parser import SimpleNodeParser
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.retrievers import VectorIndexRetriever
+from llama_index.core.schema import BaseNode
+from llama_index.core.storage.docstore import SimpleDocumentStore
+from llama_index.core.storage.index_store import SimpleIndexStore
+from llama_index.core.text_splitter import TokenTextSplitter
+from llama_index.vector_stores.faiss import FaissVectorStore
+
+from documents.models import Document
+from paperless_ai.embedding import build_llm_index_text
+from paperless_ai.embedding import get_embedding_dim
+from paperless_ai.embedding import get_embedding_model
+
+logger = logging.getLogger("paperless_ai.indexing")
+
+
+def get_or_create_storage_context(*, rebuild=False):
+    """
+    Loads or creates the StorageContext (vector store, docstore, index store).
+    If rebuild=True, deletes and recreates everything.
+    """
+    if rebuild:
+        shutil.rmtree(settings.LLM_INDEX_DIR, ignore_errors=True)
+        settings.LLM_INDEX_DIR.mkdir(parents=True, exist_ok=True)
+
+    if rebuild or not settings.LLM_INDEX_DIR.exists():
+        embedding_dim = get_embedding_dim()
+        faiss_index = faiss.IndexFlatL2(embedding_dim)
+        vector_store = FaissVectorStore(faiss_index=faiss_index)
+        docstore = SimpleDocumentStore()
+        index_store = SimpleIndexStore()
+    else:
+        vector_store = FaissVectorStore.from_persist_dir(settings.LLM_INDEX_DIR)
+        docstore = SimpleDocumentStore.from_persist_dir(settings.LLM_INDEX_DIR)
+        index_store = SimpleIndexStore.from_persist_dir(settings.LLM_INDEX_DIR)
+
+    return StorageContext.from_defaults(
+        docstore=docstore,
+        index_store=index_store,
+        vector_store=vector_store,
+        persist_dir=settings.LLM_INDEX_DIR,
+    )
+
+
+def build_document_node(document: Document) -> list[BaseNode]:
+    """
+    Given a Document, returns parsed Nodes ready for indexing.
+    """
+    text = build_llm_index_text(document)
+    metadata = {
+        "document_id": str(document.id),
+        "title": document.title,
+        "tags": [t.name for t in document.tags.all()],
+        "correspondent": document.correspondent.name
+        if document.correspondent
+        else None,
+        "document_type": document.document_type.name
+        if document.document_type
+        else None,
+        "created": document.created.isoformat() if document.created else None,
+        "added": document.added.isoformat() if document.added else None,
+        "modified": document.modified.isoformat(),
+    }
+    doc = LlamaDocument(text=text, metadata=metadata)
+    parser = SimpleNodeParser()
+    return parser.get_nodes_from_documents([doc])
+
+
+def load_or_build_index(nodes=None):
+    """
+    Load an existing VectorStoreIndex if present,
+    or build a new one using provided nodes if storage is empty.
+    """
+    embed_model = get_embedding_model()
+    llama_settings.Settings.embed_model = embed_model
+    storage_context = get_or_create_storage_context()
+    try:
+        return load_index_from_storage(storage_context=storage_context)
+    except ValueError as e:
+        logger.warning("Failed to load index from storage: %s", e)
+        if not nodes:
+            logger.info("No nodes provided for index creation.")
+            raise
+        return VectorStoreIndex(
+            nodes=nodes,
+            storage_context=storage_context,
+            embed_model=embed_model,
+        )
+
+
+def remove_document_docstore_nodes(document: Document, index: VectorStoreIndex):
+    """
+    Removes existing documents from docstore for a given document from the index.
+    This is necessary because FAISS IndexFlatL2 is append-only.
+    """
+    all_node_ids = list(index.docstore.docs.keys())
+    existing_nodes = [
+        node.node_id
+        for node in index.docstore.get_nodes(all_node_ids)
+        if node.metadata.get("document_id") == str(document.id)
+    ]
+    for node_id in existing_nodes:
+        # Delete from docstore, FAISS IndexFlatL2 are append-only
+        index.docstore.delete_document(node_id)
+
+
+def vector_store_file_exists():
+    """
+    Check if the vector store file exists in the LLM index directory.
+    """
+    return Path(settings.LLM_INDEX_DIR / "default__vector_store.json").exists()
+
+
+def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
+    """
+    Rebuild or update the LLM index.
+    """
+    nodes = []
+
+    documents = Document.objects.all()
+    if not documents.exists():
+        msg = "No documents found to index."
+        logger.warning(msg)
+        return msg
+
+    if rebuild or not vector_store_file_exists():
+        # remove meta.json to force re-detection of embedding dim
+        (settings.LLM_INDEX_DIR / "meta.json").unlink(missing_ok=True)
+        # Rebuild index from scratch
+        logger.info("Rebuilding LLM index.")
+        embed_model = get_embedding_model()
+        llama_settings.Settings.embed_model = embed_model
+        storage_context = get_or_create_storage_context(rebuild=True)
+        for document in tqdm.tqdm(documents, disable=progress_bar_disable):
+            document_nodes = build_document_node(document)
+            nodes.extend(document_nodes)
+
+        index = VectorStoreIndex(
+            nodes=nodes,
+            storage_context=storage_context,
+            embed_model=embed_model,
+            show_progress=not progress_bar_disable,
+        )
+        msg = "LLM index rebuilt successfully."
+    else:
+        # Update existing index
+        index = load_or_build_index()
+        all_node_ids = list(index.docstore.docs.keys())
+        existing_nodes = {
+            node.metadata.get("document_id"): node
+            for node in index.docstore.get_nodes(all_node_ids)
+        }
+
+        for document in tqdm.tqdm(documents, disable=progress_bar_disable):
+            doc_id = str(document.id)
+            document_modified = document.modified.isoformat()
+
+            if doc_id in existing_nodes:
+                node = existing_nodes[doc_id]
+                node_modified = node.metadata.get("modified")
+
+                if node_modified == document_modified:
+                    continue
+
+                # Again, delete from docstore, FAISS IndexFlatL2 are append-only
+                index.docstore.delete_document(node.node_id)
+                nodes.extend(build_document_node(document))
+            else:
+                # New document, add it
+                nodes.extend(build_document_node(document))
+
+        if nodes:
+            msg = "LLM index updated successfully."
+            logger.info(
+                "Updating %d nodes in LLM index.",
+                len(nodes),
+            )
+            index.insert_nodes(nodes)
+        else:
+            msg = "No changes detected in LLM index."
+            logger.info(msg)
+
+    index.storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
+    return msg
+
+
+def llm_index_add_or_update_document(document: Document):
+    """
+    Adds or updates a document in the LLM index.
+    If the document already exists, it will be replaced.
+    """
+    new_nodes = build_document_node(document)
+
+    index = load_or_build_index(nodes=new_nodes)
+
+    remove_document_docstore_nodes(document, index)
+
+    index.insert_nodes(new_nodes)
+
+    index.storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
+
+
+def llm_index_remove_document(document: Document):
+    """
+    Removes a document from the LLM index.
+    """
+    index = load_or_build_index()
+
+    remove_document_docstore_nodes(document, index)
+
+    index.storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
+
+
+def truncate_content(content: str) -> str:
+    prompt_helper = PromptHelper(
+        context_window=8192,
+        num_output=512,
+        chunk_overlap_ratio=0.1,
+        chunk_size_limit=None,
+    )
+    splitter = TokenTextSplitter(separator=" ", chunk_size=512, chunk_overlap=50)
+    content_chunks = splitter.split_text(content)
+    truncated_chunks = prompt_helper.truncate(
+        prompt=PromptTemplate(template="{content}"),
+        text_chunks=content_chunks,
+        padding=5,
+    )
+    return " ".join(truncated_chunks)
+
+
+def query_similar_documents(
+    document: Document,
+    top_k: int = 5,
+    document_ids: list[int] | None = None,
+) -> list[Document]:
+    """
+    Runs a similarity query and returns top-k similar Document objects.
+    """
+    index = load_or_build_index()
+
+    # constrain only the node(s) that match the document IDs, if given
+    doc_node_ids = (
+        [
+            node.node_id
+            for node in index.docstore.docs.values()
+            if node.metadata.get("document_id") in document_ids
+        ]
+        if document_ids
+        else None
+    )
+
+    retriever = VectorIndexRetriever(
+        index=index,
+        similarity_top_k=top_k,
+        doc_ids=doc_node_ids,
+    )
+
+    query_text = truncate_content(
+        (document.title or "") + "\n" + (document.content or ""),
+    )
+    results = retriever.retrieve(query_text)
+
+    document_ids = [
+        int(node.metadata["document_id"])
+        for node in results
+        if "document_id" in node.metadata
+    ]
+
+    return list(Document.objects.filter(pk__in=document_ids))
--- a/src/paperless_ai/matching.py
+++ b/src/paperless_ai/matching.py
@@ -0,0 +1,102 @@
+import difflib
+import logging
+import re
+
+from django.contrib.auth.models import User
+
+from documents.models import Correspondent
+from documents.models import DocumentType
+from documents.models import StoragePath
+from documents.models import Tag
+from documents.permissions import get_objects_for_user_owner_aware
+
+MATCH_THRESHOLD = 0.8
+
+logger = logging.getLogger("paperless_ai.matching")
+
+
+def match_tags_by_name(names: list[str], user: User) -> list[Tag]:
+    queryset = get_objects_for_user_owner_aware(
+        user,
+        ["view_tag"],
+        Tag,
+    )
+    return _match_names_to_queryset(names, queryset, "name")
+
+
+def match_correspondents_by_name(names: list[str], user: User) -> list[Correspondent]:
+    queryset = get_objects_for_user_owner_aware(
+        user,
+        ["view_correspondent"],
+        Correspondent,
+    )
+    return _match_names_to_queryset(names, queryset, "name")
+
+
+def match_document_types_by_name(names: list[str], user: User) -> list[DocumentType]:
+    queryset = get_objects_for_user_owner_aware(
+        user,
+        ["view_documenttype"],
+        DocumentType,
+    )
+    return _match_names_to_queryset(names, queryset, "name")
+
+
+def match_storage_paths_by_name(names: list[str], user: User) -> list[StoragePath]:
+    queryset = get_objects_for_user_owner_aware(
+        user,
+        ["view_storagepath"],
+        StoragePath,
+    )
+    return _match_names_to_queryset(names, queryset, "name")
+
+
+def _normalize(s: str) -> str:
+    s = s.lower()
+    s = re.sub(r"[^\w\s]", "", s)  # remove punctuation
+    s = s.strip()
+    return s
+
+
+def _match_names_to_queryset(names: list[str], queryset, attr: str):
+    results = []
+    objects = list(queryset)
+    object_names = [_normalize(getattr(obj, attr)) for obj in objects]
+
+    for name in names:
+        if not name:
+            continue
+        target = _normalize(name)
+
+        # First try exact match
+        if target in object_names:
+            index = object_names.index(target)
+            matched = objects.pop(index)
+            object_names.pop(index)  # keep object list aligned after removal
+            results.append(matched)
+            continue
+
+        # Fuzzy match fallback
+        matches = difflib.get_close_matches(
+            target,
+            object_names,
+            n=1,
+            cutoff=MATCH_THRESHOLD,
+        )
+        if matches:
+            index = object_names.index(matches[0])
+            matched = objects.pop(index)
+            object_names.pop(index)
+            results.append(matched)
+        else:
+            pass
+    return results
+
+
+def extract_unmatched_names(
+    names: list[str],
+    matched_objects: list,
+    attr="name",
+) -> list[str]:
+    matched_names = {getattr(obj, attr).lower() for obj in matched_objects}
+    return [name for name in names if name.lower() not in matched_names]
--- a/src/paperless_ai/tests/init.py
+++ b/src/paperless_ai/tests/init.py
--- a/src/paperless_ai/tests/test_ai_classifier.py
+++ b/src/paperless_ai/tests/test_ai_classifier.py
@@ -0,0 +1,186 @@
+import json
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+import pytest
+from django.test import override_settings
+
+from documents.models import Document
+from paperless_ai.ai_classifier import build_prompt_with_rag
+from paperless_ai.ai_classifier import build_prompt_without_rag
+from paperless_ai.ai_classifier import get_ai_document_classification
+from paperless_ai.ai_classifier import get_context_for_document
+
+
+@pytest.fixture
+def mock_document():
+    doc = MagicMock(spec=Document)
+    doc.title = "Test Title"
+    doc.filename = "test_file.pdf"
+    doc.created = "2023-01-01"
+    doc.added = "2023-01-02"
+    doc.modified = "2023-01-03"
+
+    tag1 = MagicMock()
+    tag1.name = "Tag1"
+    tag2 = MagicMock()
+    tag2.name = "Tag2"
+    doc.tags.all = MagicMock(return_value=[tag1, tag2])
+
+    doc.document_type = MagicMock()
+    doc.document_type.name = "Invoice"
+    doc.correspondent = MagicMock()
+    doc.correspondent.name = "Test Correspondent"
+    doc.archive_serial_number = "12345"
+    doc.content = "This is the document content."
+
+    cf1 = MagicMock(__str__=lambda x: "Value1")
+    cf1.field = MagicMock()
+    cf1.field.name = "Field1"
+    cf1.value = "Value1"
+    cf2 = MagicMock(__str__=lambda x: "Value2")
+    cf2.field = MagicMock()
+    cf2.field.name = "Field2"
+    cf2.value = "Value2"
+    doc.custom_fields.all = MagicMock(return_value=[cf1, cf2])
+
+    return doc
+
+
+@pytest.fixture
+def mock_similar_documents():
+    doc1 = MagicMock()
+    doc1.content = "Content of document 1"
+    doc1.title = "Title 1"
+    doc1.filename = "file1.txt"
+
+    doc2 = MagicMock()
+    doc2.content = "Content of document 2"
+    doc2.title = None
+    doc2.filename = "file2.txt"
+
+    doc3 = MagicMock()
+    doc3.content = None
+    doc3.title = None
+    doc3.filename = None
+
+    return [doc1, doc2, doc3]
+
+
+@pytest.mark.django_db
+@patch("paperless_ai.client.AIClient.run_llm_query")
+@override_settings(
+    LLM_BACKEND="ollama",
+    LLM_MODEL="some_model",
+)
+def test_get_ai_document_classification_success(mock_run_llm_query, mock_document):
+    mock_run_llm_query.return_value = {
+        "title": "Test Title",
+        "tags": ["test", "document"],
+        "correspondents": ["John Doe"],
+        "document_types": ["report"],
+        "storage_paths": ["Reports"],
+        "dates": ["2023-01-01"],
+    }
+
+    result = get_ai_document_classification(mock_document)
+
+    assert result["title"] == "Test Title"
+    assert result["tags"] == ["test", "document"]
+    assert result["correspondents"] == ["John Doe"]
+    assert result["document_types"] == ["report"]
+    assert result["storage_paths"] == ["Reports"]
+    assert result["dates"] == ["2023-01-01"]
+
+
+@pytest.mark.django_db
+@patch("paperless_ai.client.AIClient.run_llm_query")
+def test_get_ai_document_classification_failure(mock_run_llm_query, mock_document):
+    mock_run_llm_query.side_effect = Exception("LLM query failed")
+
+    # assert raises an exception
+    with pytest.raises(Exception):
+        get_ai_document_classification(mock_document)
+
+
+@pytest.mark.django_db
+@patch("paperless_ai.client.AIClient.run_llm_query")
+@patch("paperless_ai.ai_classifier.build_prompt_with_rag")
+@override_settings(
+    LLM_EMBEDDING_BACKEND="huggingface",
+    LLM_EMBEDDING_MODEL="some_model",
+    LLM_BACKEND="ollama",
+    LLM_MODEL="some_model",
+)
+def test_use_rag_if_configured(
+    mock_build_prompt_with_rag,
+    mock_run_llm_query,
+    mock_document,
+):
+    mock_build_prompt_with_rag.return_value = "Prompt with RAG"
+    mock_run_llm_query.return_value.text = json.dumps({})
+    get_ai_document_classification(mock_document)
+    mock_build_prompt_with_rag.assert_called_once()
+
+
+@pytest.mark.django_db
+@patch("paperless_ai.client.AIClient.run_llm_query")
+@patch("paperless_ai.ai_classifier.build_prompt_without_rag")
+@patch("paperless.config.AIConfig")
+@override_settings(
+    LLM_BACKEND="ollama",
+    LLM_MODEL="some_model",
+)
+def test_use_without_rag_if_not_configured(
+    mock_ai_config,
+    mock_build_prompt_without_rag,
+    mock_run_llm_query,
+    mock_document,
+):
+    mock_ai_config.llm_embedding_backend = None
+    mock_build_prompt_without_rag.return_value = "Prompt without RAG"
+    mock_run_llm_query.return_value.text = json.dumps({})
+    get_ai_document_classification(mock_document)
+    mock_build_prompt_without_rag.assert_called_once()
+
+
+@pytest.mark.django_db
+@override_settings(
+    LLM_EMBEDDING_BACKEND="huggingface",
+    LLM_BACKEND="ollama",
+    LLM_MODEL="some_model",
+)
+def test_prompt_with_without_rag(mock_document):
+    with patch(
+        "paperless_ai.ai_classifier.get_context_for_document",
+        return_value="Context from similar documents",
+    ):
+        prompt = build_prompt_without_rag(mock_document)
+        assert "Additional context from similar documents:" not in prompt
+
+        prompt = build_prompt_with_rag(mock_document)
+        assert "Additional context from similar documents:" in prompt
+
+
+@patch("paperless_ai.ai_classifier.query_similar_documents")
+def test_get_context_for_document(
+    mock_query_similar_documents,
+    mock_document,
+    mock_similar_documents,
+):
+    mock_query_similar_documents.return_value = mock_similar_documents
+
+    result = get_context_for_document(mock_document, max_docs=2)
+
+    expected_result = (
+        "TITLE: Title 1\nContent of document 1\n\n"
+        "TITLE: file2.txt\nContent of document 2"
+    )
+    assert result == expected_result
+    mock_query_similar_documents.assert_called_once()
+
+
+def test_get_context_for_document_no_similar_docs(mock_document):
+    with patch("paperless_ai.ai_classifier.query_similar_documents", return_value=[]):
+        result = get_context_for_document(mock_document)
+        assert result == ""
--- a/src/paperless_ai/tests/test_ai_indexing.py
+++ b/src/paperless_ai/tests/test_ai_indexing.py
@@ -0,0 +1,334 @@
+import json
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+import pytest
+from django.test import override_settings
+from django.utils import timezone
+from llama_index.core.base.embeddings.base import BaseEmbedding
+
+from documents.models import Document
+from paperless_ai import indexing
+
+
+@pytest.fixture
+def temp_llm_index_dir(tmp_path):
+    original_dir = indexing.settings.LLM_INDEX_DIR
+    indexing.settings.LLM_INDEX_DIR = tmp_path
+    yield tmp_path
+    indexing.settings.LLM_INDEX_DIR = original_dir
+
+
+@pytest.fixture
+def real_document(db):
+    return Document.objects.create(
+        title="Test Document",
+        content="This is some test content.",
+        added=timezone.now(),
+    )
+
+
+@pytest.fixture
+def mock_embed_model():
+    fake = FakeEmbedding()
+    with (
+        patch("paperless_ai.indexing.get_embedding_model") as mock_index,
+        patch(
+            "paperless_ai.embedding.get_embedding_model",
+        ) as mock_embedding,
+    ):
+        mock_index.return_value = fake
+        mock_embedding.return_value = fake
+        yield mock_index
+
+
+class FakeEmbedding(BaseEmbedding):
+    # TODO: maybe a better way to do this?
+    def _aget_query_embedding(self, query: str) -> list[float]:
+        return [0.1] * self.get_query_embedding_dim()
+
+    def _get_query_embedding(self, query: str) -> list[float]:
+        return [0.1] * self.get_query_embedding_dim()
+
+    def _get_text_embedding(self, text: str) -> list[float]:
+        return [0.1] * self.get_query_embedding_dim()
+
+    def get_query_embedding_dim(self) -> int:
+        return 384  # Match your real FAISS config
+
+
+@pytest.mark.django_db
+def test_build_document_node(real_document):
+    nodes = indexing.build_document_node(real_document)
+    assert len(nodes) > 0
+    assert nodes[0].metadata["document_id"] == str(real_document.id)
+
+
+@pytest.mark.django_db
+def test_update_llm_index(
+    temp_llm_index_dir,
+    real_document,
+    mock_embed_model,
+):
+    with patch("documents.models.Document.objects.all") as mock_all:
+        mock_queryset = MagicMock()
+        mock_queryset.exists.return_value = True
+        mock_queryset.__iter__.return_value = iter([real_document])
+        mock_all.return_value = mock_queryset
+        indexing.update_llm_index(rebuild=True)
+
+        assert any(temp_llm_index_dir.glob("*.json"))
+
+
+@pytest.mark.django_db
+def test_update_llm_index_removes_meta(
+    temp_llm_index_dir,
+    real_document,
+    mock_embed_model,
+):
+    # Pre-create a meta.json with incorrect data
+    (temp_llm_index_dir / "meta.json").write_text(
+        json.dumps({"embedding_model": "old", "dim": 1}),
+    )
+
+    with patch("documents.models.Document.objects.all") as mock_all:
+        mock_queryset = MagicMock()
+        mock_queryset.exists.return_value = True
+        mock_queryset.__iter__.return_value = iter([real_document])
+        mock_all.return_value = mock_queryset
+        indexing.update_llm_index(rebuild=True)
+
+    meta = json.loads((temp_llm_index_dir / "meta.json").read_text())
+    from paperless.config import AIConfig
+
+    config = AIConfig()
+    expected_model = config.llm_embedding_model or (
+        "text-embedding-3-small"
+        if config.llm_embedding_backend == "openai"
+        else "sentence-transformers/all-MiniLM-L6-v2"
+    )
+    assert meta == {"embedding_model": expected_model, "dim": 384}
+
+
+@pytest.mark.django_db
+def test_update_llm_index_partial_update(
+    temp_llm_index_dir,
+    real_document,
+    mock_embed_model,
+):
+    doc2 = Document.objects.create(
+        title="Test Document 2",
+        content="This is some test content 2.",
+        added=timezone.now(),
+        checksum="1234567890abcdef",
+    )
+    # Initial index
+    with patch("documents.models.Document.objects.all") as mock_all:
+        mock_queryset = MagicMock()
+        mock_queryset.exists.return_value = True
+        mock_queryset.__iter__.return_value = iter([real_document, doc2])
+        mock_all.return_value = mock_queryset
+
+        indexing.update_llm_index(rebuild=True)
+
+    # modify document
+    updated_document = real_document
+    updated_document.modified = timezone.now()  # simulate modification
+
+    # new doc
+    doc3 = Document.objects.create(
+        title="Test Document 3",
+        content="This is some test content 3.",
+        added=timezone.now(),
+        checksum="abcdef1234567890",
+    )
+
+    with patch("documents.models.Document.objects.all") as mock_all:
+        mock_queryset = MagicMock()
+        mock_queryset.exists.return_value = True
+        mock_queryset.__iter__.return_value = iter([updated_document, doc2, doc3])
+        mock_all.return_value = mock_queryset
+
+        # assert logs "Updating LLM index with %d new nodes and removing %d old nodes."
+        with patch("paperless_ai.indexing.logger") as mock_logger:
+            indexing.update_llm_index(rebuild=False)
+            mock_logger.info.assert_called_once_with(
+                "Updating %d nodes in LLM index.",
+                2,
+            )
+        indexing.update_llm_index(rebuild=False)
+
+    assert any(temp_llm_index_dir.glob("*.json"))
+
+
+def test_get_or_create_storage_context_raises_exception(
+    temp_llm_index_dir,
+    mock_embed_model,
+):
+    with pytest.raises(Exception):
+        indexing.get_or_create_storage_context(rebuild=False)
+
+
+@override_settings(
+    LLM_EMBEDDING_BACKEND="huggingface",
+)
+def test_load_or_build_index_builds_when_nodes_given(
+    temp_llm_index_dir,
+    real_document,
+    mock_embed_model,
+):
+    with (
+        patch(
+            "paperless_ai.indexing.load_index_from_storage",
+            side_effect=ValueError("Index not found"),
+        ),
+        patch(
+            "paperless_ai.indexing.VectorStoreIndex",
+            return_value=MagicMock(),
+        ) as mock_index_cls,
+        patch(
+            "paperless_ai.indexing.get_or_create_storage_context",
+            return_value=MagicMock(),
+        ) as mock_storage,
+    ):
+        mock_storage.return_value.persist_dir = temp_llm_index_dir
+        indexing.load_or_build_index(
+            nodes=[indexing.build_document_node(real_document)],
+        )
+        mock_index_cls.assert_called_once()
+
+
+def test_load_or_build_index_raises_exception_when_no_nodes(
+    temp_llm_index_dir,
+    mock_embed_model,
+):
+    with (
+        patch(
+            "paperless_ai.indexing.load_index_from_storage",
+            side_effect=ValueError("Index not found"),
+        ),
+        patch(
+            "paperless_ai.indexing.get_or_create_storage_context",
+            return_value=MagicMock(),
+        ),
+    ):
+        with pytest.raises(Exception):
+            indexing.load_or_build_index()
+
+
+@pytest.mark.django_db
+def test_load_or_build_index_succeeds_when_nodes_given(
+    temp_llm_index_dir,
+    mock_embed_model,
+):
+    with (
+        patch(
+            "paperless_ai.indexing.load_index_from_storage",
+            side_effect=ValueError("Index not found"),
+        ),
+        patch(
+            "paperless_ai.indexing.VectorStoreIndex",
+            return_value=MagicMock(),
+        ) as mock_index_cls,
+        patch(
+            "paperless_ai.indexing.get_or_create_storage_context",
+            return_value=MagicMock(),
+        ) as mock_storage,
+    ):
+        mock_storage.return_value.persist_dir = temp_llm_index_dir
+        indexing.load_or_build_index(
+            nodes=[MagicMock()],
+        )
+        mock_index_cls.assert_called_once()
+
+
+@pytest.mark.django_db
+def test_add_or_update_document_updates_existing_entry(
+    temp_llm_index_dir,
+    real_document,
+    mock_embed_model,
+):
+    indexing.update_llm_index(rebuild=True)
+    indexing.llm_index_add_or_update_document(real_document)
+
+    assert any(temp_llm_index_dir.glob("*.json"))
+
+
+@pytest.mark.django_db
+def test_remove_document_deletes_node_from_docstore(
+    temp_llm_index_dir,
+    real_document,
+    mock_embed_model,
+):
+    indexing.update_llm_index(rebuild=True)
+    index = indexing.load_or_build_index()
+    assert len(index.docstore.docs) == 1
+
+    indexing.llm_index_remove_document(real_document)
+    index = indexing.load_or_build_index()
+    assert len(index.docstore.docs) == 0
+
+
+@pytest.mark.django_db
+def test_update_llm_index_no_documents(
+    temp_llm_index_dir,
+    mock_embed_model,
+):
+    with patch("documents.models.Document.objects.all") as mock_all:
+        mock_queryset = MagicMock()
+        mock_queryset.exists.return_value = False
+        mock_queryset.__iter__.return_value = iter([])
+        mock_all.return_value = mock_queryset
+
+        # check log message
+        with patch("paperless_ai.indexing.logger") as mock_logger:
+            indexing.update_llm_index(rebuild=True)
+            mock_logger.warning.assert_called_once_with(
+                "No documents found to index.",
+            )
+
+
+@override_settings(
+    LLM_EMBEDDING_BACKEND="huggingface",
+    LLM_BACKEND="ollama",
+)
+def test_query_similar_documents(
+    temp_llm_index_dir,
+    real_document,
+):
+    with (
+        patch("paperless_ai.indexing.get_or_create_storage_context") as mock_storage,
+        patch("paperless_ai.indexing.load_or_build_index") as mock_load_or_build_index,
+        patch("paperless_ai.indexing.VectorIndexRetriever") as mock_retriever_cls,
+        patch("paperless_ai.indexing.Document.objects.filter") as mock_filter,
+    ):
+        mock_storage.return_value = MagicMock()
+        mock_storage.return_value.persist_dir = temp_llm_index_dir
+
+        mock_index = MagicMock()
+        mock_load_or_build_index.return_value = mock_index
+
+        mock_retriever = MagicMock()
+        mock_retriever_cls.return_value = mock_retriever
+
+        mock_node1 = MagicMock()
+        mock_node1.metadata = {"document_id": 1}
+
+        mock_node2 = MagicMock()
+        mock_node2.metadata = {"document_id": 2}
+
+        mock_retriever.retrieve.return_value = [mock_node1, mock_node2]
+
+        mock_filtered_docs = [MagicMock(pk=1), MagicMock(pk=2)]
+        mock_filter.return_value = mock_filtered_docs
+
+        result = indexing.query_similar_documents(real_document, top_k=3)
+
+        mock_load_or_build_index.assert_called_once()
+        mock_retriever_cls.assert_called_once()
+        mock_retriever.retrieve.assert_called_once_with(
+            "Test Document\nThis is some test content.",
+        )
+        mock_filter.assert_called_once_with(pk__in=[1, 2])
+
+        assert result == mock_filtered_docs
--- a/src/paperless_ai/tests/test_chat.py
+++ b/src/paperless_ai/tests/test_chat.py
@@ -0,0 +1,142 @@
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+import pytest
+from llama_index.core import VectorStoreIndex
+from llama_index.core.schema import TextNode
+
+from paperless_ai.chat import stream_chat_with_documents
+
+
+@pytest.fixture(autouse=True)
+def patch_embed_model():
+    from llama_index.core import settings as llama_settings
+
+    mock_embed_model = MagicMock()
+    mock_embed_model._get_text_embedding_batch.return_value = [
+        [0.1] * 1536,
+    ]  # 1 vector per input
+    llama_settings.Settings._embed_model = mock_embed_model
+    yield
+    llama_settings.Settings._embed_model = None
+
+
+@pytest.fixture(autouse=True)
+def patch_embed_nodes():
+    with patch(
+        "llama_index.core.indices.vector_store.base.embed_nodes",
+    ) as mock_embed_nodes:
+        mock_embed_nodes.side_effect = lambda nodes, *_args, **_kwargs: {
+            node.node_id: [0.1] * 1536 for node in nodes
+        }
+        yield
+
+
+@pytest.fixture
+def mock_document():
+    doc = MagicMock()
+    doc.pk = 1
+    doc.title = "Test Document"
+    doc.filename = "test_file.pdf"
+    doc.content = "This is the document content."
+    return doc
+
+
+def test_stream_chat_with_one_document_full_content(mock_document):
+    with (
+        patch("paperless_ai.chat.AIClient") as mock_client_cls,
+        patch("paperless_ai.chat.load_or_build_index") as mock_load_index,
+        patch(
+            "paperless_ai.chat.RetrieverQueryEngine.from_args",
+        ) as mock_query_engine_cls,
+    ):
+        mock_client = MagicMock()
+        mock_client_cls.return_value = mock_client
+        mock_client.llm = MagicMock()
+
+        mock_node = TextNode(
+            text="This is node content.",
+            metadata={"document_id": str(mock_document.pk), "title": "Test Document"},
+        )
+        mock_index = MagicMock()
+        mock_index.docstore.docs.values.return_value = [mock_node]
+        mock_load_index.return_value = mock_index
+
+        mock_response_stream = MagicMock()
+        mock_response_stream.response_gen = iter(["chunk1", "chunk2"])
+        mock_query_engine = MagicMock()
+        mock_query_engine_cls.return_value = mock_query_engine
+        mock_query_engine.query.return_value = mock_response_stream
+
+        output = list(stream_chat_with_documents("What is this?", [mock_document]))
+
+        assert output == ["chunk1", "chunk2"]
+
+
+def test_stream_chat_with_multiple_documents_retrieval(patch_embed_nodes):
+    with (
+        patch("paperless_ai.chat.AIClient") as mock_client_cls,
+        patch("paperless_ai.chat.load_or_build_index") as mock_load_index,
+        patch(
+            "paperless_ai.chat.RetrieverQueryEngine.from_args",
+        ) as mock_query_engine_cls,
+        patch.object(VectorStoreIndex, "as_retriever") as mock_as_retriever,
+    ):
+        # Mock AIClient and LLM
+        mock_client = MagicMock()
+        mock_client_cls.return_value = mock_client
+        mock_client.llm = MagicMock()
+
+        # Create two real TextNodes
+        mock_node1 = TextNode(
+            text="Content for doc 1.",
+            metadata={"document_id": "1", "title": "Document 1"},
+        )
+        mock_node2 = TextNode(
+            text="Content for doc 2.",
+            metadata={"document_id": "2", "title": "Document 2"},
+        )
+        mock_index = MagicMock()
+        mock_index.docstore.docs.values.return_value = [mock_node1, mock_node2]
+        mock_load_index.return_value = mock_index
+
+        # Patch as_retriever to return a retriever whose retrieve() returns mock_node1 and mock_node2
+        mock_retriever = MagicMock()
+        mock_retriever.retrieve.return_value = [mock_node1, mock_node2]
+        mock_as_retriever.return_value = mock_retriever
+
+        # Mock response stream
+        mock_response_stream = MagicMock()
+        mock_response_stream.response_gen = iter(["chunk1", "chunk2"])
+
+        # Mock RetrieverQueryEngine
+        mock_query_engine = MagicMock()
+        mock_query_engine_cls.return_value = mock_query_engine
+        mock_query_engine.query.return_value = mock_response_stream
+
+        # Fake documents
+        doc1 = MagicMock(pk=1)
+        doc2 = MagicMock(pk=2)
+
+        output = list(stream_chat_with_documents("What's up?", [doc1, doc2]))
+
+        assert output == ["chunk1", "chunk2"]
+
+
+def test_stream_chat_no_matching_nodes():
+    with (
+        patch("paperless_ai.chat.AIClient") as mock_client_cls,
+        patch("paperless_ai.chat.load_or_build_index") as mock_load_index,
+    ):
+        mock_client = MagicMock()
+        mock_client_cls.return_value = mock_client
+        mock_client.llm = MagicMock()
+
+        mock_index = MagicMock()
+        # No matching nodes
+        mock_index.docstore.docs.values.return_value = []
+        mock_load_index.return_value = mock_index
+
+        output = list(stream_chat_with_documents("Any info?", [MagicMock(pk=1)]))
+
+        assert output == ["Sorry, I couldn't find any content to answer your question."]
--- a/src/paperless_ai/tests/test_client.py
+++ b/src/paperless_ai/tests/test_client.py
@@ -0,0 +1,111 @@
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+import pytest
+from llama_index.core.llms import ChatMessage
+from llama_index.core.llms.llm import ToolSelection
+
+from paperless_ai.client import AIClient
+
+
+@pytest.fixture
+def mock_ai_config():
+    with patch("paperless_ai.client.AIConfig") as MockAIConfig:
+        mock_config = MagicMock()
+        MockAIConfig.return_value = mock_config
+        yield mock_config
+
+
+@pytest.fixture
+def mock_ollama_llm():
+    with patch("paperless_ai.client.Ollama") as MockOllama:
+        yield MockOllama
+
+
+@pytest.fixture
+def mock_openai_llm():
+    with patch("paperless_ai.client.OpenAI") as MockOpenAI:
+        yield MockOpenAI
+
+
+def test_get_llm_ollama(mock_ai_config, mock_ollama_llm):
+    mock_ai_config.llm_backend = "ollama"
+    mock_ai_config.llm_model = "test_model"
+    mock_ai_config.llm_endpoint = "http://test-url"
+
+    client = AIClient()
+
+    mock_ollama_llm.assert_called_once_with(
+        model="test_model",
+        base_url="http://test-url",
+        request_timeout=120,
+    )
+    assert client.llm == mock_ollama_llm.return_value
+
+
+def test_get_llm_openai(mock_ai_config, mock_openai_llm):
+    mock_ai_config.llm_backend = "openai"
+    mock_ai_config.llm_model = "test_model"
+    mock_ai_config.llm_api_key = "test_api_key"
+    mock_ai_config.llm_endpoint = "http://test-url"
+
+    client = AIClient()
+
+    mock_openai_llm.assert_called_once_with(
+        model="test_model",
+        api_base="http://test-url",
+        api_key="test_api_key",
+    )
+    assert client.llm == mock_openai_llm.return_value
+
+
+def test_get_llm_unsupported_backend(mock_ai_config):
+    mock_ai_config.llm_backend = "unsupported"
+
+    with pytest.raises(ValueError, match="Unsupported LLM backend: unsupported"):
+        AIClient()
+
+
+def test_run_llm_query(mock_ai_config, mock_ollama_llm):
+    mock_ai_config.llm_backend = "ollama"
+    mock_ai_config.llm_model = "test_model"
+    mock_ai_config.llm_endpoint = "http://test-url"
+
+    mock_llm_instance = mock_ollama_llm.return_value
+
+    tool_selection = ToolSelection(
+        tool_id="call_test",
+        tool_name="DocumentClassifierSchema",
+        tool_kwargs={
+            "title": "Test Title",
+            "tags": ["test", "document"],
+            "correspondents": ["John Doe"],
+            "document_types": ["report"],
+            "storage_paths": ["Reports"],
+            "dates": ["2023-01-01"],
+        },
+    )
+
+    mock_llm_instance.chat_with_tools.return_value = MagicMock()
+    mock_llm_instance.get_tool_calls_from_response.return_value = [tool_selection]
+
+    client = AIClient()
+    result = client.run_llm_query("test_prompt")
+
+    assert result["title"] == "Test Title"
+
+
+def test_run_chat(mock_ai_config, mock_ollama_llm):
+    mock_ai_config.llm_backend = "ollama"
+    mock_ai_config.llm_model = "test_model"
+    mock_ai_config.llm_endpoint = "http://test-url"
+
+    mock_llm_instance = mock_ollama_llm.return_value
+    mock_llm_instance.chat.return_value = "test_chat_result"
+
+    client = AIClient()
+    messages = [ChatMessage(role="user", content="Hello")]
+    result = client.run_chat(messages)
+
+    mock_llm_instance.chat.assert_called_once_with(messages)
+    assert result == "test_chat_result"
--- a/src/paperless_ai/tests/test_embedding.py
+++ b/src/paperless_ai/tests/test_embedding.py
@@ -0,0 +1,169 @@
+import json
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+import pytest
+from django.conf import settings
+
+from documents.models import Document
+from paperless.models import LLMEmbeddingBackend
+from paperless_ai.embedding import build_llm_index_text
+from paperless_ai.embedding import get_embedding_dim
+from paperless_ai.embedding import get_embedding_model
+
+
+@pytest.fixture
+def mock_ai_config():
+    with patch("paperless_ai.embedding.AIConfig") as MockAIConfig:
+        yield MockAIConfig
+
+
+@pytest.fixture
+def temp_llm_index_dir(tmp_path):
+    original_dir = settings.LLM_INDEX_DIR
+    settings.LLM_INDEX_DIR = tmp_path
+    yield tmp_path
+    settings.LLM_INDEX_DIR = original_dir
+
+
+@pytest.fixture
+def mock_document():
+    doc = MagicMock(spec=Document)
+    doc.title = "Test Title"
+    doc.filename = "test_file.pdf"
+    doc.created = "2023-01-01"
+    doc.added = "2023-01-02"
+    doc.modified = "2023-01-03"
+
+    tag1 = MagicMock()
+    tag1.name = "Tag1"
+    tag2 = MagicMock()
+    tag2.name = "Tag2"
+    doc.tags.all = MagicMock(return_value=[tag1, tag2])
+
+    doc.document_type = MagicMock()
+    doc.document_type.name = "Invoice"
+    doc.correspondent = MagicMock()
+    doc.correspondent.name = "Test Correspondent"
+    doc.archive_serial_number = "12345"
+    doc.content = "This is the document content."
+
+    cf1 = MagicMock(__str__=lambda x: "Value1")
+    cf1.field = MagicMock()
+    cf1.field.name = "Field1"
+    cf1.value = "Value1"
+    cf2 = MagicMock(__str__=lambda x: "Value2")
+    cf2.field = MagicMock()
+    cf2.field.name = "Field2"
+    cf2.value = "Value2"
+    doc.custom_fields.all = MagicMock(return_value=[cf1, cf2])
+
+    return doc
+
+
+def test_get_embedding_model_openai(mock_ai_config):
+    mock_ai_config.return_value.llm_embedding_backend = LLMEmbeddingBackend.OPENAI
+    mock_ai_config.return_value.llm_embedding_model = "text-embedding-3-small"
+    mock_ai_config.return_value.llm_api_key = "test_api_key"
+
+    with patch("paperless_ai.embedding.OpenAIEmbedding") as MockOpenAIEmbedding:
+        model = get_embedding_model()
+        MockOpenAIEmbedding.assert_called_once_with(
+            model="text-embedding-3-small",
+            api_key="test_api_key",
+        )
+        assert model == MockOpenAIEmbedding.return_value
+
+
+def test_get_embedding_model_huggingface(mock_ai_config):
+    mock_ai_config.return_value.llm_embedding_backend = LLMEmbeddingBackend.HUGGINGFACE
+    mock_ai_config.return_value.llm_embedding_model = (
+        "sentence-transformers/all-MiniLM-L6-v2"
+    )
+
+    with patch(
+        "paperless_ai.embedding.HuggingFaceEmbedding",
+    ) as MockHuggingFaceEmbedding:
+        model = get_embedding_model()
+        MockHuggingFaceEmbedding.assert_called_once_with(
+            model_name="sentence-transformers/all-MiniLM-L6-v2",
+        )
+        assert model == MockHuggingFaceEmbedding.return_value
+
+
+def test_get_embedding_model_invalid_backend(mock_ai_config):
+    mock_ai_config.return_value.llm_embedding_backend = "INVALID_BACKEND"
+
+    with pytest.raises(
+        ValueError,
+        match="Unsupported embedding backend: INVALID_BACKEND",
+    ):
+        get_embedding_model()
+
+
+def test_get_embedding_dim_infers_and_saves(temp_llm_index_dir, mock_ai_config):
+    mock_ai_config.return_value.llm_embedding_backend = "openai"
+    mock_ai_config.return_value.llm_embedding_model = None
+
+    class DummyEmbedding:
+        def get_text_embedding(self, text):
+            return [0.0] * 7
+
+    with patch(
+        "paperless_ai.embedding.get_embedding_model",
+        return_value=DummyEmbedding(),
+    ) as mock_get:
+        dim = get_embedding_dim()
+        mock_get.assert_called_once()
+
+    assert dim == 7
+    meta = json.loads((temp_llm_index_dir / "meta.json").read_text())
+    assert meta == {"embedding_model": "text-embedding-3-small", "dim": 7}
+
+
+def test_get_embedding_dim_reads_existing_meta(temp_llm_index_dir, mock_ai_config):
+    mock_ai_config.return_value.llm_embedding_backend = "openai"
+    mock_ai_config.return_value.llm_embedding_model = None
+
+    (temp_llm_index_dir / "meta.json").write_text(
+        json.dumps({"embedding_model": "text-embedding-3-small", "dim": 11}),
+    )
+
+    with patch("paperless_ai.embedding.get_embedding_model") as mock_get:
+        assert get_embedding_dim() == 11
+        mock_get.assert_not_called()
+
+
+def test_get_embedding_dim_raises_on_model_change(temp_llm_index_dir, mock_ai_config):
+    mock_ai_config.return_value.llm_embedding_backend = "openai"
+    mock_ai_config.return_value.llm_embedding_model = None
+
+    (temp_llm_index_dir / "meta.json").write_text(
+        json.dumps({"embedding_model": "old", "dim": 11}),
+    )
+
+    with pytest.raises(
+        RuntimeError,
+        match="Embedding model changed from old to text-embedding-3-small",
+    ):
+        get_embedding_dim()
+
+
+def test_build_llm_index_text(mock_document):
+    with patch("documents.models.Note.objects.filter") as mock_notes_filter:
+        mock_notes_filter.return_value = [
+            MagicMock(note="Note1"),
+            MagicMock(note="Note2"),
+        ]
+
+        result = build_llm_index_text(mock_document)
+
+        assert "Title: Test Title" in result
+        assert "Filename: test_file.pdf" in result
+        assert "Created: 2023-01-01" in result
+        assert "Tags: Tag1, Tag2" in result
+        assert "Document Type: Invoice" in result
+        assert "Correspondent: Test Correspondent" in result
+        assert "Notes: Note1,Note2" in result
+        assert "Content:\n\nThis is the document content." in result
+        assert "Custom Field - Field1: Value1\nCustom Field - Field2: Value2" in result
--- a/src/paperless_ai/tests/test_matching.py
+++ b/src/paperless_ai/tests/test_matching.py
@@ -0,0 +1,86 @@
+from unittest.mock import patch
+
+from django.test import TestCase
+
+from documents.models import Correspondent
+from documents.models import DocumentType
+from documents.models import StoragePath
+from documents.models import Tag
+from paperless_ai.matching import extract_unmatched_names
+from paperless_ai.matching import match_correspondents_by_name
+from paperless_ai.matching import match_document_types_by_name
+from paperless_ai.matching import match_storage_paths_by_name
+from paperless_ai.matching import match_tags_by_name
+
+
+class TestAIMatching(TestCase):
+    def setUp(self):
+        # Create test data for Tag
+        self.tag1 = Tag.objects.create(name="Test Tag 1")
+        self.tag2 = Tag.objects.create(name="Test Tag 2")
+
+        # Create test data for Correspondent
+        self.correspondent1 = Correspondent.objects.create(name="Test Correspondent 1")
+        self.correspondent2 = Correspondent.objects.create(name="Test Correspondent 2")
+
+        # Create test data for DocumentType
+        self.document_type1 = DocumentType.objects.create(name="Test Document Type 1")
+        self.document_type2 = DocumentType.objects.create(name="Test Document Type 2")
+
+        # Create test data for StoragePath
+        self.storage_path1 = StoragePath.objects.create(name="Test Storage Path 1")
+        self.storage_path2 = StoragePath.objects.create(name="Test Storage Path 2")
+
+    @patch("paperless_ai.matching.get_objects_for_user_owner_aware")
+    def test_match_tags_by_name(self, mock_get_objects):
+        mock_get_objects.return_value = Tag.objects.all()
+        names = ["Test Tag 1", "Nonexistent Tag"]
+        result = match_tags_by_name(names, user=None)
+        self.assertEqual(len(result), 1)
+        self.assertEqual(result[0].name, "Test Tag 1")
+
+    @patch("paperless_ai.matching.get_objects_for_user_owner_aware")
+    def test_match_correspondents_by_name(self, mock_get_objects):
+        mock_get_objects.return_value = Correspondent.objects.all()
+        names = ["Test Correspondent 1", "Nonexistent Correspondent"]
+        result = match_correspondents_by_name(names, user=None)
+        self.assertEqual(len(result), 1)
+        self.assertEqual(result[0].name, "Test Correspondent 1")
+
+    @patch("paperless_ai.matching.get_objects_for_user_owner_aware")
+    def test_match_document_types_by_name(self, mock_get_objects):
+        mock_get_objects.return_value = DocumentType.objects.all()
+        names = ["Test Document Type 1", "Nonexistent Document Type"]
+        result = match_document_types_by_name(names, user=None)
+        self.assertEqual(len(result), 1)
+        self.assertEqual(result[0].name, "Test Document Type 1")
+
+    @patch("paperless_ai.matching.get_objects_for_user_owner_aware")
+    def test_match_storage_paths_by_name(self, mock_get_objects):
+        mock_get_objects.return_value = StoragePath.objects.all()
+        names = ["Test Storage Path 1", "Nonexistent Storage Path"]
+        result = match_storage_paths_by_name(names, user=None)
+        self.assertEqual(len(result), 1)
+        self.assertEqual(result[0].name, "Test Storage Path 1")
+
+    def test_extract_unmatched_names(self):
+        llm_names = ["Test Tag 1", "Nonexistent Tag"]
+        matched_objects = [self.tag1]
+        unmatched_names = extract_unmatched_names(llm_names, matched_objects)
+        self.assertEqual(unmatched_names, ["Nonexistent Tag"])
+
+    @patch("paperless_ai.matching.get_objects_for_user_owner_aware")
+    def test_match_tags_by_name_with_empty_names(self, mock_get_objects):
+        mock_get_objects.return_value = Tag.objects.all()
+        names = [None, "", "   "]
+        result = match_tags_by_name(names, user=None)
+        self.assertEqual(result, [])
+
+    @patch("paperless_ai.matching.get_objects_for_user_owner_aware")
+    def test_match_tags_with_fuzzy_matching(self, mock_get_objects):
+        mock_get_objects.return_value = Tag.objects.all()
+        names = ["Test Taag 1", "Teest Tag 2"]
+        result = match_tags_by_name(names, user=None)
+        self.assertEqual(len(result), 2)
+        self.assertEqual(result[0].name, "Test Tag 1")
+        self.assertEqual(result[1].name, "Test Tag 2")