Compare commits

..

1 Commits

Author SHA1 Message Date
dependabot[bot]
7a1bdf3850 docker(deps): Bump astral-sh/uv
Bumps [astral-sh/uv](https://github.com/astral-sh/uv) from 0.10.5-python3.12-trixie-slim to 0.10.7-python3.12-trixie-slim.
- [Release notes](https://github.com/astral-sh/uv/releases)
- [Changelog](https://github.com/astral-sh/uv/blob/main/CHANGELOG.md)
- [Commits](https://github.com/astral-sh/uv/compare/0.10.5...0.10.7)

---
updated-dependencies:
- dependency-name: astral-sh/uv
  dependency-version: 0.10.7-python3.12-trixie-slim
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-02-27 20:18:30 +00:00
6 changed files with 33 additions and 58 deletions

View File

@@ -30,7 +30,7 @@ RUN set -eux \
# Purpose: Installs s6-overlay and rootfs
# Comments:
# - Don't leave anything extra in here either
FROM ghcr.io/astral-sh/uv:0.10.5-python3.12-trixie-slim AS s6-overlay-base
FROM ghcr.io/astral-sh/uv:0.10.7-python3.12-trixie-slim AS s6-overlay-base
WORKDIR /usr/src/s6

View File

@@ -1,25 +1,22 @@
from django.core.management import BaseCommand
from django.db import transaction
from documents.management.commands.base import PaperlessCommand
from documents.management.commands.mixins import ProgressBarMixin
from documents.tasks import index_optimize
from documents.tasks import index_reindex
class Command(PaperlessCommand):
class Command(ProgressBarMixin, BaseCommand):
help = "Manages the document index."
def add_arguments(self, parser):
super().add_arguments(parser)
parser.add_argument("command", choices=["reindex", "optimize"])
self.add_argument_progress_bar_mixin(parser)
def handle(self, *args, **options):
self.handle_progress_bar_mixin(**options)
with transaction.atomic():
if options["command"] == "reindex":
index_reindex(
iter_wrapper=lambda docs: self.track(
docs,
description="Indexing documents...",
),
)
index_reindex(progress_bar_disable=self.no_progress_bar)
elif options["command"] == "optimize":
index_optimize()

View File

@@ -1,22 +1,22 @@
from typing import Any
from django.core.management import BaseCommand
from django.db import transaction
from documents.management.commands.base import PaperlessCommand
from documents.management.commands.mixins import ProgressBarMixin
from documents.tasks import llmindex_index
class Command(PaperlessCommand):
class Command(ProgressBarMixin, BaseCommand):
help = "Manages the LLM-based vector index for Paperless."
def add_arguments(self, parser: Any) -> None:
super().add_arguments(parser)
def add_arguments(self, parser):
parser.add_argument("command", choices=["rebuild", "update"])
self.add_argument_progress_bar_mixin(parser)
def handle(self, *args: Any, **options: Any) -> None:
llmindex_index(
rebuild=options["command"] == "rebuild",
scheduled=False,
iter_wrapper=lambda docs: self.track(
docs,
description="Indexing documents...",
),
)
def handle(self, *args, **options):
self.handle_progress_bar_mixin(**options)
with transaction.atomic():
llmindex_index(
progress_bar_disable=self.no_progress_bar,
rebuild=options["command"] == "rebuild",
scheduled=False,
)

View File

@@ -4,13 +4,11 @@ import logging
import shutil
import uuid
import zipfile
from collections.abc import Callable
from collections.abc import Iterable
from pathlib import Path
from tempfile import TemporaryDirectory
from tempfile import mkstemp
from typing import TypeVar
import tqdm
from celery import Task
from celery import shared_task
from celery import states
@@ -68,19 +66,11 @@ from paperless_ai.indexing import llm_index_add_or_update_document
from paperless_ai.indexing import llm_index_remove_document
from paperless_ai.indexing import update_llm_index
_T = TypeVar("_T")
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
if settings.AUDIT_LOG_ENABLED:
from auditlog.models import LogEntry
logger = logging.getLogger("paperless.tasks")
def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
return iterable
@shared_task
def index_optimize() -> None:
ix = index.open_index()
@@ -88,13 +78,13 @@ def index_optimize() -> None:
writer.commit(optimize=True)
def index_reindex(*, iter_wrapper: IterWrapper[Document] = _identity) -> None:
def index_reindex(*, progress_bar_disable=False) -> None:
documents = Document.objects.all()
ix = index.open_index(recreate=True)
with AsyncWriter(ix) as writer:
for document in iter_wrapper(documents):
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
index.update_document(writer, document)
@@ -275,6 +265,7 @@ def bulk_update_documents(document_ids) -> None:
ai_config = AIConfig()
if ai_config.llm_index_enabled:
update_llm_index(
progress_bar_disable=True,
rebuild=False,
)
@@ -603,7 +594,7 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:
@shared_task
def llmindex_index(
*,
iter_wrapper: IterWrapper[Document] = _identity,
progress_bar_disable=True,
rebuild=False,
scheduled=True,
auto=False,
@@ -626,7 +617,7 @@ def llmindex_index(
try:
result = update_llm_index(
iter_wrapper=iter_wrapper,
progress_bar_disable=progress_bar_disable,
rebuild=rebuild,
)
task.status = states.SUCCESS

View File

@@ -378,6 +378,7 @@ class ApplicationConfigurationViewSet(ModelViewSet):
):
# AI index was just enabled and vector store file does not exist
llmindex_index.delay(
progress_bar_disable=True,
rebuild=True,
scheduled=False,
auto=True,

View File

@@ -1,13 +1,11 @@
import logging
import shutil
from collections.abc import Callable
from collections.abc import Iterable
from datetime import timedelta
from pathlib import Path
from typing import TypeVar
import faiss
import llama_index.core.settings as llama_settings
import tqdm
from celery import states
from django.conf import settings
from django.utils import timezone
@@ -31,14 +29,6 @@ from paperless_ai.embedding import build_llm_index_text
from paperless_ai.embedding import get_embedding_dim
from paperless_ai.embedding import get_embedding_model
_T = TypeVar("_T")
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
return iterable
logger = logging.getLogger("paperless_ai.indexing")
@@ -166,11 +156,7 @@ def vector_store_file_exists():
return Path(settings.LLM_INDEX_DIR / "default__vector_store.json").exists()
def update_llm_index(
*,
iter_wrapper: IterWrapper[Document] = _identity,
rebuild=False,
) -> str:
def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
"""
Rebuild or update the LLM index.
"""
@@ -190,7 +176,7 @@ def update_llm_index(
embed_model = get_embedding_model()
llama_settings.Settings.embed_model = embed_model
storage_context = get_or_create_storage_context(rebuild=True)
for document in iter_wrapper(documents):
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
document_nodes = build_document_node(document)
nodes.extend(document_nodes)
@@ -198,7 +184,7 @@ def update_llm_index(
nodes=nodes,
storage_context=storage_context,
embed_model=embed_model,
show_progress=False,
show_progress=not progress_bar_disable,
)
msg = "LLM index rebuilt successfully."
else:
@@ -210,7 +196,7 @@ def update_llm_index(
for node in index.docstore.get_nodes(all_node_ids)
}
for document in iter_wrapper(documents):
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
doc_id = str(document.id)
document_modified = document.modified.isoformat()