Replaces tqdm with rich

This commit is contained in:
Trenton H
2026-02-09 15:52:52 -08:00
parent c4ed4e7f36
commit e0b45539a6
11 changed files with 368 additions and 205 deletions

View File

@@ -1,10 +1,14 @@
import logging import logging
import multiprocessing import multiprocessing
import tqdm
from django import db from django import db
from django.conf import settings from django.conf import settings
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
from documents.management.commands.mixins import MultiProcessMixin from documents.management.commands.mixins import MultiProcessMixin
from documents.management.commands.mixins import ProgressBarMixin from documents.management.commands.mixins import ProgressBarMixin
@@ -75,20 +79,24 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
try: try:
logging.getLogger().handlers[0].level = logging.ERROR logging.getLogger().handlers[0].level = logging.ERROR
with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=self.no_progress_bar,
) as progress:
task = progress.add_task("Archiving documents", total=len(document_ids))
if self.process_count == 1: if self.process_count == 1:
for doc_id in document_ids: for doc_id in document_ids:
update_document_content_maybe_archive_file(doc_id) update_document_content_maybe_archive_file(doc_id)
progress.update(task, advance=1)
else: # pragma: no cover else: # pragma: no cover
with multiprocessing.Pool(self.process_count) as pool: with multiprocessing.Pool(self.process_count) as pool:
list( for _ in pool.imap_unordered(
tqdm.tqdm(
pool.imap_unordered(
update_document_content_maybe_archive_file, update_document_content_maybe_archive_file,
document_ids, document_ids,
), ):
total=len(document_ids), progress.update(task, advance=1)
disable=self.no_progress_bar,
),
)
except KeyboardInterrupt: except KeyboardInterrupt:
self.stdout.write(self.style.NOTICE("Aborting...")) self.stdout.write(self.style.NOTICE("Aborting..."))

View File

@@ -6,7 +6,6 @@ import tempfile
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
import tqdm
from allauth.mfa.models import Authenticator from allauth.mfa.models import Authenticator
from allauth.socialaccount.models import SocialAccount from allauth.socialaccount.models import SocialAccount
from allauth.socialaccount.models import SocialApp from allauth.socialaccount.models import SocialApp
@@ -24,6 +23,11 @@ from django.utils import timezone
from filelock import FileLock from filelock import FileLock
from guardian.models import GroupObjectPermission from guardian.models import GroupObjectPermission
from guardian.models import UserObjectPermission from guardian.models import UserObjectPermission
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
if TYPE_CHECKING: if TYPE_CHECKING:
from django.db.models import QuerySet from django.db.models import QuerySet
@@ -309,11 +313,18 @@ class Command(CryptMixin, BaseCommand):
document_manifest = manifest_dict["documents"] document_manifest = manifest_dict["documents"]
# 3. Export files from each document # 3. Export files from each document
for index, document_dict in tqdm.tqdm( with Progress(
enumerate(document_manifest), TextColumn("[progress.description]{task.description}"),
total=len(document_manifest), BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=self.no_progress_bar, disable=self.no_progress_bar,
): ) as progress:
task = progress.add_task(
"Exporting documents",
total=len(document_manifest),
)
for index, document_dict in enumerate(document_manifest):
document = document_map[document_dict["pk"]] document = document_map[document_dict["pk"]]
# 3.1. generate a unique filename # 3.1. generate a unique filename
@@ -357,6 +368,7 @@ class Command(CryptMixin, BaseCommand):
content, content,
manifest_name, manifest_name,
) )
progress.update(task, advance=1)
# These were exported already # These were exported already
if self.split_manifest: if self.split_manifest:

View File

@@ -3,9 +3,13 @@ import multiprocessing
from typing import Final from typing import Final
import rapidfuzz import rapidfuzz
import tqdm
from django.core.management import BaseCommand from django.core.management import BaseCommand
from django.core.management import CommandError from django.core.management import CommandError
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
from documents.management.commands.mixins import MultiProcessMixin from documents.management.commands.mixins import MultiProcessMixin
from documents.management.commands.mixins import ProgressBarMixin from documents.management.commands.mixins import ProgressBarMixin
@@ -106,19 +110,25 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
work_pkgs.append(_WorkPackage(first_doc, second_doc)) work_pkgs.append(_WorkPackage(first_doc, second_doc))
# Don't spin up a pool of 1 process # Don't spin up a pool of 1 process
with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=self.no_progress_bar,
) as progress:
task = progress.add_task("Fuzzy matching documents", total=len(work_pkgs))
if self.process_count == 1: if self.process_count == 1:
results = [] results = []
for work in tqdm.tqdm(work_pkgs, disable=self.no_progress_bar): for work in work_pkgs:
results.append(_process_and_match(work)) results.append(_process_and_match(work))
progress.update(task, advance=1)
else: # pragma: no cover else: # pragma: no cover
with multiprocessing.Pool(processes=self.process_count) as pool: with multiprocessing.Pool(processes=self.process_count) as pool:
results = list( results = []
tqdm.tqdm( for result in pool.imap_unordered(_process_and_match, work_pkgs):
pool.imap_unordered(_process_and_match, work_pkgs), results.append(result)
total=len(work_pkgs), progress.update(task, advance=1)
disable=self.no_progress_bar,
),
)
# Check results # Check results
messages = [] messages = []

View File

@@ -8,7 +8,6 @@ from pathlib import Path
from zipfile import ZipFile from zipfile import ZipFile
from zipfile import is_zipfile from zipfile import is_zipfile
import tqdm
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import Permission from django.contrib.auth.models import Permission
from django.contrib.auth.models import User from django.contrib.auth.models import User
@@ -23,6 +22,11 @@ from django.db import transaction
from django.db.models.signals import m2m_changed from django.db.models.signals import m2m_changed
from django.db.models.signals import post_save from django.db.models.signals import post_save
from filelock import FileLock from filelock import FileLock
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
from documents.file_handling import create_source_path_directory from documents.file_handling import create_source_path_directory
from documents.management.commands.mixins import CryptMixin from documents.management.commands.mixins import CryptMixin
@@ -365,7 +369,18 @@ class Command(CryptMixin, BaseCommand):
filter(lambda r: r["model"] == "documents.document", self.manifest), filter(lambda r: r["model"] == "documents.document", self.manifest),
) )
for record in tqdm.tqdm(manifest_documents, disable=self.no_progress_bar): with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=self.no_progress_bar,
) as progress:
task = progress.add_task(
"Importing documents",
total=len(manifest_documents),
)
for record in manifest_documents:
document = Document.objects.get(pk=record["pk"]) document = Document.objects.get(pk=record["pk"])
doc_file = record[EXPORTER_FILE_NAME] doc_file = record[EXPORTER_FILE_NAME]
@@ -417,6 +432,7 @@ class Command(CryptMixin, BaseCommand):
copy_file_with_basic_stats(archive_path, document.archive_path) copy_file_with_basic_stats(archive_path, document.archive_path)
document.save() document.save()
progress.update(task, advance=1)
def decrypt_secret_fields(self) -> None: def decrypt_secret_fields(self) -> None:
""" """

View File

@@ -1,8 +1,12 @@
import logging import logging
import tqdm
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.db.models.signals import post_save from django.db.models.signals import post_save
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
from documents.management.commands.mixins import ProgressBarMixin from documents.management.commands.mixins import ProgressBarMixin
from documents.models import Document from documents.models import Document
@@ -18,8 +22,15 @@ class Command(ProgressBarMixin, BaseCommand):
self.handle_progress_bar_mixin(**options) self.handle_progress_bar_mixin(**options)
logging.getLogger().handlers[0].level = logging.ERROR logging.getLogger().handlers[0].level = logging.ERROR
for document in tqdm.tqdm( documents = Document.objects.all()
Document.objects.all(), with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=self.no_progress_bar, disable=self.no_progress_bar,
): ) as progress:
task = progress.add_task("Renaming documents", total=documents.count())
for document in documents:
post_save.send(Document, instance=document, created=False) post_save.send(Document, instance=document, created=False)
progress.update(task, advance=1)

View File

@@ -1,7 +1,11 @@
import logging import logging
import tqdm
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
from documents.classifier import load_classifier from documents.classifier import load_classifier
from documents.management.commands.mixins import ProgressBarMixin from documents.management.commands.mixins import ProgressBarMixin
@@ -84,7 +88,15 @@ class Command(ProgressBarMixin, BaseCommand):
classifier = load_classifier() classifier = load_classifier()
for document in tqdm.tqdm(documents, disable=self.no_progress_bar): with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=self.no_progress_bar,
) as progress:
task = progress.add_task("Retagging documents", total=documents.count())
for document in documents:
if options["correspondent"]: if options["correspondent"]:
set_correspondent( set_correspondent(
sender=None, sender=None,
@@ -134,3 +146,4 @@ class Command(ProgressBarMixin, BaseCommand):
stdout=self.stdout, stdout=self.stdout,
style_func=self.style, style_func=self.style,
) )
progress.update(task, advance=1)

View File

@@ -2,9 +2,13 @@ import logging
import multiprocessing import multiprocessing
import shutil import shutil
import tqdm
from django import db from django import db
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
from documents.management.commands.mixins import MultiProcessMixin from documents.management.commands.mixins import MultiProcessMixin
from documents.management.commands.mixins import ProgressBarMixin from documents.management.commands.mixins import ProgressBarMixin
@@ -70,15 +74,19 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
# with postgres. # with postgres.
db.connections.close_all() db.connections.close_all()
with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=self.no_progress_bar,
) as progress:
task = progress.add_task("Generating thumbnails", total=len(ids))
if self.process_count == 1: if self.process_count == 1:
for doc_id in ids: for doc_id in ids:
_process_document(doc_id) _process_document(doc_id)
progress.update(task, advance=1)
else: # pragma: no cover else: # pragma: no cover
with multiprocessing.Pool(processes=self.process_count) as pool: with multiprocessing.Pool(processes=self.process_count) as pool:
list( for _ in pool.imap_unordered(_process_document, ids):
tqdm.tqdm( progress.update(task, advance=1)
pool.imap_unordered(_process_document, ids),
total=len(ids),
disable=self.no_progress_bar,
),
)

View File

@@ -1,7 +1,12 @@
from auditlog.models import LogEntry from auditlog.models import LogEntry
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.db import transaction from django.db import transaction
from tqdm import tqdm from rich.console import Console
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
from documents.management.commands.mixins import ProgressBarMixin from documents.management.commands.mixins import ProgressBarMixin
@@ -18,8 +23,22 @@ class Command(BaseCommand, ProgressBarMixin):
def handle(self, **options): def handle(self, **options):
self.handle_progress_bar_mixin(**options) self.handle_progress_bar_mixin(**options)
console = Console()
with transaction.atomic(): with transaction.atomic():
for log_entry in tqdm(LogEntry.objects.all(), disable=self.no_progress_bar): log_entries = LogEntry.objects.all()
with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
console=console,
disable=self.no_progress_bar,
) as progress:
task = progress.add_task(
"Pruning audit logs",
total=log_entries.count(),
)
for log_entry in log_entries:
model_class = log_entry.content_type.model_class() model_class = log_entry.content_type.model_class()
# use global_objects for SoftDeleteModel # use global_objects for SoftDeleteModel
objects = ( objects = (
@@ -32,8 +51,9 @@ class Command(BaseCommand, ProgressBarMixin):
and not objects.filter(pk=log_entry.object_id).exists() and not objects.filter(pk=log_entry.object_id).exists()
): ):
log_entry.delete() log_entry.delete()
tqdm.write( console.print(
self.style.NOTICE( self.style.NOTICE(
f"Deleted audit log entry for {model_class.__name__} #{log_entry.object_id}", f"Deleted audit log entry for {model_class.__name__} #{log_entry.object_id}",
), ),
) )
progress.update(task, advance=1)

View File

@@ -8,7 +8,11 @@ from typing import Final
from celery import states from celery import states
from django.conf import settings from django.conf import settings
from django.utils import timezone from django.utils import timezone
from tqdm import tqdm from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
from documents.models import Document from documents.models import Document
from documents.models import PaperlessTask from documents.models import PaperlessTask
@@ -92,7 +96,19 @@ def check_sanity(*, progress=False, scheduled=True) -> SanityCheckMessages:
if logo_file in present_files: if logo_file in present_files:
present_files.remove(logo_file) present_files.remove(logo_file)
for doc in tqdm(Document.global_objects.all(), disable=not progress): documents = Document.global_objects.all()
with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=not progress,
) as progress_bar:
task = progress_bar.add_task(
"Checking document sanity",
total=documents.count(),
)
for doc in documents:
# Check sanity of the thumbnail # Check sanity of the thumbnail
thumbnail_path: Final[Path] = Path(doc.thumbnail_path).resolve() thumbnail_path: Final[Path] = Path(doc.thumbnail_path).resolve()
if not thumbnail_path.exists() or not thumbnail_path.is_file(): if not thumbnail_path.exists() or not thumbnail_path.is_file():
@@ -103,7 +119,10 @@ def check_sanity(*, progress=False, scheduled=True) -> SanityCheckMessages:
try: try:
_ = thumbnail_path.read_bytes() _ = thumbnail_path.read_bytes()
except OSError as e: except OSError as e:
messages.error(doc.pk, f"Cannot read thumbnail file of document: {e}") messages.error(
doc.pk,
f"Cannot read thumbnail file of document: {e}",
)
# Check sanity of the original file # Check sanity of the original file
# TODO: extract method # TODO: extract method
@@ -116,7 +135,10 @@ def check_sanity(*, progress=False, scheduled=True) -> SanityCheckMessages:
try: try:
checksum = hashlib.md5(source_path.read_bytes()).hexdigest() checksum = hashlib.md5(source_path.read_bytes()).hexdigest()
except OSError as e: except OSError as e:
messages.error(doc.pk, f"Cannot read original file of document: {e}") messages.error(
doc.pk,
f"Cannot read original file of document: {e}",
)
else: else:
if checksum != doc.checksum: if checksum != doc.checksum:
messages.error( messages.error(
@@ -139,7 +161,10 @@ def check_sanity(*, progress=False, scheduled=True) -> SanityCheckMessages:
elif doc.has_archive_version: elif doc.has_archive_version:
archive_path: Final[Path] = Path(doc.archive_path).resolve() archive_path: Final[Path] = Path(doc.archive_path).resolve()
if not archive_path.exists() or not archive_path.is_file(): if not archive_path.exists() or not archive_path.is_file():
messages.error(doc.pk, "Archived version of document does not exist.") messages.error(
doc.pk,
"Archived version of document does not exist.",
)
else: else:
if archive_path in present_files: if archive_path in present_files:
present_files.remove(archive_path) present_files.remove(archive_path)
@@ -163,6 +188,8 @@ def check_sanity(*, progress=False, scheduled=True) -> SanityCheckMessages:
if not doc.content: if not doc.content:
messages.info(doc.pk, "Document contains no OCR data") messages.info(doc.pk, "Document contains no OCR data")
progress_bar.update(task, advance=1)
for extra_file in present_files: for extra_file in present_files:
messages.warning(None, f"Orphaned file in media dir: {extra_file}") messages.warning(None, f"Orphaned file in media dir: {extra_file}")

View File

@@ -8,7 +8,6 @@ from pathlib import Path
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
from tempfile import mkstemp from tempfile import mkstemp
import tqdm
from celery import Task from celery import Task
from celery import shared_task from celery import shared_task
from celery import states from celery import states
@@ -19,6 +18,11 @@ from django.db import transaction
from django.db.models.signals import post_save from django.db.models.signals import post_save
from django.utils import timezone from django.utils import timezone
from filelock import FileLock from filelock import FileLock
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
from whoosh.writing import AsyncWriter from whoosh.writing import AsyncWriter
from documents import index from documents import index
@@ -83,9 +87,20 @@ def index_reindex(*, progress_bar_disable=False) -> None:
ix = index.open_index(recreate=True) ix = index.open_index(recreate=True)
with AsyncWriter(ix) as writer: with (
for document in tqdm.tqdm(documents, disable=progress_bar_disable): AsyncWriter(ix) as writer,
Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=progress_bar_disable,
) as progress,
):
task = progress.add_task("Reindexing documents", total=documents.count())
for document in documents:
index.update_document(writer, document) index.update_document(writer, document)
progress.update(task, advance=1)
@shared_task @shared_task

View File

@@ -5,7 +5,6 @@ from pathlib import Path
import faiss import faiss
import llama_index.core.settings as llama_settings import llama_index.core.settings as llama_settings
import tqdm
from celery import states from celery import states
from django.conf import settings from django.conf import settings
from django.utils import timezone from django.utils import timezone
@@ -22,6 +21,11 @@ from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage.index_store import SimpleIndexStore from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.core.text_splitter import TokenTextSplitter from llama_index.core.text_splitter import TokenTextSplitter
from llama_index.vector_stores.faiss import FaissVectorStore from llama_index.vector_stores.faiss import FaissVectorStore
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
from documents.models import Document from documents.models import Document
from documents.models import PaperlessTask from documents.models import PaperlessTask
@@ -176,9 +180,18 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
embed_model = get_embedding_model() embed_model = get_embedding_model()
llama_settings.Settings.embed_model = embed_model llama_settings.Settings.embed_model = embed_model
storage_context = get_or_create_storage_context(rebuild=True) storage_context = get_or_create_storage_context(rebuild=True)
for document in tqdm.tqdm(documents, disable=progress_bar_disable): with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=progress_bar_disable,
) as progress:
task = progress.add_task("Building document nodes", total=documents.count())
for document in documents:
document_nodes = build_document_node(document) document_nodes = build_document_node(document)
nodes.extend(document_nodes) nodes.extend(document_nodes)
progress.update(task, advance=1)
index = VectorStoreIndex( index = VectorStoreIndex(
nodes=nodes, nodes=nodes,
@@ -196,7 +209,15 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
for node in index.docstore.get_nodes(all_node_ids) for node in index.docstore.get_nodes(all_node_ids)
} }
for document in tqdm.tqdm(documents, disable=progress_bar_disable): with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=progress_bar_disable,
) as progress:
task = progress.add_task("Updating index nodes", total=documents.count())
for document in documents:
doc_id = str(document.id) doc_id = str(document.id)
document_modified = document.modified.isoformat() document_modified = document.modified.isoformat()
@@ -205,6 +226,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
node_modified = node.metadata.get("modified") node_modified = node.metadata.get("modified")
if node_modified == document_modified: if node_modified == document_modified:
progress.update(task, advance=1)
continue continue
# Again, delete from docstore, FAISS IndexFlatL2 are append-only # Again, delete from docstore, FAISS IndexFlatL2 are append-only
@@ -213,6 +235,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
else: else:
# New document, add it # New document, add it
nodes.extend(build_document_node(document)) nodes.extend(build_document_node(document))
progress.update(task, advance=1)
if nodes: if nodes:
msg = "LLM index updated successfully." msg = "LLM index updated successfully."