Replaces tqdm with rich

This commit is contained in:
Trenton H
2026-02-09 15:52:52 -08:00
parent c4ed4e7f36
commit e0b45539a6
11 changed files with 368 additions and 205 deletions

View File

@@ -1,10 +1,14 @@
import logging
import multiprocessing
import tqdm
from django import db
from django.conf import settings
from django.core.management.base import BaseCommand
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
from documents.management.commands.mixins import MultiProcessMixin
from documents.management.commands.mixins import ProgressBarMixin
@@ -75,20 +79,24 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
try:
logging.getLogger().handlers[0].level = logging.ERROR
if self.process_count == 1:
for doc_id in document_ids:
update_document_content_maybe_archive_file(doc_id)
else: # pragma: no cover
with multiprocessing.Pool(self.process_count) as pool:
list(
tqdm.tqdm(
pool.imap_unordered(
update_document_content_maybe_archive_file,
document_ids,
),
total=len(document_ids),
disable=self.no_progress_bar,
),
)
with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=self.no_progress_bar,
) as progress:
task = progress.add_task("Archiving documents", total=len(document_ids))
if self.process_count == 1:
for doc_id in document_ids:
update_document_content_maybe_archive_file(doc_id)
progress.update(task, advance=1)
else: # pragma: no cover
with multiprocessing.Pool(self.process_count) as pool:
for _ in pool.imap_unordered(
update_document_content_maybe_archive_file,
document_ids,
):
progress.update(task, advance=1)
except KeyboardInterrupt:
self.stdout.write(self.style.NOTICE("Aborting..."))

View File

@@ -6,7 +6,6 @@ import tempfile
from pathlib import Path
from typing import TYPE_CHECKING
import tqdm
from allauth.mfa.models import Authenticator
from allauth.socialaccount.models import SocialAccount
from allauth.socialaccount.models import SocialApp
@@ -24,6 +23,11 @@ from django.utils import timezone
from filelock import FileLock
from guardian.models import GroupObjectPermission
from guardian.models import UserObjectPermission
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
if TYPE_CHECKING:
from django.db.models import QuerySet
@@ -309,12 +313,19 @@ class Command(CryptMixin, BaseCommand):
document_manifest = manifest_dict["documents"]
# 3. Export files from each document
for index, document_dict in tqdm.tqdm(
enumerate(document_manifest),
total=len(document_manifest),
with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=self.no_progress_bar,
):
document = document_map[document_dict["pk"]]
) as progress:
task = progress.add_task(
"Exporting documents",
total=len(document_manifest),
)
for index, document_dict in enumerate(document_manifest):
document = document_map[document_dict["pk"]]
# 3.1. generate a unique filename
base_name = self.generate_base_name(document)
@@ -357,6 +368,7 @@ class Command(CryptMixin, BaseCommand):
content,
manifest_name,
)
progress.update(task, advance=1)
# These were exported already
if self.split_manifest:

View File

@@ -3,9 +3,13 @@ import multiprocessing
from typing import Final
import rapidfuzz
import tqdm
from django.core.management import BaseCommand
from django.core.management import CommandError
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
from documents.management.commands.mixins import MultiProcessMixin
from documents.management.commands.mixins import ProgressBarMixin
@@ -106,19 +110,25 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
work_pkgs.append(_WorkPackage(first_doc, second_doc))
# Don't spin up a pool of 1 process
if self.process_count == 1:
results = []
for work in tqdm.tqdm(work_pkgs, disable=self.no_progress_bar):
results.append(_process_and_match(work))
else: # pragma: no cover
with multiprocessing.Pool(processes=self.process_count) as pool:
results = list(
tqdm.tqdm(
pool.imap_unordered(_process_and_match, work_pkgs),
total=len(work_pkgs),
disable=self.no_progress_bar,
),
)
with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=self.no_progress_bar,
) as progress:
task = progress.add_task("Fuzzy matching documents", total=len(work_pkgs))
if self.process_count == 1:
results = []
for work in work_pkgs:
results.append(_process_and_match(work))
progress.update(task, advance=1)
else: # pragma: no cover
with multiprocessing.Pool(processes=self.process_count) as pool:
results = []
for result in pool.imap_unordered(_process_and_match, work_pkgs):
results.append(result)
progress.update(task, advance=1)
# Check results
messages = []

View File

@@ -8,7 +8,6 @@ from pathlib import Path
from zipfile import ZipFile
from zipfile import is_zipfile
import tqdm
from django.conf import settings
from django.contrib.auth.models import Permission
from django.contrib.auth.models import User
@@ -23,6 +22,11 @@ from django.db import transaction
from django.db.models.signals import m2m_changed
from django.db.models.signals import post_save
from filelock import FileLock
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
from documents.file_handling import create_source_path_directory
from documents.management.commands.mixins import CryptMixin
@@ -365,8 +369,19 @@ class Command(CryptMixin, BaseCommand):
filter(lambda r: r["model"] == "documents.document", self.manifest),
)
for record in tqdm.tqdm(manifest_documents, disable=self.no_progress_bar):
document = Document.objects.get(pk=record["pk"])
with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=self.no_progress_bar,
) as progress:
task = progress.add_task(
"Importing documents",
total=len(manifest_documents),
)
for record in manifest_documents:
document = Document.objects.get(pk=record["pk"])
doc_file = record[EXPORTER_FILE_NAME]
document_path = self.source / doc_file
@@ -416,7 +431,8 @@ class Command(CryptMixin, BaseCommand):
# archived files
copy_file_with_basic_stats(archive_path, document.archive_path)
document.save()
document.save()
progress.update(task, advance=1)
def decrypt_secret_fields(self) -> None:
"""

View File

@@ -1,8 +1,12 @@
import logging
import tqdm
from django.core.management.base import BaseCommand
from django.db.models.signals import post_save
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
from documents.management.commands.mixins import ProgressBarMixin
from documents.models import Document
@@ -18,8 +22,15 @@ class Command(ProgressBarMixin, BaseCommand):
self.handle_progress_bar_mixin(**options)
logging.getLogger().handlers[0].level = logging.ERROR
for document in tqdm.tqdm(
Document.objects.all(),
documents = Document.objects.all()
with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=self.no_progress_bar,
):
post_save.send(Document, instance=document, created=False)
) as progress:
task = progress.add_task("Renaming documents", total=documents.count())
for document in documents:
post_save.send(Document, instance=document, created=False)
progress.update(task, advance=1)

View File

@@ -1,7 +1,11 @@
import logging
import tqdm
from django.core.management.base import BaseCommand
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
from documents.classifier import load_classifier
from documents.management.commands.mixins import ProgressBarMixin
@@ -84,53 +88,62 @@ class Command(ProgressBarMixin, BaseCommand):
classifier = load_classifier()
for document in tqdm.tqdm(documents, disable=self.no_progress_bar):
if options["correspondent"]:
set_correspondent(
sender=None,
document=document,
classifier=classifier,
replace=options["overwrite"],
use_first=options["use_first"],
suggest=options["suggest"],
base_url=options["base_url"],
stdout=self.stdout,
style_func=self.style,
)
with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=self.no_progress_bar,
) as progress:
task = progress.add_task("Retagging documents", total=documents.count())
for document in documents:
if options["correspondent"]:
set_correspondent(
sender=None,
document=document,
classifier=classifier,
replace=options["overwrite"],
use_first=options["use_first"],
suggest=options["suggest"],
base_url=options["base_url"],
stdout=self.stdout,
style_func=self.style,
)
if options["document_type"]:
set_document_type(
sender=None,
document=document,
classifier=classifier,
replace=options["overwrite"],
use_first=options["use_first"],
suggest=options["suggest"],
base_url=options["base_url"],
stdout=self.stdout,
style_func=self.style,
)
if options["document_type"]:
set_document_type(
sender=None,
document=document,
classifier=classifier,
replace=options["overwrite"],
use_first=options["use_first"],
suggest=options["suggest"],
base_url=options["base_url"],
stdout=self.stdout,
style_func=self.style,
)
if options["tags"]:
set_tags(
sender=None,
document=document,
classifier=classifier,
replace=options["overwrite"],
suggest=options["suggest"],
base_url=options["base_url"],
stdout=self.stdout,
style_func=self.style,
)
if options["storage_path"]:
set_storage_path(
sender=None,
document=document,
classifier=classifier,
replace=options["overwrite"],
use_first=options["use_first"],
suggest=options["suggest"],
base_url=options["base_url"],
stdout=self.stdout,
style_func=self.style,
)
if options["tags"]:
set_tags(
sender=None,
document=document,
classifier=classifier,
replace=options["overwrite"],
suggest=options["suggest"],
base_url=options["base_url"],
stdout=self.stdout,
style_func=self.style,
)
if options["storage_path"]:
set_storage_path(
sender=None,
document=document,
classifier=classifier,
replace=options["overwrite"],
use_first=options["use_first"],
suggest=options["suggest"],
base_url=options["base_url"],
stdout=self.stdout,
style_func=self.style,
)
progress.update(task, advance=1)

View File

@@ -2,9 +2,13 @@ import logging
import multiprocessing
import shutil
import tqdm
from django import db
from django.core.management.base import BaseCommand
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
from documents.management.commands.mixins import MultiProcessMixin
from documents.management.commands.mixins import ProgressBarMixin
@@ -70,15 +74,19 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
# with postgres.
db.connections.close_all()
if self.process_count == 1:
for doc_id in ids:
_process_document(doc_id)
else: # pragma: no cover
with multiprocessing.Pool(processes=self.process_count) as pool:
list(
tqdm.tqdm(
pool.imap_unordered(_process_document, ids),
total=len(ids),
disable=self.no_progress_bar,
),
)
with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
disable=self.no_progress_bar,
) as progress:
task = progress.add_task("Generating thumbnails", total=len(ids))
if self.process_count == 1:
for doc_id in ids:
_process_document(doc_id)
progress.update(task, advance=1)
else: # pragma: no cover
with multiprocessing.Pool(processes=self.process_count) as pool:
for _ in pool.imap_unordered(_process_document, ids):
progress.update(task, advance=1)

View File

@@ -1,7 +1,12 @@
from auditlog.models import LogEntry
from django.core.management.base import BaseCommand
from django.db import transaction
from tqdm import tqdm
from rich.console import Console
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeRemainingColumn
from documents.management.commands.mixins import ProgressBarMixin
@@ -18,22 +23,37 @@ class Command(BaseCommand, ProgressBarMixin):
def handle(self, **options):
self.handle_progress_bar_mixin(**options)
console = Console()
with transaction.atomic():
for log_entry in tqdm(LogEntry.objects.all(), disable=self.no_progress_bar):
model_class = log_entry.content_type.model_class()
# use global_objects for SoftDeleteModel
objects = (
model_class.global_objects
if hasattr(model_class, "global_objects")
else model_class.objects
log_entries = LogEntry.objects.all()
with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeRemainingColumn(),
console=console,
disable=self.no_progress_bar,
) as progress:
task = progress.add_task(
"Pruning audit logs",
total=log_entries.count(),
)
if (
log_entry.object_id
and not objects.filter(pk=log_entry.object_id).exists()
):
log_entry.delete()
tqdm.write(
self.style.NOTICE(
f"Deleted audit log entry for {model_class.__name__} #{log_entry.object_id}",
),
for log_entry in log_entries:
model_class = log_entry.content_type.model_class()
# use global_objects for SoftDeleteModel
objects = (
model_class.global_objects
if hasattr(model_class, "global_objects")
else model_class.objects
)
if (
log_entry.object_id
and not objects.filter(pk=log_entry.object_id).exists()
):
log_entry.delete()
console.print(
self.style.NOTICE(
f"Deleted audit log entry for {model_class.__name__} #{log_entry.object_id}",
),
)
progress.update(task, advance=1)