mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Testing out a switch to rich to remove tqdm
This commit is contained in:
parent
c122c60d3f
commit
3656c36965
@ -1,10 +1,10 @@
|
||||
import logging
|
||||
import multiprocessing
|
||||
|
||||
import tqdm
|
||||
from django import db
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
from rich.progress import track
|
||||
|
||||
from documents.management.commands.mixins import MultiProcessMixin
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
@ -81,7 +81,7 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||
else: # pragma: no cover
|
||||
with multiprocessing.Pool(self.process_count) as pool:
|
||||
list(
|
||||
tqdm.tqdm(
|
||||
track(
|
||||
pool.imap_unordered(
|
||||
update_document_content_maybe_archive_file,
|
||||
document_ids,
|
||||
|
@ -7,7 +7,6 @@ import time
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import tqdm
|
||||
from allauth.mfa.models import Authenticator
|
||||
from allauth.socialaccount.models import SocialAccount
|
||||
from allauth.socialaccount.models import SocialApp
|
||||
@ -25,6 +24,11 @@ from django.utils import timezone
|
||||
from filelock import FileLock
|
||||
from guardian.models import GroupObjectPermission
|
||||
from guardian.models import UserObjectPermission
|
||||
from rich.progress import BarColumn
|
||||
from rich.progress import MofNCompleteColumn
|
||||
from rich.progress import Progress
|
||||
from rich.progress import TaskProgressColumn
|
||||
from rich.progress import TextColumn
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from django.db.models import QuerySet
|
||||
@ -229,8 +233,17 @@ class Command(CryptMixin, BaseCommand):
|
||||
|
||||
try:
|
||||
# Prevent any ongoing changes in the documents
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
self.dump()
|
||||
with (
|
||||
FileLock(settings.MEDIA_LOCK),
|
||||
Progress(
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
TaskProgressColumn(),
|
||||
MofNCompleteColumn(),
|
||||
disable=self.no_progress_bar,
|
||||
) as progress,
|
||||
):
|
||||
self.dump(progress)
|
||||
|
||||
# We've written everything to the temporary directory in this case,
|
||||
# now make an archive in the original target, with all files stored
|
||||
@ -249,7 +262,7 @@ class Command(CryptMixin, BaseCommand):
|
||||
if self.zip_export and temp_dir is not None:
|
||||
temp_dir.cleanup()
|
||||
|
||||
def dump(self):
|
||||
def dump(self, progress: Progress):
|
||||
# 1. Take a snapshot of what files exist in the current export folder
|
||||
for x in self.target.glob("**/*"):
|
||||
if x.is_file():
|
||||
@ -297,11 +310,17 @@ class Command(CryptMixin, BaseCommand):
|
||||
with transaction.atomic():
|
||||
manifest_dict = {}
|
||||
|
||||
serialize_task = progress.add_task(
|
||||
"Serializing database",
|
||||
total=len(manifest_key_to_object_query),
|
||||
)
|
||||
|
||||
# Build an overall manifest
|
||||
for key, object_query in manifest_key_to_object_query.items():
|
||||
manifest_dict[key] = json.loads(
|
||||
serializers.serialize("json", object_query),
|
||||
)
|
||||
progress.advance(serialize_task)
|
||||
|
||||
self.encrypt_secret_fields(manifest_dict)
|
||||
|
||||
@ -313,12 +332,10 @@ class Command(CryptMixin, BaseCommand):
|
||||
}
|
||||
document_manifest = manifest_dict["documents"]
|
||||
|
||||
copy_task = progress.add_task("Copying files", total=len(document_manifest))
|
||||
|
||||
# 3. Export files from each document
|
||||
for index, document_dict in tqdm.tqdm(
|
||||
enumerate(document_manifest),
|
||||
total=len(document_manifest),
|
||||
disable=self.no_progress_bar,
|
||||
):
|
||||
for index, document_dict in enumerate(document_manifest):
|
||||
# 3.1. store files unencrypted
|
||||
document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
@ -365,6 +382,7 @@ class Command(CryptMixin, BaseCommand):
|
||||
content,
|
||||
manifest_name,
|
||||
)
|
||||
progress.advance(copy_task)
|
||||
|
||||
# These were exported already
|
||||
if self.split_manifest:
|
||||
|
@ -3,9 +3,9 @@ import multiprocessing
|
||||
from typing import Final
|
||||
|
||||
import rapidfuzz
|
||||
import tqdm
|
||||
from django.core.management import BaseCommand
|
||||
from django.core.management import CommandError
|
||||
from rich.progress import track
|
||||
|
||||
from documents.management.commands.mixins import MultiProcessMixin
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
@ -105,12 +105,12 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||
# Don't spin up a pool of 1 process
|
||||
if self.process_count == 1:
|
||||
results = []
|
||||
for work in tqdm.tqdm(work_pkgs, disable=self.no_progress_bar):
|
||||
for work in track(work_pkgs, disable=self.no_progress_bar):
|
||||
results.append(_process_and_match(work))
|
||||
else: # pragma: no cover
|
||||
with multiprocessing.Pool(processes=self.process_count) as pool:
|
||||
results = list(
|
||||
tqdm.tqdm(
|
||||
track(
|
||||
pool.imap_unordered(_process_and_match, work_pkgs),
|
||||
total=len(work_pkgs),
|
||||
disable=self.no_progress_bar,
|
||||
|
@ -5,7 +5,6 @@ from collections.abc import Generator
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
|
||||
import tqdm
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Permission
|
||||
from django.contrib.auth.models import User
|
||||
@ -20,6 +19,7 @@ from django.db import transaction
|
||||
from django.db.models.signals import m2m_changed
|
||||
from django.db.models.signals import post_save
|
||||
from filelock import FileLock
|
||||
from rich.progress import Progress
|
||||
|
||||
from documents.file_handling import create_source_path_directory
|
||||
from documents.management.commands.mixins import CryptMixin
|
||||
@ -138,7 +138,7 @@ class Command(CryptMixin, BaseCommand):
|
||||
pre_check_maybe_not_empty()
|
||||
pre_check_manifest_exists()
|
||||
|
||||
def load_manifest_files(self) -> None:
|
||||
def load_manifest_files(self, progress: Progress) -> None:
|
||||
"""
|
||||
Loads manifest data from the various JSON files for parsing and loading the database
|
||||
"""
|
||||
@ -148,10 +148,15 @@ class Command(CryptMixin, BaseCommand):
|
||||
self.manifest = json.load(infile)
|
||||
self.manifest_paths.append(main_manifest_path)
|
||||
|
||||
split_manifest_task = progress.add_task("Parsing split manifests")
|
||||
|
||||
for file in Path(self.source).glob("**/*-manifest.json"):
|
||||
progress.update(split_manifest_task, visible=True)
|
||||
with file.open() as infile:
|
||||
self.manifest += json.load(infile)
|
||||
self.manifest_paths.append(file)
|
||||
progress.advance(split_manifest_task)
|
||||
progress.update(split_manifest_task, total=1, completed=1)
|
||||
|
||||
def load_metadata(self) -> None:
|
||||
"""
|
||||
@ -191,7 +196,7 @@ class Command(CryptMixin, BaseCommand):
|
||||
),
|
||||
)
|
||||
|
||||
def load_data_to_database(self) -> None:
|
||||
def load_data_to_database(self, progress: Progress) -> None:
|
||||
"""
|
||||
As the name implies, loads data from the JSON file(s) into the database
|
||||
"""
|
||||
@ -201,7 +206,7 @@ class Command(CryptMixin, BaseCommand):
|
||||
ContentType.objects.all().delete()
|
||||
Permission.objects.all().delete()
|
||||
for manifest_path in self.manifest_paths:
|
||||
call_command("loaddata", manifest_path)
|
||||
call_command("loaddata", "-v", "0", manifest_path)
|
||||
except (FieldDoesNotExist, DeserializationError, IntegrityError) as e:
|
||||
self.stdout.write(self.style.ERROR("Database import failed"))
|
||||
if (
|
||||
@ -234,55 +239,56 @@ class Command(CryptMixin, BaseCommand):
|
||||
self.manifest_paths = []
|
||||
self.manifest = []
|
||||
|
||||
self.pre_check()
|
||||
with Progress(disable=self.no_progress_bar) as progress:
|
||||
self.pre_check()
|
||||
|
||||
self.load_metadata()
|
||||
self.load_metadata()
|
||||
|
||||
self.load_manifest_files()
|
||||
self.load_manifest_files(progress)
|
||||
|
||||
self.check_manifest_validity()
|
||||
self.check_manifest_validity(progress)
|
||||
|
||||
self.decrypt_secret_fields()
|
||||
self.decrypt_secret_fields()
|
||||
|
||||
# see /src/documents/signals/handlers.py
|
||||
with (
|
||||
disable_signal(
|
||||
post_save,
|
||||
receiver=update_filename_and_move_files,
|
||||
sender=Document,
|
||||
),
|
||||
disable_signal(
|
||||
m2m_changed,
|
||||
receiver=update_filename_and_move_files,
|
||||
sender=Document.tags.through,
|
||||
),
|
||||
disable_signal(
|
||||
post_save,
|
||||
receiver=update_filename_and_move_files,
|
||||
sender=CustomFieldInstance,
|
||||
),
|
||||
disable_signal(
|
||||
post_save,
|
||||
receiver=check_paths_and_prune_custom_fields,
|
||||
sender=CustomField,
|
||||
),
|
||||
):
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
auditlog.unregister(Document)
|
||||
auditlog.unregister(Correspondent)
|
||||
auditlog.unregister(Tag)
|
||||
auditlog.unregister(DocumentType)
|
||||
auditlog.unregister(Note)
|
||||
auditlog.unregister(CustomField)
|
||||
auditlog.unregister(CustomFieldInstance)
|
||||
# see /src/documents/signals/handlers.py
|
||||
with (
|
||||
disable_signal(
|
||||
post_save,
|
||||
receiver=update_filename_and_move_files,
|
||||
sender=Document,
|
||||
),
|
||||
disable_signal(
|
||||
m2m_changed,
|
||||
receiver=update_filename_and_move_files,
|
||||
sender=Document.tags.through,
|
||||
),
|
||||
disable_signal(
|
||||
post_save,
|
||||
receiver=update_filename_and_move_files,
|
||||
sender=CustomFieldInstance,
|
||||
),
|
||||
disable_signal(
|
||||
post_save,
|
||||
receiver=check_paths_and_prune_custom_fields,
|
||||
sender=CustomField,
|
||||
),
|
||||
):
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
auditlog.unregister(Document)
|
||||
auditlog.unregister(Correspondent)
|
||||
auditlog.unregister(Tag)
|
||||
auditlog.unregister(DocumentType)
|
||||
auditlog.unregister(Note)
|
||||
auditlog.unregister(CustomField)
|
||||
auditlog.unregister(CustomFieldInstance)
|
||||
|
||||
# Fill up the database with whatever is in the manifest
|
||||
self.load_data_to_database()
|
||||
# Fill up the database with whatever is in the manifest
|
||||
self.load_data_to_database(progress)
|
||||
|
||||
if not self.data_only:
|
||||
self._import_files_from_manifest()
|
||||
else:
|
||||
self.stdout.write(self.style.NOTICE("Data only import completed"))
|
||||
if not self.data_only:
|
||||
self._import_files_from_manifest(progress)
|
||||
else:
|
||||
self.stdout.write(self.style.NOTICE("Data only import completed"))
|
||||
|
||||
self.stdout.write("Updating search index...")
|
||||
call_command(
|
||||
@ -291,7 +297,7 @@ class Command(CryptMixin, BaseCommand):
|
||||
no_progress_bar=self.no_progress_bar,
|
||||
)
|
||||
|
||||
def check_manifest_validity(self) -> None:
|
||||
def check_manifest_validity(self, progress: Progress) -> None:
|
||||
"""
|
||||
Attempts to verify the manifest is valid. Namely checking the files
|
||||
referred to exist and the files can be read from
|
||||
@ -335,45 +341,56 @@ class Command(CryptMixin, BaseCommand):
|
||||
f"Failed to read from archive file {doc_archive_path}",
|
||||
) from e
|
||||
|
||||
self.stdout.write("Checking the manifest")
|
||||
manifest_valid_task = progress.add_task(
|
||||
"Checking validity",
|
||||
total=None,
|
||||
visible=not self.data_only,
|
||||
)
|
||||
|
||||
# self.stdout.write("Checking the manifest")
|
||||
for record in self.manifest:
|
||||
# Only check if the document files exist if this is not data only
|
||||
# We don't care about documents for a data only import
|
||||
if not self.data_only and record["model"] == "documents.document":
|
||||
check_document_validity(record)
|
||||
progress.advance(manifest_valid_task)
|
||||
progress.update(manifest_valid_task, total=1, completed=1)
|
||||
|
||||
def _import_files_from_manifest(self) -> None:
|
||||
def _import_files_from_manifest(self, progress: Progress) -> None:
|
||||
settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
settings.THUMBNAIL_DIR.mkdir(parents=True, exist_ok=True)
|
||||
settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.stdout.write("Copy files into paperless...")
|
||||
# self.stdout.write("Copy files into paperless...")
|
||||
|
||||
manifest_documents = list(
|
||||
filter(lambda r: r["model"] == "documents.document", self.manifest),
|
||||
)
|
||||
copy_file_task = progress.add_task(
|
||||
"Copying files",
|
||||
total=len(manifest_documents),
|
||||
)
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
for record in manifest_documents:
|
||||
document = Document.objects.get(pk=record["pk"])
|
||||
|
||||
for record in tqdm.tqdm(manifest_documents, disable=self.no_progress_bar):
|
||||
document = Document.objects.get(pk=record["pk"])
|
||||
doc_file = record[EXPORTER_FILE_NAME]
|
||||
document_path = self.source / doc_file
|
||||
|
||||
doc_file = record[EXPORTER_FILE_NAME]
|
||||
document_path = self.source / doc_file
|
||||
if EXPORTER_THUMBNAIL_NAME in record:
|
||||
thumb_file = record[EXPORTER_THUMBNAIL_NAME]
|
||||
thumbnail_path = (self.source / thumb_file).resolve()
|
||||
else:
|
||||
thumbnail_path = None
|
||||
|
||||
if EXPORTER_THUMBNAIL_NAME in record:
|
||||
thumb_file = record[EXPORTER_THUMBNAIL_NAME]
|
||||
thumbnail_path = (self.source / thumb_file).resolve()
|
||||
else:
|
||||
thumbnail_path = None
|
||||
if EXPORTER_ARCHIVE_NAME in record:
|
||||
archive_file = record[EXPORTER_ARCHIVE_NAME]
|
||||
archive_path = self.source / archive_file
|
||||
else:
|
||||
archive_path = None
|
||||
|
||||
if EXPORTER_ARCHIVE_NAME in record:
|
||||
archive_file = record[EXPORTER_ARCHIVE_NAME]
|
||||
archive_path = self.source / archive_file
|
||||
else:
|
||||
archive_path = None
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
if Path(document.source_path).is_file():
|
||||
raise FileExistsError(document.source_path)
|
||||
|
||||
@ -406,7 +423,8 @@ class Command(CryptMixin, BaseCommand):
|
||||
# archived files
|
||||
copy_file_with_basic_stats(archive_path, document.archive_path)
|
||||
|
||||
document.save()
|
||||
document.save()
|
||||
progress.advance(copy_file_task)
|
||||
|
||||
def decrypt_secret_fields(self) -> None:
|
||||
"""
|
||||
|
@ -1,8 +1,8 @@
|
||||
import logging
|
||||
|
||||
import tqdm
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db.models.signals import post_save
|
||||
from rich.progress import track
|
||||
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.models import Document
|
||||
@ -17,9 +17,10 @@ class Command(ProgressBarMixin, BaseCommand):
|
||||
def handle(self, *args, **options):
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
logging.getLogger().handlers[0].level = logging.ERROR
|
||||
|
||||
for document in tqdm.tqdm(
|
||||
Document.objects.all(),
|
||||
qs = Document.objects.all()
|
||||
for document in track(
|
||||
qs,
|
||||
total=qs.count(),
|
||||
disable=self.no_progress_bar,
|
||||
):
|
||||
post_save.send(Document, instance=document, created=False)
|
||||
|
@ -1,7 +1,7 @@
|
||||
import logging
|
||||
|
||||
import tqdm
|
||||
from django.core.management.base import BaseCommand
|
||||
from rich.progress import track
|
||||
|
||||
from documents.classifier import load_classifier
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
@ -84,7 +84,11 @@ class Command(ProgressBarMixin, BaseCommand):
|
||||
|
||||
classifier = load_classifier()
|
||||
|
||||
for document in tqdm.tqdm(documents, disable=self.no_progress_bar):
|
||||
for document in track(
|
||||
documents,
|
||||
total=documents.count(),
|
||||
disable=self.no_progress_bar,
|
||||
):
|
||||
if options["correspondent"]:
|
||||
set_correspondent(
|
||||
sender=None,
|
||||
|
@ -2,9 +2,9 @@ import logging
|
||||
import multiprocessing
|
||||
import shutil
|
||||
|
||||
import tqdm
|
||||
from django import db
|
||||
from django.core.management.base import BaseCommand
|
||||
from rich.progress import track
|
||||
|
||||
from documents.management.commands.mixins import MultiProcessMixin
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
@ -76,7 +76,7 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||
else: # pragma: no cover
|
||||
with multiprocessing.Pool(processes=self.process_count) as pool:
|
||||
list(
|
||||
tqdm.tqdm(
|
||||
track(
|
||||
pool.imap_unordered(_process_document, ids),
|
||||
total=len(ids),
|
||||
disable=self.no_progress_bar,
|
||||
|
@ -1,7 +1,7 @@
|
||||
from auditlog.models import LogEntry
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db import transaction
|
||||
from tqdm import tqdm
|
||||
from rich.progress import track
|
||||
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
|
||||
@ -19,7 +19,10 @@ class Command(BaseCommand, ProgressBarMixin):
|
||||
def handle(self, **options):
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
with transaction.atomic():
|
||||
for log_entry in tqdm(LogEntry.objects.all(), disable=self.no_progress_bar):
|
||||
for log_entry in track(
|
||||
LogEntry.objects.all(),
|
||||
disable=self.no_progress_bar,
|
||||
):
|
||||
model_class = log_entry.content_type.model_class()
|
||||
# use global_objects for SoftDeleteModel
|
||||
objects = (
|
||||
@ -32,7 +35,7 @@ class Command(BaseCommand, ProgressBarMixin):
|
||||
and not objects.filter(pk=log_entry.object_id).exists()
|
||||
):
|
||||
log_entry.delete()
|
||||
tqdm.write(
|
||||
self.stdout.write(
|
||||
self.style.NOTICE(
|
||||
f"Deleted audit log entry for {model_class.__name__} #{log_entry.object_id}",
|
||||
),
|
||||
|
@ -5,7 +5,7 @@ from pathlib import Path
|
||||
from typing import Final
|
||||
|
||||
from django.conf import settings
|
||||
from tqdm import tqdm
|
||||
from rich.progress import track
|
||||
|
||||
from documents.models import Document
|
||||
|
||||
@ -68,7 +68,9 @@ def check_sanity(*, progress=False) -> SanityCheckMessages:
|
||||
if lockfile in present_files:
|
||||
present_files.remove(lockfile)
|
||||
|
||||
for doc in tqdm(Document.global_objects.all(), disable=not progress):
|
||||
qs = Document.global_objects.all()
|
||||
|
||||
for doc in track(qs, total=qs.count(), disable=not progress):
|
||||
# Check sanity of the thumbnail
|
||||
thumbnail_path: Final[Path] = Path(doc.thumbnail_path).resolve()
|
||||
if not thumbnail_path.exists() or not thumbnail_path.is_file():
|
||||
|
@ -6,7 +6,6 @@ from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
import tqdm
|
||||
from celery import Task
|
||||
from celery import shared_task
|
||||
from django.conf import settings
|
||||
@ -16,6 +15,7 @@ from django.db import transaction
|
||||
from django.db.models.signals import post_save
|
||||
from django.utils import timezone
|
||||
from filelock import FileLock
|
||||
from rich.progress import track
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents import index
|
||||
@ -69,7 +69,12 @@ def index_reindex(*, progress_bar_disable=False):
|
||||
ix = index.open_index(recreate=True)
|
||||
|
||||
with AsyncWriter(ix) as writer:
|
||||
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
|
||||
for document in track(
|
||||
documents,
|
||||
total=documents.count(),
|
||||
description="Indexing...",
|
||||
disable=progress_bar_disable,
|
||||
):
|
||||
index.update_document(writer, document)
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user