Chore: Cleanup command arguments and standardize process count handling (#4541)

Cleans up some command help text and adds more control over process count for command with a Pool
This commit is contained in:
Trenton H
2023-11-09 11:46:37 -08:00
committed by GitHub
parent 577b49df9d
commit e8527ba723
17 changed files with 229 additions and 198 deletions

View File

@@ -7,21 +7,20 @@ from django import db
from django.conf import settings
from django.core.management.base import BaseCommand
from documents.management.commands.mixins import MultiProcessMixin
from documents.management.commands.mixins import ProgressBarMixin
from documents.models import Document
from documents.tasks import update_document_archive_file
logger = logging.getLogger("paperless.management.archiver")
class Command(BaseCommand):
help = """
Using the current classification model, assigns correspondents, tags
and document types to all documents, effectively allowing you to
back-tag all previously indexed documents with metadata created (or
modified) after their initial import.
""".replace(
" ",
"",
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
help = (
"Using the current classification model, assigns correspondents, tags "
"and document types to all documents, effectively allowing you to "
"back-tag all previously indexed documents with metadata created (or "
"modified) after their initial import."
)
def add_arguments(self, parser):
@@ -30,8 +29,10 @@ class Command(BaseCommand):
"--overwrite",
default=False,
action="store_true",
help="Recreates the archived document for documents that already "
"have an archived version.",
help=(
"Recreates the archived document for documents that already "
"have an archived version."
),
)
parser.add_argument(
"-d",
@@ -39,17 +40,18 @@ class Command(BaseCommand):
default=None,
type=int,
required=False,
help="Specify the ID of a document, and this command will only "
"run on this specific document.",
)
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown",
help=(
"Specify the ID of a document, and this command will only "
"run on this specific document."
),
)
self.add_argument_progress_bar_mixin(parser)
self.add_argument_processes_mixin(parser)
def handle(self, *args, **options):
self.handle_processes_mixin(**options)
self.handle_progress_bar_mixin(**options)
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
overwrite = options["overwrite"]
@@ -67,19 +69,27 @@ class Command(BaseCommand):
)
# Note to future self: this prevents django from reusing database
# conncetions between processes, which is bad and does not work
# connections between processes, which is bad and does not work
# with postgres.
db.connections.close_all()
try:
logging.getLogger().handlers[0].level = logging.ERROR
with multiprocessing.Pool(processes=settings.TASK_WORKERS) as pool:
list(
tqdm.tqdm(
pool.imap_unordered(update_document_archive_file, document_ids),
total=len(document_ids),
disable=options["no_progress_bar"],
),
)
if self.process_count == 1:
for doc_id in document_ids:
update_document_archive_file(doc_id)
else: # pragma: no cover
with multiprocessing.Pool(self.process_count) as pool:
list(
tqdm.tqdm(
pool.imap_unordered(
update_document_archive_file,
document_ids,
),
total=len(document_ids),
disable=self.no_progress_bar,
),
)
except KeyboardInterrupt:
self.stdout.write(self.style.NOTICE("Aborting..."))