mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-09-12 21:35:40 -05:00
Chore: Cleanup command arguments and standardize process count handling (#4541)
Cleans up some command help text and adds more control over process count for command with a Pool
This commit is contained in:
@@ -7,6 +7,8 @@ import tqdm
|
||||
from django.core.management import BaseCommand
|
||||
from django.core.management import CommandError
|
||||
|
||||
from documents.management.commands.mixins import MultiProcessMixin
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.models import Document
|
||||
|
||||
|
||||
@@ -41,7 +43,7 @@ def _process_and_match(work: _WorkPackage) -> _WorkResult:
|
||||
return _WorkResult(work.first_doc.pk, work.second_doc.pk, match)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||
help = "Searches for documents where the content almost matches"
|
||||
|
||||
def add_arguments(self, parser):
|
||||
@@ -51,23 +53,16 @@ class Command(BaseCommand):
|
||||
type=float,
|
||||
help="Ratio to consider documents a match",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--processes",
|
||||
default=4,
|
||||
type=int,
|
||||
help="Number of processes to distribute work amongst",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
self.add_argument_processes_mixin(parser)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
RATIO_MIN: Final[float] = 0.0
|
||||
RATIO_MAX: Final[float] = 100.0
|
||||
|
||||
self.handle_processes_mixin(**options)
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
|
||||
opt_ratio = options["ratio"]
|
||||
checked_pairs: set[tuple[int, int]] = set()
|
||||
work_pkgs: list[_WorkPackage] = []
|
||||
@@ -76,9 +71,6 @@ class Command(BaseCommand):
|
||||
if opt_ratio < RATIO_MIN or opt_ratio > RATIO_MAX:
|
||||
raise CommandError("The ratio must be between 0 and 100")
|
||||
|
||||
if options["processes"] < 1:
|
||||
raise CommandError("There must be at least 1 process")
|
||||
|
||||
all_docs = Document.objects.all().order_by("id")
|
||||
|
||||
# Build work packages for processing
|
||||
@@ -101,17 +93,17 @@ class Command(BaseCommand):
|
||||
work_pkgs.append(_WorkPackage(first_doc, second_doc))
|
||||
|
||||
# Don't spin up a pool of 1 process
|
||||
if options["processes"] == 1:
|
||||
if self.process_count == 1:
|
||||
results = []
|
||||
for work in tqdm.tqdm(work_pkgs, disable=options["no_progress_bar"]):
|
||||
for work in tqdm.tqdm(work_pkgs, disable=self.no_progress_bar):
|
||||
results.append(_process_and_match(work))
|
||||
else:
|
||||
with multiprocessing.Pool(processes=options["processes"]) as pool:
|
||||
else: # pragma: no cover
|
||||
with multiprocessing.Pool(processes=self.process_count) as pool:
|
||||
results = list(
|
||||
tqdm.tqdm(
|
||||
pool.imap_unordered(_process_and_match, work_pkgs),
|
||||
total=len(work_pkgs),
|
||||
disable=options["no_progress_bar"],
|
||||
disable=self.no_progress_bar,
|
||||
),
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user