mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-11 10:00:48 -05:00
Add progress bar to document_fuzzy_match
This commit is contained in:
parent
e2ae919a84
commit
ce8bf90663
@ -1,6 +1,7 @@
|
|||||||
from typing import Final
|
from typing import Final
|
||||||
|
|
||||||
import rapidfuzz
|
import rapidfuzz
|
||||||
|
import tqdm
|
||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
from django.core.management import CommandError
|
from django.core.management import CommandError
|
||||||
|
|
||||||
@ -17,12 +18,19 @@ class Command(BaseCommand):
|
|||||||
type=float,
|
type=float,
|
||||||
help="Ratio to consider documents a match",
|
help="Ratio to consider documents a match",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-progress-bar",
|
||||||
|
default=False,
|
||||||
|
action="store_true",
|
||||||
|
help="If set, the progress bar will not be shown",
|
||||||
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
RATIO_MIN: Final[float] = 0.0
|
RATIO_MIN: Final[float] = 0.0
|
||||||
RATIO_MAX: Final[float] = 100.0
|
RATIO_MAX: Final[float] = 100.0
|
||||||
|
|
||||||
opt_ratio = options["ratio"]
|
opt_ratio = options["ratio"]
|
||||||
|
progress_bar_disable = options["no_progress_bar"]
|
||||||
match_pairs = set()
|
match_pairs = set()
|
||||||
|
|
||||||
# Ratio is a float from 0.0 to 100.0
|
# Ratio is a float from 0.0 to 100.0
|
||||||
@ -31,7 +39,9 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
all_docs = Document.objects.all().order_by("id")
|
all_docs = Document.objects.all().order_by("id")
|
||||||
|
|
||||||
for first_doc in all_docs:
|
messages = []
|
||||||
|
|
||||||
|
for first_doc in tqdm.tqdm(all_docs, disable=progress_bar_disable):
|
||||||
for second_doc in all_docs:
|
for second_doc in all_docs:
|
||||||
if first_doc.pk == second_doc.pk:
|
if first_doc.pk == second_doc.pk:
|
||||||
continue
|
continue
|
||||||
@ -55,9 +65,17 @@ class Command(BaseCommand):
|
|||||||
match_pairs.add((first_doc.pk, second_doc.pk))
|
match_pairs.add((first_doc.pk, second_doc.pk))
|
||||||
match_pairs.add((second_doc.pk, first_doc.pk))
|
match_pairs.add((second_doc.pk, first_doc.pk))
|
||||||
|
|
||||||
self.stdout.write(
|
messages.append(
|
||||||
self.style.NOTICE(
|
self.style.NOTICE(
|
||||||
f"Document {first_doc.pk} fuzzy match"
|
f"Document {first_doc.pk} fuzzy match"
|
||||||
f" to {second_doc.pk} (confidence {match:.3f})",
|
f" to {second_doc.pk} (confidence {match:.3f})",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if len(messages) == 0:
|
||||||
|
messages.append(
|
||||||
|
self.style.NOTICE("No matches found"),
|
||||||
|
)
|
||||||
|
self.stdout.writelines(
|
||||||
|
messages,
|
||||||
|
)
|
||||||
|
@ -45,7 +45,7 @@ class TestFuzzyMatchCommand(TestCase):
|
|||||||
filename="other_test.pdf",
|
filename="other_test.pdf",
|
||||||
)
|
)
|
||||||
stdout, _ = self.call_command()
|
stdout, _ = self.call_command()
|
||||||
self.assertEqual(stdout, "")
|
self.assertEqual(stdout, "No matches found\n")
|
||||||
|
|
||||||
def test_with_matches(self):
|
def test_with_matches(self):
|
||||||
# Content similarity is 86.667
|
# Content similarity is 86.667
|
||||||
|
Loading…
x
Reference in New Issue
Block a user