From ce8bf90663b22c8e0d66ad6a9daac9cac46ffd7b Mon Sep 17 00:00:00 2001
From: shamoon <4887959+shamoon@users.noreply.github.com>
Date: Sun, 10 Sep 2023 21:34:40 -0700
Subject: [PATCH] Add progress bar to document_fuzzy_match

---
 .../commands/document_fuzzy_match.py          | 22 +++++++++++++++++--
 src/documents/tests/test_management_fuzzy.py  |  2 +-
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/src/documents/management/commands/document_fuzzy_match.py b/src/documents/management/commands/document_fuzzy_match.py
index f33e2d07c..17ddf4351 100644
--- a/src/documents/management/commands/document_fuzzy_match.py
+++ b/src/documents/management/commands/document_fuzzy_match.py
@@ -1,6 +1,7 @@
 from typing import Final
 
 import rapidfuzz
+import tqdm
 from django.core.management import BaseCommand
 from django.core.management import CommandError
 
@@ -17,12 +18,19 @@ class Command(BaseCommand):
             type=float,
             help="Ratio to consider documents a match",
         )
+        parser.add_argument(
+            "--no-progress-bar",
+            default=False,
+            action="store_true",
+            help="If set, the progress bar will not be shown",
+        )
 
     def handle(self, *args, **options):
         RATIO_MIN: Final[float] = 0.0
         RATIO_MAX: Final[float] = 100.0
 
         opt_ratio = options["ratio"]
+        progress_bar_disable = options["no_progress_bar"]
         match_pairs = set()
 
         # Ratio is a float from 0.0 to 100.0
@@ -31,7 +39,9 @@ class Command(BaseCommand):
 
         all_docs = Document.objects.all().order_by("id")
 
-        for first_doc in all_docs:
+        messages = []
+
+        for first_doc in tqdm.tqdm(all_docs, disable=progress_bar_disable):
             for second_doc in all_docs:
                 if first_doc.pk == second_doc.pk:
                     continue
@@ -55,9 +65,17 @@ class Command(BaseCommand):
                         match_pairs.add((first_doc.pk, second_doc.pk))
                         match_pairs.add((second_doc.pk, first_doc.pk))
 
-                    self.stdout.write(
+                    messages.append(
                         self.style.NOTICE(
                             f"Document {first_doc.pk} fuzzy match"
                             f" to {second_doc.pk} (confidence {match:.3f})",
                         ),
                     )
+
+        if len(messages) == 0:
+            messages.append(
+                self.style.NOTICE("No matches found"),
+            )
+        self.stdout.writelines(
+            messages,
+        )
diff --git a/src/documents/tests/test_management_fuzzy.py b/src/documents/tests/test_management_fuzzy.py
index 71b04b506..3c64696e7 100644
--- a/src/documents/tests/test_management_fuzzy.py
+++ b/src/documents/tests/test_management_fuzzy.py
@@ -45,7 +45,7 @@ class TestFuzzyMatchCommand(TestCase):
             filename="other_test.pdf",
         )
         stdout, _ = self.call_command()
-        self.assertEqual(stdout, "")
+        self.assertEqual(stdout, "No matches found\n")
 
     def test_with_matches(self):
         # Content similarity is 86.667