From 6119c215e7d8782f4a8a4a1a889462ddd2297b63 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Mon, 22 Sep 2025 23:30:24 -0700 Subject: [PATCH] Fix: skip fuzzy matching for empty document content (#10914) --- .../commands/document_fuzzy_match.py | 3 +++ src/documents/tests/test_management_fuzzy.py | 26 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/src/documents/management/commands/document_fuzzy_match.py b/src/documents/management/commands/document_fuzzy_match.py index 5eebeb172..4ecdf6d01 100644 --- a/src/documents/management/commands/document_fuzzy_match.py +++ b/src/documents/management/commands/document_fuzzy_match.py @@ -92,6 +92,9 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand): # doc to doc is obviously not useful if first_doc.pk == second_doc.pk: continue + # Skip empty documents (e.g. password-protected) + if first_doc.content.strip() == "" or second_doc.content.strip() == "": + continue # Skip matching which have already been matched together # doc 1 to doc 2 is the same as doc 2 to doc 1 doc_1_to_doc_2 = (first_doc.pk, second_doc.pk) diff --git a/src/documents/tests/test_management_fuzzy.py b/src/documents/tests/test_management_fuzzy.py index 2d7d3735a..453a86082 100644 --- a/src/documents/tests/test_management_fuzzy.py +++ b/src/documents/tests/test_management_fuzzy.py @@ -206,3 +206,29 @@ class TestFuzzyMatchCommand(TestCase): self.assertEqual(Document.objects.count(), 2) self.assertIsNotNone(Document.objects.get(pk=1)) self.assertIsNotNone(Document.objects.get(pk=2)) + + def test_empty_content(self): + """ + GIVEN: + - 2 documents exist, content is empty (pw-protected) + WHEN: + - Command is called + THEN: + - No matches are found + """ + Document.objects.create( + checksum="BEEFCAFE", + title="A", + content="", + mime_type="application/pdf", + filename="test.pdf", + ) + Document.objects.create( + checksum="DEADBEAF", + title="A", + content="", + mime_type="application/pdf", + filename="other_test.pdf", + ) + stdout, _ = self.call_command() + self.assertIn("No matches found", stdout)