diff --git a/src/documents/management/commands/document_fuzzy_match.py b/src/documents/management/commands/document_fuzzy_match.py index 5eebeb172..4ecdf6d01 100644 --- a/src/documents/management/commands/document_fuzzy_match.py +++ b/src/documents/management/commands/document_fuzzy_match.py @@ -92,6 +92,9 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand): # doc to doc is obviously not useful if first_doc.pk == second_doc.pk: continue + # Skip empty documents (e.g. password-protected) + if first_doc.content.strip() == "" or second_doc.content.strip() == "": + continue # Skip matching which have already been matched together # doc 1 to doc 2 is the same as doc 2 to doc 1 doc_1_to_doc_2 = (first_doc.pk, second_doc.pk) diff --git a/src/documents/tests/test_management_fuzzy.py b/src/documents/tests/test_management_fuzzy.py index 2d7d3735a..453a86082 100644 --- a/src/documents/tests/test_management_fuzzy.py +++ b/src/documents/tests/test_management_fuzzy.py @@ -206,3 +206,29 @@ class TestFuzzyMatchCommand(TestCase): self.assertEqual(Document.objects.count(), 2) self.assertIsNotNone(Document.objects.get(pk=1)) self.assertIsNotNone(Document.objects.get(pk=2)) + + def test_empty_content(self): + """ + GIVEN: + - 2 documents exist, content is empty (pw-protected) + WHEN: + - Command is called + THEN: + - No matches are found + """ + Document.objects.create( + checksum="BEEFCAFE", + title="A", + content="", + mime_type="application/pdf", + filename="test.pdf", + ) + Document.objects.create( + checksum="DEADBEAF", + title="A", + content="", + mime_type="application/pdf", + filename="other_test.pdf", + ) + stdout, _ = self.call_command() + self.assertIn("No matches found", stdout)