Compare commits

..

1 Commits

Author SHA1 Message Date
shamoon
6119c215e7 Fix: skip fuzzy matching for empty document content (#10914) 2025-09-22 23:30:24 -07:00
3 changed files with 30 additions and 1 deletions

View File

@@ -32,7 +32,7 @@ RUN set -eux \
# Purpose: Installs s6-overlay and rootfs # Purpose: Installs s6-overlay and rootfs
# Comments: # Comments:
# - Don't leave anything extra in here either # - Don't leave anything extra in here either
FROM ghcr.io/astral-sh/uv:0.8.19-python3.12-bookworm-slim AS s6-overlay-base FROM ghcr.io/astral-sh/uv:0.8.17-python3.12-bookworm-slim AS s6-overlay-base
WORKDIR /usr/src/s6 WORKDIR /usr/src/s6

View File

@@ -92,6 +92,9 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
# doc to doc is obviously not useful # doc to doc is obviously not useful
if first_doc.pk == second_doc.pk: if first_doc.pk == second_doc.pk:
continue continue
# Skip empty documents (e.g. password-protected)
if first_doc.content.strip() == "" or second_doc.content.strip() == "":
continue
# Skip matching which have already been matched together # Skip matching which have already been matched together
# doc 1 to doc 2 is the same as doc 2 to doc 1 # doc 1 to doc 2 is the same as doc 2 to doc 1
doc_1_to_doc_2 = (first_doc.pk, second_doc.pk) doc_1_to_doc_2 = (first_doc.pk, second_doc.pk)

View File

@@ -206,3 +206,29 @@ class TestFuzzyMatchCommand(TestCase):
self.assertEqual(Document.objects.count(), 2) self.assertEqual(Document.objects.count(), 2)
self.assertIsNotNone(Document.objects.get(pk=1)) self.assertIsNotNone(Document.objects.get(pk=1))
self.assertIsNotNone(Document.objects.get(pk=2)) self.assertIsNotNone(Document.objects.get(pk=2))
def test_empty_content(self):
"""
GIVEN:
- 2 documents exist, content is empty (pw-protected)
WHEN:
- Command is called
THEN:
- No matches are found
"""
Document.objects.create(
checksum="BEEFCAFE",
title="A",
content="",
mime_type="application/pdf",
filename="test.pdf",
)
Document.objects.create(
checksum="DEADBEAF",
title="A",
content="",
mime_type="application/pdf",
filename="other_test.pdf",
)
stdout, _ = self.call_command()
self.assertIn("No matches found", stdout)