Compare commits

..

4 Commits

Author SHA1 Message Date
shamoon
d70cadaf05 Try this should_run thing 2025-09-23 08:27:54 -07:00
shamoon
5e6c6f63c2 Try a concurrency block 2025-09-23 08:09:17 -07:00
shamoon
6635fb11a1 Chore: remove conditional from pre-commit job in CI 2025-09-23 07:47:11 -07:00
shamoon
6119c215e7 Fix: skip fuzzy matching for empty document content (#10914) 2025-09-22 23:30:24 -07:00
5 changed files with 79 additions and 9 deletions

View File

@@ -17,11 +17,52 @@ env:
DEFAULT_PYTHON_VERSION: "3.11"
NLTK_DATA: "/usr/share/nltk_data"
jobs:
detect-duplicate:
name: Detect Duplicate Run
runs-on: ubuntu-24.04
outputs:
should_run: ${{ steps.check.outputs.should_run }}
steps:
- name: Check if workflow should run
id: check
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
if (context.eventName !== 'push') {
core.info('Not a push event; running workflow.');
core.setOutput('should_run', 'true');
return;
}
const ref = context.ref || '';
if (!ref.startsWith('refs/heads/')) {
core.info('Push is not to a branch; running workflow.');
core.setOutput('should_run', 'true');
return;
}
const branch = ref.substring('refs/heads/'.length);
const { owner, repo } = context.repo;
const prs = await github.paginate(github.rest.pulls.list, {
owner,
repo,
state: 'open',
head: `${owner}:${branch}`,
per_page: 100,
});
if (prs.length === 0) {
core.info(`No open PR found for ${branch}; running workflow.`);
core.setOutput('should_run', 'true');
} else {
core.info(`Found ${prs.length} open PR(s) for ${branch}; skipping duplicate push run.`);
core.setOutput('should_run', 'false');
}
pre-commit:
# We want to run on external PRs, but not on our own internal PRs as they'll be run
# by the push to the branch. Without this if check, checks are duplicated since
# internal PRs match both the push and pull_request events.
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
needs:
- detect-duplicate
if: needs.detect-duplicate.outputs.should_run == 'true'
name: Linting Checks
runs-on: ubuntu-24.04
steps:

View File

@@ -33,7 +33,7 @@ dependencies = [
"django-cors-headers~=4.8.0",
"django-extensions~=4.1",
"django-filter~=25.1",
"django-guardian~=3.2.0",
"django-guardian~=3.1.2",
"django-multiselectfield~=1.0.1",
"django-soft-delete~=1.0.18",
"django-treenode>=0.23.2",

View File

@@ -92,6 +92,9 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
# doc to doc is obviously not useful
if first_doc.pk == second_doc.pk:
continue
# Skip empty documents (e.g. password-protected)
if first_doc.content.strip() == "" or second_doc.content.strip() == "":
continue
# Skip matching which have already been matched together
# doc 1 to doc 2 is the same as doc 2 to doc 1
doc_1_to_doc_2 = (first_doc.pk, second_doc.pk)

View File

@@ -206,3 +206,29 @@ class TestFuzzyMatchCommand(TestCase):
self.assertEqual(Document.objects.count(), 2)
self.assertIsNotNone(Document.objects.get(pk=1))
self.assertIsNotNone(Document.objects.get(pk=2))
def test_empty_content(self):
"""
GIVEN:
- 2 documents exist, content is empty (pw-protected)
WHEN:
- Command is called
THEN:
- No matches are found
"""
Document.objects.create(
checksum="BEEFCAFE",
title="A",
content="",
mime_type="application/pdf",
filename="test.pdf",
)
Document.objects.create(
checksum="DEADBEAF",
title="A",
content="",
mime_type="application/pdf",
filename="other_test.pdf",
)
stdout, _ = self.call_command()
self.assertIn("No matches found", stdout)

8
uv.lock generated
View File

@@ -782,15 +782,15 @@ wheels = [
[[package]]
name = "django-guardian"
version = "3.2.0"
version = "3.1.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "django", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "typing-extensions", marker = "(python_full_version < '3.13' and sys_platform == 'darwin') or (python_full_version < '3.13' and sys_platform == 'linux')" },
]
sdist = { url = "https://files.pythonhosted.org/packages/e2/f9/bcff6a931298b9eb55e1550b55ab964fab747f594ba6d2d81cbe19736c5f/django_guardian-3.2.0.tar.gz", hash = "sha256:9e18ecd2e211b665972690c2d03d27bce0ea4932b5efac24a4bb9d526950a69e", size = 99940, upload-time = "2025-09-16T10:35:53.609Z" }
sdist = { url = "https://files.pythonhosted.org/packages/81/d3/436a44c7688fce1a978224c349ba66c95bf9103d548596b7a2694fd58c03/django_guardian-3.1.3.tar.gz", hash = "sha256:12b5e66c18c97088b0adfa033ab14be68c321c170fd3ec438898271f00a71699", size = 93571, upload-time = "2025-09-10T08:36:23.928Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2f/23/63a7d868373a73d25c4a5c2dd3cce3aaeb22fbee82560d42b6e93ba01403/django_guardian-3.2.0-py3-none-any.whl", hash = "sha256:0768565a057988a93fc4a1d93649c4a794abfd7473a8408a079cfbf83c559d77", size = 134674, upload-time = "2025-09-16T10:35:51.69Z" },
{ url = "https://files.pythonhosted.org/packages/83/fc/6fd7b8bc7c52cbbfd1714673cfd28ff0b3fae32265c52d492ec0dee22cb8/django_guardian-3.1.3-py3-none-any.whl", hash = "sha256:90e28b40eea65c326a3a961908cc300f9e1cd69b74e88d38317a9befa167b71c", size = 127687, upload-time = "2025-09-10T08:36:22.533Z" },
]
[[package]]
@@ -2185,7 +2185,7 @@ requires-dist = [
{ name = "django-cors-headers", specifier = "~=4.8.0" },
{ name = "django-extensions", specifier = "~=4.1" },
{ name = "django-filter", specifier = "~=25.1" },
{ name = "django-guardian", specifier = "~=3.2.0" },
{ name = "django-guardian", specifier = "~=3.1.2" },
{ name = "django-multiselectfield", specifier = "~=1.0.1" },
{ name = "django-soft-delete", specifier = "~=1.0.18" },
{ name = "django-treenode", specifier = ">=0.23.2" },