Fix: Enforce classifier training ordering to prevent extra training (#8822)

This commit is contained in:
Trenton H 2025-01-19 12:52:03 -08:00 committed by GitHub
parent e1dde85c59
commit fd425aa618
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -170,6 +170,7 @@ class DocumentClassifier:
)
.select_related("document_type", "correspondent", "storage_path")
.prefetch_related("tags")
.order_by("pk")
)
# No documents exit to train against
@ -199,11 +200,10 @@ class DocumentClassifier:
hasher.update(y.to_bytes(4, "little", signed=True))
labels_correspondent.append(y)
tags: list[int] = sorted(
tag.pk
for tag in doc.tags.filter(
matching_algorithm=MatchingModel.MATCH_AUTO,
)
tags: list[int] = list(
doc.tags.filter(matching_algorithm=MatchingModel.MATCH_AUTO)
.order_by("pk")
.values_list("pk", flat=True),
)
for tag in tags:
hasher.update(tag.to_bytes(4, "little", signed=True))