Fix: Enforce classifier training ordering to prevent extra training (#8822)

This commit is contained in:
Trenton H 2025-01-19 12:52:03 -08:00 committed by GitHub
parent e1dde85c59
commit fd425aa618
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -170,6 +170,7 @@ class DocumentClassifier:
) )
.select_related("document_type", "correspondent", "storage_path") .select_related("document_type", "correspondent", "storage_path")
.prefetch_related("tags") .prefetch_related("tags")
.order_by("pk")
) )
# No documents exit to train against # No documents exit to train against
@ -199,11 +200,10 @@ class DocumentClassifier:
hasher.update(y.to_bytes(4, "little", signed=True)) hasher.update(y.to_bytes(4, "little", signed=True))
labels_correspondent.append(y) labels_correspondent.append(y)
tags: list[int] = sorted( tags: list[int] = list(
tag.pk doc.tags.filter(matching_algorithm=MatchingModel.MATCH_AUTO)
for tag in doc.tags.filter( .order_by("pk")
matching_algorithm=MatchingModel.MATCH_AUTO, .values_list("pk", flat=True),
)
) )
for tag in tags: for tag in tags:
hasher.update(tag.to_bytes(4, "little", signed=True)) hasher.update(tag.to_bytes(4, "little", signed=True))