From fd425aa618eeff830d79c2f67af35933ed266cf0 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Sun, 19 Jan 2025 12:52:03 -0800 Subject: [PATCH] Fix: Enforce classifier training ordering to prevent extra training (#8822) --- src/documents/classifier.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/documents/classifier.py b/src/documents/classifier.py index 4c36dc5e0..b3e71711c 100644 --- a/src/documents/classifier.py +++ b/src/documents/classifier.py @@ -170,6 +170,7 @@ class DocumentClassifier: ) .select_related("document_type", "correspondent", "storage_path") .prefetch_related("tags") + .order_by("pk") ) # No documents exit to train against @@ -199,11 +200,10 @@ class DocumentClassifier: hasher.update(y.to_bytes(4, "little", signed=True)) labels_correspondent.append(y) - tags: list[int] = sorted( - tag.pk - for tag in doc.tags.filter( - matching_algorithm=MatchingModel.MATCH_AUTO, - ) + tags: list[int] = list( + doc.tags.filter(matching_algorithm=MatchingModel.MATCH_AUTO) + .order_by("pk") + .values_list("pk", flat=True), ) for tag in tags: hasher.update(tag.to_bytes(4, "little", signed=True))