From 3314c5982859609eea1635bfdb8545b7df1a7c07 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Thu, 6 Feb 2025 10:54:31 -0800 Subject: [PATCH] Tweak: more accurate classifier last trained time (#9004) --- src/documents/classifier.py | 16 ++++++++++++++++ src/documents/views.py | 17 ++++++----------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/documents/classifier.py b/src/documents/classifier.py index 72bf1f16c..5bc8be2c6 100644 --- a/src/documents/classifier.py +++ b/src/documents/classifier.py @@ -1,6 +1,7 @@ import logging import pickle import re +import time import warnings from collections.abc import Iterator from hashlib import sha256 @@ -141,6 +142,19 @@ class DocumentClassifier: ): raise IncompatibleClassifierVersionError("sklearn version update") + def set_last_checked(self) -> None: + # save a timestamp of the last time we checked for retraining to a file + with Path(settings.MODEL_FILE.with_suffix(".last_checked")).open("w") as f: + f.write(str(time.time())) + + def get_last_checked(self) -> float | None: + # load the timestamp of the last time we checked for retraining + try: + with Path(settings.MODEL_FILE.with_suffix(".last_checked")).open("r") as f: + return float(f.read()) + except FileNotFoundError: # pragma: no cover + return None + def save(self) -> None: target_file: Path = settings.MODEL_FILE target_file_temp: Path = target_file.with_suffix(".pickle.part") @@ -161,6 +175,7 @@ class DocumentClassifier: pickle.dump(self.storage_path_classifier, f) target_file_temp.rename(target_file) + self.set_last_checked() def train(self) -> bool: # Get non-inbox documents @@ -229,6 +244,7 @@ class DocumentClassifier: and self.last_doc_change_time >= latest_doc_change ) and self.last_auto_type_hash == hasher.digest(): logger.info("No updates since last training") + self.set_last_checked() # Set the classifier information into the cache # Caching for 50 minutes, so slightly less than the normal retrain time cache.set( diff --git a/src/documents/views.py b/src/documents/views.py index f98932a6f..24578179a 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -15,7 +15,6 @@ from urllib.parse import quote from urllib.parse import urlparse import pathvalidate -from django.apps import apps from django.conf import settings from django.contrib.auth.models import Group from django.contrib.auth.models import User @@ -2174,18 +2173,14 @@ class SystemStatusView(PassUserMixin): classifier_status = "WARNING" raise FileNotFoundError(classifier_error) classifier_status = "OK" - task_result_model = apps.get_model("django_celery_results", "taskresult") - result = ( - task_result_model.objects.filter( - task_name="documents.tasks.train_classifier", - status="SUCCESS", + classifier_last_trained = ( + make_aware( + datetime.fromtimestamp(classifier.get_last_checked()), ) - .order_by( - "-date_done", - ) - .first() + if settings.MODEL_FILE.exists() + and classifier.get_last_checked() is not None + else None ) - classifier_last_trained = result.date_done if result else None except Exception as e: if classifier_status is None: classifier_status = "ERROR"