Revert "Tweak: more accurate classifier last trained time (#9004)"

This reverts commit 3314c5982859609eea1635bfdb8545b7df1a7c07.
This commit is contained in:
shamoon 2025-02-13 17:54:08 -08:00
parent f897447a65
commit f31df22ab6
2 changed files with 11 additions and 22 deletions

View File

@ -1,7 +1,6 @@
import logging import logging
import pickle import pickle
import re import re
import time
import warnings import warnings
from collections.abc import Iterator from collections.abc import Iterator
from hashlib import sha256 from hashlib import sha256
@ -142,19 +141,6 @@ class DocumentClassifier:
): ):
raise IncompatibleClassifierVersionError("sklearn version update") raise IncompatibleClassifierVersionError("sklearn version update")
def set_last_checked(self) -> None:
# save a timestamp of the last time we checked for retraining to a file
with Path(settings.MODEL_FILE.with_suffix(".last_checked")).open("w") as f:
f.write(str(time.time()))
def get_last_checked(self) -> float | None:
# load the timestamp of the last time we checked for retraining
try:
with Path(settings.MODEL_FILE.with_suffix(".last_checked")).open("r") as f:
return float(f.read())
except FileNotFoundError: # pragma: no cover
return None
def save(self) -> None: def save(self) -> None:
target_file: Path = settings.MODEL_FILE target_file: Path = settings.MODEL_FILE
target_file_temp: Path = target_file.with_suffix(".pickle.part") target_file_temp: Path = target_file.with_suffix(".pickle.part")
@ -175,7 +161,6 @@ class DocumentClassifier:
pickle.dump(self.storage_path_classifier, f) pickle.dump(self.storage_path_classifier, f)
target_file_temp.rename(target_file) target_file_temp.rename(target_file)
self.set_last_checked()
def train(self) -> bool: def train(self) -> bool:
# Get non-inbox documents # Get non-inbox documents
@ -244,7 +229,6 @@ class DocumentClassifier:
and self.last_doc_change_time >= latest_doc_change and self.last_doc_change_time >= latest_doc_change
) and self.last_auto_type_hash == hasher.digest(): ) and self.last_auto_type_hash == hasher.digest():
logger.info("No updates since last training") logger.info("No updates since last training")
self.set_last_checked()
# Set the classifier information into the cache # Set the classifier information into the cache
# Caching for 50 minutes, so slightly less than the normal retrain time # Caching for 50 minutes, so slightly less than the normal retrain time
cache.set( cache.set(

View File

@ -15,6 +15,7 @@ from urllib.parse import quote
from urllib.parse import urlparse from urllib.parse import urlparse
import pathvalidate import pathvalidate
from django.apps import apps
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import Group from django.contrib.auth.models import Group
from django.contrib.auth.models import User from django.contrib.auth.models import User
@ -2680,14 +2681,18 @@ class SystemStatusView(PassUserMixin):
classifier_status = "WARNING" classifier_status = "WARNING"
raise FileNotFoundError(classifier_error) raise FileNotFoundError(classifier_error)
classifier_status = "OK" classifier_status = "OK"
classifier_last_trained = ( task_result_model = apps.get_model("django_celery_results", "taskresult")
make_aware( result = (
datetime.fromtimestamp(classifier.get_last_checked()), task_result_model.objects.filter(
task_name="documents.tasks.train_classifier",
status="SUCCESS",
) )
if settings.MODEL_FILE.exists() .order_by(
and classifier.get_last_checked() is not None "-date_done",
else None )
.first()
) )
classifier_last_trained = result.date_done if result else None
except Exception as e: except Exception as e:
if classifier_status is None: if classifier_status is None:
classifier_status = "ERROR" classifier_status = "ERROR"