Revert "Tweak: more accurate classifier last trained time (#9004)"

This reverts commit 3314c5982859609eea1635bfdb8545b7df1a7c07.
This commit is contained in:
shamoon 2025-02-13 17:54:08 -08:00
parent f897447a65
commit f31df22ab6
2 changed files with 11 additions and 22 deletions

View File

@ -1,7 +1,6 @@
import logging
import pickle
import re
import time
import warnings
from collections.abc import Iterator
from hashlib import sha256
@ -142,19 +141,6 @@ class DocumentClassifier:
):
raise IncompatibleClassifierVersionError("sklearn version update")
def set_last_checked(self) -> None:
# save a timestamp of the last time we checked for retraining to a file
with Path(settings.MODEL_FILE.with_suffix(".last_checked")).open("w") as f:
f.write(str(time.time()))
def get_last_checked(self) -> float | None:
# load the timestamp of the last time we checked for retraining
try:
with Path(settings.MODEL_FILE.with_suffix(".last_checked")).open("r") as f:
return float(f.read())
except FileNotFoundError: # pragma: no cover
return None
def save(self) -> None:
target_file: Path = settings.MODEL_FILE
target_file_temp: Path = target_file.with_suffix(".pickle.part")
@ -175,7 +161,6 @@ class DocumentClassifier:
pickle.dump(self.storage_path_classifier, f)
target_file_temp.rename(target_file)
self.set_last_checked()
def train(self) -> bool:
# Get non-inbox documents
@ -244,7 +229,6 @@ class DocumentClassifier:
and self.last_doc_change_time >= latest_doc_change
) and self.last_auto_type_hash == hasher.digest():
logger.info("No updates since last training")
self.set_last_checked()
# Set the classifier information into the cache
# Caching for 50 minutes, so slightly less than the normal retrain time
cache.set(

View File

@ -15,6 +15,7 @@ from urllib.parse import quote
from urllib.parse import urlparse
import pathvalidate
from django.apps import apps
from django.conf import settings
from django.contrib.auth.models import Group
from django.contrib.auth.models import User
@ -2680,14 +2681,18 @@ class SystemStatusView(PassUserMixin):
classifier_status = "WARNING"
raise FileNotFoundError(classifier_error)
classifier_status = "OK"
classifier_last_trained = (
make_aware(
datetime.fromtimestamp(classifier.get_last_checked()),
task_result_model = apps.get_model("django_celery_results", "taskresult")
result = (
task_result_model.objects.filter(
task_name="documents.tasks.train_classifier",
status="SUCCESS",
)
if settings.MODEL_FILE.exists()
and classifier.get_last_checked() is not None
else None
.order_by(
"-date_done",
)
.first()
)
classifier_last_trained = result.date_done if result else None
except Exception as e:
if classifier_status is None:
classifier_status = "ERROR"