Just use mtime and keep it fresh on every train

This commit is contained in:
shamoon 2025-02-02 23:56:14 -08:00
parent a8d46196d1
commit b735a0841c
No known key found for this signature in database
2 changed files with 6 additions and 22 deletions

View File

@ -1,6 +1,8 @@
import logging
import os
import pickle
import re
import time
import warnings
from collections.abc import Iterator
from hashlib import sha256
@ -229,6 +231,9 @@ class DocumentClassifier:
and self.last_doc_change_time >= latest_doc_change
) and self.last_auto_type_hash == hasher.digest():
logger.info("No updates since last training")
# Update the modification time of the file to mark it as fresh
new_mtime = time.time()
os.utime(settings.MODEL_FILE, (new_mtime, new_mtime))
# Set the classifier information into the cache
# Caching for 50 minutes, so slightly less than the normal retrain time
cache.set(

View File

@ -15,7 +15,6 @@ from urllib.parse import quote
from urllib.parse import urlparse
import pathvalidate
from django.apps import apps
from django.conf import settings
from django.contrib.auth.models import Group
from django.contrib.auth.models import User
@ -2174,33 +2173,13 @@ class SystemStatusView(PassUserMixin):
classifier_status = "WARNING"
raise FileNotFoundError(classifier_error)
classifier_status = "OK"
task_result_model = apps.get_model("django_celery_results", "taskresult")
result = (
task_result_model.objects.filter(
task_name="documents.tasks.train_classifier",
status="SUCCESS",
)
.order_by(
"-date_done",
)
.first()
)
classifier_last_auto_trained = result.date_done if result else None
classifier_last_modified = (
classifier_last_trained = (
make_aware(
datetime.fromtimestamp(settings.MODEL_FILE.stat().st_mtime),
)
if settings.MODEL_FILE.exists()
else None
)
classifier_last_trained = (
max(
classifier_last_auto_trained,
classifier_last_modified,
)
if classifier_last_auto_trained and classifier_last_modified
else classifier_last_auto_trained or classifier_last_modified
)
except Exception as e:
if classifier_status is None:
classifier_status = "ERROR"