mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Tweak: more accurate classifier last trained time (#9004)
This commit is contained in:
parent
2103a499eb
commit
3314c59828
@ -1,6 +1,7 @@
|
||||
import logging
|
||||
import pickle
|
||||
import re
|
||||
import time
|
||||
import warnings
|
||||
from collections.abc import Iterator
|
||||
from hashlib import sha256
|
||||
@ -141,6 +142,19 @@ class DocumentClassifier:
|
||||
):
|
||||
raise IncompatibleClassifierVersionError("sklearn version update")
|
||||
|
||||
def set_last_checked(self) -> None:
|
||||
# save a timestamp of the last time we checked for retraining to a file
|
||||
with Path(settings.MODEL_FILE.with_suffix(".last_checked")).open("w") as f:
|
||||
f.write(str(time.time()))
|
||||
|
||||
def get_last_checked(self) -> float | None:
|
||||
# load the timestamp of the last time we checked for retraining
|
||||
try:
|
||||
with Path(settings.MODEL_FILE.with_suffix(".last_checked")).open("r") as f:
|
||||
return float(f.read())
|
||||
except FileNotFoundError: # pragma: no cover
|
||||
return None
|
||||
|
||||
def save(self) -> None:
|
||||
target_file: Path = settings.MODEL_FILE
|
||||
target_file_temp: Path = target_file.with_suffix(".pickle.part")
|
||||
@ -161,6 +175,7 @@ class DocumentClassifier:
|
||||
pickle.dump(self.storage_path_classifier, f)
|
||||
|
||||
target_file_temp.rename(target_file)
|
||||
self.set_last_checked()
|
||||
|
||||
def train(self) -> bool:
|
||||
# Get non-inbox documents
|
||||
@ -229,6 +244,7 @@ class DocumentClassifier:
|
||||
and self.last_doc_change_time >= latest_doc_change
|
||||
) and self.last_auto_type_hash == hasher.digest():
|
||||
logger.info("No updates since last training")
|
||||
self.set_last_checked()
|
||||
# Set the classifier information into the cache
|
||||
# Caching for 50 minutes, so slightly less than the normal retrain time
|
||||
cache.set(
|
||||
|
@ -15,7 +15,6 @@ from urllib.parse import quote
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import pathvalidate
|
||||
from django.apps import apps
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import User
|
||||
@ -2174,18 +2173,14 @@ class SystemStatusView(PassUserMixin):
|
||||
classifier_status = "WARNING"
|
||||
raise FileNotFoundError(classifier_error)
|
||||
classifier_status = "OK"
|
||||
task_result_model = apps.get_model("django_celery_results", "taskresult")
|
||||
result = (
|
||||
task_result_model.objects.filter(
|
||||
task_name="documents.tasks.train_classifier",
|
||||
status="SUCCESS",
|
||||
classifier_last_trained = (
|
||||
make_aware(
|
||||
datetime.fromtimestamp(classifier.get_last_checked()),
|
||||
)
|
||||
.order_by(
|
||||
"-date_done",
|
||||
)
|
||||
.first()
|
||||
if settings.MODEL_FILE.exists()
|
||||
and classifier.get_last_checked() is not None
|
||||
else None
|
||||
)
|
||||
classifier_last_trained = result.date_done if result else None
|
||||
except Exception as e:
|
||||
if classifier_status is None:
|
||||
classifier_status = "ERROR"
|
||||
|
Loading…
x
Reference in New Issue
Block a user