Update system status to use classifier paperlesstask

This commit is contained in:
shamoon 2025-02-13 18:46:42 -08:00
parent f31df22ab6
commit 673839265d
2 changed files with 23 additions and 56 deletions

View File

@ -117,14 +117,14 @@ def train_classifier(*, scheduled=True):
f"Saving updated classifier model to {settings.MODEL_FILE}...", f"Saving updated classifier model to {settings.MODEL_FILE}...",
) )
classifier.save() classifier.save()
task.status = states.SUCCESS
task.result = "Training completed successfully" task.result = "Training completed successfully"
else: else:
logger.debug("Training data unchanged.") logger.debug("Training data unchanged.")
task.status = states.SUCCESS
task.result = "Training data unchanged" task.result = "Training data unchanged"
task.save(update_fields=["status", "result"]) task.status = states.SUCCESS
task.date_done = timezone.now()
task.save(update_fields=["status", "result", "date_done"])
except Exception as e: except Exception as e:
logger.warning("Classifier error: " + str(e)) logger.warning("Classifier error: " + str(e))

View File

@ -15,7 +15,7 @@ from urllib.parse import quote
from urllib.parse import urlparse from urllib.parse import urlparse
import pathvalidate import pathvalidate
from django.apps import apps from celery import states
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import Group from django.contrib.auth.models import Group
from django.contrib.auth.models import User from django.contrib.auth.models import User
@ -2651,59 +2651,26 @@ class SystemStatusView(PassUserMixin):
) )
index_last_modified = None index_last_modified = None
last_trained_task = (
PaperlessTask.objects.filter(
task_name__icontains="train_classifier",
)
.order_by("-date_done")
.first()
)
classifier_status = (
"OK"
if last_trained_task is not None
and last_trained_task.status == states.SUCCESS
else "ERROR"
)
classifier_error = None classifier_error = None
classifier_status = None if last_trained_task.status == states.FAILURE:
try: classifier_error = last_trained_task.result
classifier = load_classifier(raise_exception=True) classifier_last_trained = (
if classifier is None: last_trained_task.date_done if last_trained_task else None
# Make sure classifier should exist )
docs_queryset = Document.objects.exclude(
tags__is_inbox_tag=True,
)
if (
docs_queryset.count() > 0
and (
Tag.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
or DocumentType.objects.filter(
matching_algorithm=Tag.MATCH_AUTO,
).exists()
or Correspondent.objects.filter(
matching_algorithm=Tag.MATCH_AUTO,
).exists()
or StoragePath.objects.filter(
matching_algorithm=Tag.MATCH_AUTO,
).exists()
)
and not settings.MODEL_FILE.exists()
):
# if classifier file doesn't exist just classify as a warning
classifier_error = "Classifier file does not exist (yet). Re-training may be pending."
classifier_status = "WARNING"
raise FileNotFoundError(classifier_error)
classifier_status = "OK"
task_result_model = apps.get_model("django_celery_results", "taskresult")
result = (
task_result_model.objects.filter(
task_name="documents.tasks.train_classifier",
status="SUCCESS",
)
.order_by(
"-date_done",
)
.first()
)
classifier_last_trained = result.date_done if result else None
except Exception as e:
if classifier_status is None:
classifier_status = "ERROR"
classifier_last_trained = None
if classifier_error is None:
classifier_error = (
"Unable to load classifier, check logs for more detail."
)
logger.exception(
f"System status detected a possible problem while loading the classifier: {e}",
)
return Response( return Response(
{ {