Update system status to use classifier paperlesstask

This commit is contained in:
shamoon 2025-02-13 18:46:42 -08:00
parent f31df22ab6
commit 673839265d
2 changed files with 23 additions and 56 deletions
src/documents

@ -117,14 +117,14 @@ def train_classifier(*, scheduled=True):
f"Saving updated classifier model to {settings.MODEL_FILE}...", f"Saving updated classifier model to {settings.MODEL_FILE}...",
) )
classifier.save() classifier.save()
task.status = states.SUCCESS
task.result = "Training completed successfully" task.result = "Training completed successfully"
else: else:
logger.debug("Training data unchanged.") logger.debug("Training data unchanged.")
task.status = states.SUCCESS
task.result = "Training data unchanged" task.result = "Training data unchanged"
task.save(update_fields=["status", "result"]) task.status = states.SUCCESS
task.date_done = timezone.now()
task.save(update_fields=["status", "result", "date_done"])
except Exception as e: except Exception as e:
logger.warning("Classifier error: " + str(e)) logger.warning("Classifier error: " + str(e))

@ -15,7 +15,7 @@ from urllib.parse import quote
from urllib.parse import urlparse from urllib.parse import urlparse
import pathvalidate import pathvalidate
from django.apps import apps from celery import states
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import Group from django.contrib.auth.models import Group
from django.contrib.auth.models import User from django.contrib.auth.models import User
@ -2651,58 +2651,25 @@ class SystemStatusView(PassUserMixin):
) )
index_last_modified = None index_last_modified = None
classifier_error = None last_trained_task = (
classifier_status = None PaperlessTask.objects.filter(
try: task_name__icontains="train_classifier",
classifier = load_classifier(raise_exception=True)
if classifier is None:
# Make sure classifier should exist
docs_queryset = Document.objects.exclude(
tags__is_inbox_tag=True,
)
if (
docs_queryset.count() > 0
and (
Tag.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
or DocumentType.objects.filter(
matching_algorithm=Tag.MATCH_AUTO,
).exists()
or Correspondent.objects.filter(
matching_algorithm=Tag.MATCH_AUTO,
).exists()
or StoragePath.objects.filter(
matching_algorithm=Tag.MATCH_AUTO,
).exists()
)
and not settings.MODEL_FILE.exists()
):
# if classifier file doesn't exist just classify as a warning
classifier_error = "Classifier file does not exist (yet). Re-training may be pending."
classifier_status = "WARNING"
raise FileNotFoundError(classifier_error)
classifier_status = "OK"
task_result_model = apps.get_model("django_celery_results", "taskresult")
result = (
task_result_model.objects.filter(
task_name="documents.tasks.train_classifier",
status="SUCCESS",
)
.order_by(
"-date_done",
) )
.order_by("-date_done")
.first() .first()
) )
classifier_last_trained = result.date_done if result else None
except Exception as e: classifier_status = (
if classifier_status is None: "OK"
classifier_status = "ERROR" if last_trained_task is not None
classifier_last_trained = None and last_trained_task.status == states.SUCCESS
if classifier_error is None: else "ERROR"
classifier_error = (
"Unable to load classifier, check logs for more detail."
) )
logger.exception( classifier_error = None
f"System status detected a possible problem while loading the classifier: {e}", if last_trained_task.status == states.FAILURE:
classifier_error = last_trained_task.result
classifier_last_trained = (
last_trained_task.date_done if last_trained_task else None
) )
return Response( return Response(