Enhancement: system status report sanity check, simpler classifier check, styling updates (#9106)

This commit is contained in:
shamoon
2025-02-26 14:12:20 -08:00
committed by GitHub
parent ec34197b59
commit 2d52226732
30 changed files with 1117 additions and 479 deletions

View File

@@ -14,6 +14,7 @@ from urllib.parse import urlparse
import httpx
import pathvalidate
from celery import states
from django.conf import settings
from django.contrib.auth.models import Group
from django.contrib.auth.models import User
@@ -103,6 +104,7 @@ from documents.filters import DocumentsOrderingFilter
from documents.filters import DocumentTypeFilterSet
from documents.filters import ObjectOwnedOrGrantedPermissionsFilter
from documents.filters import ObjectOwnedPermissionsFilter
from documents.filters import PaperlessTaskFilterSet
from documents.filters import ShareLinkFilterSet
from documents.filters import StoragePathFilterSet
from documents.filters import TagFilterSet
@@ -144,6 +146,7 @@ from documents.serialisers import DocumentListSerializer
from documents.serialisers import DocumentSerializer
from documents.serialisers import DocumentTypeSerializer
from documents.serialisers import PostDocumentSerializer
from documents.serialisers import RunTaskViewSerializer
from documents.serialisers import SavedViewSerializer
from documents.serialisers import SearchResultSerializer
from documents.serialisers import ShareLinkSerializer
@@ -160,6 +163,9 @@ from documents.serialisers import WorkflowTriggerSerializer
from documents.signals import document_updated
from documents.tasks import consume_file
from documents.tasks import empty_trash
from documents.tasks import index_optimize
from documents.tasks import sanity_check
from documents.tasks import train_classifier
from documents.templating.filepath import validate_filepath_template_and_render
from paperless import version
from paperless.celery import app as celery_app
@@ -2276,16 +2282,27 @@ class RemoteVersionView(GenericAPIView):
class TasksViewSet(ReadOnlyModelViewSet):
permission_classes = (IsAuthenticated, PaperlessObjectPermissions)
serializer_class = TasksViewSerializer
filter_backends = (ObjectOwnedOrGrantedPermissionsFilter,)
filter_backends = (
DjangoFilterBackend,
OrderingFilter,
ObjectOwnedOrGrantedPermissionsFilter,
)
filterset_class = PaperlessTaskFilterSet
TASK_AND_ARGS_BY_NAME = {
PaperlessTask.TaskName.INDEX_OPTIMIZE: (index_optimize, {}),
PaperlessTask.TaskName.TRAIN_CLASSIFIER: (
train_classifier,
{"scheduled": False},
),
PaperlessTask.TaskName.CHECK_SANITY: (
sanity_check,
{"scheduled": False, "raise_on_error": False},
),
}
def get_queryset(self):
queryset = (
PaperlessTask.objects.filter(
acknowledged=False,
)
.order_by("date_created")
.reverse()
)
queryset = PaperlessTask.objects.all().order_by("-date_created")
task_id = self.request.query_params.get("task_id")
if task_id is not None:
queryset = PaperlessTask.objects.filter(task_id=task_id)
@@ -2308,6 +2325,25 @@ class TasksViewSet(ReadOnlyModelViewSet):
except Exception:
return HttpResponseBadRequest()
@action(methods=["post"], detail=False)
def run(self, request):
serializer = RunTaskViewSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
task_name = serializer.validated_data.get("task_name")
if not request.user.is_superuser:
return HttpResponseForbidden("Insufficient permissions")
try:
task_func, task_args = self.TASK_AND_ARGS_BY_NAME[task_name]
result = task_func(**task_args)
return Response({"result": result})
except Exception as e:
logger.warning(f"An error occurred running task: {e!s}")
return HttpResponseServerError(
"Error running task, check logs for more detail.",
)
class ShareLinkViewSet(ModelViewSet, PassUserMixin):
model = ShareLink
@@ -2614,6 +2650,14 @@ class CustomFieldViewSet(ModelViewSet):
"last_trained": serializers.DateTimeField(),
},
),
"sanity_check": inline_serializer(
name="SanityCheck",
fields={
"status": serializers.CharField(),
"error": serializers.CharField(),
"last_run": serializers.DateTimeField(),
},
),
},
),
},
@@ -2674,13 +2718,20 @@ class SystemStatusView(PassUserMixin):
)
redis_error = "Error connecting to redis, check logs for more detail."
celery_error = None
celery_url = None
try:
celery_ping = celery_app.control.inspect().ping()
first_worker_ping = celery_ping[next(iter(celery_ping.keys()))]
celery_url = next(iter(celery_ping.keys()))
first_worker_ping = celery_ping[celery_url]
if first_worker_ping["ok"] == "pong":
celery_active = "OK"
except Exception:
except Exception as e:
celery_active = "ERROR"
logger.exception(
f"System status detected a possible problem while connecting to celery: {e}",
)
celery_error = "Error connecting to celery, check logs for more detail."
index_error = None
try:
@@ -2697,55 +2748,43 @@ class SystemStatusView(PassUserMixin):
)
index_last_modified = None
last_trained_task = (
PaperlessTask.objects.filter(
task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
)
.order_by("-date_done")
.first()
)
classifier_status = "OK"
classifier_error = None
classifier_status = None
try:
classifier = load_classifier(raise_exception=True)
if classifier is None:
# Make sure classifier should exist
docs_queryset = Document.objects.exclude(
tags__is_inbox_tag=True,
)
if (
docs_queryset.count() > 0
and (
Tag.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
or DocumentType.objects.filter(
matching_algorithm=Tag.MATCH_AUTO,
).exists()
or Correspondent.objects.filter(
matching_algorithm=Tag.MATCH_AUTO,
).exists()
or StoragePath.objects.filter(
matching_algorithm=Tag.MATCH_AUTO,
).exists()
)
and not settings.MODEL_FILE.exists()
):
# if classifier file doesn't exist just classify as a warning
classifier_error = "Classifier file does not exist (yet). Re-training may be pending."
classifier_status = "WARNING"
raise FileNotFoundError(classifier_error)
classifier_status = "OK"
classifier_last_trained = (
make_aware(
datetime.fromtimestamp(classifier.get_last_checked()),
)
if settings.MODEL_FILE.exists()
and classifier.get_last_checked() is not None
else None
)
except Exception as e:
if classifier_status is None:
classifier_status = "ERROR"
classifier_last_trained = None
if classifier_error is None:
classifier_error = (
"Unable to load classifier, check logs for more detail."
)
logger.exception(
f"System status detected a possible problem while loading the classifier: {e}",
if last_trained_task is None:
classifier_status = "WARNING"
classifier_error = "No classifier training tasks found"
elif last_trained_task and last_trained_task.status == states.FAILURE:
classifier_status = "ERROR"
classifier_error = last_trained_task.result
classifier_last_trained = (
last_trained_task.date_done if last_trained_task else None
)
last_sanity_check = (
PaperlessTask.objects.filter(
task_name=PaperlessTask.TaskName.CHECK_SANITY,
)
.order_by("-date_done")
.first()
)
sanity_check_status = "OK"
sanity_check_error = None
if last_sanity_check is None:
sanity_check_status = "WARNING"
sanity_check_error = "No sanity check tasks found"
elif last_sanity_check and last_sanity_check.status == states.FAILURE:
sanity_check_status = "ERROR"
sanity_check_error = last_sanity_check.result
sanity_check_last_run = (
last_sanity_check.date_done if last_sanity_check else None
)
return Response(
{
@@ -2773,12 +2812,17 @@ class SystemStatusView(PassUserMixin):
"redis_status": redis_status,
"redis_error": redis_error,
"celery_status": celery_active,
"celery_url": celery_url,
"celery_error": celery_error,
"index_status": index_status,
"index_last_modified": index_last_modified,
"index_error": index_error,
"classifier_status": classifier_status,
"classifier_last_trained": classifier_last_trained,
"classifier_error": classifier_error,
"sanity_check_status": sanity_check_status,
"sanity_check_last_run": sanity_check_last_run,
"sanity_check_error": sanity_check_error,
},
},
)