mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Enhancement: system status report sanity check, simpler classifier check, styling updates (#9106)
This commit is contained in:
@@ -9,6 +9,7 @@ from tempfile import TemporaryDirectory
|
||||
import tqdm
|
||||
from celery import Task
|
||||
from celery import shared_task
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.db import models
|
||||
@@ -35,6 +36,7 @@ from documents.models import Correspondent
|
||||
from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import PaperlessTask
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import Workflow
|
||||
@@ -74,19 +76,34 @@ def index_reindex(*, progress_bar_disable=False):
|
||||
|
||||
|
||||
@shared_task
|
||||
def train_classifier():
|
||||
def train_classifier(*, scheduled=True):
|
||||
task = PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK
|
||||
if scheduled
|
||||
else PaperlessTask.TaskType.MANUAL_TASK,
|
||||
task_id=uuid.uuid4(),
|
||||
task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
|
||||
status=states.STARTED,
|
||||
date_created=timezone.now(),
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
if (
|
||||
not Tag.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||
and not DocumentType.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||
and not Correspondent.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||
and not StoragePath.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||
):
|
||||
logger.info("No automatic matching items, not training")
|
||||
result = "No automatic matching items, not training"
|
||||
logger.info(result)
|
||||
# Special case, items were once auto and trained, so remove the model
|
||||
# and prevent its use again
|
||||
if settings.MODEL_FILE.exists():
|
||||
logger.info(f"Removing {settings.MODEL_FILE} so it won't be used")
|
||||
settings.MODEL_FILE.unlink()
|
||||
task.status = states.SUCCESS
|
||||
task.result = result
|
||||
task.date_done = timezone.now()
|
||||
task.save()
|
||||
return
|
||||
|
||||
classifier = load_classifier()
|
||||
@@ -100,11 +117,19 @@ def train_classifier():
|
||||
f"Saving updated classifier model to {settings.MODEL_FILE}...",
|
||||
)
|
||||
classifier.save()
|
||||
task.result = "Training completed successfully"
|
||||
else:
|
||||
logger.debug("Training data unchanged.")
|
||||
task.result = "Training data unchanged"
|
||||
|
||||
task.status = states.SUCCESS
|
||||
task.date_done = timezone.now()
|
||||
task.save(update_fields=["status", "result", "date_done"])
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Classifier error: " + str(e))
|
||||
task.status = states.FAILURE
|
||||
task.result = str(e)
|
||||
|
||||
|
||||
@shared_task(bind=True)
|
||||
@@ -176,13 +201,16 @@ def consume_file(
|
||||
|
||||
|
||||
@shared_task
|
||||
def sanity_check():
|
||||
messages = sanity_checker.check_sanity()
|
||||
def sanity_check(*, scheduled=True, raise_on_error=True):
|
||||
messages = sanity_checker.check_sanity(scheduled=scheduled)
|
||||
|
||||
messages.log_messages()
|
||||
|
||||
if messages.has_error:
|
||||
raise SanityCheckFailedException("Sanity check failed with errors. See log.")
|
||||
message = "Sanity check exited with errors. See log."
|
||||
if raise_on_error:
|
||||
raise SanityCheckFailedException(message)
|
||||
return message
|
||||
elif messages.has_warning:
|
||||
return "Sanity check exited with warnings. See log."
|
||||
elif len(messages) > 0:
|
||||
|
Reference in New Issue
Block a user