Makes the sanity check messages better for users

This commit is contained in:
Trenton Holmes 2022-05-30 17:03:33 -07:00
parent a4927477fb
commit 0a34a4a7ad
4 changed files with 88 additions and 108 deletions

View File

@ -1,6 +1,7 @@
import hashlib import hashlib
import logging import logging
import os import os
from collections import defaultdict
from django.conf import settings from django.conf import settings
from documents.models import Document from documents.models import Document
@ -9,16 +10,20 @@ from tqdm import tqdm
class SanityCheckMessages: class SanityCheckMessages:
def __init__(self): def __init__(self):
self._messages = [] self._messages = defaultdict(list)
self.has_error = False
self.has_warning = False
def error(self, message): def error(self, doc_pk, message):
self._messages.append({"level": logging.ERROR, "message": message}) self._messages[doc_pk].append({"level": logging.ERROR, "message": message})
self.has_error = True
def warning(self, message): def warning(self, doc_pk, message):
self._messages.append({"level": logging.WARNING, "message": message}) self._messages[doc_pk].append({"level": logging.WARNING, "message": message})
self.has_warning = True
def info(self, message): def info(self, doc_pk, message):
self._messages.append({"level": logging.INFO, "message": message}) self._messages[doc_pk].append({"level": logging.INFO, "message": message})
def log_messages(self): def log_messages(self):
logger = logging.getLogger("paperless.sanity_checker") logger = logging.getLogger("paperless.sanity_checker")
@ -26,7 +31,15 @@ class SanityCheckMessages:
if len(self._messages) == 0: if len(self._messages) == 0:
logger.info("Sanity checker detected no issues.") logger.info("Sanity checker detected no issues.")
else: else:
for msg in self._messages:
# Query once
all_docs = Document.objects.all()
for doc_pk in self._messages:
if doc_pk is not None:
doc = all_docs.get(pk=doc_pk)
logger.info(f"Document: {doc.pk}, title: {doc.title}")
for msg in self._messages[doc_pk]:
logger.log(msg["level"], msg["message"]) logger.log(msg["level"], msg["message"])
def __len__(self): def __len__(self):
@ -35,18 +48,12 @@ class SanityCheckMessages:
def __getitem__(self, item): def __getitem__(self, item):
return self._messages[item] return self._messages[item]
def has_error(self):
return any([msg["level"] == logging.ERROR for msg in self._messages])
def has_warning(self):
return any([msg["level"] == logging.WARNING for msg in self._messages])
class SanityCheckFailedException(Exception): class SanityCheckFailedException(Exception):
pass pass
def check_sanity(progress=False): def check_sanity(progress=False) -> SanityCheckMessages:
messages = SanityCheckMessages() messages = SanityCheckMessages()
present_files = [] present_files = []
@ -61,7 +68,7 @@ def check_sanity(progress=False):
for doc in tqdm(Document.objects.all(), disable=not progress): for doc in tqdm(Document.objects.all(), disable=not progress):
# Check sanity of the thumbnail # Check sanity of the thumbnail
if not os.path.isfile(doc.thumbnail_path): if not os.path.isfile(doc.thumbnail_path):
messages.error(f"Thumbnail of document {doc.pk} does not exist.") messages.error(doc.pk, "Thumbnail of document does not exist.")
else: else:
if os.path.normpath(doc.thumbnail_path) in present_files: if os.path.normpath(doc.thumbnail_path) in present_files:
present_files.remove(os.path.normpath(doc.thumbnail_path)) present_files.remove(os.path.normpath(doc.thumbnail_path))
@ -69,12 +76,12 @@ def check_sanity(progress=False):
with doc.thumbnail_file as f: with doc.thumbnail_file as f:
f.read() f.read()
except OSError as e: except OSError as e:
messages.error(f"Cannot read thumbnail file of document {doc.pk}: {e}") messages.error(doc.pk, f"Cannot read thumbnail file of document: {e}")
# Check sanity of the original file # Check sanity of the original file
# TODO: extract method # TODO: extract method
if not os.path.isfile(doc.source_path): if not os.path.isfile(doc.source_path):
messages.error(f"Original of document {doc.pk} does not exist.") messages.error(doc.pk, "Original of document does not exist.")
else: else:
if os.path.normpath(doc.source_path) in present_files: if os.path.normpath(doc.source_path) in present_files:
present_files.remove(os.path.normpath(doc.source_path)) present_files.remove(os.path.normpath(doc.source_path))
@ -82,28 +89,29 @@ def check_sanity(progress=False):
with doc.source_file as f: with doc.source_file as f:
checksum = hashlib.md5(f.read()).hexdigest() checksum = hashlib.md5(f.read()).hexdigest()
except OSError as e: except OSError as e:
messages.error(f"Cannot read original file of document {doc.pk}: {e}") messages.error(doc.pk, f"Cannot read original file of document: {e}")
else: else:
if not checksum == doc.checksum: if not checksum == doc.checksum:
messages.error( messages.error(
f"Checksum mismatch of document {doc.pk}. " doc.pk,
"Checksum mismatch. "
f"Stored: {doc.checksum}, actual: {checksum}.", f"Stored: {doc.checksum}, actual: {checksum}.",
) )
# Check sanity of the archive file. # Check sanity of the archive file.
if doc.archive_checksum and not doc.archive_filename: if doc.archive_checksum and not doc.archive_filename:
messages.error( messages.error(
f"Document {doc.pk} has an archive file checksum, but no " doc.pk,
f"archive filename.", "Document has an archive file checksum, but no archive filename.",
) )
elif not doc.archive_checksum and doc.archive_filename: elif not doc.archive_checksum and doc.archive_filename:
messages.error( messages.error(
f"Document {doc.pk} has an archive file, but its checksum is " doc.pk,
f"missing.", "Document has an archive file, but its checksum is missing.",
) )
elif doc.has_archive_version: elif doc.has_archive_version:
if not os.path.isfile(doc.archive_path): if not os.path.isfile(doc.archive_path):
messages.error(f"Archived version of document {doc.pk} does not exist.") messages.error(doc.pk, "Archived version of document does not exist.")
else: else:
if os.path.normpath(doc.archive_path) in present_files: if os.path.normpath(doc.archive_path) in present_files:
present_files.remove(os.path.normpath(doc.archive_path)) present_files.remove(os.path.normpath(doc.archive_path))
@ -112,22 +120,23 @@ def check_sanity(progress=False):
checksum = hashlib.md5(f.read()).hexdigest() checksum = hashlib.md5(f.read()).hexdigest()
except OSError as e: except OSError as e:
messages.error( messages.error(
f"Cannot read archive file of document {doc.pk}: {e}", doc.pk,
f"Cannot read archive file of document : {e}",
) )
else: else:
if not checksum == doc.archive_checksum: if not checksum == doc.archive_checksum:
messages.error( messages.error(
f"Checksum mismatch of archived document " doc.pk,
f"{doc.pk}. " "Checksum mismatch of archived document. "
f"Stored: {doc.archive_checksum}, " f"Stored: {doc.archive_checksum}, "
f"actual: {checksum}.", f"actual: {checksum}.",
) )
# other document checks # other document checks
if not doc.content: if not doc.content:
messages.info(f"Document {doc.pk} has no content.") messages.info(doc.pk, "Document has no content.")
for extra_file in present_files: for extra_file in present_files:
messages.warning(f"Orphaned file in media dir: {extra_file}") messages.warning(None, f"Orphaned file in media dir: {extra_file}")
return messages return messages

View File

@ -338,9 +338,9 @@ def sanity_check():
messages.log_messages() messages.log_messages()
if messages.has_error(): if messages.has_error:
raise SanityCheckFailedException("Sanity check failed with errors. See log.") raise SanityCheckFailedException("Sanity check failed with errors. See log.")
elif messages.has_warning(): elif messages.has_warning:
return "Sanity check exited with warnings. See log." return "Sanity check exited with warnings. See log."
elif len(messages) > 0: elif len(messages) > 0:
return "Sanity check exited with infos. See log." return "Sanity check exited with infos. See log."

View File

@ -238,5 +238,5 @@ class TestSanityChecker(DirectoriesMixin, TestCase):
with self.assertLogs() as capture: with self.assertLogs() as capture:
call_command("document_sanity_checker") call_command("document_sanity_checker")
self.assertEqual(len(capture.output), 1) self.assertEqual(len(capture.output), 2)
self.assertIn("Checksum mismatch of document", capture.output[0]) self.assertIn("Checksum mismatch. Stored: abc, actual:", capture.output[1])

View File

@ -8,62 +8,9 @@ from django.conf import settings
from django.test import TestCase from django.test import TestCase
from documents.models import Document from documents.models import Document
from documents.sanity_checker import check_sanity from documents.sanity_checker import check_sanity
from documents.sanity_checker import SanityCheckMessages
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
class TestSanityCheckMessages(TestCase):
def test_no_messages(self):
messages = SanityCheckMessages()
self.assertEqual(len(messages), 0)
self.assertFalse(messages.has_error())
self.assertFalse(messages.has_warning())
with self.assertLogs() as capture:
messages.log_messages()
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.INFO)
self.assertEqual(
capture.records[0].message,
"Sanity checker detected no issues.",
)
def test_info(self):
messages = SanityCheckMessages()
messages.info("Something might be wrong")
self.assertEqual(len(messages), 1)
self.assertFalse(messages.has_error())
self.assertFalse(messages.has_warning())
with self.assertLogs() as capture:
messages.log_messages()
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.INFO)
self.assertEqual(capture.records[0].message, "Something might be wrong")
def test_warning(self):
messages = SanityCheckMessages()
messages.warning("Something is wrong")
self.assertEqual(len(messages), 1)
self.assertFalse(messages.has_error())
self.assertTrue(messages.has_warning())
with self.assertLogs() as capture:
messages.log_messages()
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.WARNING)
self.assertEqual(capture.records[0].message, "Something is wrong")
def test_error(self):
messages = SanityCheckMessages()
messages.error("Something is seriously wrong")
self.assertEqual(len(messages), 1)
self.assertTrue(messages.has_error())
self.assertFalse(messages.has_warning())
with self.assertLogs() as capture:
messages.log_messages()
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.ERROR)
self.assertEqual(capture.records[0].message, "Something is seriously wrong")
class TestSanityCheck(DirectoriesMixin, TestCase): class TestSanityCheck(DirectoriesMixin, TestCase):
def make_test_data(self): def make_test_data(self):
@ -111,10 +58,30 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
archive_filename="0000001.pdf", archive_filename="0000001.pdf",
) )
def assertSanityError(self, messageRegex): def assertSanityError(self, doc: Document, messageRegex):
messages = check_sanity() messages = check_sanity()
self.assertTrue(messages.has_error()) self.assertTrue(messages.has_error)
self.assertRegex(messages[0]["message"], messageRegex) with self.assertLogs() as capture:
messages.log_messages()
self.assertRegex(
capture.records[0].message,
f"Document: {doc.pk}, title: {doc.title}",
)
self.assertRegex(capture.records[1].message, messageRegex)
def test_no_issues(self):
self.make_test_data()
messages = check_sanity()
self.assertFalse(messages.has_error)
self.assertFalse(messages.has_warning)
with self.assertLogs() as capture:
messages.log_messages()
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.INFO)
self.assertEqual(
capture.records[0].message,
"Sanity checker detected no issues.",
)
def test_no_docs(self): def test_no_docs(self):
self.assertEqual(len(check_sanity()), 0) self.assertEqual(len(check_sanity()), 0)
@ -126,75 +93,79 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
def test_no_thumbnail(self): def test_no_thumbnail(self):
doc = self.make_test_data() doc = self.make_test_data()
os.remove(doc.thumbnail_path) os.remove(doc.thumbnail_path)
self.assertSanityError("Thumbnail of document .* does not exist") self.assertSanityError(doc, "Thumbnail of document does not exist")
def test_thumbnail_no_access(self): def test_thumbnail_no_access(self):
doc = self.make_test_data() doc = self.make_test_data()
os.chmod(doc.thumbnail_path, 0o000) os.chmod(doc.thumbnail_path, 0o000)
self.assertSanityError("Cannot read thumbnail file of document") self.assertSanityError(doc, "Cannot read thumbnail file of document")
os.chmod(doc.thumbnail_path, 0o777) os.chmod(doc.thumbnail_path, 0o777)
def test_no_original(self): def test_no_original(self):
doc = self.make_test_data() doc = self.make_test_data()
os.remove(doc.source_path) os.remove(doc.source_path)
self.assertSanityError("Original of document .* does not exist.") self.assertSanityError(doc, "Original of document does not exist.")
def test_original_no_access(self): def test_original_no_access(self):
doc = self.make_test_data() doc = self.make_test_data()
os.chmod(doc.source_path, 0o000) os.chmod(doc.source_path, 0o000)
self.assertSanityError("Cannot read original file of document") self.assertSanityError(doc, "Cannot read original file of document")
os.chmod(doc.source_path, 0o777) os.chmod(doc.source_path, 0o777)
def test_original_checksum_mismatch(self): def test_original_checksum_mismatch(self):
doc = self.make_test_data() doc = self.make_test_data()
doc.checksum = "WOW" doc.checksum = "WOW"
doc.save() doc.save()
self.assertSanityError("Checksum mismatch of document") self.assertSanityError(doc, "Checksum mismatch. Stored: WOW, actual: ")
def test_no_archive(self): def test_no_archive(self):
doc = self.make_test_data() doc = self.make_test_data()
os.remove(doc.archive_path) os.remove(doc.archive_path)
self.assertSanityError("Archived version of document .* does not exist.") self.assertSanityError(doc, "Archived version of document does not exist.")
def test_archive_no_access(self): def test_archive_no_access(self):
doc = self.make_test_data() doc = self.make_test_data()
os.chmod(doc.archive_path, 0o000) os.chmod(doc.archive_path, 0o000)
self.assertSanityError("Cannot read archive file of document") self.assertSanityError(doc, "Cannot read archive file of document")
os.chmod(doc.archive_path, 0o777) os.chmod(doc.archive_path, 0o777)
def test_archive_checksum_mismatch(self): def test_archive_checksum_mismatch(self):
doc = self.make_test_data() doc = self.make_test_data()
doc.archive_checksum = "WOW" doc.archive_checksum = "WOW"
doc.save() doc.save()
self.assertSanityError("Checksum mismatch of archived document") self.assertSanityError(doc, "Checksum mismatch of archived document")
def test_empty_content(self): def test_empty_content(self):
doc = self.make_test_data() doc = self.make_test_data()
doc.content = "" doc.content = ""
doc.save() doc.save()
messages = check_sanity() messages = check_sanity()
self.assertFalse(messages.has_error()) self.assertFalse(messages.has_error)
self.assertFalse(messages.has_warning()) self.assertFalse(messages.has_warning)
self.assertEqual(len(messages), 1) self.assertEqual(len(messages), 1)
self.assertRegex(messages[0]["message"], "Document .* has no content.") self.assertRegex(messages[doc.pk][0]["message"], "Document has no content.")
def test_orphaned_file(self): def test_orphaned_file(self):
doc = self.make_test_data() doc = self.make_test_data()
Path(self.dirs.originals_dir, "orphaned").touch() Path(self.dirs.originals_dir, "orphaned").touch()
messages = check_sanity() messages = check_sanity()
self.assertFalse(messages.has_error()) self.assertTrue(messages.has_warning)
self.assertTrue(messages.has_warning()) self.assertRegex(
self.assertEqual(len(messages), 1) messages._messages[None][0]["message"],
self.assertRegex(messages[0]["message"], "Orphaned file in media dir") "Orphaned file in media dir",
)
def test_archive_filename_no_checksum(self): def test_archive_filename_no_checksum(self):
doc = self.make_test_data() doc = self.make_test_data()
doc.archive_checksum = None doc.archive_checksum = None
doc.save() doc.save()
self.assertSanityError("has an archive file, but its checksum is missing.") self.assertSanityError(doc, "has an archive file, but its checksum is missing.")
def test_archive_checksum_no_filename(self): def test_archive_checksum_no_filename(self):
doc = self.make_test_data() doc = self.make_test_data()
doc.archive_filename = None doc.archive_filename = None
doc.save() doc.save()
self.assertSanityError("has an archive file checksum, but no archive filename.") self.assertSanityError(
doc,
"has an archive file checksum, but no archive filename.",
)