import logging import shutil from pathlib import Path import filelock from django.conf import settings from django.test import TestCase from documents.models import Document from documents.sanity_checker import check_sanity from documents.tests.utils import DirectoriesMixin class TestSanityCheck(DirectoriesMixin, TestCase): def make_test_data(self): with filelock.FileLock(settings.MEDIA_LOCK): # just make sure that the lockfile is present. shutil.copy( ( Path(__file__).parent / "samples" / "documents" / "originals" / "0000001.pdf" ), Path(self.dirs.originals_dir) / "0000001.pdf", ) shutil.copy( ( Path(__file__).parent / "samples" / "documents" / "archive" / "0000001.pdf" ), Path(self.dirs.archive_dir) / "0000001.pdf", ) shutil.copy( ( Path(__file__).parent / "samples" / "documents" / "thumbnails" / "0000001.webp" ), Path(self.dirs.thumbnail_dir) / "0000001.webp", ) return Document.objects.create( title="test", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", content="test", pk=1, filename="0000001.pdf", mime_type="application/pdf", archive_filename="0000001.pdf", ) def assertSanityError(self, doc: Document, messageRegex): messages = check_sanity() self.assertTrue(messages.has_error) with self.assertLogs() as capture: messages.log_messages() self.assertEqual( capture.records[0].message, f"Detected following issue(s) with document #{doc.pk}, titled {doc.title}", ) self.assertRegex(capture.records[1].message, messageRegex) def test_no_issues(self): self.make_test_data() messages = check_sanity() self.assertFalse(messages.has_error) self.assertFalse(messages.has_warning) with self.assertLogs() as capture: messages.log_messages() self.assertEqual(len(capture.output), 1) self.assertEqual(capture.records[0].levelno, logging.INFO) self.assertEqual( capture.records[0].message, "Sanity checker detected no issues.", ) def test_no_docs(self): self.assertEqual(len(check_sanity()), 0) def test_success(self): self.make_test_data() self.assertEqual(len(check_sanity()), 0) def test_no_thumbnail(self): doc = self.make_test_data() Path(doc.thumbnail_path).unlink() self.assertSanityError(doc, "Thumbnail of document does not exist") def test_thumbnail_no_access(self): doc = self.make_test_data() Path(doc.thumbnail_path).chmod(0o000) self.assertSanityError(doc, "Cannot read thumbnail file of document") Path(doc.thumbnail_path).chmod(0o777) def test_no_original(self): doc = self.make_test_data() Path(doc.source_path).unlink() self.assertSanityError(doc, "Original of document does not exist.") def test_original_no_access(self): doc = self.make_test_data() Path(doc.source_path).chmod(0o000) self.assertSanityError(doc, "Cannot read original file of document") Path(doc.source_path).chmod(0o777) def test_original_checksum_mismatch(self): doc = self.make_test_data() doc.checksum = "WOW" doc.save() self.assertSanityError(doc, "Checksum mismatch. Stored: WOW, actual: ") def test_no_archive(self): doc = self.make_test_data() Path(doc.archive_path).unlink() self.assertSanityError(doc, "Archived version of document does not exist.") def test_archive_no_access(self): doc = self.make_test_data() Path(doc.archive_path).chmod(0o000) self.assertSanityError(doc, "Cannot read archive file of document") Path(doc.archive_path).chmod(0o777) def test_archive_checksum_mismatch(self): doc = self.make_test_data() doc.archive_checksum = "WOW" doc.save() self.assertSanityError(doc, "Checksum mismatch of archived document") def test_empty_content(self): doc = self.make_test_data() doc.content = "" doc.save() messages = check_sanity() self.assertFalse(messages.has_error) self.assertFalse(messages.has_warning) self.assertEqual(len(messages), 1) self.assertRegex( messages[doc.pk][0]["message"], "Document contains no OCR data", ) def test_orphaned_file(self): self.make_test_data() Path(self.dirs.originals_dir, "orphaned").touch() messages = check_sanity() self.assertTrue(messages.has_warning) self.assertRegex( messages._messages[None][0]["message"], "Orphaned file in media dir", ) def test_archive_filename_no_checksum(self): doc = self.make_test_data() doc.archive_checksum = None doc.save() self.assertSanityError(doc, "has an archive file, but its checksum is missing.") def test_archive_checksum_no_filename(self): doc = self.make_test_data() doc.archive_filename = None doc.save() self.assertSanityError( doc, "has an archive file checksum, but no archive filename.", )