From a4f60c48ea855ee2ceafd9ade5d7f0b32ca98238 Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Wed, 2 Dec 2020 01:18:11 +0100 Subject: [PATCH] testing and fixing the sanity checker --- src/documents/sanity_checker.py | 32 +++++---- src/documents/tests/test_sanity_check.py | 83 ++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 12 deletions(-) create mode 100644 src/documents/tests/test_sanity_check.py diff --git a/src/documents/sanity_checker.py b/src/documents/sanity_checker.py index 6be7400a3..e3c4b1aec 100644 --- a/src/documents/sanity_checker.py +++ b/src/documents/sanity_checker.py @@ -47,7 +47,7 @@ def check_sanity(): present_files.append(os.path.normpath(os.path.join(root, f))) for doc in Document.objects.all(): - # Check thumbnail + # Check sanity of the thumbnail if not os.path.isfile(doc.thumbnail_path): messages.append(SanityError( f"Thumbnail of document {doc.pk} does not exist.")) @@ -61,7 +61,8 @@ def check_sanity(): f"Cannot read thumbnail file of document {doc.pk}: {e}" )) - # Check document + # Check sanity of the original file + # TODO: extract method if not os.path.isfile(doc.source_path): messages.append(SanityError( f"Original of document {doc.pk} does not exist.")) @@ -80,22 +81,29 @@ def check_sanity(): f"Stored: {doc.checksum}, actual: {checksum}." )) - if os.path.isfile(doc.archive_path): - present_files.remove(os.path.normpath(doc.archive_path)) - try: - with doc.archive_file as f: - checksum = hashlib.md5(f.read()).hexdigest() - except OSError as e: + # Check sanity of the archive file. + if doc.archive_checksum: + if not os.path.isfile(doc.archive_path): messages.append(SanityError( - f"Cannot read archive file of document {doc.pk}: {e}" + f"Archived version of document {doc.pk} does not exist." )) else: - if not checksum == doc.archive_checksum: + present_files.remove(os.path.normpath(doc.archive_path)) + try: + with doc.archive_file as f: + checksum = hashlib.md5(f.read()).hexdigest() + except OSError as e: messages.append(SanityError( - f"Checksum mismatch of archive {doc.pk}. " - f"Stored: {doc.checksum}, actual: {checksum}." + f"Cannot read archive file of document {doc.pk}: {e}" )) + else: + if not checksum == doc.archive_checksum: + messages.append(SanityError( + f"Checksum mismatch of archive {doc.pk}. " + f"Stored: {doc.checksum}, actual: {checksum}." + )) + # other document checks if not doc.content: messages.append(SanityWarning( f"Document {doc.pk} has no content." diff --git a/src/documents/tests/test_sanity_check.py b/src/documents/tests/test_sanity_check.py new file mode 100644 index 000000000..995be7d6d --- /dev/null +++ b/src/documents/tests/test_sanity_check.py @@ -0,0 +1,83 @@ +import os +import shutil +from pathlib import Path + +from django.test import TestCase + +from documents.models import Document +from documents.sanity_checker import check_sanity +from documents.tests.utils import DirectoriesMixin + + +class TestSanityCheck(DirectoriesMixin, TestCase): + + def make_test_data(self): + + shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000001.pdf"), os.path.join(self.dirs.originals_dir, "0000001.pdf")) + shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf"), os.path.join(self.dirs.archive_dir, "0000001.pdf")) + shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png"), os.path.join(self.dirs.thumbnail_dir, "0000001.png")) + + return Document.objects.create(title="test", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", content="test", pk=1, filename="0000001.pdf") + + def test_no_docs(self): + self.assertEqual(len(check_sanity()), 0) + + def test_success(self): + self.make_test_data() + self.assertEqual(len(check_sanity()), 0) + + def test_no_thumbnail(self): + doc = self.make_test_data() + os.remove(doc.thumbnail_path) + self.assertEqual(len(check_sanity()), 1) + + def test_thumbnail_no_access(self): + doc = self.make_test_data() + os.chmod(doc.thumbnail_path, 0o000) + self.assertEqual(len(check_sanity()), 1) + os.chmod(doc.thumbnail_path, 0o777) + + def test_no_original(self): + doc = self.make_test_data() + os.remove(doc.source_path) + self.assertEqual(len(check_sanity()), 1) + + def test_original_no_access(self): + doc = self.make_test_data() + os.chmod(doc.source_path, 0o000) + self.assertEqual(len(check_sanity()), 1) + os.chmod(doc.source_path, 0o777) + + def test_original_checksum_mismatch(self): + doc = self.make_test_data() + doc.checksum = "WOW" + doc.save() + self.assertEqual(len(check_sanity()), 1) + + def test_no_archive(self): + doc = self.make_test_data() + os.remove(doc.archive_path) + self.assertEqual(len(check_sanity()), 1) + + def test_archive_no_access(self): + doc = self.make_test_data() + os.chmod(doc.archive_path, 0o000) + self.assertEqual(len(check_sanity()), 1) + os.chmod(doc.archive_path, 0o777) + + def test_archive_checksum_mismatch(self): + doc = self.make_test_data() + doc.archive_checksum = "WOW" + doc.save() + self.assertEqual(len(check_sanity()), 1) + + def test_empty_content(self): + doc = self.make_test_data() + doc.content = "" + doc.save() + self.assertEqual(len(check_sanity()), 1) + + def test_orphaned_file(self): + doc = self.make_test_data() + Path(self.dirs.originals_dir, "orphaned").touch() + self.assertEqual(len(check_sanity()), 1)