mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	testing and fixing the sanity checker
This commit is contained in:
		| @@ -47,7 +47,7 @@ def check_sanity(): | ||||
|             present_files.append(os.path.normpath(os.path.join(root, f))) | ||||
|  | ||||
|     for doc in Document.objects.all(): | ||||
|         # Check thumbnail | ||||
|         # Check sanity of the thumbnail | ||||
|         if not os.path.isfile(doc.thumbnail_path): | ||||
|             messages.append(SanityError( | ||||
|                 f"Thumbnail of document {doc.pk} does not exist.")) | ||||
| @@ -61,7 +61,8 @@ def check_sanity(): | ||||
|                     f"Cannot read thumbnail file of document {doc.pk}: {e}" | ||||
|                 )) | ||||
|  | ||||
|         # Check document | ||||
|         # Check sanity of the original file | ||||
|         # TODO: extract method | ||||
|         if not os.path.isfile(doc.source_path): | ||||
|             messages.append(SanityError( | ||||
|                 f"Original of document {doc.pk} does not exist.")) | ||||
| @@ -80,22 +81,29 @@ def check_sanity(): | ||||
|                         f"Stored: {doc.checksum}, actual: {checksum}." | ||||
|                     )) | ||||
|  | ||||
|         if os.path.isfile(doc.archive_path): | ||||
|             present_files.remove(os.path.normpath(doc.archive_path)) | ||||
|             try: | ||||
|                 with doc.archive_file as f: | ||||
|                     checksum = hashlib.md5(f.read()).hexdigest() | ||||
|             except OSError as e: | ||||
|         # Check sanity of the archive file. | ||||
|         if doc.archive_checksum: | ||||
|             if not os.path.isfile(doc.archive_path): | ||||
|                 messages.append(SanityError( | ||||
|                     f"Cannot read archive file of document {doc.pk}: {e}" | ||||
|                     f"Archived version of document {doc.pk} does not exist." | ||||
|                 )) | ||||
|             else: | ||||
|                 if not checksum == doc.archive_checksum: | ||||
|                 present_files.remove(os.path.normpath(doc.archive_path)) | ||||
|                 try: | ||||
|                     with doc.archive_file as f: | ||||
|                         checksum = hashlib.md5(f.read()).hexdigest() | ||||
|                 except OSError as e: | ||||
|                     messages.append(SanityError( | ||||
|                         f"Checksum mismatch of archive {doc.pk}. " | ||||
|                         f"Stored: {doc.checksum}, actual: {checksum}." | ||||
|                         f"Cannot read archive file of document {doc.pk}: {e}" | ||||
|                     )) | ||||
|                 else: | ||||
|                     if not checksum == doc.archive_checksum: | ||||
|                         messages.append(SanityError( | ||||
|                             f"Checksum mismatch of archive {doc.pk}. " | ||||
|                             f"Stored: {doc.checksum}, actual: {checksum}." | ||||
|                         )) | ||||
|  | ||||
|         # other document checks | ||||
|         if not doc.content: | ||||
|             messages.append(SanityWarning( | ||||
|                 f"Document {doc.pk} has no content." | ||||
|   | ||||
							
								
								
									
										83
									
								
								src/documents/tests/test_sanity_check.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								src/documents/tests/test_sanity_check.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,83 @@ | ||||
| import os | ||||
| import shutil | ||||
| from pathlib import Path | ||||
|  | ||||
| from django.test import TestCase | ||||
|  | ||||
| from documents.models import Document | ||||
| from documents.sanity_checker import check_sanity | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
| class TestSanityCheck(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def make_test_data(self): | ||||
|  | ||||
|         shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000001.pdf"), os.path.join(self.dirs.originals_dir, "0000001.pdf")) | ||||
|         shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf"), os.path.join(self.dirs.archive_dir, "0000001.pdf")) | ||||
|         shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png"), os.path.join(self.dirs.thumbnail_dir, "0000001.png")) | ||||
|  | ||||
|         return Document.objects.create(title="test", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", content="test", pk=1, filename="0000001.pdf") | ||||
|  | ||||
|     def test_no_docs(self): | ||||
|         self.assertEqual(len(check_sanity()), 0) | ||||
|  | ||||
|     def test_success(self): | ||||
|         self.make_test_data() | ||||
|         self.assertEqual(len(check_sanity()), 0) | ||||
|  | ||||
|     def test_no_thumbnail(self): | ||||
|         doc = self.make_test_data() | ||||
|         os.remove(doc.thumbnail_path) | ||||
|         self.assertEqual(len(check_sanity()), 1) | ||||
|  | ||||
|     def test_thumbnail_no_access(self): | ||||
|         doc = self.make_test_data() | ||||
|         os.chmod(doc.thumbnail_path, 0o000) | ||||
|         self.assertEqual(len(check_sanity()), 1) | ||||
|         os.chmod(doc.thumbnail_path, 0o777) | ||||
|  | ||||
|     def test_no_original(self): | ||||
|         doc = self.make_test_data() | ||||
|         os.remove(doc.source_path) | ||||
|         self.assertEqual(len(check_sanity()), 1) | ||||
|  | ||||
|     def test_original_no_access(self): | ||||
|         doc = self.make_test_data() | ||||
|         os.chmod(doc.source_path, 0o000) | ||||
|         self.assertEqual(len(check_sanity()), 1) | ||||
|         os.chmod(doc.source_path, 0o777) | ||||
|  | ||||
|     def test_original_checksum_mismatch(self): | ||||
|         doc = self.make_test_data() | ||||
|         doc.checksum = "WOW" | ||||
|         doc.save() | ||||
|         self.assertEqual(len(check_sanity()), 1) | ||||
|  | ||||
|     def test_no_archive(self): | ||||
|         doc = self.make_test_data() | ||||
|         os.remove(doc.archive_path) | ||||
|         self.assertEqual(len(check_sanity()), 1) | ||||
|  | ||||
|     def test_archive_no_access(self): | ||||
|         doc = self.make_test_data() | ||||
|         os.chmod(doc.archive_path, 0o000) | ||||
|         self.assertEqual(len(check_sanity()), 1) | ||||
|         os.chmod(doc.archive_path, 0o777) | ||||
|  | ||||
|     def test_archive_checksum_mismatch(self): | ||||
|         doc = self.make_test_data() | ||||
|         doc.archive_checksum = "WOW" | ||||
|         doc.save() | ||||
|         self.assertEqual(len(check_sanity()), 1) | ||||
|  | ||||
|     def test_empty_content(self): | ||||
|         doc = self.make_test_data() | ||||
|         doc.content = "" | ||||
|         doc.save() | ||||
|         self.assertEqual(len(check_sanity()), 1) | ||||
|  | ||||
|     def test_orphaned_file(self): | ||||
|         doc = self.make_test_data() | ||||
|         Path(self.dirs.originals_dir, "orphaned").touch() | ||||
|         self.assertEqual(len(check_sanity()), 1) | ||||
		Reference in New Issue
	
	Block a user
	 jonaswinkler
					jonaswinkler