mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	more testing of the migration
This commit is contained in:
		| @@ -160,7 +160,12 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False): | ||||
| ############################################################################### | ||||
|  | ||||
|  | ||||
| def create_archive_version(doc, retry_count=4): | ||||
| def parse_wrapper(parser, path, mime_type, file_name): | ||||
|     # this is here so that I can mock this out for testing. | ||||
|     parser.parse(path, mime_type, file_name) | ||||
|  | ||||
|  | ||||
| def create_archive_version(doc, retry_count=3): | ||||
|     from documents.parsers import get_parser_class_for_mime_type, \ | ||||
|         DocumentParser, \ | ||||
|         ParseError | ||||
| @@ -172,8 +177,8 @@ def create_archive_version(doc, retry_count=4): | ||||
|     for try_num in range(retry_count): | ||||
|         parser: DocumentParser = parser_class(None, None) | ||||
|         try: | ||||
|             parser.parse(source_path(doc), doc.mime_type, | ||||
|                          os.path.basename(doc.filename)) | ||||
|             parse_wrapper(parser, source_path(doc), doc.mime_type, | ||||
|                           os.path.basename(doc.filename)) | ||||
|             doc.content = parser.get_text() | ||||
|  | ||||
|             if parser.get_archive_path() and os.path.isfile( | ||||
| @@ -225,25 +230,28 @@ def move_old_to_new_locations(apps, schema_editor): | ||||
|     for doc in Document.objects.filter(archive_checksum__isnull=False): | ||||
|         old_path = archive_path_old(doc) | ||||
|  | ||||
|         if not os.path.isfile(old_path): | ||||
|             raise ValueError( | ||||
|                 f"Archived document ID:{doc.id} does not exist at: " | ||||
|                 f"{old_path}") | ||||
|  | ||||
|         if old_path in old_archive_path_to_id: | ||||
|             affected_document_ids.add(doc.id) | ||||
|             affected_document_ids.add(old_archive_path_to_id[old_path]) | ||||
|         else: | ||||
|             old_archive_path_to_id[old_path] = doc.id | ||||
|  | ||||
|     # check that we can regenerate these archive versions | ||||
|     # check that archive files of all unaffected documents are in place | ||||
|     for doc in Document.objects.filter(archive_checksum__isnull=False): | ||||
|         old_path = archive_path_old(doc) | ||||
|         if doc.id not in affected_document_ids and not os.path.isfile(old_path): | ||||
|             raise ValueError( | ||||
|                 f"Archived document ID:{doc.id} does not exist at: " | ||||
|                 f"{old_path}") | ||||
|  | ||||
|     # check that we can regenerate affected archive versions | ||||
|     for doc_id in affected_document_ids: | ||||
|         from documents.parsers import get_parser_class_for_mime_type | ||||
|  | ||||
|         doc = Document.objects.get(id=doc_id) | ||||
|         parser_class = get_parser_class_for_mime_type(doc.mime_type) | ||||
|         if not parser_class: | ||||
|             raise Exception( | ||||
|             raise ValueError( | ||||
|                 f"Document ID:{doc.id} has an invalid archived document, " | ||||
|                 f"but no parsers are available. Cannot migrate.") | ||||
|  | ||||
| @@ -253,6 +261,9 @@ def move_old_to_new_locations(apps, schema_editor): | ||||
|             old_path = archive_path_old(doc) | ||||
|             # remove affected archive versions | ||||
|             if os.path.isfile(old_path): | ||||
|                 logger.debug( | ||||
|                     f"Removing {old_path}" | ||||
|                 ) | ||||
|                 os.unlink(old_path) | ||||
|         else: | ||||
|             # Set archive path for unaffected files | ||||
| @@ -267,8 +278,6 @@ def move_old_to_new_locations(apps, schema_editor): | ||||
|         create_archive_version(doc) | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| def move_new_to_old_locations(apps, schema_editor): | ||||
|     Document = apps.get_model("documents", "Document") | ||||
|  | ||||
|   | ||||
| @@ -2,10 +2,12 @@ import hashlib | ||||
| import os | ||||
| import shutil | ||||
| from pathlib import Path | ||||
| from unittest import mock | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.test import override_settings | ||||
|  | ||||
| from documents.parsers import ParseError | ||||
| from documents.tests.utils import DirectoriesMixin, TestMigrations | ||||
|  | ||||
|  | ||||
| @@ -169,6 +171,11 @@ class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles): | ||||
|         self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf") | ||||
|  | ||||
|  | ||||
| def fake_parse_wrapper(parser, path, mime_type, file_name): | ||||
|     parser.archive_path = None | ||||
|     parser.text = "the text" | ||||
|  | ||||
|  | ||||
| @override_settings(PAPERLESS_FILENAME_FORMAT="") | ||||
| class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): | ||||
|  | ||||
| @@ -185,6 +192,73 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): | ||||
|  | ||||
|         self.assertRaisesMessage(ValueError, "does not exist at: ", self.performMigration) | ||||
|  | ||||
|     def test_parser_missing(self): | ||||
|         Document = self.apps.get_model("documents", "Document") | ||||
|  | ||||
|         doc1 = make_test_document(Document, "document", "invalid/typesss768", simple_png, "document.png", simple_pdf) | ||||
|         doc2 = make_test_document(Document, "document", "invalid/typesss768", simple_jpg, "document.jpg", simple_pdf) | ||||
|  | ||||
|         self.assertRaisesMessage(ValueError, "no parsers are available", self.performMigration) | ||||
|  | ||||
|     @mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper") | ||||
|     def test_parser_error(self, m): | ||||
|         m.side_effect = ParseError() | ||||
|         Document = self.apps.get_model("documents", "Document") | ||||
|  | ||||
|         doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf) | ||||
|         doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf) | ||||
|  | ||||
|         self.assertIsNotNone(doc1.archive_checksum) | ||||
|         self.assertIsNotNone(doc2.archive_checksum) | ||||
|  | ||||
|         with self.assertLogs() as capture: | ||||
|             self.performMigration() | ||||
|  | ||||
|         self.assertEqual(m.call_count, 6) | ||||
|  | ||||
|         self.assertEqual( | ||||
|             len(list(filter(lambda log: "Parse error, will try again in 5 seconds" in log, capture.output))), | ||||
|             4) | ||||
|  | ||||
|         self.assertEqual( | ||||
|             len(list(filter(lambda log: "Unable to regenerate archive document for ID:" in log, capture.output))), | ||||
|             2) | ||||
|  | ||||
|         Document = self.apps.get_model("documents", "Document") | ||||
|  | ||||
|         doc1 = Document.objects.get(id=doc1.id) | ||||
|         doc2 = Document.objects.get(id=doc2.id) | ||||
|  | ||||
|         self.assertIsNone(doc1.archive_checksum) | ||||
|         self.assertIsNone(doc2.archive_checksum) | ||||
|         self.assertIsNone(doc1.archive_filename) | ||||
|         self.assertIsNone(doc2.archive_filename) | ||||
|  | ||||
|     @mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper") | ||||
|     def test_parser_no_archive(self, m): | ||||
|         m.side_effect = fake_parse_wrapper | ||||
|  | ||||
|         Document = self.apps.get_model("documents", "Document") | ||||
|  | ||||
|         doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf) | ||||
|         doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf) | ||||
|  | ||||
|         with self.assertLogs() as capture: | ||||
|             self.performMigration() | ||||
|  | ||||
|         self.assertEqual( | ||||
|             len(list(filter(lambda log: "Parser did not return an archive document for document" in log, capture.output))), | ||||
|             2) | ||||
|  | ||||
|         Document = self.apps.get_model("documents", "Document") | ||||
|  | ||||
|         doc1 = Document.objects.get(id=doc1.id) | ||||
|         doc2 = Document.objects.get(id=doc2.id) | ||||
|  | ||||
|         self.assertIsNone(doc1.archive_checksum) | ||||
|         self.assertIsNone(doc2.archive_checksum) | ||||
|         self.assertIsNone(doc1.archive_filename) | ||||
|         self.assertIsNone(doc2.archive_filename) | ||||
|  | ||||
| @override_settings(PAPERLESS_FILENAME_FORMAT="") | ||||
| class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 jonaswinkler
					jonaswinkler