mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	more testing of the migration
This commit is contained in:
		| @@ -160,7 +160,12 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False): | |||||||
| ############################################################################### | ############################################################################### | ||||||
|  |  | ||||||
|  |  | ||||||
| def create_archive_version(doc, retry_count=4): | def parse_wrapper(parser, path, mime_type, file_name): | ||||||
|  |     # this is here so that I can mock this out for testing. | ||||||
|  |     parser.parse(path, mime_type, file_name) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def create_archive_version(doc, retry_count=3): | ||||||
|     from documents.parsers import get_parser_class_for_mime_type, \ |     from documents.parsers import get_parser_class_for_mime_type, \ | ||||||
|         DocumentParser, \ |         DocumentParser, \ | ||||||
|         ParseError |         ParseError | ||||||
| @@ -172,8 +177,8 @@ def create_archive_version(doc, retry_count=4): | |||||||
|     for try_num in range(retry_count): |     for try_num in range(retry_count): | ||||||
|         parser: DocumentParser = parser_class(None, None) |         parser: DocumentParser = parser_class(None, None) | ||||||
|         try: |         try: | ||||||
|             parser.parse(source_path(doc), doc.mime_type, |             parse_wrapper(parser, source_path(doc), doc.mime_type, | ||||||
|                          os.path.basename(doc.filename)) |                           os.path.basename(doc.filename)) | ||||||
|             doc.content = parser.get_text() |             doc.content = parser.get_text() | ||||||
|  |  | ||||||
|             if parser.get_archive_path() and os.path.isfile( |             if parser.get_archive_path() and os.path.isfile( | ||||||
| @@ -225,25 +230,28 @@ def move_old_to_new_locations(apps, schema_editor): | |||||||
|     for doc in Document.objects.filter(archive_checksum__isnull=False): |     for doc in Document.objects.filter(archive_checksum__isnull=False): | ||||||
|         old_path = archive_path_old(doc) |         old_path = archive_path_old(doc) | ||||||
|  |  | ||||||
|         if not os.path.isfile(old_path): |  | ||||||
|             raise ValueError( |  | ||||||
|                 f"Archived document ID:{doc.id} does not exist at: " |  | ||||||
|                 f"{old_path}") |  | ||||||
|  |  | ||||||
|         if old_path in old_archive_path_to_id: |         if old_path in old_archive_path_to_id: | ||||||
|             affected_document_ids.add(doc.id) |             affected_document_ids.add(doc.id) | ||||||
|             affected_document_ids.add(old_archive_path_to_id[old_path]) |             affected_document_ids.add(old_archive_path_to_id[old_path]) | ||||||
|         else: |         else: | ||||||
|             old_archive_path_to_id[old_path] = doc.id |             old_archive_path_to_id[old_path] = doc.id | ||||||
|  |  | ||||||
|     # check that we can regenerate these archive versions |     # check that archive files of all unaffected documents are in place | ||||||
|  |     for doc in Document.objects.filter(archive_checksum__isnull=False): | ||||||
|  |         old_path = archive_path_old(doc) | ||||||
|  |         if doc.id not in affected_document_ids and not os.path.isfile(old_path): | ||||||
|  |             raise ValueError( | ||||||
|  |                 f"Archived document ID:{doc.id} does not exist at: " | ||||||
|  |                 f"{old_path}") | ||||||
|  |  | ||||||
|  |     # check that we can regenerate affected archive versions | ||||||
|     for doc_id in affected_document_ids: |     for doc_id in affected_document_ids: | ||||||
|         from documents.parsers import get_parser_class_for_mime_type |         from documents.parsers import get_parser_class_for_mime_type | ||||||
|  |  | ||||||
|         doc = Document.objects.get(id=doc_id) |         doc = Document.objects.get(id=doc_id) | ||||||
|         parser_class = get_parser_class_for_mime_type(doc.mime_type) |         parser_class = get_parser_class_for_mime_type(doc.mime_type) | ||||||
|         if not parser_class: |         if not parser_class: | ||||||
|             raise Exception( |             raise ValueError( | ||||||
|                 f"Document ID:{doc.id} has an invalid archived document, " |                 f"Document ID:{doc.id} has an invalid archived document, " | ||||||
|                 f"but no parsers are available. Cannot migrate.") |                 f"but no parsers are available. Cannot migrate.") | ||||||
|  |  | ||||||
| @@ -253,6 +261,9 @@ def move_old_to_new_locations(apps, schema_editor): | |||||||
|             old_path = archive_path_old(doc) |             old_path = archive_path_old(doc) | ||||||
|             # remove affected archive versions |             # remove affected archive versions | ||||||
|             if os.path.isfile(old_path): |             if os.path.isfile(old_path): | ||||||
|  |                 logger.debug( | ||||||
|  |                     f"Removing {old_path}" | ||||||
|  |                 ) | ||||||
|                 os.unlink(old_path) |                 os.unlink(old_path) | ||||||
|         else: |         else: | ||||||
|             # Set archive path for unaffected files |             # Set archive path for unaffected files | ||||||
| @@ -267,8 +278,6 @@ def move_old_to_new_locations(apps, schema_editor): | |||||||
|         create_archive_version(doc) |         create_archive_version(doc) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def move_new_to_old_locations(apps, schema_editor): | def move_new_to_old_locations(apps, schema_editor): | ||||||
|     Document = apps.get_model("documents", "Document") |     Document = apps.get_model("documents", "Document") | ||||||
|  |  | ||||||
|   | |||||||
| @@ -2,10 +2,12 @@ import hashlib | |||||||
| import os | import os | ||||||
| import shutil | import shutil | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
|  | from unittest import mock | ||||||
|  |  | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| from django.test import override_settings | from django.test import override_settings | ||||||
|  |  | ||||||
|  | from documents.parsers import ParseError | ||||||
| from documents.tests.utils import DirectoriesMixin, TestMigrations | from documents.tests.utils import DirectoriesMixin, TestMigrations | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -169,6 +171,11 @@ class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles): | |||||||
|         self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf") |         self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf") | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def fake_parse_wrapper(parser, path, mime_type, file_name): | ||||||
|  |     parser.archive_path = None | ||||||
|  |     parser.text = "the text" | ||||||
|  |  | ||||||
|  |  | ||||||
| @override_settings(PAPERLESS_FILENAME_FORMAT="") | @override_settings(PAPERLESS_FILENAME_FORMAT="") | ||||||
| class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): | class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): | ||||||
|  |  | ||||||
| @@ -185,6 +192,73 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations): | |||||||
|  |  | ||||||
|         self.assertRaisesMessage(ValueError, "does not exist at: ", self.performMigration) |         self.assertRaisesMessage(ValueError, "does not exist at: ", self.performMigration) | ||||||
|  |  | ||||||
|  |     def test_parser_missing(self): | ||||||
|  |         Document = self.apps.get_model("documents", "Document") | ||||||
|  |  | ||||||
|  |         doc1 = make_test_document(Document, "document", "invalid/typesss768", simple_png, "document.png", simple_pdf) | ||||||
|  |         doc2 = make_test_document(Document, "document", "invalid/typesss768", simple_jpg, "document.jpg", simple_pdf) | ||||||
|  |  | ||||||
|  |         self.assertRaisesMessage(ValueError, "no parsers are available", self.performMigration) | ||||||
|  |  | ||||||
|  |     @mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper") | ||||||
|  |     def test_parser_error(self, m): | ||||||
|  |         m.side_effect = ParseError() | ||||||
|  |         Document = self.apps.get_model("documents", "Document") | ||||||
|  |  | ||||||
|  |         doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf) | ||||||
|  |         doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf) | ||||||
|  |  | ||||||
|  |         self.assertIsNotNone(doc1.archive_checksum) | ||||||
|  |         self.assertIsNotNone(doc2.archive_checksum) | ||||||
|  |  | ||||||
|  |         with self.assertLogs() as capture: | ||||||
|  |             self.performMigration() | ||||||
|  |  | ||||||
|  |         self.assertEqual(m.call_count, 6) | ||||||
|  |  | ||||||
|  |         self.assertEqual( | ||||||
|  |             len(list(filter(lambda log: "Parse error, will try again in 5 seconds" in log, capture.output))), | ||||||
|  |             4) | ||||||
|  |  | ||||||
|  |         self.assertEqual( | ||||||
|  |             len(list(filter(lambda log: "Unable to regenerate archive document for ID:" in log, capture.output))), | ||||||
|  |             2) | ||||||
|  |  | ||||||
|  |         Document = self.apps.get_model("documents", "Document") | ||||||
|  |  | ||||||
|  |         doc1 = Document.objects.get(id=doc1.id) | ||||||
|  |         doc2 = Document.objects.get(id=doc2.id) | ||||||
|  |  | ||||||
|  |         self.assertIsNone(doc1.archive_checksum) | ||||||
|  |         self.assertIsNone(doc2.archive_checksum) | ||||||
|  |         self.assertIsNone(doc1.archive_filename) | ||||||
|  |         self.assertIsNone(doc2.archive_filename) | ||||||
|  |  | ||||||
|  |     @mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper") | ||||||
|  |     def test_parser_no_archive(self, m): | ||||||
|  |         m.side_effect = fake_parse_wrapper | ||||||
|  |  | ||||||
|  |         Document = self.apps.get_model("documents", "Document") | ||||||
|  |  | ||||||
|  |         doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf) | ||||||
|  |         doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf) | ||||||
|  |  | ||||||
|  |         with self.assertLogs() as capture: | ||||||
|  |             self.performMigration() | ||||||
|  |  | ||||||
|  |         self.assertEqual( | ||||||
|  |             len(list(filter(lambda log: "Parser did not return an archive document for document" in log, capture.output))), | ||||||
|  |             2) | ||||||
|  |  | ||||||
|  |         Document = self.apps.get_model("documents", "Document") | ||||||
|  |  | ||||||
|  |         doc1 = Document.objects.get(id=doc1.id) | ||||||
|  |         doc2 = Document.objects.get(id=doc2.id) | ||||||
|  |  | ||||||
|  |         self.assertIsNone(doc1.archive_checksum) | ||||||
|  |         self.assertIsNone(doc2.archive_checksum) | ||||||
|  |         self.assertIsNone(doc1.archive_filename) | ||||||
|  |         self.assertIsNone(doc2.archive_filename) | ||||||
|  |  | ||||||
| @override_settings(PAPERLESS_FILENAME_FORMAT="") | @override_settings(PAPERLESS_FILENAME_FORMAT="") | ||||||
| class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations): | class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 jonaswinkler
					jonaswinkler