mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 03:16:10 -06:00 
			
		
		
		
	more testing of the migration
This commit is contained in:
		@@ -160,7 +160,12 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
 | 
			
		||||
###############################################################################
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def create_archive_version(doc, retry_count=4):
 | 
			
		||||
def parse_wrapper(parser, path, mime_type, file_name):
 | 
			
		||||
    # this is here so that I can mock this out for testing.
 | 
			
		||||
    parser.parse(path, mime_type, file_name)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def create_archive_version(doc, retry_count=3):
 | 
			
		||||
    from documents.parsers import get_parser_class_for_mime_type, \
 | 
			
		||||
        DocumentParser, \
 | 
			
		||||
        ParseError
 | 
			
		||||
@@ -172,7 +177,7 @@ def create_archive_version(doc, retry_count=4):
 | 
			
		||||
    for try_num in range(retry_count):
 | 
			
		||||
        parser: DocumentParser = parser_class(None, None)
 | 
			
		||||
        try:
 | 
			
		||||
            parser.parse(source_path(doc), doc.mime_type,
 | 
			
		||||
            parse_wrapper(parser, source_path(doc), doc.mime_type,
 | 
			
		||||
                          os.path.basename(doc.filename))
 | 
			
		||||
            doc.content = parser.get_text()
 | 
			
		||||
 | 
			
		||||
@@ -225,25 +230,28 @@ def move_old_to_new_locations(apps, schema_editor):
 | 
			
		||||
    for doc in Document.objects.filter(archive_checksum__isnull=False):
 | 
			
		||||
        old_path = archive_path_old(doc)
 | 
			
		||||
 | 
			
		||||
        if not os.path.isfile(old_path):
 | 
			
		||||
            raise ValueError(
 | 
			
		||||
                f"Archived document ID:{doc.id} does not exist at: "
 | 
			
		||||
                f"{old_path}")
 | 
			
		||||
 | 
			
		||||
        if old_path in old_archive_path_to_id:
 | 
			
		||||
            affected_document_ids.add(doc.id)
 | 
			
		||||
            affected_document_ids.add(old_archive_path_to_id[old_path])
 | 
			
		||||
        else:
 | 
			
		||||
            old_archive_path_to_id[old_path] = doc.id
 | 
			
		||||
 | 
			
		||||
    # check that we can regenerate these archive versions
 | 
			
		||||
    # check that archive files of all unaffected documents are in place
 | 
			
		||||
    for doc in Document.objects.filter(archive_checksum__isnull=False):
 | 
			
		||||
        old_path = archive_path_old(doc)
 | 
			
		||||
        if doc.id not in affected_document_ids and not os.path.isfile(old_path):
 | 
			
		||||
            raise ValueError(
 | 
			
		||||
                f"Archived document ID:{doc.id} does not exist at: "
 | 
			
		||||
                f"{old_path}")
 | 
			
		||||
 | 
			
		||||
    # check that we can regenerate affected archive versions
 | 
			
		||||
    for doc_id in affected_document_ids:
 | 
			
		||||
        from documents.parsers import get_parser_class_for_mime_type
 | 
			
		||||
 | 
			
		||||
        doc = Document.objects.get(id=doc_id)
 | 
			
		||||
        parser_class = get_parser_class_for_mime_type(doc.mime_type)
 | 
			
		||||
        if not parser_class:
 | 
			
		||||
            raise Exception(
 | 
			
		||||
            raise ValueError(
 | 
			
		||||
                f"Document ID:{doc.id} has an invalid archived document, "
 | 
			
		||||
                f"but no parsers are available. Cannot migrate.")
 | 
			
		||||
 | 
			
		||||
@@ -253,6 +261,9 @@ def move_old_to_new_locations(apps, schema_editor):
 | 
			
		||||
            old_path = archive_path_old(doc)
 | 
			
		||||
            # remove affected archive versions
 | 
			
		||||
            if os.path.isfile(old_path):
 | 
			
		||||
                logger.debug(
 | 
			
		||||
                    f"Removing {old_path}"
 | 
			
		||||
                )
 | 
			
		||||
                os.unlink(old_path)
 | 
			
		||||
        else:
 | 
			
		||||
            # Set archive path for unaffected files
 | 
			
		||||
@@ -267,8 +278,6 @@ def move_old_to_new_locations(apps, schema_editor):
 | 
			
		||||
        create_archive_version(doc)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def move_new_to_old_locations(apps, schema_editor):
 | 
			
		||||
    Document = apps.get_model("documents", "Document")
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -2,10 +2,12 @@ import hashlib
 | 
			
		||||
import os
 | 
			
		||||
import shutil
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from unittest import mock
 | 
			
		||||
 | 
			
		||||
from django.conf import settings
 | 
			
		||||
from django.test import override_settings
 | 
			
		||||
 | 
			
		||||
from documents.parsers import ParseError
 | 
			
		||||
from documents.tests.utils import DirectoriesMixin, TestMigrations
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -169,6 +171,11 @@ class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
 | 
			
		||||
        self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def fake_parse_wrapper(parser, path, mime_type, file_name):
 | 
			
		||||
    parser.archive_path = None
 | 
			
		||||
    parser.text = "the text"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
 | 
			
		||||
class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
 | 
			
		||||
 | 
			
		||||
@@ -185,6 +192,73 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
 | 
			
		||||
 | 
			
		||||
        self.assertRaisesMessage(ValueError, "does not exist at: ", self.performMigration)
 | 
			
		||||
 | 
			
		||||
    def test_parser_missing(self):
 | 
			
		||||
        Document = self.apps.get_model("documents", "Document")
 | 
			
		||||
 | 
			
		||||
        doc1 = make_test_document(Document, "document", "invalid/typesss768", simple_png, "document.png", simple_pdf)
 | 
			
		||||
        doc2 = make_test_document(Document, "document", "invalid/typesss768", simple_jpg, "document.jpg", simple_pdf)
 | 
			
		||||
 | 
			
		||||
        self.assertRaisesMessage(ValueError, "no parsers are available", self.performMigration)
 | 
			
		||||
 | 
			
		||||
    @mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
 | 
			
		||||
    def test_parser_error(self, m):
 | 
			
		||||
        m.side_effect = ParseError()
 | 
			
		||||
        Document = self.apps.get_model("documents", "Document")
 | 
			
		||||
 | 
			
		||||
        doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
 | 
			
		||||
        doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
 | 
			
		||||
 | 
			
		||||
        self.assertIsNotNone(doc1.archive_checksum)
 | 
			
		||||
        self.assertIsNotNone(doc2.archive_checksum)
 | 
			
		||||
 | 
			
		||||
        with self.assertLogs() as capture:
 | 
			
		||||
            self.performMigration()
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(m.call_count, 6)
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            len(list(filter(lambda log: "Parse error, will try again in 5 seconds" in log, capture.output))),
 | 
			
		||||
            4)
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            len(list(filter(lambda log: "Unable to regenerate archive document for ID:" in log, capture.output))),
 | 
			
		||||
            2)
 | 
			
		||||
 | 
			
		||||
        Document = self.apps.get_model("documents", "Document")
 | 
			
		||||
 | 
			
		||||
        doc1 = Document.objects.get(id=doc1.id)
 | 
			
		||||
        doc2 = Document.objects.get(id=doc2.id)
 | 
			
		||||
 | 
			
		||||
        self.assertIsNone(doc1.archive_checksum)
 | 
			
		||||
        self.assertIsNone(doc2.archive_checksum)
 | 
			
		||||
        self.assertIsNone(doc1.archive_filename)
 | 
			
		||||
        self.assertIsNone(doc2.archive_filename)
 | 
			
		||||
 | 
			
		||||
    @mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
 | 
			
		||||
    def test_parser_no_archive(self, m):
 | 
			
		||||
        m.side_effect = fake_parse_wrapper
 | 
			
		||||
 | 
			
		||||
        Document = self.apps.get_model("documents", "Document")
 | 
			
		||||
 | 
			
		||||
        doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
 | 
			
		||||
        doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
 | 
			
		||||
 | 
			
		||||
        with self.assertLogs() as capture:
 | 
			
		||||
            self.performMigration()
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            len(list(filter(lambda log: "Parser did not return an archive document for document" in log, capture.output))),
 | 
			
		||||
            2)
 | 
			
		||||
 | 
			
		||||
        Document = self.apps.get_model("documents", "Document")
 | 
			
		||||
 | 
			
		||||
        doc1 = Document.objects.get(id=doc1.id)
 | 
			
		||||
        doc2 = Document.objects.get(id=doc2.id)
 | 
			
		||||
 | 
			
		||||
        self.assertIsNone(doc1.archive_checksum)
 | 
			
		||||
        self.assertIsNone(doc2.archive_checksum)
 | 
			
		||||
        self.assertIsNone(doc1.archive_filename)
 | 
			
		||||
        self.assertIsNone(doc2.archive_filename)
 | 
			
		||||
 | 
			
		||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
 | 
			
		||||
class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user