mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
more testing of the migration
This commit is contained in:
parent
6c8f010f7a
commit
04519ee623
@ -160,7 +160,12 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
||||
###############################################################################
|
||||
|
||||
|
||||
def create_archive_version(doc, retry_count=4):
|
||||
def parse_wrapper(parser, path, mime_type, file_name):
|
||||
# this is here so that I can mock this out for testing.
|
||||
parser.parse(path, mime_type, file_name)
|
||||
|
||||
|
||||
def create_archive_version(doc, retry_count=3):
|
||||
from documents.parsers import get_parser_class_for_mime_type, \
|
||||
DocumentParser, \
|
||||
ParseError
|
||||
@ -172,8 +177,8 @@ def create_archive_version(doc, retry_count=4):
|
||||
for try_num in range(retry_count):
|
||||
parser: DocumentParser = parser_class(None, None)
|
||||
try:
|
||||
parser.parse(source_path(doc), doc.mime_type,
|
||||
os.path.basename(doc.filename))
|
||||
parse_wrapper(parser, source_path(doc), doc.mime_type,
|
||||
os.path.basename(doc.filename))
|
||||
doc.content = parser.get_text()
|
||||
|
||||
if parser.get_archive_path() and os.path.isfile(
|
||||
@ -225,25 +230,28 @@ def move_old_to_new_locations(apps, schema_editor):
|
||||
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
||||
old_path = archive_path_old(doc)
|
||||
|
||||
if not os.path.isfile(old_path):
|
||||
raise ValueError(
|
||||
f"Archived document ID:{doc.id} does not exist at: "
|
||||
f"{old_path}")
|
||||
|
||||
if old_path in old_archive_path_to_id:
|
||||
affected_document_ids.add(doc.id)
|
||||
affected_document_ids.add(old_archive_path_to_id[old_path])
|
||||
else:
|
||||
old_archive_path_to_id[old_path] = doc.id
|
||||
|
||||
# check that we can regenerate these archive versions
|
||||
# check that archive files of all unaffected documents are in place
|
||||
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
||||
old_path = archive_path_old(doc)
|
||||
if doc.id not in affected_document_ids and not os.path.isfile(old_path):
|
||||
raise ValueError(
|
||||
f"Archived document ID:{doc.id} does not exist at: "
|
||||
f"{old_path}")
|
||||
|
||||
# check that we can regenerate affected archive versions
|
||||
for doc_id in affected_document_ids:
|
||||
from documents.parsers import get_parser_class_for_mime_type
|
||||
|
||||
doc = Document.objects.get(id=doc_id)
|
||||
parser_class = get_parser_class_for_mime_type(doc.mime_type)
|
||||
if not parser_class:
|
||||
raise Exception(
|
||||
raise ValueError(
|
||||
f"Document ID:{doc.id} has an invalid archived document, "
|
||||
f"but no parsers are available. Cannot migrate.")
|
||||
|
||||
@ -253,6 +261,9 @@ def move_old_to_new_locations(apps, schema_editor):
|
||||
old_path = archive_path_old(doc)
|
||||
# remove affected archive versions
|
||||
if os.path.isfile(old_path):
|
||||
logger.debug(
|
||||
f"Removing {old_path}"
|
||||
)
|
||||
os.unlink(old_path)
|
||||
else:
|
||||
# Set archive path for unaffected files
|
||||
@ -267,8 +278,6 @@ def move_old_to_new_locations(apps, schema_editor):
|
||||
create_archive_version(doc)
|
||||
|
||||
|
||||
|
||||
|
||||
def move_new_to_old_locations(apps, schema_editor):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
|
||||
|
@ -2,10 +2,12 @@ import hashlib
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from django.conf import settings
|
||||
from django.test import override_settings
|
||||
|
||||
from documents.parsers import ParseError
|
||||
from documents.tests.utils import DirectoriesMixin, TestMigrations
|
||||
|
||||
|
||||
@ -169,6 +171,11 @@ class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
|
||||
self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf")
|
||||
|
||||
|
||||
def fake_parse_wrapper(parser, path, mime_type, file_name):
|
||||
parser.archive_path = None
|
||||
parser.text = "the text"
|
||||
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||
class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
||||
|
||||
@ -185,6 +192,73 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
||||
|
||||
self.assertRaisesMessage(ValueError, "does not exist at: ", self.performMigration)
|
||||
|
||||
def test_parser_missing(self):
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc1 = make_test_document(Document, "document", "invalid/typesss768", simple_png, "document.png", simple_pdf)
|
||||
doc2 = make_test_document(Document, "document", "invalid/typesss768", simple_jpg, "document.jpg", simple_pdf)
|
||||
|
||||
self.assertRaisesMessage(ValueError, "no parsers are available", self.performMigration)
|
||||
|
||||
@mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
|
||||
def test_parser_error(self, m):
|
||||
m.side_effect = ParseError()
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
|
||||
doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
|
||||
|
||||
self.assertIsNotNone(doc1.archive_checksum)
|
||||
self.assertIsNotNone(doc2.archive_checksum)
|
||||
|
||||
with self.assertLogs() as capture:
|
||||
self.performMigration()
|
||||
|
||||
self.assertEqual(m.call_count, 6)
|
||||
|
||||
self.assertEqual(
|
||||
len(list(filter(lambda log: "Parse error, will try again in 5 seconds" in log, capture.output))),
|
||||
4)
|
||||
|
||||
self.assertEqual(
|
||||
len(list(filter(lambda log: "Unable to regenerate archive document for ID:" in log, capture.output))),
|
||||
2)
|
||||
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc1 = Document.objects.get(id=doc1.id)
|
||||
doc2 = Document.objects.get(id=doc2.id)
|
||||
|
||||
self.assertIsNone(doc1.archive_checksum)
|
||||
self.assertIsNone(doc2.archive_checksum)
|
||||
self.assertIsNone(doc1.archive_filename)
|
||||
self.assertIsNone(doc2.archive_filename)
|
||||
|
||||
@mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
|
||||
def test_parser_no_archive(self, m):
|
||||
m.side_effect = fake_parse_wrapper
|
||||
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
|
||||
doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
|
||||
|
||||
with self.assertLogs() as capture:
|
||||
self.performMigration()
|
||||
|
||||
self.assertEqual(
|
||||
len(list(filter(lambda log: "Parser did not return an archive document for document" in log, capture.output))),
|
||||
2)
|
||||
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc1 = Document.objects.get(id=doc1.id)
|
||||
doc2 = Document.objects.get(id=doc2.id)
|
||||
|
||||
self.assertIsNone(doc1.archive_checksum)
|
||||
self.assertIsNone(doc2.archive_checksum)
|
||||
self.assertIsNone(doc1.archive_filename)
|
||||
self.assertIsNone(doc2.archive_filename)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||
class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
|
||||
|
Loading…
x
Reference in New Issue
Block a user