add migration to fix null characters in document contents

This commit is contained in:
jonaswinkler 2021-04-04 20:41:08 +02:00
parent cd85d4e86a
commit 1322aaf4da
2 changed files with 44 additions and 0 deletions

View File

@ -0,0 +1,29 @@
# Generated by Django 3.1.7 on 2021-04-04 18:28
import logging
from django.db import migrations
logger = logging.getLogger("paperless.migrations")
def remove_null_characters(apps, schema_editor):
Document = apps.get_model('documents', 'Document')
for doc in Document.objects.all():
content: str = doc.content
if '\0' in content:
logger.info(f"Removing null characters from document {doc}...")
doc.content = content.replace('\0', ' ')
doc.save()
class Migration(migrations.Migration):
dependencies = [
('documents', '1014_auto_20210228_1614'),
]
operations = [
migrations.RunPython(remove_null_characters, migrations.RunPython.noop)
]

View File

@ -0,0 +1,15 @@
from documents.tests.utils import DirectoriesMixin, TestMigrations
class TestMigrateNullCharacters(DirectoriesMixin, TestMigrations):
migrate_from = '1014_auto_20210228_1614'
migrate_to = '1015_remove_null_characters'
def setUpBeforeMigration(self, apps):
Document = apps.get_model("documents", "Document")
self.doc = Document.objects.create(content="aaa\0bbb")
def testMimeTypesMigrated(self):
Document = self.apps.get_model('documents', 'Document')
self.assertNotIn("\0", Document.objects.get(id=self.doc.id).content)