From 1322aaf4da1fd06adee3c491d7bfd577abf6c1cd Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Sun, 4 Apr 2021 20:41:08 +0200 Subject: [PATCH] add migration to fix null characters in document contents --- .../migrations/1015_remove_null_characters.py | 29 +++++++++++++++++++ .../test_migration_remove_null_characters.py | 15 ++++++++++ 2 files changed, 44 insertions(+) create mode 100644 src/documents/migrations/1015_remove_null_characters.py create mode 100644 src/documents/tests/test_migration_remove_null_characters.py diff --git a/src/documents/migrations/1015_remove_null_characters.py b/src/documents/migrations/1015_remove_null_characters.py new file mode 100644 index 000000000..2f7ee99b6 --- /dev/null +++ b/src/documents/migrations/1015_remove_null_characters.py @@ -0,0 +1,29 @@ +# Generated by Django 3.1.7 on 2021-04-04 18:28 +import logging + +from django.db import migrations + + +logger = logging.getLogger("paperless.migrations") + + +def remove_null_characters(apps, schema_editor): + Document = apps.get_model('documents', 'Document') + + for doc in Document.objects.all(): + content: str = doc.content + if '\0' in content: + logger.info(f"Removing null characters from document {doc}...") + doc.content = content.replace('\0', ' ') + doc.save() + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '1014_auto_20210228_1614'), + ] + + operations = [ + migrations.RunPython(remove_null_characters, migrations.RunPython.noop) + ] diff --git a/src/documents/tests/test_migration_remove_null_characters.py b/src/documents/tests/test_migration_remove_null_characters.py new file mode 100644 index 000000000..ba6f18539 --- /dev/null +++ b/src/documents/tests/test_migration_remove_null_characters.py @@ -0,0 +1,15 @@ +from documents.tests.utils import DirectoriesMixin, TestMigrations + + +class TestMigrateNullCharacters(DirectoriesMixin, TestMigrations): + + migrate_from = '1014_auto_20210228_1614' + migrate_to = '1015_remove_null_characters' + + def setUpBeforeMigration(self, apps): + Document = apps.get_model("documents", "Document") + self.doc = Document.objects.create(content="aaa\0bbb") + + def testMimeTypesMigrated(self): + Document = self.apps.get_model('documents', 'Document') + self.assertNotIn("\0", Document.objects.get(id=self.doc.id).content)