Fix: handle uuid fields created under mariadb and Django 4 (#8034)

This commit is contained in:
shamoon 2024-10-28 06:54:16 -07:00 committed by GitHub
parent 335c6c3820
commit 28fdb170bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 128 additions and 7 deletions

View File

@ -14,7 +14,8 @@ for command in decrypt_documents \
document_thumbnails \ document_thumbnails \
document_sanity_checker \ document_sanity_checker \
document_fuzzy_match \ document_fuzzy_match \
manage_superuser; manage_superuser \
convert_mariadb_uuid;
do do
echo "installing $command..." echo "installing $command..."
sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command

View File

@ -353,6 +353,20 @@ ways from the original. As the logs indicate, if you encounter this error you ca
`PAPERLESS_OCR_USER_ARGS: '{"continue_on_soft_render_error": true}'` to try to 'force' `PAPERLESS_OCR_USER_ARGS: '{"continue_on_soft_render_error": true}'` to try to 'force'
processing documents with this issue. processing documents with this issue.
## Logs show "possible incompatible database column" when deleting documents {#convert-uuid-field}
You may see errors when deleting documents like:
```
Data too long for column 'transaction_id' at row 1
```
This error can occur in installations which have upgraded from a version of Paperless-ngx that used Django 4 (Paperless-ngx versions prior to v2.13.0) with a MariaDB/MySQL database. Due to the backawards-incompatible change in Django 5, the column "documents_document.transaction_id" will need to be re-created, which can be done with a one-time run of the following management command:
```shell-session
$ python3 manage.py convert_mariadb_uuid
```
## Platform-Specific Deployment Troubleshooting ## Platform-Specific Deployment Troubleshooting
A user-maintained wiki page is available to help troubleshoot issues that may arise when trying to deploy Paperless-ngx on specific platforms, for example SELinux. Please see [the wiki](https://github.com/paperless-ngx/paperless-ngx/wiki/Platform%E2%80%90Specific-Troubleshooting). A user-maintained wiki page is available to help troubleshoot issues that may arise when trying to deploy Paperless-ngx on specific platforms, for example SELinux. Please see [the wiki](https://github.com/paperless-ngx/paperless-ngx/wiki/Platform%E2%80%90Specific-Troubleshooting).

View File

@ -159,13 +159,20 @@ def modify_custom_fields(doc_ids: list[int], add_custom_fields, remove_custom_fi
@shared_task @shared_task
def delete(doc_ids: list[int]): def delete(doc_ids: list[int]):
Document.objects.filter(id__in=doc_ids).delete() try:
Document.objects.filter(id__in=doc_ids).delete()
from documents import index from documents import index
with index.open_index_writer() as writer: with index.open_index_writer() as writer:
for id in doc_ids: for id in doc_ids:
index.remove_document_by_id(writer, id) index.remove_document_by_id(writer, id)
except Exception as e:
if "Data too long for column" in str(e):
logger.warning(
"Detected a possible incompatible database column. See https://docs.paperless-ngx.com/troubleshooting/#convert-uuid-field",
)
logger.error(f"Error deleting documents: {e!s}")
return "OK" return "OK"

View File

@ -0,0 +1,36 @@
from django.core.management.base import BaseCommand
from django.db import connection
from django.db import models
from documents.models import Document
class Command(BaseCommand):
# This code is taken almost entirely from https://github.com/wagtail/wagtail/pull/11912 with all credit to the original author.
help = "Converts UUID columns from char type to the native UUID type used in MariaDB 10.7+ and Django 5.0+."
def convert_field(self, model, field_name, null=False):
if model._meta.get_field(field_name).model != model: # pragma: no cover
# Field is inherited from a parent model
return
if not model._meta.managed: # pragma: no cover
# The migration framework skips unmanaged models, so we should too
return
old_field = models.CharField(null=null, max_length=36)
old_field.set_attributes_from_name(field_name)
new_field = models.UUIDField(null=null)
new_field.set_attributes_from_name(field_name)
with connection.schema_editor() as schema_editor:
schema_editor.alter_field(model, old_field, new_field)
self.stdout.write(
self.style.SUCCESS(
f"Successfully converted {model._meta.label} {field_name} field to UUID type.",
),
)
def handle(self, **options):
self.convert_field(Document, "transaction_id", null=True)

View File

@ -15,6 +15,7 @@ from django.conf import settings
from django.contrib.auth.models import Permission from django.contrib.auth.models import Permission
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.core.cache import cache from django.core.cache import cache
from django.db import DataError
from django.test import override_settings from django.test import override_settings
from django.utils import timezone from django.utils import timezone
from guardian.shortcuts import assign_perm from guardian.shortcuts import assign_perm
@ -2605,6 +2606,35 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
self.assertEqual(resp.status_code, status.HTTP_200_OK) self.assertEqual(resp.status_code, status.HTTP_200_OK)
self.assertEqual(doc1.tags.count(), 2) self.assertEqual(doc1.tags.count(), 2)
@mock.patch("django_softdelete.models.SoftDeleteModel.delete")
def test_warn_on_delete_with_old_uuid_field(self, mocked_delete):
"""
GIVEN:
- Existing document in a (mocked) MariaDB database with an old UUID field
WHEN:
- API request to delete document is made which raises "Data too long for column" error
THEN:
- Warning is logged alerting the user of the issue (and link to the fix)
"""
doc = Document.objects.create(
title="test",
mime_type="application/pdf",
content="this is a document 1",
checksum="1",
)
mocked_delete.side_effect = DataError(
"Data too long for column 'transaction_id' at row 1",
)
with self.assertLogs(level="WARNING") as cm:
self.client.delete(f"/api/documents/{doc.pk}/")
self.assertIn(
"Detected a possible incompatible database column",
cm.output[0],
)
class TestDocumentApiV2(DirectoriesMixin, APITestCase): class TestDocumentApiV2(DirectoriesMixin, APITestCase):
def setUp(self): def setUp(self):

View File

@ -327,6 +327,15 @@ class TestBulkEdit(DirectoriesMixin, TestCase):
) )
self.assertEqual(groups_with_perms.count(), 2) self.assertEqual(groups_with_perms.count(), 2)
@mock.patch("documents.models.Document.delete")
def test_delete_documents_old_uuid_field(self, m):
m.side_effect = Exception("Data too long for column 'transaction_id' at row 1")
doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
bulk_edit.delete(doc_ids)
with self.assertLogs(level="WARNING") as cm:
bulk_edit.delete(doc_ids)
self.assertIn("possible incompatible database column", cm.output[0])
class TestPDFActions(DirectoriesMixin, TestCase): class TestPDFActions(DirectoriesMixin, TestCase):
def setUp(self): def setUp(self):

View File

@ -3,6 +3,7 @@ import hashlib
import os import os
import shutil import shutil
import tempfile import tempfile
from io import StringIO
from pathlib import Path from pathlib import Path
from unittest import mock from unittest import mock
@ -238,3 +239,16 @@ class TestSanityChecker(DirectoriesMixin, TestCase):
self.assertEqual(len(capture.output), 2) self.assertEqual(len(capture.output), 2)
self.assertIn("Checksum mismatch. Stored: abc, actual:", capture.output[1]) self.assertIn("Checksum mismatch. Stored: abc, actual:", capture.output[1])
class TestConvertMariaDBUUID(TestCase):
@mock.patch("django.db.connection.schema_editor")
def test_convert(self, m):
m.alter_field.return_value = None
stdout = StringIO()
call_command("convert_mariadb_uuid", stdout=stdout)
m.assert_called_once()
self.assertIn("Successfully converted", stdout.getvalue())

View File

@ -406,7 +406,17 @@ class DocumentViewSet(
from documents import index from documents import index
index.remove_document_from_index(self.get_object()) index.remove_document_from_index(self.get_object())
return super().destroy(request, *args, **kwargs) try:
return super().destroy(request, *args, **kwargs)
except Exception as e:
if "Data too long for column" in str(e):
logger.warning(
"Detected a possible incompatible database column. See https://docs.paperless-ngx.com/troubleshooting/#convert-uuid-field",
)
logger.error(f"Error deleting document: {e!s}")
return HttpResponseBadRequest(
"Error deleting document, check logs for more detail.",
)
@staticmethod @staticmethod
def original_requested(request): def original_requested(request):