Fix: handle uuid fields created under mariadb and Django 4 ()

This commit is contained in:
shamoon 2024-10-28 06:54:16 -07:00 committed by GitHub
parent 335c6c3820
commit 28fdb170bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 128 additions and 7 deletions

@ -14,7 +14,8 @@ for command in decrypt_documents \
document_thumbnails \
document_sanity_checker \
document_fuzzy_match \
manage_superuser;
manage_superuser \
convert_mariadb_uuid;
do
echo "installing $command..."
sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command

@ -353,6 +353,20 @@ ways from the original. As the logs indicate, if you encounter this error you ca
`PAPERLESS_OCR_USER_ARGS: '{"continue_on_soft_render_error": true}'` to try to 'force'
processing documents with this issue.
## Logs show "possible incompatible database column" when deleting documents {#convert-uuid-field}
You may see errors when deleting documents like:
```
Data too long for column 'transaction_id' at row 1
```
This error can occur in installations which have upgraded from a version of Paperless-ngx that used Django 4 (Paperless-ngx versions prior to v2.13.0) with a MariaDB/MySQL database. Due to the backawards-incompatible change in Django 5, the column "documents_document.transaction_id" will need to be re-created, which can be done with a one-time run of the following management command:
```shell-session
$ python3 manage.py convert_mariadb_uuid
```
## Platform-Specific Deployment Troubleshooting
A user-maintained wiki page is available to help troubleshoot issues that may arise when trying to deploy Paperless-ngx on specific platforms, for example SELinux. Please see [the wiki](https://github.com/paperless-ngx/paperless-ngx/wiki/Platform%E2%80%90Specific-Troubleshooting).

@ -159,13 +159,20 @@ def modify_custom_fields(doc_ids: list[int], add_custom_fields, remove_custom_fi
@shared_task
def delete(doc_ids: list[int]):
Document.objects.filter(id__in=doc_ids).delete()
try:
Document.objects.filter(id__in=doc_ids).delete()
from documents import index
from documents import index
with index.open_index_writer() as writer:
for id in doc_ids:
index.remove_document_by_id(writer, id)
with index.open_index_writer() as writer:
for id in doc_ids:
index.remove_document_by_id(writer, id)
except Exception as e:
if "Data too long for column" in str(e):
logger.warning(
"Detected a possible incompatible database column. See https://docs.paperless-ngx.com/troubleshooting/#convert-uuid-field",
)
logger.error(f"Error deleting documents: {e!s}")
return "OK"

@ -0,0 +1,36 @@
from django.core.management.base import BaseCommand
from django.db import connection
from django.db import models
from documents.models import Document
class Command(BaseCommand):
# This code is taken almost entirely from https://github.com/wagtail/wagtail/pull/11912 with all credit to the original author.
help = "Converts UUID columns from char type to the native UUID type used in MariaDB 10.7+ and Django 5.0+."
def convert_field(self, model, field_name, null=False):
if model._meta.get_field(field_name).model != model: # pragma: no cover
# Field is inherited from a parent model
return
if not model._meta.managed: # pragma: no cover
# The migration framework skips unmanaged models, so we should too
return
old_field = models.CharField(null=null, max_length=36)
old_field.set_attributes_from_name(field_name)
new_field = models.UUIDField(null=null)
new_field.set_attributes_from_name(field_name)
with connection.schema_editor() as schema_editor:
schema_editor.alter_field(model, old_field, new_field)
self.stdout.write(
self.style.SUCCESS(
f"Successfully converted {model._meta.label} {field_name} field to UUID type.",
),
)
def handle(self, **options):
self.convert_field(Document, "transaction_id", null=True)

@ -15,6 +15,7 @@ from django.conf import settings
from django.contrib.auth.models import Permission
from django.contrib.auth.models import User
from django.core.cache import cache
from django.db import DataError
from django.test import override_settings
from django.utils import timezone
from guardian.shortcuts import assign_perm
@ -2605,6 +2606,35 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
self.assertEqual(resp.status_code, status.HTTP_200_OK)
self.assertEqual(doc1.tags.count(), 2)
@mock.patch("django_softdelete.models.SoftDeleteModel.delete")
def test_warn_on_delete_with_old_uuid_field(self, mocked_delete):
"""
GIVEN:
- Existing document in a (mocked) MariaDB database with an old UUID field
WHEN:
- API request to delete document is made which raises "Data too long for column" error
THEN:
- Warning is logged alerting the user of the issue (and link to the fix)
"""
doc = Document.objects.create(
title="test",
mime_type="application/pdf",
content="this is a document 1",
checksum="1",
)
mocked_delete.side_effect = DataError(
"Data too long for column 'transaction_id' at row 1",
)
with self.assertLogs(level="WARNING") as cm:
self.client.delete(f"/api/documents/{doc.pk}/")
self.assertIn(
"Detected a possible incompatible database column",
cm.output[0],
)
class TestDocumentApiV2(DirectoriesMixin, APITestCase):
def setUp(self):

@ -327,6 +327,15 @@ class TestBulkEdit(DirectoriesMixin, TestCase):
)
self.assertEqual(groups_with_perms.count(), 2)
@mock.patch("documents.models.Document.delete")
def test_delete_documents_old_uuid_field(self, m):
m.side_effect = Exception("Data too long for column 'transaction_id' at row 1")
doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
bulk_edit.delete(doc_ids)
with self.assertLogs(level="WARNING") as cm:
bulk_edit.delete(doc_ids)
self.assertIn("possible incompatible database column", cm.output[0])
class TestPDFActions(DirectoriesMixin, TestCase):
def setUp(self):

@ -3,6 +3,7 @@ import hashlib
import os
import shutil
import tempfile
from io import StringIO
from pathlib import Path
from unittest import mock
@ -238,3 +239,16 @@ class TestSanityChecker(DirectoriesMixin, TestCase):
self.assertEqual(len(capture.output), 2)
self.assertIn("Checksum mismatch. Stored: abc, actual:", capture.output[1])
class TestConvertMariaDBUUID(TestCase):
@mock.patch("django.db.connection.schema_editor")
def test_convert(self, m):
m.alter_field.return_value = None
stdout = StringIO()
call_command("convert_mariadb_uuid", stdout=stdout)
m.assert_called_once()
self.assertIn("Successfully converted", stdout.getvalue())

@ -406,7 +406,17 @@ class DocumentViewSet(
from documents import index
index.remove_document_from_index(self.get_object())
return super().destroy(request, *args, **kwargs)
try:
return super().destroy(request, *args, **kwargs)
except Exception as e:
if "Data too long for column" in str(e):
logger.warning(
"Detected a possible incompatible database column. See https://docs.paperless-ngx.com/troubleshooting/#convert-uuid-field",
)
logger.error(f"Error deleting document: {e!s}")
return HttpResponseBadRequest(
"Error deleting document, check logs for more detail.",
)
@staticmethod
def original_requested(request):