diff --git a/docker/install_management_commands.sh b/docker/install_management_commands.sh index 38604af9d..fa59d59a9 100755 --- a/docker/install_management_commands.sh +++ b/docker/install_management_commands.sh @@ -14,7 +14,8 @@ for command in decrypt_documents \ document_thumbnails \ document_sanity_checker \ document_fuzzy_match \ - manage_superuser; + manage_superuser \ + convert_mariadb_uuid; do echo "installing $command..." sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 6418b82ba..b18ded26d 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -353,6 +353,20 @@ ways from the original. As the logs indicate, if you encounter this error you ca `PAPERLESS_OCR_USER_ARGS: '{"continue_on_soft_render_error": true}'` to try to 'force' processing documents with this issue. +## Logs show "possible incompatible database column" when deleting documents {#convert-uuid-field} + +You may see errors when deleting documents like: + +``` +Data too long for column 'transaction_id' at row 1 +``` + +This error can occur in installations which have upgraded from a version of Paperless-ngx that used Django 4 (Paperless-ngx versions prior to v2.13.0) with a MariaDB/MySQL database. Due to the backawards-incompatible change in Django 5, the column "documents_document.transaction_id" will need to be re-created, which can be done with a one-time run of the following management command: + +```shell-session +$ python3 manage.py convert_mariadb_uuid +``` + ## Platform-Specific Deployment Troubleshooting A user-maintained wiki page is available to help troubleshoot issues that may arise when trying to deploy Paperless-ngx on specific platforms, for example SELinux. Please see [the wiki](https://github.com/paperless-ngx/paperless-ngx/wiki/Platform%E2%80%90Specific-Troubleshooting). diff --git a/src/documents/bulk_edit.py b/src/documents/bulk_edit.py index 1aba8f9ec..2e3e5f591 100644 --- a/src/documents/bulk_edit.py +++ b/src/documents/bulk_edit.py @@ -159,13 +159,20 @@ def modify_custom_fields(doc_ids: list[int], add_custom_fields, remove_custom_fi @shared_task def delete(doc_ids: list[int]): - Document.objects.filter(id__in=doc_ids).delete() + try: + Document.objects.filter(id__in=doc_ids).delete() - from documents import index + from documents import index - with index.open_index_writer() as writer: - for id in doc_ids: - index.remove_document_by_id(writer, id) + with index.open_index_writer() as writer: + for id in doc_ids: + index.remove_document_by_id(writer, id) + except Exception as e: + if "Data too long for column" in str(e): + logger.warning( + "Detected a possible incompatible database column. See https://docs.paperless-ngx.com/troubleshooting/#convert-uuid-field", + ) + logger.error(f"Error deleting documents: {e!s}") return "OK" diff --git a/src/documents/management/commands/convert_mariadb_uuid.py b/src/documents/management/commands/convert_mariadb_uuid.py new file mode 100644 index 000000000..4000e67cb --- /dev/null +++ b/src/documents/management/commands/convert_mariadb_uuid.py @@ -0,0 +1,36 @@ +from django.core.management.base import BaseCommand +from django.db import connection +from django.db import models + +from documents.models import Document + + +class Command(BaseCommand): + # This code is taken almost entirely from https://github.com/wagtail/wagtail/pull/11912 with all credit to the original author. + help = "Converts UUID columns from char type to the native UUID type used in MariaDB 10.7+ and Django 5.0+." + + def convert_field(self, model, field_name, null=False): + if model._meta.get_field(field_name).model != model: # pragma: no cover + # Field is inherited from a parent model + return + + if not model._meta.managed: # pragma: no cover + # The migration framework skips unmanaged models, so we should too + return + + old_field = models.CharField(null=null, max_length=36) + old_field.set_attributes_from_name(field_name) + + new_field = models.UUIDField(null=null) + new_field.set_attributes_from_name(field_name) + + with connection.schema_editor() as schema_editor: + schema_editor.alter_field(model, old_field, new_field) + self.stdout.write( + self.style.SUCCESS( + f"Successfully converted {model._meta.label} {field_name} field to UUID type.", + ), + ) + + def handle(self, **options): + self.convert_field(Document, "transaction_id", null=True) diff --git a/src/documents/tests/test_api_documents.py b/src/documents/tests/test_api_documents.py index b1cd43932..2e2b02f0d 100644 --- a/src/documents/tests/test_api_documents.py +++ b/src/documents/tests/test_api_documents.py @@ -15,6 +15,7 @@ from django.conf import settings from django.contrib.auth.models import Permission from django.contrib.auth.models import User from django.core.cache import cache +from django.db import DataError from django.test import override_settings from django.utils import timezone from guardian.shortcuts import assign_perm @@ -2605,6 +2606,35 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): self.assertEqual(resp.status_code, status.HTTP_200_OK) self.assertEqual(doc1.tags.count(), 2) + @mock.patch("django_softdelete.models.SoftDeleteModel.delete") + def test_warn_on_delete_with_old_uuid_field(self, mocked_delete): + """ + GIVEN: + - Existing document in a (mocked) MariaDB database with an old UUID field + WHEN: + - API request to delete document is made which raises "Data too long for column" error + THEN: + - Warning is logged alerting the user of the issue (and link to the fix) + """ + + doc = Document.objects.create( + title="test", + mime_type="application/pdf", + content="this is a document 1", + checksum="1", + ) + + mocked_delete.side_effect = DataError( + "Data too long for column 'transaction_id' at row 1", + ) + + with self.assertLogs(level="WARNING") as cm: + self.client.delete(f"/api/documents/{doc.pk}/") + self.assertIn( + "Detected a possible incompatible database column", + cm.output[0], + ) + class TestDocumentApiV2(DirectoriesMixin, APITestCase): def setUp(self): diff --git a/src/documents/tests/test_bulk_edit.py b/src/documents/tests/test_bulk_edit.py index d80116a80..c6e846a77 100644 --- a/src/documents/tests/test_bulk_edit.py +++ b/src/documents/tests/test_bulk_edit.py @@ -327,6 +327,15 @@ class TestBulkEdit(DirectoriesMixin, TestCase): ) self.assertEqual(groups_with_perms.count(), 2) + @mock.patch("documents.models.Document.delete") + def test_delete_documents_old_uuid_field(self, m): + m.side_effect = Exception("Data too long for column 'transaction_id' at row 1") + doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id] + bulk_edit.delete(doc_ids) + with self.assertLogs(level="WARNING") as cm: + bulk_edit.delete(doc_ids) + self.assertIn("possible incompatible database column", cm.output[0]) + class TestPDFActions(DirectoriesMixin, TestCase): def setUp(self): diff --git a/src/documents/tests/test_management.py b/src/documents/tests/test_management.py index d1efe27d4..76a0a2c74 100644 --- a/src/documents/tests/test_management.py +++ b/src/documents/tests/test_management.py @@ -3,6 +3,7 @@ import hashlib import os import shutil import tempfile +from io import StringIO from pathlib import Path from unittest import mock @@ -238,3 +239,16 @@ class TestSanityChecker(DirectoriesMixin, TestCase): self.assertEqual(len(capture.output), 2) self.assertIn("Checksum mismatch. Stored: abc, actual:", capture.output[1]) + + +class TestConvertMariaDBUUID(TestCase): + @mock.patch("django.db.connection.schema_editor") + def test_convert(self, m): + m.alter_field.return_value = None + + stdout = StringIO() + call_command("convert_mariadb_uuid", stdout=stdout) + + m.assert_called_once() + + self.assertIn("Successfully converted", stdout.getvalue()) diff --git a/src/documents/views.py b/src/documents/views.py index 919f9d2dd..10b2d0cbd 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -406,7 +406,17 @@ class DocumentViewSet( from documents import index index.remove_document_from_index(self.get_object()) - return super().destroy(request, *args, **kwargs) + try: + return super().destroy(request, *args, **kwargs) + except Exception as e: + if "Data too long for column" in str(e): + logger.warning( + "Detected a possible incompatible database column. See https://docs.paperless-ngx.com/troubleshooting/#convert-uuid-field", + ) + logger.error(f"Error deleting document: {e!s}") + return HttpResponseBadRequest( + "Error deleting document, check logs for more detail.", + ) @staticmethod def original_requested(request):