From 8d7e179a6d37bdec71fcf9f4802d87c8c227e604 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Wed, 7 May 2025 11:29:28 -0700 Subject: [PATCH] Safer batched migration --- .../migrations/1066_alter_document_created.py | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/documents/migrations/1066_alter_document_created.py b/src/documents/migrations/1066_alter_document_created.py index 48f900d4a..cab4212ba 100644 --- a/src/documents/migrations/1066_alter_document_created.py +++ b/src/documents/migrations/1066_alter_document_created.py @@ -2,9 +2,7 @@ import datetime -import os -import pytz from django.db import migrations from django.db import models from django.db.models.functions import TruncDate @@ -12,12 +10,23 @@ from django.db.models.functions import TruncDate def migrate_date(apps, schema_editor): Document = apps.get_model("documents", "Document") - Document.objects.update( - created_date=TruncDate( - "created", - tzinfo=pytz.timezone(os.getenv("PAPERLESS_TIME_ZONE", "UTC")), - ), - ) + queryset = Document.objects.annotate( + truncated_created=TruncDate("created"), + ).values("id", "truncated_created") + + # Batch to avoid loading all objects into memory at once, + # which would be problematic for large datasets. + batch_size = 500 + updates = [] + for item in queryset.iterator(chunk_size=batch_size): + updates.append( + Document(id=item["id"], created_date=item["truncated_created"]), + ) + if len(updates) >= batch_size: + Document.objects.bulk_update(updates, ["created_date"]) + updates.clear() + if updates: + Document.objects.bulk_update(updates, ["created_date"]) class Migration(migrations.Migration):