Safer batched migration

2025-06-30 16:04:40 -05:00 · 2025-05-07 11:29:28 -07:00 · 2025-05-07 11:29:28 -07:00 · 8d7e179a6d
commit 8d7e179a6d
parent e47c08bde3
1 changed files with 17 additions and 8 deletions
--- a/src/documents/migrations/1066_alter_document_created.py
+++ b/src/documents/migrations/1066_alter_document_created.py
@ -2,9 +2,7 @@
 import datetime
 import os
 import pytz
 from django.db import migrations
 from django.db import models
 from django.db.models.functions import TruncDate
@ -12,12 +10,23 @@ from django.db.models.functions import TruncDate
 def migrate_date(apps, schema_editor):
    Document = apps.get_model("documents", "Document")
-    Document.objects.update(
+    queryset = Document.objects.annotate(
-        created_date=TruncDate(
+        truncated_created=TruncDate("created"),
-            "created",
+    ).values("id", "truncated_created")
-            tzinfo=pytz.timezone(os.getenv("PAPERLESS_TIME_ZONE", "UTC")),
+
-        ),
+    # Batch to avoid loading all objects into memory at once,
-    )
+    # which would be problematic for large datasets.
    batch_size = 500
    updates = []
    for item in queryset.iterator(chunk_size=batch_size):
        updates.append(
            Document(id=item["id"], created_date=item["truncated_created"]),
        )
        if len(updates) >= batch_size:
            Document.objects.bulk_update(updates, ["created_date"])
            updates.clear()
    if updates:
        Document.objects.bulk_update(updates, ["created_date"])
 class Migration(migrations.Migration):