Safer batched migration

This commit is contained in:
shamoon 2025-05-07 11:29:28 -07:00
parent e47c08bde3
commit 8d7e179a6d

View File

@ -2,9 +2,7 @@
import datetime import datetime
import os
import pytz
from django.db import migrations from django.db import migrations
from django.db import models from django.db import models
from django.db.models.functions import TruncDate from django.db.models.functions import TruncDate
@ -12,12 +10,23 @@ from django.db.models.functions import TruncDate
def migrate_date(apps, schema_editor): def migrate_date(apps, schema_editor):
Document = apps.get_model("documents", "Document") Document = apps.get_model("documents", "Document")
Document.objects.update( queryset = Document.objects.annotate(
created_date=TruncDate( truncated_created=TruncDate("created"),
"created", ).values("id", "truncated_created")
tzinfo=pytz.timezone(os.getenv("PAPERLESS_TIME_ZONE", "UTC")),
), # Batch to avoid loading all objects into memory at once,
# which would be problematic for large datasets.
batch_size = 500
updates = []
for item in queryset.iterator(chunk_size=batch_size):
updates.append(
Document(id=item["id"], created_date=item["truncated_created"]),
) )
if len(updates) >= batch_size:
Document.objects.bulk_update(updates, ["created_date"])
updates.clear()
if updates:
Document.objects.bulk_update(updates, ["created_date"])
class Migration(migrations.Migration): class Migration(migrations.Migration):