mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-05-15 12:29:29 -05:00
Safer batched migration
This commit is contained in:
parent
e47c08bde3
commit
8d7e179a6d
@ -2,9 +2,7 @@
|
|||||||
|
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import os
|
|
||||||
|
|
||||||
import pytz
|
|
||||||
from django.db import migrations
|
from django.db import migrations
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.db.models.functions import TruncDate
|
from django.db.models.functions import TruncDate
|
||||||
@ -12,12 +10,23 @@ from django.db.models.functions import TruncDate
|
|||||||
|
|
||||||
def migrate_date(apps, schema_editor):
|
def migrate_date(apps, schema_editor):
|
||||||
Document = apps.get_model("documents", "Document")
|
Document = apps.get_model("documents", "Document")
|
||||||
Document.objects.update(
|
queryset = Document.objects.annotate(
|
||||||
created_date=TruncDate(
|
truncated_created=TruncDate("created"),
|
||||||
"created",
|
).values("id", "truncated_created")
|
||||||
tzinfo=pytz.timezone(os.getenv("PAPERLESS_TIME_ZONE", "UTC")),
|
|
||||||
),
|
# Batch to avoid loading all objects into memory at once,
|
||||||
)
|
# which would be problematic for large datasets.
|
||||||
|
batch_size = 500
|
||||||
|
updates = []
|
||||||
|
for item in queryset.iterator(chunk_size=batch_size):
|
||||||
|
updates.append(
|
||||||
|
Document(id=item["id"], created_date=item["truncated_created"]),
|
||||||
|
)
|
||||||
|
if len(updates) >= batch_size:
|
||||||
|
Document.objects.bulk_update(updates, ["created_date"])
|
||||||
|
updates.clear()
|
||||||
|
if updates:
|
||||||
|
Document.objects.bulk_update(updates, ["created_date"])
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user