diff --git a/concordia/tasks.py b/concordia/tasks.py index b0bc2b58e..2f86370d0 100644 --- a/concordia/tasks.py +++ b/concordia/tasks.py @@ -353,6 +353,50 @@ def calculate_difficulty_values(asset_qs=None): return updated_count +# modify above def for storage_image populate +@celery_app.task +def populate_storage_image_values(asset_qs=None): + """ + For Assets that existed prior to implementing the storage_image ImageField, build + the relative S3 storage key for the asset and update the storage_image value + """ + + # only fetch assest with no storgae image value + asset_qs = ( + Asset.objects.filter(storage_image__isnull=True).order_by("id") + # .prefetch_related("item__project__campaign")[:20000] + .select_related("item__project__campaign")[:25000] + ) + + updated_count = 0 + + # We'll process assets in chunks using an iterator to avoid saving objects + # which will never be used again in memory. We will build the S3 relative key for + # each existing asset and pass them to bulk_update() to be saved in a single query. + for asset_chunk in chunked(asset_qs.iterator(), 2000): + + for asset in asset_chunk: + asset.storage_image = "/".join( + [ + asset.item.project.campaign.slug, + asset.item.project.slug, + asset.item.item_id, + asset.media_url, + ] + ) + + # We will only save the new storage image value both for performance + # and to avoid any possibility of race conditions causing stale data + # to be saved: + + Asset.objects.bulk_update(asset_chunk, ["storage_image"]) + updated_count += len(asset_chunk) + + logger.debug("Storage image updated count %s" % updated_count) + + return updated_count + + @celery_app.task def populate_asset_years(): """