1593 populate existing asset storage image (#1604)

* black and flake 8 updates * add ordering on qs * black changes
LibraryOfCongress · Feb 23, 2022 · 3db47dc · 3db47dc
1 parent 87d9a09
commit 3db47dc
Showing 1 changed file with 44 additions and 0 deletions.
diff --git a/concordia/tasks.py b/concordia/tasks.py
@@ -353,6 +353,50 @@ def calculate_difficulty_values(asset_qs=None):
  return updated_count
 
 
+# modify above def for storage_image populate
+@celery_app.task
+def populate_storage_image_values(asset_qs=None):
+ """
+ For Assets that existed prior to implementing the storage_image ImageField, build
+ the relative S3 storage key for the asset and update the storage_image value
+ """
+
+ # only fetch assest with no storgae image value
+ asset_qs = (
+ Asset.objects.filter(storage_image__isnull=True).order_by("id")
+ # .prefetch_related("item__project__campaign")[:20000]
+ .select_related("item__project__campaign")[:25000]
+ )
+
+ updated_count = 0
+
+ # We'll process assets in chunks using an iterator to avoid saving objects
+ # which will never be used again in memory. We will build the S3 relative key for
+ # each existing asset and pass them to bulk_update() to be saved in a single query.
+ for asset_chunk in chunked(asset_qs.iterator(), 2000):
+
+ for asset in asset_chunk:
+ asset.storage_image = "/".join(
+ [
+ asset.item.project.campaign.slug,
+ asset.item.project.slug,
+ asset.item.item_id,
+ asset.media_url,
+ ]
+ )
+
+ # We will only save the new storage image value both for performance
+ # and to avoid any possibility of race conditions causing stale data
+ # to be saved:
+
+ Asset.objects.bulk_update(asset_chunk, ["storage_image"])
+ updated_count += len(asset_chunk)
+
+ logger.debug("Storage image updated count %s" % updated_count)
+
+ return updated_count
+
+
 @celery_app.task
 def populate_asset_years():
  """