Skip to content

Commit

Permalink
1593 populate existing asset storage image (#1604)
Browse files Browse the repository at this point in the history
* black and flake 8 updates

* add ordering on qs

* black changes
  • Loading branch information
jkueloc committed Feb 23, 2022
1 parent 87d9a09 commit 3db47dc
Showing 1 changed file with 44 additions and 0 deletions.
44 changes: 44 additions & 0 deletions concordia/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,50 @@ def calculate_difficulty_values(asset_qs=None):
return updated_count


# modify above def for storage_image populate
@celery_app.task
def populate_storage_image_values(asset_qs=None):
"""
For Assets that existed prior to implementing the storage_image ImageField, build
the relative S3 storage key for the asset and update the storage_image value
"""

# only fetch assest with no storgae image value
asset_qs = (
Asset.objects.filter(storage_image__isnull=True).order_by("id")
# .prefetch_related("item__project__campaign")[:20000]
.select_related("item__project__campaign")[:25000]
)

updated_count = 0

# We'll process assets in chunks using an iterator to avoid saving objects
# which will never be used again in memory. We will build the S3 relative key for
# each existing asset and pass them to bulk_update() to be saved in a single query.
for asset_chunk in chunked(asset_qs.iterator(), 2000):

for asset in asset_chunk:
asset.storage_image = "/".join(
[
asset.item.project.campaign.slug,
asset.item.project.slug,
asset.item.item_id,
asset.media_url,
]
)

# We will only save the new storage image value both for performance
# and to avoid any possibility of race conditions causing stale data
# to be saved:

Asset.objects.bulk_update(asset_chunk, ["storage_image"])
updated_count += len(asset_chunk)

logger.debug("Storage image updated count %s" % updated_count)

return updated_count


@celery_app.task
def populate_asset_years():
"""
Expand Down

0 comments on commit 3db47dc

Please sign in to comment.