Skip to content

Commit

Permalink
update code
Browse files Browse the repository at this point in the history
Signed-off-by: yuwenzho <yuwen.zhou@intel.com>
  • Loading branch information
yuwenzho committed Aug 29, 2024
1 parent 45f0cc9 commit 941963f
Showing 1 changed file with 2 additions and 4 deletions.
6 changes: 2 additions & 4 deletions vllm/hpu/cache_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ def reshape_and_cache(key,
# lots of padding, or are doing warmup.
# This loop is a workaround for this issue. Please remove it
# once key_cache.index_put_(indices, offsets), key) works.
num_kv_cache_passes = int(
math.ceil(num_slots_requested / num_slots_available))
num_kv_cache_passes = math.ceil(num_slots_requested / num_slots_available)
for i in range(num_kv_cache_passes):
start_idx = i * num_slots_available
end_idx = (i + 1) * num_slots_available
Expand All @@ -58,8 +57,7 @@ def prepare_to_cache(cache, slot_mapping):
# lots of padding, or are doing warmup.
# This loop is a workaround for this issue. Please remove it
# once key_cache.index_put_(indices, offsets), key) works.
num_kv_cache_passes = int(
math.ceil(num_slots_requested / num_slots_available))
num_kv_cache_passes = math.ceil(num_slots_requested / num_slots_available)

return num_kv_cache_passes, num_slots_available, indices, offsets

Expand Down

0 comments on commit 941963f

Please sign in to comment.