update code

Signed-off-by: yuwenzho <yuwen.zhou@intel.com>
HabanaAI · Aug 29, 2024 · 941963f · 941963f
1 parent 45f0cc9
commit 941963f
Showing 1 changed file with 2 additions and 4 deletions.
diff --git a/vllm/hpu/cache_ops.py b/vllm/hpu/cache_ops.py
@@ -30,8 +30,7 @@ def reshape_and_cache(key,
     # lots of padding, or are doing warmup.
     # This loop is a workaround for this issue. Please remove it
     # once key_cache.index_put_(indices, offsets), key) works.
-    num_kv_cache_passes = int(
-        math.ceil(num_slots_requested / num_slots_available))
+    num_kv_cache_passes = math.ceil(num_slots_requested / num_slots_available)
     for i in range(num_kv_cache_passes):
         start_idx = i * num_slots_available
         end_idx = (i + 1) * num_slots_available
@@ -58,8 +57,7 @@ def prepare_to_cache(cache, slot_mapping):
     # lots of padding, or are doing warmup.
     # This loop is a workaround for this issue. Please remove it
     # once key_cache.index_put_(indices, offsets), key) works.
-    num_kv_cache_passes = int(
-        math.ceil(num_slots_requested / num_slots_available))
+    num_kv_cache_passes = math.ceil(num_slots_requested / num_slots_available)
 
     return num_kv_cache_passes, num_slots_available, indices, offsets