remove unnecessary function

HabanaAI · Sep 17, 2024 · 690b867 · 690b867
1 parent a5528ab
commit 690b867
Showing 1 changed file with 0 additions and 35 deletions.
diff --git a/vllm/hpu/cache_ops.py b/vllm/hpu/cache_ops.py
@@ -5,45 +5,10 @@
 # LICENSE file in the root directory of this source tree.
 ###############################################################################
 
-import math
-
 import habana_frameworks.torch as htorch
 import torch
 
 
-def reshape_and_cache(key,
-                      value,
-                      key_cache,
-                      value_cache,
-                      slot_mapping,
-                      dtype,
-                      is_prompt=False):
-    num_blocks = key_cache.size(0)
-    block_size = key_cache.size(1)
-    slot_mapping = slot_mapping.flatten()
-    indices = torch.div(slot_mapping, block_size, rounding_mode="floor")
-    offsets = torch.fmod(slot_mapping, block_size)
-    num_slots_requested = slot_mapping.size(0)
-    num_slots_available = num_blocks * block_size
-    # NOTE(kzawora): HPU PT bridge crashes with
-    # RuntimeError: Invalid inputs for scatter_nd_onnx
-    # on index_put when num_slots_requested > num_slots_available.
-    # This case might occur when we have little kv cache blocks and
-    # lots of padding, or are doing warmup.
-    # This loop is a workaround for this issue. Please remove it
-    # once key_cache.index_put_(indices, offsets), key) works.
-    num_kv_cache_passes = math.ceil(num_slots_requested / num_slots_available)
-    for i in range(num_kv_cache_passes):
-        start_idx = i * num_slots_available
-        end_idx = (i + 1) * num_slots_available
-        key_cache.index_put_(
-            (indices[start_idx:end_idx], offsets[start_idx:end_idx]),
-            key[start_idx:end_idx])
-        value_cache.index_put_(
-            (indices[start_idx:end_idx], offsets[start_idx:end_idx]),
-            value[start_idx:end_idx])
-
-
 def insert_or_update_cache(input, cache, block_indices, block_offsets):
     if block_offsets is None:
         cache.index_copy_(0, block_indices, input)