From 690b8679fea7b138807fde2d6f0c3af374557abe Mon Sep 17 00:00:00 2001
From: Konrad Zawora <kzawora@habana.ai>
Date: Tue, 17 Sep 2024 13:04:34 +0300
Subject: [PATCH] remove unnecessary function

---
 vllm/hpu/cache_ops.py | 35 -----------------------------------
 1 file changed, 35 deletions(-)

diff --git a/vllm/hpu/cache_ops.py b/vllm/hpu/cache_ops.py
index f9910a9e84f35..091aa7d3d679d 100644
--- a/vllm/hpu/cache_ops.py
+++ b/vllm/hpu/cache_ops.py
@@ -5,45 +5,10 @@
 # LICENSE file in the root directory of this source tree.
 ###############################################################################
 
-import math
-
 import habana_frameworks.torch as htorch
 import torch
 
 
-def reshape_and_cache(key,
-                      value,
-                      key_cache,
-                      value_cache,
-                      slot_mapping,
-                      dtype,
-                      is_prompt=False):
-    num_blocks = key_cache.size(0)
-    block_size = key_cache.size(1)
-    slot_mapping = slot_mapping.flatten()
-    indices = torch.div(slot_mapping, block_size, rounding_mode="floor")
-    offsets = torch.fmod(slot_mapping, block_size)
-    num_slots_requested = slot_mapping.size(0)
-    num_slots_available = num_blocks * block_size
-    # NOTE(kzawora): HPU PT bridge crashes with
-    # RuntimeError: Invalid inputs for scatter_nd_onnx
-    # on index_put when num_slots_requested > num_slots_available.
-    # This case might occur when we have little kv cache blocks and
-    # lots of padding, or are doing warmup.
-    # This loop is a workaround for this issue. Please remove it
-    # once key_cache.index_put_(indices, offsets), key) works.
-    num_kv_cache_passes = math.ceil(num_slots_requested / num_slots_available)
-    for i in range(num_kv_cache_passes):
-        start_idx = i * num_slots_available
-        end_idx = (i + 1) * num_slots_available
-        key_cache.index_put_(
-            (indices[start_idx:end_idx], offsets[start_idx:end_idx]),
-            key[start_idx:end_idx])
-        value_cache.index_put_(
-            (indices[start_idx:end_idx], offsets[start_idx:end_idx]),
-            value[start_idx:end_idx])
-
-
 def insert_or_update_cache(input, cache, block_indices, block_offsets):
     if block_offsets is None:
         cache.index_copy_(0, block_indices, input)