Skip to content

Commit

Permalink
remove unnecessary function
Browse files Browse the repository at this point in the history
  • Loading branch information
kzawora-intel committed Sep 17, 2024
1 parent a5528ab commit 690b867
Showing 1 changed file with 0 additions and 35 deletions.
35 changes: 0 additions & 35 deletions vllm/hpu/cache_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,45 +5,10 @@
# LICENSE file in the root directory of this source tree.
###############################################################################

import math

import habana_frameworks.torch as htorch
import torch


def reshape_and_cache(key,
value,
key_cache,
value_cache,
slot_mapping,
dtype,
is_prompt=False):
num_blocks = key_cache.size(0)
block_size = key_cache.size(1)
slot_mapping = slot_mapping.flatten()
indices = torch.div(slot_mapping, block_size, rounding_mode="floor")
offsets = torch.fmod(slot_mapping, block_size)
num_slots_requested = slot_mapping.size(0)
num_slots_available = num_blocks * block_size
# NOTE(kzawora): HPU PT bridge crashes with
# RuntimeError: Invalid inputs for scatter_nd_onnx
# on index_put when num_slots_requested > num_slots_available.
# This case might occur when we have little kv cache blocks and
# lots of padding, or are doing warmup.
# This loop is a workaround for this issue. Please remove it
# once key_cache.index_put_(indices, offsets), key) works.
num_kv_cache_passes = math.ceil(num_slots_requested / num_slots_available)
for i in range(num_kv_cache_passes):
start_idx = i * num_slots_available
end_idx = (i + 1) * num_slots_available
key_cache.index_put_(
(indices[start_idx:end_idx], offsets[start_idx:end_idx]),
key[start_idx:end_idx])
value_cache.index_put_(
(indices[start_idx:end_idx], offsets[start_idx:end_idx]),
value[start_idx:end_idx])


def insert_or_update_cache(input, cache, block_indices, block_offsets):
if block_offsets is None:
cache.index_copy_(0, block_indices, input)
Expand Down

0 comments on commit 690b867

Please sign in to comment.