From 690b8679fea7b138807fde2d6f0c3af374557abe Mon Sep 17 00:00:00 2001 From: Konrad Zawora Date: Tue, 17 Sep 2024 13:04:34 +0300 Subject: [PATCH] remove unnecessary function --- vllm/hpu/cache_ops.py | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/vllm/hpu/cache_ops.py b/vllm/hpu/cache_ops.py index f9910a9e84f35..091aa7d3d679d 100644 --- a/vllm/hpu/cache_ops.py +++ b/vllm/hpu/cache_ops.py @@ -5,45 +5,10 @@ # LICENSE file in the root directory of this source tree. ############################################################################### -import math - import habana_frameworks.torch as htorch import torch -def reshape_and_cache(key, - value, - key_cache, - value_cache, - slot_mapping, - dtype, - is_prompt=False): - num_blocks = key_cache.size(0) - block_size = key_cache.size(1) - slot_mapping = slot_mapping.flatten() - indices = torch.div(slot_mapping, block_size, rounding_mode="floor") - offsets = torch.fmod(slot_mapping, block_size) - num_slots_requested = slot_mapping.size(0) - num_slots_available = num_blocks * block_size - # NOTE(kzawora): HPU PT bridge crashes with - # RuntimeError: Invalid inputs for scatter_nd_onnx - # on index_put when num_slots_requested > num_slots_available. - # This case might occur when we have little kv cache blocks and - # lots of padding, or are doing warmup. - # This loop is a workaround for this issue. Please remove it - # once key_cache.index_put_(indices, offsets), key) works. - num_kv_cache_passes = math.ceil(num_slots_requested / num_slots_available) - for i in range(num_kv_cache_passes): - start_idx = i * num_slots_available - end_idx = (i + 1) * num_slots_available - key_cache.index_put_( - (indices[start_idx:end_idx], offsets[start_idx:end_idx]), - key[start_idx:end_idx]) - value_cache.index_put_( - (indices[start_idx:end_idx], offsets[start_idx:end_idx]), - value[start_idx:end_idx]) - - def insert_or_update_cache(input, cache, block_indices, block_offsets): if block_offsets is None: cache.index_copy_(0, block_indices, input)