diff --git a/vllm/worker/cache_engine.py b/vllm/worker/cache_engine.py index 11ad8a4b14f1e..a163656e8e903 100644 --- a/vllm/worker/cache_engine.py +++ b/vllm/worker/cache_engine.py @@ -79,18 +79,11 @@ def _allocate_kv_cache( # null block in CpuGpuBlockAllocator requires at least that # block to be zeroed-out. # We zero-out everything for simplicity. - if pin_memory: - if current_platform.is_hpu(): - kv_cache.append( - torch.zeros(kv_cache_shape, - dtype=self.dtype, - pin_memory=pin_memory, - device=device)) - else: - kv_cache.append( - torch.zeros(kv_cache_shape, - dtype=dtype, - device=device).pin_memory(device="hpu")) + kv_cache.append( + torch.zeros(kv_cache_shape, + dtype=self.dtype, + pin_memory=pin_memory, + device=device)) return kv_cache def swap_in(self, src_to_dst: torch.Tensor) -> None: