revert change in cache_engine as it is not used in HPU

HabanaAI · Oct 30, 2024 · 85cf06f · 85cf06f
1 parent 566f1c0
commit 85cf06f
Showing 1 changed file with 5 additions and 12 deletions.
diff --git a/vllm/worker/cache_engine.py b/vllm/worker/cache_engine.py
@@ -79,18 +79,11 @@ def _allocate_kv_cache(
             # null block in CpuGpuBlockAllocator requires at least that
             # block to be zeroed-out.
             # We zero-out everything for simplicity.
-            if pin_memory:
-                if current_platform.is_hpu():
-                    kv_cache.append(
-                        torch.zeros(kv_cache_shape,
-                                dtype=self.dtype,
-                                pin_memory=pin_memory,
-                                device=device))
-                else:
-                    kv_cache.append(
-                        torch.zeros(kv_cache_shape,
-                                dtype=dtype,
-                                device=device).pin_memory(device="hpu"))
+            kv_cache.append(
+                torch.zeros(kv_cache_shape,
+                            dtype=self.dtype,
+                            pin_memory=pin_memory,
+                            device=device))
         return kv_cache
 
     def swap_in(self, src_to_dst: torch.Tensor) -> None: