From c6900ccffebf8f63582b4f98d9f96f8e6fa5a270 Mon Sep 17 00:00:00 2001
From: Konrad Zawora <kzawora@habana.ai>
Date: Tue, 24 Sep 2024 14:49:36 +0300
Subject: [PATCH] Fix INC FP8 inference after rebase

---
 vllm/model_executor/model_loader/loader.py | 2 +-
 vllm/platforms/hpu.py                      | 9 +--------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/vllm/model_executor/model_loader/loader.py b/vllm/model_executor/model_loader/loader.py
index b03e6aca48c0e..b3274b6d95115 100644
--- a/vllm/model_executor/model_loader/loader.py
+++ b/vllm/model_executor/model_loader/loader.py
@@ -59,7 +59,7 @@ def device_loading_context(module: torch.nn.Module,
 
     # Store original device states and move parameters to GPU if they're on CPU
     for name, p in module.named_parameters():
-        if p.device.type == "cpu":
+        if p.device.type == "cpu" and target_device.type != 'hpu':
             original_device_states[name] = p.device
             p.data = p.data.to(target_device)
         # Parameters already on target device are not touched
diff --git a/vllm/platforms/hpu.py b/vllm/platforms/hpu.py
index feddce69ac5b4..170cfff94f90d 100644
--- a/vllm/platforms/hpu.py
+++ b/vllm/platforms/hpu.py
@@ -1,18 +1,11 @@
-from typing import Optional
-
 import torch
 
-from .interface import DeviceCapability, Platform, PlatformEnum
+from .interface import Platform, PlatformEnum
 
 
 class HpuPlatform(Platform):
     _enum = PlatformEnum.HPU
 
-    @staticmethod
-    def get_device_capability(
-            device_id: int = 0) -> Optional[DeviceCapability]:
-        raise RuntimeError("HPU does not have device capability.")
-
     @staticmethod
     def inference_mode():
         return torch.no_grad()