From c6900ccffebf8f63582b4f98d9f96f8e6fa5a270 Mon Sep 17 00:00:00 2001 From: Konrad Zawora Date: Tue, 24 Sep 2024 14:49:36 +0300 Subject: [PATCH] Fix INC FP8 inference after rebase --- vllm/model_executor/model_loader/loader.py | 2 +- vllm/platforms/hpu.py | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/vllm/model_executor/model_loader/loader.py b/vllm/model_executor/model_loader/loader.py index b03e6aca48c0e..b3274b6d95115 100644 --- a/vllm/model_executor/model_loader/loader.py +++ b/vllm/model_executor/model_loader/loader.py @@ -59,7 +59,7 @@ def device_loading_context(module: torch.nn.Module, # Store original device states and move parameters to GPU if they're on CPU for name, p in module.named_parameters(): - if p.device.type == "cpu": + if p.device.type == "cpu" and target_device.type != 'hpu': original_device_states[name] = p.device p.data = p.data.to(target_device) # Parameters already on target device are not touched diff --git a/vllm/platforms/hpu.py b/vllm/platforms/hpu.py index feddce69ac5b4..170cfff94f90d 100644 --- a/vllm/platforms/hpu.py +++ b/vllm/platforms/hpu.py @@ -1,18 +1,11 @@ -from typing import Optional - import torch -from .interface import DeviceCapability, Platform, PlatformEnum +from .interface import Platform, PlatformEnum class HpuPlatform(Platform): _enum = PlatformEnum.HPU - @staticmethod - def get_device_capability( - device_id: int = 0) -> Optional[DeviceCapability]: - raise RuntimeError("HPU does not have device capability.") - @staticmethod def inference_mode(): return torch.no_grad()