diff --git a/vllm/hpu/cache_ops.py b/vllm/hpu/cache_ops.py index facc01751dc5..9042924f68b3 100644 --- a/vllm/hpu/cache_ops.py +++ b/vllm/hpu/cache_ops.py @@ -5,9 +5,11 @@ # LICENSE file in the root directory of this source tree. ############################################################################### +import math + import habana_frameworks.torch as htorch import torch -import math + def reshape_and_cache(key, value, diff --git a/vllm/model_executor/models/gpt_bigcode.py b/vllm/model_executor/models/gpt_bigcode.py index b06ea89aeded..5d4387dbb9f4 100644 --- a/vllm/model_executor/models/gpt_bigcode.py +++ b/vllm/model_executor/models/gpt_bigcode.py @@ -46,6 +46,7 @@ is_hpu = current_platform.is_hpu() + class GPTBigCodeAttention(nn.Module): def __init__( diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index 9bb3456d3f0f..d809da29e5e6 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -57,6 +57,7 @@ is_hpu = current_platform.is_hpu() + class LlamaMLP(nn.Module): def __init__(