Skip to content

Commit

Permalink
Merge remote-trackng branch 'origin/habana_main' into private/kzawora…
Browse files Browse the repository at this point in the history
…/pruned_habana_main
  • Loading branch information
kzawora-intel committed Sep 24, 2024
2 parents 20c87dd + 9be37a3 commit c90e153
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 22 deletions.
2 changes: 1 addition & 1 deletion requirements-hpu.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ ray == 2.32.0
triton
pandas
tabulate
vllm-hpu-extension @ git+https://github.com/HabanaAI/vllm-hpu-extension.git@0e05e25
vllm-hpu-extension @ git+https://github.com/HabanaAI/vllm-hpu-extension.git@0a7adab
5 changes: 3 additions & 2 deletions vllm/core/block/cpu_gpu_block_allocator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
DeviceAwareBlockAllocator)
from vllm.core.block.naive_block import NaiveBlock, NaiveBlockAllocator
from vllm.core.block.prefix_caching_block import PrefixCachingBlockAllocator
from vllm.utils import Device, is_hpu
from vllm.platforms import current_platform
from vllm.utils import Device


class CpuGpuBlockAllocator(DeviceAwareBlockAllocator):
Expand Down Expand Up @@ -53,7 +54,7 @@ def create(
before CPU block IDs.
"""
# For HPU, block id 0 is used only for padding
reserved_blocks = 1 if is_hpu() else 0
reserved_blocks = 1 if current_platform.is_hpu() else 0
block_ids = list(
range(reserved_blocks, num_gpu_blocks + num_cpu_blocks))
num_gpu_blocks -= reserved_blocks
Expand Down
5 changes: 3 additions & 2 deletions vllm/core/block_manager_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
from vllm.core.evictor_v1 import EvictionPolicy, Evictor, make_evictor
from vllm.core.interfaces import AllocStatus, BlockSpaceManager
from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.sequence import Sequence, SequenceGroup, SequenceStatus
from vllm.utils import Device, is_hpu
from vllm.utils import Device

logger = init_logger(__name__)

Expand Down Expand Up @@ -185,7 +186,7 @@ def __init__(
# Initialize the free blocks.
self.free_blocks: List[PhysicalTokenBlock] = []
# For HPU, block id 0 is used only for padding
reserved_blocks = 1 if is_hpu() else 0
reserved_blocks = 1 if current_platform.is_hpu() else 0
for i in range(reserved_blocks, num_blocks):
block = PhysicalTokenBlock(device=device,
block_number=i,
Expand Down
19 changes: 2 additions & 17 deletions vllm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,21 +337,6 @@ def is_neuron() -> bool:
return transformers_neuronx is not None


@lru_cache(maxsize=None)
def is_hpu() -> bool:
from importlib import util
return util.find_spec('habana_frameworks') is not None


@lru_cache(maxsize=None)
def _is_built_for_hpu() -> bool:
from importlib.metadata import PackageNotFoundError, version
try:
return "gaudi" in version("vllm")
except PackageNotFoundError:
return False


@lru_cache(maxsize=None)
def is_xpu() -> bool:
from importlib.metadata import PackageNotFoundError, version
Expand Down Expand Up @@ -755,7 +740,7 @@ def print_warning_once(msg: str) -> None:


def get_device() -> str:
if is_hpu():
if current_platform.is_hpu():
return "hpu"
return "cuda"

Expand All @@ -775,7 +760,7 @@ def is_pin_memory_available() -> bool:
elif is_neuron():
print_warning_once("Pin memory is not supported on Neuron.")
return False
elif is_hpu():
elif current_platform.is_hpu():
print_warning_once("Pin memory is not supported on HPU.")
return False
elif is_cpu() or is_openvino():
Expand Down

0 comments on commit c90e153

Please sign in to comment.