Remove vllm.utils.is_hpu() (#331)

vllm.utils.is_hpu() was redundant for some time now and has always been problematic particularly for torch.compile mode. Now, we're fully switching to current_platform.is_hpu().
HabanaAI · Sep 24, 2024 · 9be37a3 · 9be37a3
1 parent 4eb9809
commit 9be37a3
Show file tree

Hide file tree

Showing 4 changed files with 9 additions and 27 deletions.
diff --git a/requirements-hpu.txt b/requirements-hpu.txt
@@ -6,4 +6,4 @@ ray == 2.32.0
 triton
 pandas
 tabulate
-vllm-hpu-extension @ git+https://github.com/HabanaAI/vllm-hpu-extension.git@0e05e25
+vllm-hpu-extension @ git+https://github.com/HabanaAI/vllm-hpu-extension.git@0a7adab
diff --git a/vllm/core/block/cpu_gpu_block_allocator.py b/vllm/core/block/cpu_gpu_block_allocator.py
@@ -4,7 +4,8 @@
                                         DeviceAwareBlockAllocator)
 from vllm.core.block.naive_block import NaiveBlock, NaiveBlockAllocator
 from vllm.core.block.prefix_caching_block import PrefixCachingBlockAllocator
-from vllm.utils import Device, is_hpu
+from vllm.platforms import current_platform
+from vllm.utils import Device
 
 
 class CpuGpuBlockAllocator(DeviceAwareBlockAllocator):
@@ -53,7 +54,7 @@ def create(
                 before CPU block IDs.
         """
         # For HPU, block id 0 is used only for padding
-        reserved_blocks = 1 if is_hpu() else 0
+        reserved_blocks = 1 if current_platform.is_hpu() else 0
         block_ids = list(
             range(reserved_blocks, num_gpu_blocks + num_cpu_blocks))
         num_gpu_blocks -= reserved_blocks

diff --git a/vllm/core/block_manager_v1.py b/vllm/core/block_manager_v1.py
@@ -13,8 +13,9 @@
 from vllm.core.evictor_v1 import EvictionPolicy, Evictor, make_evictor
 from vllm.core.interfaces import AllocStatus, BlockSpaceManager
 from vllm.logger import init_logger
+from vllm.platforms import current_platform
 from vllm.sequence import Sequence, SequenceGroup, SequenceStatus
-from vllm.utils import Device, is_hpu
+from vllm.utils import Device
 
 logger = init_logger(__name__)
 
@@ -185,7 +186,7 @@ def __init__(
         # Initialize the free blocks.
         self.free_blocks: List[PhysicalTokenBlock] = []
         # For HPU, block id 0 is used only for padding
-        reserved_blocks = 1 if is_hpu() else 0
+        reserved_blocks = 1 if current_platform.is_hpu() else 0
         for i in range(reserved_blocks, num_blocks):
             block = PhysicalTokenBlock(device=device,
                                        block_number=i,

diff --git a/vllm/utils.py b/vllm/utils.py
@@ -337,11 +337,6 @@ def is_neuron() -> bool:
     return transformers_neuronx is not None
 
 
-@lru_cache(maxsize=None)
-def is_hpu() -> bool:
-    return _is_habana_frameworks_installed() or _is_built_for_hpu()
-
-
 @lru_cache(maxsize=None)
 def is_fake_hpu() -> bool:
     return os.environ.get('VLLM_USE_FAKE_HPU', '0') != '0'
@@ -359,21 +354,6 @@ def hpu_backend_string():
     return backend_string
 
 
-@lru_cache(maxsize=None)
-def _is_habana_frameworks_installed() -> bool:
-    from importlib import util
-    return util.find_spec('habana_frameworks') is not None
-
-
-@lru_cache(maxsize=None)
-def _is_built_for_hpu() -> bool:
-    from importlib.metadata import PackageNotFoundError, version
-    try:
-        return "gaudi" in version("vllm")
-    except PackageNotFoundError:
-        return False
-
-
 @lru_cache(maxsize=None)
 def is_xpu() -> bool:
     from importlib.metadata import PackageNotFoundError, version
@@ -777,7 +757,7 @@ def print_warning_once(msg: str) -> None:
 
 
 def get_device() -> str:
-    if is_hpu():
+    if current_platform.is_hpu():
         return "hpu"
     return "cuda"
 
@@ -797,7 +777,7 @@ def is_pin_memory_available() -> bool:
     elif is_neuron():
         print_warning_once("Pin memory is not supported on Neuron.")
         return False
-    elif is_hpu():
+    elif current_platform.is_hpu():
         print_warning_once("Pin memory is not supported on HPU.")
         return False
     elif is_cpu() or is_openvino():