Skip to content

Commit

Permalink
remove leftovers from habana_main
Browse files Browse the repository at this point in the history
  • Loading branch information
kzawora-intel committed Oct 16, 2024
1 parent d2ce468 commit b6428cd
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 7 deletions.
10 changes: 4 additions & 6 deletions vllm/engine/multiprocessing/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
# yapf: enable
from vllm.envs import VLLM_RPC_TIMEOUT
from vllm.executor.gpu_executor import GPUExecutor
from vllm.executor.hpu_executor import HPUExecutor
from vllm.logger import init_logger
from vllm.outputs import RequestOutput
from vllm.usage.usage_lib import UsageContext
Expand All @@ -33,6 +32,7 @@

logger = init_logger(__name__)

POLLING_TIMEOUT_MS = 10000
HEALTHY_RESPONSE = (pickle.dumps(VLLM_RPC_SUCCESS_STR), )


Expand Down Expand Up @@ -209,7 +209,7 @@ def run_engine_loop(self):
self._alive()
if not self.engine.has_unfinished_requests():
# Poll until there is work to do.
while self.input_socket.poll(timeout=VLLM_RPC_TIMEOUT) == 0:
while self.input_socket.poll(timeout=POLLING_TIMEOUT_MS) == 0:
self._alive()
self.engine.do_log_stats()
logger.debug("Waiting for new requests in engine loop.")
Expand Down Expand Up @@ -368,15 +368,13 @@ def _alive(self):
self._last_alive_time = time.time()

def start_profile(self) -> None:
if type(self.engine.model_executor) is GPUExecutor or \
type(self.engine.model_executor) is HPUExecutor:
if type(self.engine.model_executor) is GPUExecutor:
self.engine.model_executor.start_profile()
else:
self.engine.model_executor._run_workers("start_profile")

def stop_profile(self) -> None:
if type(self.engine.model_executor) is GPUExecutor or \
type(self.engine.model_executor) is HPUExecutor:
if type(self.engine.model_executor) is GPUExecutor:
self.engine.model_executor.stop_profile()
else:
self.engine.model_executor._run_workers("stop_profile")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def apply_fp8_linear(
qinput, x_scale = ops.scaled_fp8_quant(
input,
input_scale,
batch_dim_padding=17,
num_token_padding=17,
use_per_token_if_dynamic=use_per_token_if_dynamic)

per_tensor_weights = (weight_scale.numel() == 1)
Expand Down

0 comments on commit b6428cd

Please sign in to comment.