Skip to content

Commit

Permalink
Add ignore type in mypy checks #2
Browse files Browse the repository at this point in the history
  • Loading branch information
jkaniecki committed Nov 14, 2024
1 parent 891f311 commit 2b17eea
Showing 1 changed file with 29 additions and 19 deletions.
48 changes: 29 additions & 19 deletions vllm/worker/hpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -963,19 +963,19 @@ def _prepare_prompt(
else:
prefix_block_list_tensor = None

input_tokens = make_tensor_with_pad(input_tokens, # type: ignore
input_tokens = make_tensor_with_pad(input_tokens,
max_len=max_prompt_len,
pad=0,
dtype=torch.long,
device='cpu')

input_positions = make_tensor_with_pad(input_positions, # type: ignore
input_positions = make_tensor_with_pad(input_positions,
max_len=max_prompt_len,
pad=0,
dtype=torch.long,
device='cpu')

slot_mapping = make_tensor_with_pad(slot_mapping, # type: ignore
slot_mapping = make_tensor_with_pad(slot_mapping,
max_len=max_prompt_len,
pad=_PAD_SLOT_ID,
dtype=torch.long,
Expand All @@ -992,10 +992,14 @@ def _prepare_prompt(
if prefix_block_list_tensor:
prefix_block_list_tensor = prefix_block_list_tensor.to(
self.device, non_blocking=True)
input_tokens = input_tokens.to(self.device, non_blocking=True)
input_positions = input_positions.to(self.device, non_blocking=True)
slot_mapping = slot_mapping.to(self.device, non_blocking=True)
seq_lens_tensor = seq_lens_tensor.to(self.device, non_blocking=True)
input_tokens = input_tokens.to( # type: ignore
self.device, non_blocking=True)
input_positions = input_positions.to( # type: ignore
self.device, non_blocking=True)
slot_mapping = slot_mapping.to( # type: ignore
self.device, non_blocking=True)
seq_lens_tensor = seq_lens_tensor.to(
self.device, non_blocking=True)
context_lens_tensor = context_lens_tensor.to(
self.device, non_blocking=True)

Expand Down Expand Up @@ -1102,14 +1106,14 @@ def _prepare_decode(
block_tables.append(block_table)

if output is None:
input_tokens = torch.tensor(input_tokens, # type: ignore
input_tokens = torch.tensor(input_tokens,
dtype=torch.long,
device='cpu')
else:
real_batch_size = len(seq_group_metadata_list)
input_tokens = output[:real_batch_size]

input_positions = torch.tensor(input_positions, # type: ignore
input_positions = torch.tensor(input_positions,
dtype=torch.long,
device='cpu')

Expand Down Expand Up @@ -1153,25 +1157,31 @@ def _prepare_decode(
block_groups = padding_fn(block_groups, -1)
block_usage = padding_fn(block_usage, 1)

block_list = torch.tensor(block_list, # type: ignore
block_list = torch.tensor(block_list,
dtype=torch.int,
device='cpu')
block_groups = torch.tensor(block_groups, # type: ignore
block_groups = torch.tensor(block_groups,
dtype=torch.int,
device='cpu')
block_usage = torch.tensor(block_usage, # type: ignore
block_usage = torch.tensor(block_usage,
dtype=self.model_config.dtype,
device='cpu')
slot_mapping = torch.tensor(slot_mapping, # type: ignore
slot_mapping = torch.tensor(slot_mapping,
dtype=torch.long,
device='cpu')

input_tokens = input_tokens.to(self.device, non_blocking=True)
input_positions = input_positions.to(self.device, non_blocking=True)
block_list = block_list.to(self.device, non_blocking=True)
block_groups = block_groups.to(self.device, non_blocking=True)
block_usage = block_usage.to(self.device, non_blocking=True)
slot_mapping = slot_mapping.to(self.device, non_blocking=True)
input_tokens = input_tokens.to( # type: ignore
self.device, non_blocking=True)
input_positions = input_positions.to( # type: ignore
self.device, non_blocking=True)
block_list = block_list.to( # type: ignore
self.device, non_blocking=True)
block_groups = block_groups.to( # type: ignore
self.device, non_blocking=True)
block_usage = block_usage.to( # type: ignore
self.device, non_blocking=True)
slot_mapping = slot_mapping.to( # type: ignore
self.device, non_blocking=True)

attn_metadata = self.attn_backend.make_metadata(
is_prompt=False,
Expand Down

0 comments on commit 2b17eea

Please sign in to comment.