Fix Qwen2 OOM

HabanaAI · Aug 30, 2024 · 58777a3 · 58777a3
1 parent 17cd625
commit 58777a3
Showing 1 changed file with 9 additions and 0 deletions.
diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py
@@ -45,6 +45,7 @@
     ParallelLMHead, VocabParallelEmbedding)
 from vllm.model_executor.model_loader.weight_utils import (
     default_weight_loader, maybe_remap_kv_scale_name)
+from vllm.platforms import current_platform
 from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.sequence import IntermediateTensors, SamplerOutput
 
@@ -259,6 +260,11 @@ def forward(
             hidden_states = inputs_embeds
         else:
             hidden_states = self.embed_tokens(input_ids)
+
+        if current_platform.is_hpu():
+            import habana_frameworks.torch as htorch
+            htorch.core.mark_step()
+
         residual = None
         for i in range(len(self.layers)):
             layer = self.layers[i]
@@ -269,6 +275,9 @@ def forward(
                 attn_metadata,
                 residual,
             )
+            if current_platform.is_hpu():
+                htorch.core.mark_step()
+
         hidden_states, _ = self.norm(hidden_states, residual)
         return hidden_states