Skip to content

Commit

Permalink
Remove HabanaModelRunner D'TOR for mixtral run
Browse files Browse the repository at this point in the history
  • Loading branch information
Tiefen-boop committed Sep 2, 2024
1 parent ca14579 commit 0dc3495
Showing 1 changed file with 1 addition and 3 deletions.
4 changes: 1 addition & 3 deletions vllm/worker/habana_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import habana_frameworks.torch as htorch
import torch
from neural_compressor.torch.quantization import finalize_calibration

from vllm.attention import AttentionMetadata, get_attn_backend
from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
Expand Down Expand Up @@ -1557,7 +1558,6 @@ def prepare_model_input(
virtual_engine=virtual_engine)

def finish_measurements(self):
from neural_compressor.torch.quantization import finalize_calibration
finalize_calibration(self.model.model)

@torch.inference_mode()
Expand Down Expand Up @@ -1680,8 +1680,6 @@ def shutdown_inc(self):
if (model_config := getattr(self, "model_config", None)) and \
getattr(model_config, "quantization", None) == 'inc':
print('inc shutdown start')
from neural_compressor.torch.quantization import (
finalize_calibration)
finalize_calibration(self.model.model)
print('inc shutdown')

Expand Down

0 comments on commit 0dc3495

Please sign in to comment.