diff --git a/aphrodite/common/utils.py b/aphrodite/common/utils.py index 0495d9c8b..8745c9365 100644 --- a/aphrodite/common/utils.py +++ b/aphrodite/common/utils.py @@ -12,12 +12,13 @@ import threading import uuid import warnings +import math from asyncio import FIRST_COMPLETED, ensure_future from functools import lru_cache, partial, wraps from platform import uname from typing import (Any, AsyncGenerator, Awaitable, Callable, Dict, Generic, Hashable, List, Literal, Optional, OrderedDict, Set, Tuple, - Type, TypeVar, Union, overload) + Type, TypeVar, Union, overload, Iterable) from uuid import uuid4 import numpy as np @@ -1117,3 +1118,25 @@ def progress_bar(iterable, desc="Processing"): progress.update(task, advance=1) else: yield from iterable + +def tensor_progress_bar(iterable:Iterable[Tuple[str, torch.Tensor]], + final_bytes:int, desc="Processing"): + show_progress = get_tensor_model_parallel_rank() == 0 + units = 1024 ** (int(math.log2(final_bytes)) // 10) + + if show_progress: + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + MofNCompleteColumn(), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + TimeElapsedColumn(), + ) as progress: + task = progress.add_task(f"[cyan]{desc}", total=final_bytes/units) + for item in iterable: + steps = item[1].element_size() * item[1].nelement() / units + yield item + progress.update(task, advance=steps) + else: + yield from iterable \ No newline at end of file diff --git a/aphrodite/modeling/model_loader/loader.py b/aphrodite/modeling/model_loader/loader.py index 0d0d3ace1..abb45ee11 100644 --- a/aphrodite/modeling/model_loader/loader.py +++ b/aphrodite/modeling/model_loader/loader.py @@ -24,7 +24,7 @@ DeviceConfig, LoadConfig, LoadFormat, LoRAConfig, ModelConfig, MultiModalConfig, ParallelConfig, SchedulerConfig) -from aphrodite.common.utils import is_pin_memory_available +from aphrodite.common.utils import is_pin_memory_available, tensor_progress_bar from aphrodite.modeling.model_loader.tensorizer import ( TensorizerConfig, is_aphrodite_tensorized, load_with_tensorizer, serialize_aphrodite_model, tensorizer_weights_iterator) @@ -303,10 +303,12 @@ def _prepare_weights(self, model_name_or_path: str, def _get_weights_iterator( self, model_name_or_path: str, revision: Optional[str], fall_back_to_pt: bool - ) -> Generator[Tuple[str, torch.Tensor], None, None]: + ) -> Tuple[Generator[Tuple[str, torch.Tensor], None, None], int]: """Get an iterator for the model weights based on the load format.""" hf_folder, hf_weights_files, use_safetensors = self._prepare_weights( model_name_or_path, revision, fall_back_to_pt) + est_weight_bytes = sum(os.path.getsize(f) + for f in hf_weights_files) if self.load_config.load_format == LoadFormat.NPCACHE: # Currently np_cache only support *.bin checkpoints assert use_safetensors is False @@ -329,7 +331,7 @@ def _xla_weights_iterator(iterator: Generator): xm.mark_step() weights_iterator = _xla_weights_iterator(weights_iterator) - return weights_iterator + return weights_iterator, est_weight_bytes def load_model(self, *, model_config: ModelConfig, device_config: DeviceConfig, @@ -343,13 +345,15 @@ def load_model(self, *, model_config: ModelConfig, model = _initialize_model(model_config, self.load_config, lora_config, cache_config, scheduler_config) - model.load_weights( - self._get_weights_iterator(model_config.model, + + weights, wgt_bytes = self._get_weights_iterator(model_config.model, model_config.revision, fall_back_to_pt=getattr( model, "fall_back_to_pt_during_load", - True)), ) + True)) + model.load_weights(tensor_progress_bar(weights, wgt_bytes, + "Loading modules...")) for _, module in model.named_modules(): quant_method = getattr(module, "quant_method", None) diff --git a/aphrodite/modeling/models/arctic.py b/aphrodite/modeling/models/arctic.py index e05c755b0..5c5ddc008 100644 --- a/aphrodite/modeling/models/arctic.py +++ b/aphrodite/modeling/models/arctic.py @@ -8,7 +8,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import (get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size, tensor_model_parallel_all_reduce) @@ -491,9 +490,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): "It will take ~10 minutes loading from the 16-bit weights. " "Alternatively, use the prequantized 8-bit weights of arctic " "and set load-format to `sharded_state` will accelerate loading.") - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: for (param_name, weight_name, shard_id) in stacked_params_mapping: if weight_name not in name: continue diff --git a/aphrodite/modeling/models/baichuan.py b/aphrodite/modeling/models/baichuan.py index a8cded9ba..33d8cb4d8 100644 --- a/aphrodite/modeling/models/baichuan.py +++ b/aphrodite/modeling/models/baichuan.py @@ -28,7 +28,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig, LoRAConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import (get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) from aphrodite.modeling.layers.activation import SiluAndMul @@ -368,9 +367,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("gate_up_proj", "up_proj", 1), ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue if name == "lm_head.weight": diff --git a/aphrodite/modeling/models/bart.py b/aphrodite/modeling/models/bart.py index e7334a210..f60e9bf60 100644 --- a/aphrodite/modeling/models/bart.py +++ b/aphrodite/modeling/models/bart.py @@ -930,12 +930,10 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): model_params_dict = dict(self.model.named_parameters()) top_params_dict = dict(self.named_parameters()) - weights_tuple_list = list(weights) - shared_embedding_weight = None shared_embedding_shard_id = None - for name, loaded_weight in weights_tuple_list: + for name, loaded_weight in weights: name = self._rename_key(name) name, shard_id = self._rename_stacked_param(name) diff --git a/aphrodite/modeling/models/blip2.py b/aphrodite/modeling/models/blip2.py index 02e5c6349..e861c5ff9 100644 --- a/aphrodite/modeling/models/blip2.py +++ b/aphrodite/modeling/models/blip2.py @@ -10,7 +10,6 @@ from aphrodite.common.config import CacheConfig, MultiModalConfig from aphrodite.common.sequence import (IntermediateTensors, SamplerOutput, SequenceData) -from aphrodite.common.utils import progress_bar from aphrodite.inputs import INPUT_REGISTRY, InputContext, LLMInputs from aphrodite.modeling.layers.activation import get_act_fn from aphrodite.modeling.layers.logits_processor import LogitsProcessor @@ -682,10 +681,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("gate_up_proj", "up_proj", 1), ] params_dict = dict(self.named_parameters()) - - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "lm_head.weight" in name: continue if "rotary_emb.inv_freq" in name: diff --git a/aphrodite/modeling/models/bloom.py b/aphrodite/modeling/models/bloom.py index c4f157d27..349503443 100644 --- a/aphrodite/modeling/models/bloom.py +++ b/aphrodite/modeling/models/bloom.py @@ -26,7 +26,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import (get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) from aphrodite.modeling.layers.activation import get_act_fn @@ -311,9 +310,7 @@ def sample( def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters(remove_duplicate=False)) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if name == "lm_head.weight": continue if not name.startswith("transformer."): diff --git a/aphrodite/modeling/models/chameleon.py b/aphrodite/modeling/models/chameleon.py index 5e219ba14..b1759e5fb 100644 --- a/aphrodite/modeling/models/chameleon.py +++ b/aphrodite/modeling/models/chameleon.py @@ -12,7 +12,7 @@ from aphrodite.common.config import CacheConfig, MultiModalConfig from aphrodite.common.sequence import (IntermediateTensors, SamplerOutput, SequenceData) -from aphrodite.common.utils import print_warning_once, progress_bar +from aphrodite.common.utils import print_warning_once from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.inputs import INPUT_REGISTRY, InputContext, LLMInputs from aphrodite.modeling.layers.activation import SiluAndMul @@ -1005,9 +1005,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): (".gate_up_proj", ".up_proj", 1), ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue diff --git a/aphrodite/modeling/models/chatglm.py b/aphrodite/modeling/models/chatglm.py index 637904863..a88fd1aca 100644 --- a/aphrodite/modeling/models/chatglm.py +++ b/aphrodite/modeling/models/chatglm.py @@ -11,7 +11,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig, LoRAConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.activation import SiluAndMul from aphrodite.modeling.layers.layernorm import RMSNorm @@ -389,9 +388,7 @@ def sample( def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters(remove_duplicate=False)) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_pos_emb.inv_freq" in name: continue if "word_embeddings" in name: diff --git a/aphrodite/modeling/models/commandr.py b/aphrodite/modeling/models/commandr.py index 28b101138..121ccdc1b 100644 --- a/aphrodite/modeling/models/commandr.py +++ b/aphrodite/modeling/models/commandr.py @@ -30,7 +30,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig, LoRAConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.activation import SiluAndMul from aphrodite.modeling.layers.linear import (MergedColumnParallelLinear, @@ -381,9 +380,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ] params_dict = dict(self.named_parameters()) loaded_params = set() - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: for param_name, shard_name, shard_id in stacked_params_mapping: if shard_name not in name: continue diff --git a/aphrodite/modeling/models/dbrx.py b/aphrodite/modeling/models/dbrx.py index 49d80b03a..b4f4e62c3 100644 --- a/aphrodite/modeling/models/dbrx.py +++ b/aphrodite/modeling/models/dbrx.py @@ -7,7 +7,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import (get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size, tensor_model_parallel_all_reduce) @@ -411,9 +410,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): f"experts.mlp.{weight_name}", ) for weight_name in ["w1", "v1", "w2"]] params_dict = dict(self.named_parameters(remove_duplicate=False)) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: for param_name, weight_name in expert_params_mapping: if weight_name not in name: continue diff --git a/aphrodite/modeling/models/decilm.py b/aphrodite/modeling/models/decilm.py index a6034838e..cc4b19f52 100644 --- a/aphrodite/modeling/models/decilm.py +++ b/aphrodite/modeling/models/decilm.py @@ -29,7 +29,6 @@ from transformers import LlamaConfig from aphrodite.common.config import CacheConfig, LoRAConfig -from aphrodite.common.utils import progress_bar from aphrodite.modeling.model_loader.weight_utils import default_weight_loader from aphrodite.modeling.models.llama import LlamaForCausalLM from aphrodite.quantization.base_config import QuantizationConfig @@ -77,9 +76,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("gate_up_proj", "up_proj", 1), ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue diff --git a/aphrodite/modeling/models/deepseek.py b/aphrodite/modeling/models/deepseek.py index 2163df10b..1793caeeb 100644 --- a/aphrodite/modeling/models/deepseek.py +++ b/aphrodite/modeling/models/deepseek.py @@ -30,7 +30,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import (get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size, tensor_model_parallel_all_reduce) @@ -423,9 +422,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue for (param_name, weight_name, shard_id) in stacked_params_mapping: diff --git a/aphrodite/modeling/models/deepseek_v2.py b/aphrodite/modeling/models/deepseek_v2.py index 93ad9232e..f0994d01f 100644 --- a/aphrodite/modeling/models/deepseek_v2.py +++ b/aphrodite/modeling/models/deepseek_v2.py @@ -31,7 +31,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import (get_tensor_model_parallel_world_size, tensor_model_parallel_all_reduce) from aphrodite.modeling.layers.activation import SiluAndMul @@ -489,9 +488,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): num_experts=self.config.n_routed_experts) params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue for (param_name, weight_name, shard_id) in stacked_params_mapping: diff --git a/aphrodite/modeling/models/exaone.py b/aphrodite/modeling/models/exaone.py index 53da2f518..b70e0444c 100644 --- a/aphrodite/modeling/models/exaone.py +++ b/aphrodite/modeling/models/exaone.py @@ -31,7 +31,7 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig, LoRAConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import is_hip, progress_bar +from aphrodite.common.utils import is_hip from aphrodite.distributed import (get_pp_group, get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) @@ -534,9 +534,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): (".gate_up_proj", ".c_fc_1", 1), ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue if ("rotary_emb.cos_cached" in name diff --git a/aphrodite/modeling/models/falcon.py b/aphrodite/modeling/models/falcon.py index a902ec73e..3e58ece88 100644 --- a/aphrodite/modeling/models/falcon.py +++ b/aphrodite/modeling/models/falcon.py @@ -29,7 +29,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import (get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size, tensor_model_parallel_all_reduce) @@ -422,9 +421,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): total_num_kv_heads = total_num_heads num_query_heads_per_kv_head = total_num_heads // total_num_kv_heads params_dict = dict(self.named_parameters(remove_duplicate=False)) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if name == "lm_head.weight": # Falcon uses tied embeddings. continue diff --git a/aphrodite/modeling/models/fuyu.py b/aphrodite/modeling/models/fuyu.py index 4437e4ed7..ea9fac2be 100644 --- a/aphrodite/modeling/models/fuyu.py +++ b/aphrodite/modeling/models/fuyu.py @@ -28,7 +28,6 @@ from aphrodite.common.config import CacheConfig, MultiModalConfig from aphrodite.common.sequence import (IntermediateTensors, SamplerOutput, SequenceData) -from aphrodite.common.utils import progress_bar from aphrodite.inputs import INPUT_REGISTRY, InputContext, LLMInputs from aphrodite.modeling.layers.linear import ColumnParallelLinear from aphrodite.modeling.model_loader.weight_utils import default_weight_loader @@ -310,9 +309,7 @@ def sample( def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters(remove_duplicate=False)) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue if ("rotary_emb.cos_cached" in name diff --git a/aphrodite/modeling/models/gemma.py b/aphrodite/modeling/models/gemma.py index 2270eaee1..719a7d015 100644 --- a/aphrodite/modeling/models/gemma.py +++ b/aphrodite/modeling/models/gemma.py @@ -25,7 +25,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig, LoRAConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.activation import GeluAndMul from aphrodite.modeling.layers.layernorm import GemmaRMSNorm @@ -378,9 +377,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ] params_dict = dict(self.named_parameters()) loaded_params: Set[str] = set() - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: for (param_name, shard_name, shard_id) in stacked_params_mapping: if shard_name not in name: continue diff --git a/aphrodite/modeling/models/gemma2.py b/aphrodite/modeling/models/gemma2.py index 52cf22fb7..0e8e78e42 100644 --- a/aphrodite/modeling/models/gemma2.py +++ b/aphrodite/modeling/models/gemma2.py @@ -25,7 +25,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig, LoRAConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.activation import GeluAndMul from aphrodite.modeling.layers.layernorm import GemmaRMSNorm @@ -370,9 +369,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ] params_dict = dict(self.named_parameters()) loaded_params: Set[str] = set() - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: for (param_name, shard_name, shard_id) in stacked_params_mapping: if shard_name not in name: continue diff --git a/aphrodite/modeling/models/gpt2.py b/aphrodite/modeling/models/gpt2.py index 195f46590..a7150d2d4 100644 --- a/aphrodite/modeling/models/gpt2.py +++ b/aphrodite/modeling/models/gpt2.py @@ -26,7 +26,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.activation import get_act_fn from aphrodite.modeling.layers.linear import (ColumnParallelLinear, @@ -253,9 +252,7 @@ def sample( def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters(remove_duplicate=False)) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "lm_head.weight" in name: # GPT-2 ties the weights of the embedding layer and the final # linear layer. diff --git a/aphrodite/modeling/models/gpt_bigcode.py b/aphrodite/modeling/models/gpt_bigcode.py index 2d50abb5c..d666b3813 100644 --- a/aphrodite/modeling/models/gpt_bigcode.py +++ b/aphrodite/modeling/models/gpt_bigcode.py @@ -27,7 +27,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.activation import get_act_fn from aphrodite.modeling.layers.linear import (ColumnParallelLinear, @@ -272,9 +271,7 @@ def sample( def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters(remove_duplicate=False)) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "lm_head.weight" in name: continue if ".attn.bias" in name: diff --git a/aphrodite/modeling/models/gpt_j.py b/aphrodite/modeling/models/gpt_j.py index abb04a459..e7bfd3b4e 100644 --- a/aphrodite/modeling/models/gpt_j.py +++ b/aphrodite/modeling/models/gpt_j.py @@ -25,7 +25,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.activation import get_act_fn from aphrodite.modeling.layers.linear import (ColumnParallelLinear, @@ -273,9 +272,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("gate_up_proj", "up_proj", 1), ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "attn.bias" in name or "attn.masked_bias" in name: continue for (param_name, weight_name, shard_id) in stacked_params_mapping: diff --git a/aphrodite/modeling/models/gpt_neox.py b/aphrodite/modeling/models/gpt_neox.py index 3bbc6cfb1..47c9f6494 100644 --- a/aphrodite/modeling/models/gpt_neox.py +++ b/aphrodite/modeling/models/gpt_neox.py @@ -25,7 +25,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.activation import get_act_fn from aphrodite.modeling.layers.linear import (ColumnParallelLinear, @@ -277,9 +276,7 @@ def sample( def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if ("attention.bias" in name or "attention.masked_bias" in name or "rotary_emb.inv_freq" in name): continue diff --git a/aphrodite/modeling/models/intern_vit.py b/aphrodite/modeling/models/intern_vit.py index ed82dea43..4b7b0b85d 100644 --- a/aphrodite/modeling/models/intern_vit.py +++ b/aphrodite/modeling/models/intern_vit.py @@ -11,7 +11,6 @@ import torch.nn.functional as F from transformers import PretrainedConfig -from aphrodite.common.utils import progress_bar from aphrodite.modeling.layers.activation import get_act_fn from aphrodite.modeling.layers.layernorm import RMSNorm from aphrodite.modeling.layers.linear import (ColumnParallelLinear, @@ -273,9 +272,7 @@ def forward( def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: param = params_dict[name] weight_loader = getattr(param, "weight_loader", default_weight_loader) diff --git a/aphrodite/modeling/models/internlm2.py b/aphrodite/modeling/models/internlm2.py index d6a631f4f..53045b14f 100644 --- a/aphrodite/modeling/models/internlm2.py +++ b/aphrodite/modeling/models/internlm2.py @@ -8,7 +8,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.activation import SiluAndMul from aphrodite.modeling.layers.layernorm import RMSNorm @@ -303,9 +302,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("gate_up_proj", "w3", 1), ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue for (param_name, weight_name, shard_id) in stacked_params_mapping: diff --git a/aphrodite/modeling/models/jais.py b/aphrodite/modeling/models/jais.py index c8639f5e0..a948de859 100644 --- a/aphrodite/modeling/models/jais.py +++ b/aphrodite/modeling/models/jais.py @@ -28,7 +28,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import (get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) from aphrodite.modeling.layers.linear import (ColumnParallelLinear, @@ -314,9 +313,7 @@ def sample( def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters(remove_duplicate=False)) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "lm_head.weight" in name: # GPT-2 ties the weights of the embedding layer and the final # linear layer. diff --git a/aphrodite/modeling/models/jamba.py b/aphrodite/modeling/models/jamba.py index fa6e7ef91..2e0167e5c 100644 --- a/aphrodite/modeling/models/jamba.py +++ b/aphrodite/modeling/models/jamba.py @@ -12,7 +12,6 @@ from aphrodite.attention.layer import Attention from aphrodite.common.config import CacheConfig, LoRAConfig, SchedulerConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar # yapf: disable from aphrodite.distributed import (get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) @@ -701,9 +700,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): num_experts=self.config.num_experts) params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue diff --git a/aphrodite/modeling/models/llama.py b/aphrodite/modeling/models/llama.py index f38780b7a..abbb13f0b 100644 --- a/aphrodite/modeling/models/llama.py +++ b/aphrodite/modeling/models/llama.py @@ -30,7 +30,7 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig, LoRAConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import is_hip, progress_bar +from aphrodite.common.utils import is_hip from aphrodite.distributed import (get_current_tp_rank_partition_size, get_pp_group, get_tensor_model_parallel_rank, @@ -487,9 +487,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): (".gate_up_proj", ".up_proj", 1), ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: name, loaded_weight = self.maybe_remap_mistral(name, loaded_weight) if "rotary_emb.inv_freq" in name: continue diff --git a/aphrodite/modeling/models/llama_embedding.py b/aphrodite/modeling/models/llama_embedding.py index 8132a2b28..7878f4603 100644 --- a/aphrodite/modeling/models/llama_embedding.py +++ b/aphrodite/modeling/models/llama_embedding.py @@ -5,7 +5,6 @@ from aphrodite.attention import AttentionMetadata from aphrodite.common.sequence import PoolerOutput -from aphrodite.common.utils import progress_bar from aphrodite.modeling.layers.pooler import Pooler, PoolingType from aphrodite.modeling.model_loader.weight_utils import default_weight_loader from aphrodite.modeling.models.llama import LlamaModel @@ -59,9 +58,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("gate_up_proj", "up_proj", 1), ] params_dict = dict(self.model.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue if ("rotary_emb.cos_cached" in name diff --git a/aphrodite/modeling/models/medusa.py b/aphrodite/modeling/models/medusa.py index b9ab9e0b7..bd59de0f7 100644 --- a/aphrodite/modeling/models/medusa.py +++ b/aphrodite/modeling/models/medusa.py @@ -4,7 +4,6 @@ import torch.nn as nn from aphrodite.common.sequence import SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.modeling.layers.logits_processor import LogitsProcessor from aphrodite.modeling.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead) @@ -138,9 +137,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): weights_map = {} - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: name = name.replace("medusa_heads.", "") if name == "token_map": diff --git a/aphrodite/modeling/models/minicpm.py b/aphrodite/modeling/models/minicpm.py index b88453cfa..69c23d889 100644 --- a/aphrodite/modeling/models/minicpm.py +++ b/aphrodite/modeling/models/minicpm.py @@ -32,7 +32,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig, LoRAConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import (get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size, tensor_model_parallel_all_reduce) @@ -507,9 +506,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): for weight_name in ["w1", "w2", "w3"] ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue if ("rotary_emb.cos_cached" in name diff --git a/aphrodite/modeling/models/minicpmv.py b/aphrodite/modeling/models/minicpmv.py index 058473425..c628fbf60 100644 --- a/aphrodite/modeling/models/minicpmv.py +++ b/aphrodite/modeling/models/minicpmv.py @@ -40,7 +40,6 @@ from aphrodite.common.config import CacheConfig, MultiModalConfig from aphrodite.common.sequence import (IntermediateTensors, SamplerOutput, SequenceData) -from aphrodite.common.utils import progress_bar from aphrodite.inputs import INPUT_REGISTRY, InputContext, LLMInputs from aphrodite.modeling.layers.linear import ReplicatedLinear from aphrodite.modeling.layers.logits_processor import LogitsProcessor @@ -660,9 +659,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("gate_up_proj", "up_proj", 1), ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: for key_to_modify, new_key in _KEYS_TO_MODIFY_MAPPING.items(): if key_to_modify in name: name = name.replace(key_to_modify, new_key) diff --git a/aphrodite/modeling/models/mixtral.py b/aphrodite/modeling/models/mixtral.py index 80ea5eeed..73f6c8f85 100644 --- a/aphrodite/modeling/models/mixtral.py +++ b/aphrodite/modeling/models/mixtral.py @@ -30,7 +30,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig, LoRAConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import (get_pp_group, get_tensor_model_parallel_world_size) from aphrodite.modeling.layers.fused_moe import FusedMoE @@ -423,9 +422,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): num_experts=self.config.num_local_experts) params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue diff --git a/aphrodite/modeling/models/mixtral_quant.py b/aphrodite/modeling/models/mixtral_quant.py index eac873257..5144001e4 100644 --- a/aphrodite/modeling/models/mixtral_quant.py +++ b/aphrodite/modeling/models/mixtral_quant.py @@ -32,7 +32,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import (get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size, tensor_model_parallel_all_reduce) @@ -388,9 +387,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue for (param_name, weight_name, shard_id) in stacked_params_mapping: diff --git a/aphrodite/modeling/models/mlp_speculator.py b/aphrodite/modeling/models/mlp_speculator.py index 802c629f9..102684c10 100644 --- a/aphrodite/modeling/models/mlp_speculator.py +++ b/aphrodite/modeling/models/mlp_speculator.py @@ -5,7 +5,6 @@ import torch.nn as nn from aphrodite.common.sequence import SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.modeling import SamplingMetadata from aphrodite.modeling.layers.logits_processor import LogitsProcessor from aphrodite.modeling.layers.sampler import Sampler @@ -182,9 +181,7 @@ def generate_proposals( def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: param = params_dict.get(name.replace("speculator.", "")) if param is not None: weight_loader = getattr(param, "weight_loader", diff --git a/aphrodite/modeling/models/mpt.py b/aphrodite/modeling/models/mpt.py index 9396cf4bf..75a823580 100644 --- a/aphrodite/modeling/models/mpt.py +++ b/aphrodite/modeling/models/mpt.py @@ -9,7 +9,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import (get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) from aphrodite.modeling.layers.activation import get_act_fn @@ -298,9 +297,7 @@ def sample( def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters(remove_duplicate=False)) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: continue diff --git a/aphrodite/modeling/models/nemotron.py b/aphrodite/modeling/models/nemotron.py index 3fdb2b6bb..620757cfe 100644 --- a/aphrodite/modeling/models/nemotron.py +++ b/aphrodite/modeling/models/nemotron.py @@ -31,7 +31,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig, LoRAConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import (get_pp_group, get_tensor_model_parallel_world_size) from aphrodite.modeling.layers.activation import get_act_fn @@ -494,9 +493,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): (".qkv_proj", ".v_proj", "v"), ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue if ("rotary_emb.cos_cached" in name diff --git a/aphrodite/modeling/models/olmo.py b/aphrodite/modeling/models/olmo.py index f54cdab1e..2fe48e4cc 100644 --- a/aphrodite/modeling/models/olmo.py +++ b/aphrodite/modeling/models/olmo.py @@ -30,7 +30,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.activation import SiluAndMul from aphrodite.modeling.layers.linear import (MergedColumnParallelLinear, @@ -338,9 +337,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("gate_up_proj", "up_proj", 1), ] params_dict = dict(self.named_parameters(remove_duplicate=False)) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue if ("rotary_emb.cos_cached" in name diff --git a/aphrodite/modeling/models/olmoe.py b/aphrodite/modeling/models/olmoe.py index 118718ef3..008a6bcfb 100644 --- a/aphrodite/modeling/models/olmoe.py +++ b/aphrodite/modeling/models/olmoe.py @@ -19,7 +19,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.fused_moe import FusedMoE from aphrodite.modeling.layers.layernorm import RMSNorm @@ -342,11 +341,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): num_experts=self.config.num_experts) params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar( - weights_list, - desc="Loading modules..." - ): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue for (param_name, weight_name, shard_id) in stacked_params_mapping: diff --git a/aphrodite/modeling/models/opt.py b/aphrodite/modeling/models/opt.py index 51ad61a50..4ed6b0bd6 100644 --- a/aphrodite/modeling/models/opt.py +++ b/aphrodite/modeling/models/opt.py @@ -26,7 +26,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.activation import get_act_fn from aphrodite.modeling.layers.linear import (ColumnParallelLinear, @@ -348,9 +347,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("qkv_proj", "v_proj", "v"), ] params_dict = dict(self.named_parameters(remove_duplicate=False)) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "lm_head.weight" in name: continue if name.startswith("decoder."): diff --git a/aphrodite/modeling/models/orion.py b/aphrodite/modeling/models/orion.py index ecdfc02fb..725bb4bdc 100644 --- a/aphrodite/modeling/models/orion.py +++ b/aphrodite/modeling/models/orion.py @@ -13,7 +13,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.activation import SiluAndMul from aphrodite.modeling.layers.linear import (MergedColumnParallelLinear, @@ -304,9 +303,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("gate_up_proj", "up_proj", 1), ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue if ("rotary_emb.cos_cached" in name diff --git a/aphrodite/modeling/models/paligemma.py b/aphrodite/modeling/models/paligemma.py index 43ab56df2..23a8b688a 100644 --- a/aphrodite/modeling/models/paligemma.py +++ b/aphrodite/modeling/models/paligemma.py @@ -9,7 +9,6 @@ from aphrodite.attention import AttentionMetadata from aphrodite.common.config import CacheConfig, MultiModalConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.inputs import INPUT_REGISTRY, InputContext, LLMInputs from aphrodite.modeling.layers.logits_processor import LogitsProcessor from aphrodite.modeling.layers.sampler import Sampler @@ -292,9 +291,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ] params_dict = dict(self.named_parameters()) loaded_params = set() - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: for key_to_modify, new_key in _KEYS_TO_MODIFY_MAPPING.items(): if key_to_modify in name: name = name.replace(key_to_modify, new_key) diff --git a/aphrodite/modeling/models/persimmon.py b/aphrodite/modeling/models/persimmon.py index 3566f13c7..2c1a31389 100644 --- a/aphrodite/modeling/models/persimmon.py +++ b/aphrodite/modeling/models/persimmon.py @@ -31,7 +31,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.linear import (ColumnParallelLinear, QKVParallelLinear, @@ -305,9 +304,7 @@ def sample( def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters(remove_duplicate=False)) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue if ("rotary_emb.cos_cached" in name diff --git a/aphrodite/modeling/models/phi.py b/aphrodite/modeling/models/phi.py index 67648e3d2..c3c5eb8fb 100644 --- a/aphrodite/modeling/models/phi.py +++ b/aphrodite/modeling/models/phi.py @@ -44,7 +44,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig, LoRAConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.activation import get_act_fn from aphrodite.modeling.layers.linear import (ColumnParallelLinear, @@ -308,10 +307,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("qkv_proj", "v_proj", "v") ] params_dict = dict(self.named_parameters()) - - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue diff --git a/aphrodite/modeling/models/phi3_small.py b/aphrodite/modeling/models/phi3_small.py index 3074544d1..2e5dce1ec 100644 --- a/aphrodite/modeling/models/phi3_small.py +++ b/aphrodite/modeling/models/phi3_small.py @@ -8,7 +8,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig, LoRAConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import (get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) from aphrodite.modeling.layers.linear import (MergedColumnParallelLinear, @@ -440,9 +439,7 @@ def sample( def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue if name.endswith(".bias") and name not in params_dict: diff --git a/aphrodite/modeling/models/phi3v.py b/aphrodite/modeling/models/phi3v.py index 444d4d36a..c41e1e488 100644 --- a/aphrodite/modeling/models/phi3v.py +++ b/aphrodite/modeling/models/phi3v.py @@ -29,7 +29,6 @@ from aphrodite.attention import AttentionMetadata from aphrodite.common.config import CacheConfig, ModelConfig, MultiModalConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.inputs import INPUT_REGISTRY, InputContext, LLMInputs from aphrodite.modeling.layers.logits_processor import LogitsProcessor from aphrodite.modeling.layers.sampler import Sampler @@ -613,9 +612,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): (".gate_up_proj", ".up_proj", 1), ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue # post_layernorm is not needed in CLIPVisionModel diff --git a/aphrodite/modeling/models/qwen.py b/aphrodite/modeling/models/qwen.py index 26e1e2ddc..30250d015 100644 --- a/aphrodite/modeling/models/qwen.py +++ b/aphrodite/modeling/models/qwen.py @@ -13,7 +13,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.activation import SiluAndMul from aphrodite.modeling.layers.layernorm import RMSNorm @@ -276,9 +275,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("gate_up_proj", "w1", 1), ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue for (param_name, weight_name, shard_id) in stacked_params_mapping: diff --git a/aphrodite/modeling/models/qwen2.py b/aphrodite/modeling/models/qwen2.py index ee706c4ad..45408fadb 100644 --- a/aphrodite/modeling/models/qwen2.py +++ b/aphrodite/modeling/models/qwen2.py @@ -31,7 +31,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig, LoRAConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import (get_current_tp_rank_partition_size, get_pp_group, get_tensor_model_parallel_rank, @@ -398,9 +397,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("gate_up_proj", "up_proj", 1), ] params_dict = dict(self.named_parameters(remove_duplicate=False)) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue if self.config.tie_word_embeddings and "lm_head.weight" in name: diff --git a/aphrodite/modeling/models/qwen2_moe.py b/aphrodite/modeling/models/qwen2_moe.py index 14e9f811e..2d9886f18 100644 --- a/aphrodite/modeling/models/qwen2_moe.py +++ b/aphrodite/modeling/models/qwen2_moe.py @@ -32,7 +32,7 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import print_warning_once, progress_bar +from aphrodite.common.utils import print_warning_once from aphrodite.distributed import (get_pp_group, get_tensor_model_parallel_world_size, tensor_model_parallel_all_reduce) @@ -449,9 +449,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): num_experts=self.config.num_experts) params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue for (param_name, weight_name, shard_id) in stacked_params_mapping: diff --git a/aphrodite/modeling/models/siglip.py b/aphrodite/modeling/models/siglip.py index 64ab9e2f7..2d0ff5c0b 100644 --- a/aphrodite/modeling/models/siglip.py +++ b/aphrodite/modeling/models/siglip.py @@ -14,7 +14,6 @@ from aphrodite.common.config import ModelConfig from aphrodite.common.sequence import SequenceData -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.inputs import LLMInputs from aphrodite.modeling.layers.activation import get_act_fn @@ -644,9 +643,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters()) layer_count = len(self.vision_model.encoder.layers) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: # omit layers when num_hidden_layers_override is set if "vision_model.encoder.layers." in name: layer_idx = int(name.split(".")[3]) diff --git a/aphrodite/modeling/models/solar.py b/aphrodite/modeling/models/solar.py index 9258b624f..4c35a4f40 100644 --- a/aphrodite/modeling/models/solar.py +++ b/aphrodite/modeling/models/solar.py @@ -30,7 +30,7 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig, LoRAConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import is_hip, progress_bar +from aphrodite.common.utils import is_hip from aphrodite.distributed import (get_pp_group, get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) @@ -481,9 +481,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): (".gate_up_proj", ".up_proj", 1), ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue if ("rotary_emb.cos_cached" in name diff --git a/aphrodite/modeling/models/stablelm.py b/aphrodite/modeling/models/stablelm.py index e87151dda..35b444657 100644 --- a/aphrodite/modeling/models/stablelm.py +++ b/aphrodite/modeling/models/stablelm.py @@ -28,7 +28,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.activation import SiluAndMul from aphrodite.modeling.layers.linear import (MergedColumnParallelLinear, @@ -285,9 +284,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("gate_up_proj", "up_proj", 1), ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue if ("rotary_emb.cos_cached" in name diff --git a/aphrodite/modeling/models/starcoder2.py b/aphrodite/modeling/models/starcoder2.py index 50df320e0..98acb3483 100644 --- a/aphrodite/modeling/models/starcoder2.py +++ b/aphrodite/modeling/models/starcoder2.py @@ -27,7 +27,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.activation import get_act_fn from aphrodite.modeling.layers.linear import (ColumnParallelLinear, @@ -294,9 +293,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ] params_dict = dict(self.named_parameters(remove_duplicate=False)) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name: continue diff --git a/aphrodite/modeling/models/xverse.py b/aphrodite/modeling/models/xverse.py index 34a5274b8..bc0d53554 100644 --- a/aphrodite/modeling/models/xverse.py +++ b/aphrodite/modeling/models/xverse.py @@ -29,7 +29,6 @@ from aphrodite.attention import Attention, AttentionMetadata from aphrodite.common.config import CacheConfig, LoRAConfig from aphrodite.common.sequence import IntermediateTensors, SamplerOutput -from aphrodite.common.utils import progress_bar from aphrodite.distributed import get_tensor_model_parallel_world_size from aphrodite.modeling.layers.activation import SiluAndMul from aphrodite.modeling.layers.layernorm import RMSNorm @@ -351,9 +350,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("gate_up_proj", "up_proj", 1), ] params_dict = dict(self.named_parameters()) - weights_list = list(weights) - for name, loaded_weight in progress_bar(weights_list, - desc="Loading modules..."): + for name, loaded_weight in weights: if ("rotary_emb.inv_freq" in name or "rotary_emb.cos_cached" in name or "rotary_emb.sin_cached" in name):