diff --git a/aphrodite/common/utils.py b/aphrodite/common/utils.py
index 0495d9c8b..8745c9365 100644
--- a/aphrodite/common/utils.py
+++ b/aphrodite/common/utils.py
@@ -12,12 +12,13 @@
 import threading
 import uuid
 import warnings
+import math
 from asyncio import FIRST_COMPLETED, ensure_future
 from functools import lru_cache, partial, wraps
 from platform import uname
 from typing import (Any, AsyncGenerator, Awaitable, Callable, Dict, Generic,
                     Hashable, List, Literal, Optional, OrderedDict, Set, Tuple,
-                    Type, TypeVar, Union, overload)
+                    Type, TypeVar, Union, overload, Iterable)
 from uuid import uuid4
 
 import numpy as np
@@ -1117,3 +1118,25 @@ def progress_bar(iterable, desc="Processing"):
                 progress.update(task, advance=1)
     else:
         yield from iterable
+
+def tensor_progress_bar(iterable:Iterable[Tuple[str, torch.Tensor]],
+                        final_bytes:int, desc="Processing"):
+    show_progress = get_tensor_model_parallel_rank() == 0
+    units = 1024 ** (int(math.log2(final_bytes)) // 10)
+
+    if show_progress:
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            MofNCompleteColumn(),
+            TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
+            TimeElapsedColumn(),
+        ) as progress:
+            task = progress.add_task(f"[cyan]{desc}", total=final_bytes/units)
+            for item in iterable:
+                steps = item[1].element_size() * item[1].nelement() / units
+                yield item
+                progress.update(task, advance=steps)
+    else:
+        yield from iterable
\ No newline at end of file
diff --git a/aphrodite/modeling/model_loader/loader.py b/aphrodite/modeling/model_loader/loader.py
index 0d0d3ace1..abb45ee11 100644
--- a/aphrodite/modeling/model_loader/loader.py
+++ b/aphrodite/modeling/model_loader/loader.py
@@ -24,7 +24,7 @@
                                      DeviceConfig, LoadConfig, LoadFormat,
                                      LoRAConfig, ModelConfig, MultiModalConfig,
                                      ParallelConfig, SchedulerConfig)
-from aphrodite.common.utils import is_pin_memory_available
+from aphrodite.common.utils import is_pin_memory_available, tensor_progress_bar
 from aphrodite.modeling.model_loader.tensorizer import (
     TensorizerConfig, is_aphrodite_tensorized, load_with_tensorizer,
     serialize_aphrodite_model, tensorizer_weights_iterator)
@@ -303,10 +303,12 @@ def _prepare_weights(self, model_name_or_path: str,
     def _get_weights_iterator(
         self, model_name_or_path: str, revision: Optional[str],
         fall_back_to_pt: bool
-    ) -> Generator[Tuple[str, torch.Tensor], None, None]:
+    ) -> Tuple[Generator[Tuple[str, torch.Tensor], None, None], int]:
         """Get an iterator for the model weights based on the load format."""
         hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
             model_name_or_path, revision, fall_back_to_pt)
+        est_weight_bytes = sum(os.path.getsize(f)
+                               for f in hf_weights_files)
         if self.load_config.load_format == LoadFormat.NPCACHE:
             # Currently np_cache only support *.bin checkpoints
             assert use_safetensors is False
@@ -329,7 +331,7 @@ def _xla_weights_iterator(iterator: Generator):
                     xm.mark_step()
 
             weights_iterator = _xla_weights_iterator(weights_iterator)
-        return weights_iterator
+        return weights_iterator, est_weight_bytes
 
     def load_model(self, *, model_config: ModelConfig,
                    device_config: DeviceConfig,
@@ -343,13 +345,15 @@ def load_model(self, *, model_config: ModelConfig,
                 model = _initialize_model(model_config, self.load_config,
                                           lora_config, cache_config,
                                           scheduler_config)
-            model.load_weights(
-                self._get_weights_iterator(model_config.model,
+                
+            weights, wgt_bytes = self._get_weights_iterator(model_config.model,
                                            model_config.revision,
                                            fall_back_to_pt=getattr(
                                                model,
                                                "fall_back_to_pt_during_load",
-                                               True)), )
+                                               True))
+            model.load_weights(tensor_progress_bar(weights, wgt_bytes,
+                                                   "Loading modules..."))
 
             for _, module in model.named_modules():
                 quant_method = getattr(module, "quant_method", None)
diff --git a/aphrodite/modeling/models/arctic.py b/aphrodite/modeling/models/arctic.py
index e05c755b0..5c5ddc008 100644
--- a/aphrodite/modeling/models/arctic.py
+++ b/aphrodite/modeling/models/arctic.py
@@ -8,7 +8,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import (get_tensor_model_parallel_rank,
                                    get_tensor_model_parallel_world_size,
                                    tensor_model_parallel_all_reduce)
@@ -491,9 +490,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             "It will take ~10 minutes loading from the 16-bit weights. "
             "Alternatively, use the prequantized 8-bit weights of arctic "
             "and set load-format to `sharded_state` will accelerate loading.")
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             for (param_name, weight_name, shard_id) in stacked_params_mapping:
                 if weight_name not in name:
                     continue
diff --git a/aphrodite/modeling/models/baichuan.py b/aphrodite/modeling/models/baichuan.py
index a8cded9ba..33d8cb4d8 100644
--- a/aphrodite/modeling/models/baichuan.py
+++ b/aphrodite/modeling/models/baichuan.py
@@ -28,7 +28,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig, LoRAConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import (get_tensor_model_parallel_rank,
                                    get_tensor_model_parallel_world_size)
 from aphrodite.modeling.layers.activation import SiluAndMul
@@ -368,9 +367,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             ("gate_up_proj", "up_proj", 1),
         ]
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             if name == "lm_head.weight":
diff --git a/aphrodite/modeling/models/bart.py b/aphrodite/modeling/models/bart.py
index e7334a210..f60e9bf60 100644
--- a/aphrodite/modeling/models/bart.py
+++ b/aphrodite/modeling/models/bart.py
@@ -930,12 +930,10 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         model_params_dict = dict(self.model.named_parameters())
         top_params_dict = dict(self.named_parameters())
 
-        weights_tuple_list = list(weights)
-
         shared_embedding_weight = None
         shared_embedding_shard_id = None
 
-        for name, loaded_weight in weights_tuple_list:
+        for name, loaded_weight in weights:
 
             name = self._rename_key(name)
             name, shard_id = self._rename_stacked_param(name)
diff --git a/aphrodite/modeling/models/blip2.py b/aphrodite/modeling/models/blip2.py
index 02e5c6349..e861c5ff9 100644
--- a/aphrodite/modeling/models/blip2.py
+++ b/aphrodite/modeling/models/blip2.py
@@ -10,7 +10,6 @@
 from aphrodite.common.config import CacheConfig, MultiModalConfig
 from aphrodite.common.sequence import (IntermediateTensors, SamplerOutput,
                                        SequenceData)
-from aphrodite.common.utils import progress_bar
 from aphrodite.inputs import INPUT_REGISTRY, InputContext, LLMInputs
 from aphrodite.modeling.layers.activation import get_act_fn
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
@@ -682,10 +681,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             ("gate_up_proj", "up_proj", 1),
         ]
         params_dict = dict(self.named_parameters())
-
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "lm_head.weight" in name:
                 continue
             if "rotary_emb.inv_freq" in name:
diff --git a/aphrodite/modeling/models/bloom.py b/aphrodite/modeling/models/bloom.py
index c4f157d27..349503443 100644
--- a/aphrodite/modeling/models/bloom.py
+++ b/aphrodite/modeling/models/bloom.py
@@ -26,7 +26,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import (get_tensor_model_parallel_rank,
                                    get_tensor_model_parallel_world_size)
 from aphrodite.modeling.layers.activation import get_act_fn
@@ -311,9 +310,7 @@ def sample(
 
     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         params_dict = dict(self.named_parameters(remove_duplicate=False))
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if name == "lm_head.weight":
                 continue
             if not name.startswith("transformer."):
diff --git a/aphrodite/modeling/models/chameleon.py b/aphrodite/modeling/models/chameleon.py
index 5e219ba14..b1759e5fb 100644
--- a/aphrodite/modeling/models/chameleon.py
+++ b/aphrodite/modeling/models/chameleon.py
@@ -12,7 +12,7 @@
 from aphrodite.common.config import CacheConfig, MultiModalConfig
 from aphrodite.common.sequence import (IntermediateTensors, SamplerOutput,
                                        SequenceData)
-from aphrodite.common.utils import print_warning_once, progress_bar
+from aphrodite.common.utils import print_warning_once
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.inputs import INPUT_REGISTRY, InputContext, LLMInputs
 from aphrodite.modeling.layers.activation import SiluAndMul
@@ -1005,9 +1005,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             (".gate_up_proj", ".up_proj", 1),
         ]
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
 
diff --git a/aphrodite/modeling/models/chatglm.py b/aphrodite/modeling/models/chatglm.py
index 637904863..a88fd1aca 100644
--- a/aphrodite/modeling/models/chatglm.py
+++ b/aphrodite/modeling/models/chatglm.py
@@ -11,7 +11,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig, LoRAConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.activation import SiluAndMul
 from aphrodite.modeling.layers.layernorm import RMSNorm
@@ -389,9 +388,7 @@ def sample(
 
     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         params_dict = dict(self.named_parameters(remove_duplicate=False))
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_pos_emb.inv_freq" in name:
                 continue
             if "word_embeddings" in name:
diff --git a/aphrodite/modeling/models/commandr.py b/aphrodite/modeling/models/commandr.py
index 28b101138..121ccdc1b 100644
--- a/aphrodite/modeling/models/commandr.py
+++ b/aphrodite/modeling/models/commandr.py
@@ -30,7 +30,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig, LoRAConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.activation import SiluAndMul
 from aphrodite.modeling.layers.linear import (MergedColumnParallelLinear,
@@ -381,9 +380,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         ]
         params_dict = dict(self.named_parameters())
         loaded_params = set()
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             for param_name, shard_name, shard_id in stacked_params_mapping:
                 if shard_name not in name:
                     continue
diff --git a/aphrodite/modeling/models/dbrx.py b/aphrodite/modeling/models/dbrx.py
index 49d80b03a..b4f4e62c3 100644
--- a/aphrodite/modeling/models/dbrx.py
+++ b/aphrodite/modeling/models/dbrx.py
@@ -7,7 +7,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import (get_tensor_model_parallel_rank,
                                    get_tensor_model_parallel_world_size,
                                    tensor_model_parallel_all_reduce)
@@ -411,9 +410,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             f"experts.mlp.{weight_name}",
         ) for weight_name in ["w1", "v1", "w2"]]
         params_dict = dict(self.named_parameters(remove_duplicate=False))
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             for param_name, weight_name in expert_params_mapping:
                 if weight_name not in name:
                     continue
diff --git a/aphrodite/modeling/models/decilm.py b/aphrodite/modeling/models/decilm.py
index a6034838e..cc4b19f52 100644
--- a/aphrodite/modeling/models/decilm.py
+++ b/aphrodite/modeling/models/decilm.py
@@ -29,7 +29,6 @@
 from transformers import LlamaConfig
 
 from aphrodite.common.config import CacheConfig, LoRAConfig
-from aphrodite.common.utils import progress_bar
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.models.llama import LlamaForCausalLM
 from aphrodite.quantization.base_config import QuantizationConfig
@@ -77,9 +76,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             ("gate_up_proj", "up_proj", 1),
         ]
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
 
diff --git a/aphrodite/modeling/models/deepseek.py b/aphrodite/modeling/models/deepseek.py
index 2163df10b..1793caeeb 100644
--- a/aphrodite/modeling/models/deepseek.py
+++ b/aphrodite/modeling/models/deepseek.py
@@ -30,7 +30,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import (get_tensor_model_parallel_rank,
                                    get_tensor_model_parallel_world_size,
                                    tensor_model_parallel_all_reduce)
@@ -423,9 +422,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         ]
 
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             for (param_name, weight_name, shard_id) in stacked_params_mapping:
diff --git a/aphrodite/modeling/models/deepseek_v2.py b/aphrodite/modeling/models/deepseek_v2.py
index 93ad9232e..f0994d01f 100644
--- a/aphrodite/modeling/models/deepseek_v2.py
+++ b/aphrodite/modeling/models/deepseek_v2.py
@@ -31,7 +31,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import (get_tensor_model_parallel_world_size,
                                    tensor_model_parallel_all_reduce)
 from aphrodite.modeling.layers.activation import SiluAndMul
@@ -489,9 +488,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             num_experts=self.config.n_routed_experts)
 
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             for (param_name, weight_name, shard_id) in stacked_params_mapping:
diff --git a/aphrodite/modeling/models/exaone.py b/aphrodite/modeling/models/exaone.py
index 53da2f518..b70e0444c 100644
--- a/aphrodite/modeling/models/exaone.py
+++ b/aphrodite/modeling/models/exaone.py
@@ -31,7 +31,7 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig, LoRAConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import is_hip, progress_bar
+from aphrodite.common.utils import is_hip
 from aphrodite.distributed import (get_pp_group,
                                    get_tensor_model_parallel_rank,
                                    get_tensor_model_parallel_world_size)
@@ -534,9 +534,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             (".gate_up_proj", ".c_fc_1", 1),
         ]
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             if ("rotary_emb.cos_cached" in name
diff --git a/aphrodite/modeling/models/falcon.py b/aphrodite/modeling/models/falcon.py
index a902ec73e..3e58ece88 100644
--- a/aphrodite/modeling/models/falcon.py
+++ b/aphrodite/modeling/models/falcon.py
@@ -29,7 +29,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import (get_tensor_model_parallel_rank,
                                    get_tensor_model_parallel_world_size,
                                    tensor_model_parallel_all_reduce)
@@ -422,9 +421,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             total_num_kv_heads = total_num_heads
         num_query_heads_per_kv_head = total_num_heads // total_num_kv_heads
         params_dict = dict(self.named_parameters(remove_duplicate=False))
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if name == "lm_head.weight":
                 # Falcon uses tied embeddings.
                 continue
diff --git a/aphrodite/modeling/models/fuyu.py b/aphrodite/modeling/models/fuyu.py
index 4437e4ed7..ea9fac2be 100644
--- a/aphrodite/modeling/models/fuyu.py
+++ b/aphrodite/modeling/models/fuyu.py
@@ -28,7 +28,6 @@
 from aphrodite.common.config import CacheConfig, MultiModalConfig
 from aphrodite.common.sequence import (IntermediateTensors, SamplerOutput,
                                        SequenceData)
-from aphrodite.common.utils import progress_bar
 from aphrodite.inputs import INPUT_REGISTRY, InputContext, LLMInputs
 from aphrodite.modeling.layers.linear import ColumnParallelLinear
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
@@ -310,9 +309,7 @@ def sample(
 
     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         params_dict = dict(self.named_parameters(remove_duplicate=False))
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             if ("rotary_emb.cos_cached" in name
diff --git a/aphrodite/modeling/models/gemma.py b/aphrodite/modeling/models/gemma.py
index 2270eaee1..719a7d015 100644
--- a/aphrodite/modeling/models/gemma.py
+++ b/aphrodite/modeling/models/gemma.py
@@ -25,7 +25,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig, LoRAConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.activation import GeluAndMul
 from aphrodite.modeling.layers.layernorm import GemmaRMSNorm
@@ -378,9 +377,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         ]
         params_dict = dict(self.named_parameters())
         loaded_params: Set[str] = set()
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             for (param_name, shard_name, shard_id) in stacked_params_mapping:
                 if shard_name not in name:
                     continue
diff --git a/aphrodite/modeling/models/gemma2.py b/aphrodite/modeling/models/gemma2.py
index 52cf22fb7..0e8e78e42 100644
--- a/aphrodite/modeling/models/gemma2.py
+++ b/aphrodite/modeling/models/gemma2.py
@@ -25,7 +25,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig, LoRAConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.activation import GeluAndMul
 from aphrodite.modeling.layers.layernorm import GemmaRMSNorm
@@ -370,9 +369,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         ]
         params_dict = dict(self.named_parameters())
         loaded_params: Set[str] = set()
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             for (param_name, shard_name, shard_id) in stacked_params_mapping:
                 if shard_name not in name:
                     continue
diff --git a/aphrodite/modeling/models/gpt2.py b/aphrodite/modeling/models/gpt2.py
index 195f46590..a7150d2d4 100644
--- a/aphrodite/modeling/models/gpt2.py
+++ b/aphrodite/modeling/models/gpt2.py
@@ -26,7 +26,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.activation import get_act_fn
 from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
@@ -253,9 +252,7 @@ def sample(
 
     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         params_dict = dict(self.named_parameters(remove_duplicate=False))
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "lm_head.weight" in name:
                 # GPT-2 ties the weights of the embedding layer and the final
                 # linear layer.
diff --git a/aphrodite/modeling/models/gpt_bigcode.py b/aphrodite/modeling/models/gpt_bigcode.py
index 2d50abb5c..d666b3813 100644
--- a/aphrodite/modeling/models/gpt_bigcode.py
+++ b/aphrodite/modeling/models/gpt_bigcode.py
@@ -27,7 +27,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.activation import get_act_fn
 from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
@@ -272,9 +271,7 @@ def sample(
 
     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         params_dict = dict(self.named_parameters(remove_duplicate=False))
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "lm_head.weight" in name:
                 continue
             if ".attn.bias" in name:
diff --git a/aphrodite/modeling/models/gpt_j.py b/aphrodite/modeling/models/gpt_j.py
index abb04a459..e7bfd3b4e 100644
--- a/aphrodite/modeling/models/gpt_j.py
+++ b/aphrodite/modeling/models/gpt_j.py
@@ -25,7 +25,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.activation import get_act_fn
 from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
@@ -273,9 +272,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             ("gate_up_proj", "up_proj", 1),
         ]
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "attn.bias" in name or "attn.masked_bias" in name:
                 continue
             for (param_name, weight_name, shard_id) in stacked_params_mapping:
diff --git a/aphrodite/modeling/models/gpt_neox.py b/aphrodite/modeling/models/gpt_neox.py
index 3bbc6cfb1..47c9f6494 100644
--- a/aphrodite/modeling/models/gpt_neox.py
+++ b/aphrodite/modeling/models/gpt_neox.py
@@ -25,7 +25,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.activation import get_act_fn
 from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
@@ -277,9 +276,7 @@ def sample(
 
     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if ("attention.bias" in name or "attention.masked_bias" in name
                     or "rotary_emb.inv_freq" in name):
                 continue
diff --git a/aphrodite/modeling/models/intern_vit.py b/aphrodite/modeling/models/intern_vit.py
index ed82dea43..4b7b0b85d 100644
--- a/aphrodite/modeling/models/intern_vit.py
+++ b/aphrodite/modeling/models/intern_vit.py
@@ -11,7 +11,6 @@
 import torch.nn.functional as F
 from transformers import PretrainedConfig
 
-from aphrodite.common.utils import progress_bar
 from aphrodite.modeling.layers.activation import get_act_fn
 from aphrodite.modeling.layers.layernorm import RMSNorm
 from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
@@ -273,9 +272,7 @@ def forward(
 
     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             param = params_dict[name]
             weight_loader = getattr(param, "weight_loader",
                                     default_weight_loader)
diff --git a/aphrodite/modeling/models/internlm2.py b/aphrodite/modeling/models/internlm2.py
index d6a631f4f..53045b14f 100644
--- a/aphrodite/modeling/models/internlm2.py
+++ b/aphrodite/modeling/models/internlm2.py
@@ -8,7 +8,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.activation import SiluAndMul
 from aphrodite.modeling.layers.layernorm import RMSNorm
@@ -303,9 +302,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             ("gate_up_proj", "w3", 1),
         ]
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             for (param_name, weight_name, shard_id) in stacked_params_mapping:
diff --git a/aphrodite/modeling/models/jais.py b/aphrodite/modeling/models/jais.py
index c8639f5e0..a948de859 100644
--- a/aphrodite/modeling/models/jais.py
+++ b/aphrodite/modeling/models/jais.py
@@ -28,7 +28,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import (get_tensor_model_parallel_rank,
                                    get_tensor_model_parallel_world_size)
 from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
@@ -314,9 +313,7 @@ def sample(
 
     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         params_dict = dict(self.named_parameters(remove_duplicate=False))
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "lm_head.weight" in name:
                 # GPT-2 ties the weights of the embedding layer and the final
                 # linear layer.
diff --git a/aphrodite/modeling/models/jamba.py b/aphrodite/modeling/models/jamba.py
index fa6e7ef91..2e0167e5c 100644
--- a/aphrodite/modeling/models/jamba.py
+++ b/aphrodite/modeling/models/jamba.py
@@ -12,7 +12,6 @@
 from aphrodite.attention.layer import Attention
 from aphrodite.common.config import CacheConfig, LoRAConfig, SchedulerConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 # yapf: disable
 from aphrodite.distributed import (get_tensor_model_parallel_rank,
                                    get_tensor_model_parallel_world_size)
@@ -701,9 +700,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             num_experts=self.config.num_experts)
 
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
 
diff --git a/aphrodite/modeling/models/llama.py b/aphrodite/modeling/models/llama.py
index f38780b7a..abbb13f0b 100644
--- a/aphrodite/modeling/models/llama.py
+++ b/aphrodite/modeling/models/llama.py
@@ -30,7 +30,7 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig, LoRAConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import is_hip, progress_bar
+from aphrodite.common.utils import is_hip
 from aphrodite.distributed import (get_current_tp_rank_partition_size,
                                    get_pp_group,
                                    get_tensor_model_parallel_rank,
@@ -487,9 +487,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             (".gate_up_proj", ".up_proj", 1),
         ]
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             name, loaded_weight = self.maybe_remap_mistral(name, loaded_weight)
             if "rotary_emb.inv_freq" in name:
                 continue
diff --git a/aphrodite/modeling/models/llama_embedding.py b/aphrodite/modeling/models/llama_embedding.py
index 8132a2b28..7878f4603 100644
--- a/aphrodite/modeling/models/llama_embedding.py
+++ b/aphrodite/modeling/models/llama_embedding.py
@@ -5,7 +5,6 @@
 
 from aphrodite.attention import AttentionMetadata
 from aphrodite.common.sequence import PoolerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.modeling.layers.pooler import Pooler, PoolingType
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.models.llama import LlamaModel
@@ -59,9 +58,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             ("gate_up_proj", "up_proj", 1),
         ]
         params_dict = dict(self.model.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             if ("rotary_emb.cos_cached" in name
diff --git a/aphrodite/modeling/models/medusa.py b/aphrodite/modeling/models/medusa.py
index b9ab9e0b7..bd59de0f7 100644
--- a/aphrodite/modeling/models/medusa.py
+++ b/aphrodite/modeling/models/medusa.py
@@ -4,7 +4,6 @@
 import torch.nn as nn
 
 from aphrodite.common.sequence import SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.vocab_parallel_embedding import (
     DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead)
@@ -138,9 +137,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
 
         weights_map = {}
 
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             name = name.replace("medusa_heads.", "")
 
             if name == "token_map":
diff --git a/aphrodite/modeling/models/minicpm.py b/aphrodite/modeling/models/minicpm.py
index b88453cfa..69c23d889 100644
--- a/aphrodite/modeling/models/minicpm.py
+++ b/aphrodite/modeling/models/minicpm.py
@@ -32,7 +32,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig, LoRAConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import (get_tensor_model_parallel_rank,
                                    get_tensor_model_parallel_world_size,
                                    tensor_model_parallel_all_reduce)
@@ -507,9 +506,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             for weight_name in ["w1", "w2", "w3"]
         ]
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             if ("rotary_emb.cos_cached" in name
diff --git a/aphrodite/modeling/models/minicpmv.py b/aphrodite/modeling/models/minicpmv.py
index 058473425..c628fbf60 100644
--- a/aphrodite/modeling/models/minicpmv.py
+++ b/aphrodite/modeling/models/minicpmv.py
@@ -40,7 +40,6 @@
 from aphrodite.common.config import CacheConfig, MultiModalConfig
 from aphrodite.common.sequence import (IntermediateTensors, SamplerOutput,
                                        SequenceData)
-from aphrodite.common.utils import progress_bar
 from aphrodite.inputs import INPUT_REGISTRY, InputContext, LLMInputs
 from aphrodite.modeling.layers.linear import ReplicatedLinear
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
@@ -660,9 +659,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             ("gate_up_proj", "up_proj", 1),
         ]
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             for key_to_modify, new_key in _KEYS_TO_MODIFY_MAPPING.items():
                 if key_to_modify in name:
                     name = name.replace(key_to_modify, new_key)
diff --git a/aphrodite/modeling/models/mixtral.py b/aphrodite/modeling/models/mixtral.py
index 80ea5eeed..73f6c8f85 100644
--- a/aphrodite/modeling/models/mixtral.py
+++ b/aphrodite/modeling/models/mixtral.py
@@ -30,7 +30,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig, LoRAConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import (get_pp_group,
                                    get_tensor_model_parallel_world_size)
 from aphrodite.modeling.layers.fused_moe import FusedMoE
@@ -423,9 +422,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             num_experts=self.config.num_local_experts)
 
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
 
diff --git a/aphrodite/modeling/models/mixtral_quant.py b/aphrodite/modeling/models/mixtral_quant.py
index eac873257..5144001e4 100644
--- a/aphrodite/modeling/models/mixtral_quant.py
+++ b/aphrodite/modeling/models/mixtral_quant.py
@@ -32,7 +32,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import (get_tensor_model_parallel_rank,
                                    get_tensor_model_parallel_world_size,
                                    tensor_model_parallel_all_reduce)
@@ -388,9 +387,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         ]
 
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             for (param_name, weight_name, shard_id) in stacked_params_mapping:
diff --git a/aphrodite/modeling/models/mlp_speculator.py b/aphrodite/modeling/models/mlp_speculator.py
index 802c629f9..102684c10 100644
--- a/aphrodite/modeling/models/mlp_speculator.py
+++ b/aphrodite/modeling/models/mlp_speculator.py
@@ -5,7 +5,6 @@
 import torch.nn as nn
 
 from aphrodite.common.sequence import SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.modeling import SamplingMetadata
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.sampler import Sampler
@@ -182,9 +181,7 @@ def generate_proposals(
 
     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             param = params_dict.get(name.replace("speculator.", ""))
             if param is not None:
                 weight_loader = getattr(param, "weight_loader",
diff --git a/aphrodite/modeling/models/mpt.py b/aphrodite/modeling/models/mpt.py
index 9396cf4bf..75a823580 100644
--- a/aphrodite/modeling/models/mpt.py
+++ b/aphrodite/modeling/models/mpt.py
@@ -9,7 +9,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import (get_tensor_model_parallel_rank,
                                    get_tensor_model_parallel_world_size)
 from aphrodite.modeling.layers.activation import get_act_fn
@@ -298,9 +297,7 @@ def sample(
 
     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         params_dict = dict(self.named_parameters(remove_duplicate=False))
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             # Skip loading extra bias for GPTQ models.
             if name.endswith(".bias") and name not in params_dict:
                 continue
diff --git a/aphrodite/modeling/models/nemotron.py b/aphrodite/modeling/models/nemotron.py
index 3fdb2b6bb..620757cfe 100644
--- a/aphrodite/modeling/models/nemotron.py
+++ b/aphrodite/modeling/models/nemotron.py
@@ -31,7 +31,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig, LoRAConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import (get_pp_group,
                                    get_tensor_model_parallel_world_size)
 from aphrodite.modeling.layers.activation import get_act_fn
@@ -494,9 +493,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             (".qkv_proj", ".v_proj", "v"),
         ]
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             if ("rotary_emb.cos_cached" in name
diff --git a/aphrodite/modeling/models/olmo.py b/aphrodite/modeling/models/olmo.py
index f54cdab1e..2fe48e4cc 100644
--- a/aphrodite/modeling/models/olmo.py
+++ b/aphrodite/modeling/models/olmo.py
@@ -30,7 +30,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.activation import SiluAndMul
 from aphrodite.modeling.layers.linear import (MergedColumnParallelLinear,
@@ -338,9 +337,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             ("gate_up_proj", "up_proj", 1),
         ]
         params_dict = dict(self.named_parameters(remove_duplicate=False))
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             if ("rotary_emb.cos_cached" in name
diff --git a/aphrodite/modeling/models/olmoe.py b/aphrodite/modeling/models/olmoe.py
index 118718ef3..008a6bcfb 100644
--- a/aphrodite/modeling/models/olmoe.py
+++ b/aphrodite/modeling/models/olmoe.py
@@ -19,7 +19,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.fused_moe import FusedMoE
 from aphrodite.modeling.layers.layernorm import RMSNorm
@@ -342,11 +341,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             num_experts=self.config.num_experts)
 
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(
-            weights_list,
-            desc="Loading modules..."
-        ):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             for (param_name, weight_name, shard_id) in stacked_params_mapping:
diff --git a/aphrodite/modeling/models/opt.py b/aphrodite/modeling/models/opt.py
index 51ad61a50..4ed6b0bd6 100644
--- a/aphrodite/modeling/models/opt.py
+++ b/aphrodite/modeling/models/opt.py
@@ -26,7 +26,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.activation import get_act_fn
 from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
@@ -348,9 +347,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             ("qkv_proj", "v_proj", "v"),
         ]
         params_dict = dict(self.named_parameters(remove_duplicate=False))
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "lm_head.weight" in name:
                 continue
             if name.startswith("decoder."):
diff --git a/aphrodite/modeling/models/orion.py b/aphrodite/modeling/models/orion.py
index ecdfc02fb..725bb4bdc 100644
--- a/aphrodite/modeling/models/orion.py
+++ b/aphrodite/modeling/models/orion.py
@@ -13,7 +13,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.activation import SiluAndMul
 from aphrodite.modeling.layers.linear import (MergedColumnParallelLinear,
@@ -304,9 +303,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             ("gate_up_proj", "up_proj", 1),
         ]
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             if ("rotary_emb.cos_cached" in name
diff --git a/aphrodite/modeling/models/paligemma.py b/aphrodite/modeling/models/paligemma.py
index 43ab56df2..23a8b688a 100644
--- a/aphrodite/modeling/models/paligemma.py
+++ b/aphrodite/modeling/models/paligemma.py
@@ -9,7 +9,6 @@
 from aphrodite.attention import AttentionMetadata
 from aphrodite.common.config import CacheConfig, MultiModalConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.inputs import INPUT_REGISTRY, InputContext, LLMInputs
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.sampler import Sampler
@@ -292,9 +291,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         ]
         params_dict = dict(self.named_parameters())
         loaded_params = set()
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             for key_to_modify, new_key in _KEYS_TO_MODIFY_MAPPING.items():
                 if key_to_modify in name:
                     name = name.replace(key_to_modify, new_key)
diff --git a/aphrodite/modeling/models/persimmon.py b/aphrodite/modeling/models/persimmon.py
index 3566f13c7..2c1a31389 100644
--- a/aphrodite/modeling/models/persimmon.py
+++ b/aphrodite/modeling/models/persimmon.py
@@ -31,7 +31,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
                                               QKVParallelLinear,
@@ -305,9 +304,7 @@ def sample(
 
     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         params_dict = dict(self.named_parameters(remove_duplicate=False))
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             if ("rotary_emb.cos_cached" in name
diff --git a/aphrodite/modeling/models/phi.py b/aphrodite/modeling/models/phi.py
index 67648e3d2..c3c5eb8fb 100644
--- a/aphrodite/modeling/models/phi.py
+++ b/aphrodite/modeling/models/phi.py
@@ -44,7 +44,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig, LoRAConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.activation import get_act_fn
 from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
@@ -308,10 +307,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             ("qkv_proj", "v_proj", "v")
         ]
         params_dict = dict(self.named_parameters())
-
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
 
diff --git a/aphrodite/modeling/models/phi3_small.py b/aphrodite/modeling/models/phi3_small.py
index 3074544d1..2e5dce1ec 100644
--- a/aphrodite/modeling/models/phi3_small.py
+++ b/aphrodite/modeling/models/phi3_small.py
@@ -8,7 +8,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig, LoRAConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import (get_tensor_model_parallel_rank,
                                    get_tensor_model_parallel_world_size)
 from aphrodite.modeling.layers.linear import (MergedColumnParallelLinear,
@@ -440,9 +439,7 @@ def sample(
     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
 
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             if name.endswith(".bias") and name not in params_dict:
diff --git a/aphrodite/modeling/models/phi3v.py b/aphrodite/modeling/models/phi3v.py
index 444d4d36a..c41e1e488 100644
--- a/aphrodite/modeling/models/phi3v.py
+++ b/aphrodite/modeling/models/phi3v.py
@@ -29,7 +29,6 @@
 from aphrodite.attention import AttentionMetadata
 from aphrodite.common.config import CacheConfig, ModelConfig, MultiModalConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.inputs import INPUT_REGISTRY, InputContext, LLMInputs
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.sampler import Sampler
@@ -613,9 +612,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             (".gate_up_proj", ".up_proj", 1),
         ]
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             # post_layernorm is not needed in CLIPVisionModel
diff --git a/aphrodite/modeling/models/qwen.py b/aphrodite/modeling/models/qwen.py
index 26e1e2ddc..30250d015 100644
--- a/aphrodite/modeling/models/qwen.py
+++ b/aphrodite/modeling/models/qwen.py
@@ -13,7 +13,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.activation import SiluAndMul
 from aphrodite.modeling.layers.layernorm import RMSNorm
@@ -276,9 +275,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             ("gate_up_proj", "w1", 1),
         ]
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             for (param_name, weight_name, shard_id) in stacked_params_mapping:
diff --git a/aphrodite/modeling/models/qwen2.py b/aphrodite/modeling/models/qwen2.py
index ee706c4ad..45408fadb 100644
--- a/aphrodite/modeling/models/qwen2.py
+++ b/aphrodite/modeling/models/qwen2.py
@@ -31,7 +31,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig, LoRAConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import (get_current_tp_rank_partition_size,
                                    get_pp_group,
                                    get_tensor_model_parallel_rank,
@@ -398,9 +397,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             ("gate_up_proj", "up_proj", 1),
         ]
         params_dict = dict(self.named_parameters(remove_duplicate=False))
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             if self.config.tie_word_embeddings and "lm_head.weight" in name:
diff --git a/aphrodite/modeling/models/qwen2_moe.py b/aphrodite/modeling/models/qwen2_moe.py
index 14e9f811e..2d9886f18 100644
--- a/aphrodite/modeling/models/qwen2_moe.py
+++ b/aphrodite/modeling/models/qwen2_moe.py
@@ -32,7 +32,7 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import print_warning_once, progress_bar
+from aphrodite.common.utils import print_warning_once
 from aphrodite.distributed import (get_pp_group,
                                    get_tensor_model_parallel_world_size,
                                    tensor_model_parallel_all_reduce)
@@ -449,9 +449,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             num_experts=self.config.num_experts)
 
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             for (param_name, weight_name, shard_id) in stacked_params_mapping:
diff --git a/aphrodite/modeling/models/siglip.py b/aphrodite/modeling/models/siglip.py
index 64ab9e2f7..2d0ff5c0b 100644
--- a/aphrodite/modeling/models/siglip.py
+++ b/aphrodite/modeling/models/siglip.py
@@ -14,7 +14,6 @@
 
 from aphrodite.common.config import ModelConfig
 from aphrodite.common.sequence import SequenceData
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.inputs import LLMInputs
 from aphrodite.modeling.layers.activation import get_act_fn
@@ -644,9 +643,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         params_dict = dict(self.named_parameters())
         layer_count = len(self.vision_model.encoder.layers)
 
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             # omit layers when num_hidden_layers_override is set
             if "vision_model.encoder.layers." in name:
                 layer_idx = int(name.split(".")[3])
diff --git a/aphrodite/modeling/models/solar.py b/aphrodite/modeling/models/solar.py
index 9258b624f..4c35a4f40 100644
--- a/aphrodite/modeling/models/solar.py
+++ b/aphrodite/modeling/models/solar.py
@@ -30,7 +30,7 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig, LoRAConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import is_hip, progress_bar
+from aphrodite.common.utils import is_hip
 from aphrodite.distributed import (get_pp_group,
                                    get_tensor_model_parallel_rank,
                                    get_tensor_model_parallel_world_size)
@@ -481,9 +481,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             (".gate_up_proj", ".up_proj", 1),
         ]
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             if ("rotary_emb.cos_cached" in name
diff --git a/aphrodite/modeling/models/stablelm.py b/aphrodite/modeling/models/stablelm.py
index e87151dda..35b444657 100644
--- a/aphrodite/modeling/models/stablelm.py
+++ b/aphrodite/modeling/models/stablelm.py
@@ -28,7 +28,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.activation import SiluAndMul
 from aphrodite.modeling.layers.linear import (MergedColumnParallelLinear,
@@ -285,9 +284,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             ("gate_up_proj", "up_proj", 1),
         ]
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
             if ("rotary_emb.cos_cached" in name
diff --git a/aphrodite/modeling/models/starcoder2.py b/aphrodite/modeling/models/starcoder2.py
index 50df320e0..98acb3483 100644
--- a/aphrodite/modeling/models/starcoder2.py
+++ b/aphrodite/modeling/models/starcoder2.py
@@ -27,7 +27,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.activation import get_act_fn
 from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
@@ -294,9 +293,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         ]
 
         params_dict = dict(self.named_parameters(remove_duplicate=False))
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
 
diff --git a/aphrodite/modeling/models/xverse.py b/aphrodite/modeling/models/xverse.py
index 34a5274b8..bc0d53554 100644
--- a/aphrodite/modeling/models/xverse.py
+++ b/aphrodite/modeling/models/xverse.py
@@ -29,7 +29,6 @@
 from aphrodite.attention import Attention, AttentionMetadata
 from aphrodite.common.config import CacheConfig, LoRAConfig
 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
-from aphrodite.common.utils import progress_bar
 from aphrodite.distributed import get_tensor_model_parallel_world_size
 from aphrodite.modeling.layers.activation import SiluAndMul
 from aphrodite.modeling.layers.layernorm import RMSNorm
@@ -351,9 +350,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
             ("gate_up_proj", "up_proj", 1),
         ]
         params_dict = dict(self.named_parameters())
-        weights_list = list(weights)
-        for name, loaded_weight in progress_bar(weights_list,
-                                                desc="Loading modules..."):
+        for name, loaded_weight in weights:
             if ("rotary_emb.inv_freq" in name
                     or "rotary_emb.cos_cached" in name
                     or "rotary_emb.sin_cached" in name):