há 4 meses atrás · 9576096b9d
--- a/aphrodite/modeling/models/arctic.py
+++ b/aphrodite/modeling/models/arctic.py
@@ -491,9 +491,7 @@ class ArcticForCausalLM(nn.Module):
 
				             "It will take ~10 minutes loading from the 16-bit weights. "
			
 
				             "Alternatively, use the prequantized 8-bit weights of arctic "
			
 
				             "and set load-format to `sharded_state` will accelerate loading.")
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             for (param_name, weight_name, shard_id) in stacked_params_mapping:
			
 
				                 if weight_name not in name:
			
 
				                     continue
			
--- a/aphrodite/modeling/models/baichuan.py
+++ b/aphrodite/modeling/models/baichuan.py
@@ -368,9 +368,7 @@ class BaiChuanBaseForCausalLM(nn.Module, SupportsLoRA):
 
				             ("gate_up_proj", "up_proj", 1),
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             if name == "lm_head.weight":
			
--- a/aphrodite/modeling/models/bart.py
+++ b/aphrodite/modeling/models/bart.py
@@ -930,12 +930,10 @@ class BartForConditionalGeneration(nn.Module):
 
				         model_params_dict = dict(self.model.named_parameters())
			
 
				         top_params_dict = dict(self.named_parameters())
			
 
				 
			
 
				-        weights_tuple_list = list(weights)
			
 
				-
			
 
				         shared_embedding_weight = None
			
 
				         shared_embedding_shard_id = None
			
 
				 
			
 
				-        for name, loaded_weight in weights_tuple_list:
			
 
				+        for name, loaded_weight in weights:
			
 
				 
			
 
				             name = self._rename_key(name)
			
 
				             name, shard_id = self._rename_stacked_param(name)
			
--- a/aphrodite/modeling/models/blip2.py
+++ b/aphrodite/modeling/models/blip2.py
@@ -682,10 +682,7 @@ class Blip2ForConditionalGeneration(nn.Module, SupportsMultiModal):
 
				             ("gate_up_proj", "up_proj", 1),
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "lm_head.weight" in name:
			
 
				                 continue
			
 
				             if "rotary_emb.inv_freq" in name:
			
--- a/aphrodite/modeling/models/bloom.py
+++ b/aphrodite/modeling/models/bloom.py
@@ -311,9 +311,7 @@ class BloomForCausalLM(nn.Module):
 
				 
			
 
				     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
			
 
				         params_dict = dict(self.named_parameters(remove_duplicate=False))
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if name == "lm_head.weight":
			
 
				                 continue
			
 
				             if not name.startswith("transformer."):
			
--- a/aphrodite/modeling/models/chameleon.py
+++ b/aphrodite/modeling/models/chameleon.py
@@ -1005,9 +1005,7 @@ class ChameleonForConditionalGeneration(nn.Module, SupportsMultiModal):
 
				             (".gate_up_proj", ".up_proj", 1),
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				 
			
--- a/aphrodite/modeling/models/chatglm.py
+++ b/aphrodite/modeling/models/chatglm.py
@@ -389,9 +389,7 @@ class ChatGLMForCausalLM(nn.Module, SupportsLoRA):
 
				 
			
 
				     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
			
 
				         params_dict = dict(self.named_parameters(remove_duplicate=False))
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_pos_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             if "word_embeddings" in name:
			
--- a/aphrodite/modeling/models/commandr.py
+++ b/aphrodite/modeling/models/commandr.py
@@ -381,9 +381,7 @@ class CohereForCausalLM(nn.Module):
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				         loaded_params = set()
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             for param_name, shard_name, shard_id in stacked_params_mapping:
			
 
				                 if shard_name not in name:
			
 
				                     continue
			
--- a/aphrodite/modeling/models/dbrx.py
+++ b/aphrodite/modeling/models/dbrx.py
@@ -411,9 +411,7 @@ class DbrxForCausalLM(nn.Module):
 
				             f"experts.mlp.{weight_name}",
			
 
				         ) for weight_name in ["w1", "v1", "w2"]]
			
 
				         params_dict = dict(self.named_parameters(remove_duplicate=False))
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             for param_name, weight_name in expert_params_mapping:
			
 
				                 if weight_name not in name:
			
 
				                     continue
			
--- a/aphrodite/modeling/models/decilm.py
+++ b/aphrodite/modeling/models/decilm.py
@@ -77,9 +77,7 @@ class DeciLMForCausalLM(LlamaForCausalLM):
 
				             ("gate_up_proj", "up_proj", 1),
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				 
			
--- a/aphrodite/modeling/models/deepseek.py
+++ b/aphrodite/modeling/models/deepseek.py
@@ -423,9 +423,7 @@ class DeepseekForCausalLM(nn.Module):
 
				         ]
			
 
				 
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             for (param_name, weight_name, shard_id) in stacked_params_mapping:
			
--- a/aphrodite/modeling/models/deepseek_v2.py
+++ b/aphrodite/modeling/models/deepseek_v2.py
@@ -489,9 +489,7 @@ class DeepseekV2ForCausalLM(nn.Module):
 
				             num_experts=self.config.n_routed_experts)
			
 
				 
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             for (param_name, weight_name, shard_id) in stacked_params_mapping:
			
--- a/aphrodite/modeling/models/exaone.py
+++ b/aphrodite/modeling/models/exaone.py
@@ -534,9 +534,7 @@ class ExaoneForCausalLM(nn.Module, SupportsLoRA):
 
				             (".gate_up_proj", ".c_fc_1", 1),
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             if ("rotary_emb.cos_cached" in name
			
--- a/aphrodite/modeling/models/falcon.py
+++ b/aphrodite/modeling/models/falcon.py
@@ -422,9 +422,7 @@ class FalconForCausalLM(nn.Module):
 
				             total_num_kv_heads = total_num_heads
			
 
				         num_query_heads_per_kv_head = total_num_heads // total_num_kv_heads
			
 
				         params_dict = dict(self.named_parameters(remove_duplicate=False))
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if name == "lm_head.weight":
			
 
				                 # Falcon uses tied embeddings.
			
 
				                 continue
			
--- a/aphrodite/modeling/models/fuyu.py
+++ b/aphrodite/modeling/models/fuyu.py
@@ -310,9 +310,7 @@ class FuyuForCausalLM(nn.Module, SupportsMultiModal):
 
				 
			
 
				     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
			
 
				         params_dict = dict(self.named_parameters(remove_duplicate=False))
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             if ("rotary_emb.cos_cached" in name
			
--- a/aphrodite/modeling/models/gemma.py
+++ b/aphrodite/modeling/models/gemma.py
@@ -378,9 +378,7 @@ class GemmaForCausalLM(nn.Module, SupportsLoRA):
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				         loaded_params: Set[str] = set()
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             for (param_name, shard_name, shard_id) in stacked_params_mapping:
			
 
				                 if shard_name not in name:
			
 
				                     continue
			
--- a/aphrodite/modeling/models/gemma2.py
+++ b/aphrodite/modeling/models/gemma2.py
@@ -370,9 +370,7 @@ class Gemma2ForCausalLM(nn.Module, SupportsLoRA):
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				         loaded_params: Set[str] = set()
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             for (param_name, shard_name, shard_id) in stacked_params_mapping:
			
 
				                 if shard_name not in name:
			
 
				                     continue
			
--- a/aphrodite/modeling/models/gpt2.py
+++ b/aphrodite/modeling/models/gpt2.py
@@ -253,9 +253,7 @@ class GPT2LMHeadModel(nn.Module):
 
				 
			
 
				     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
			
 
				         params_dict = dict(self.named_parameters(remove_duplicate=False))
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "lm_head.weight" in name:
			
 
				                 # GPT-2 ties the weights of the embedding layer and the final
			
 
				                 # linear layer.
			
--- a/aphrodite/modeling/models/gpt_bigcode.py
+++ b/aphrodite/modeling/models/gpt_bigcode.py
@@ -272,9 +272,7 @@ class GPTBigCodeForCausalLM(nn.Module):
 
				 
			
 
				     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
			
 
				         params_dict = dict(self.named_parameters(remove_duplicate=False))
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "lm_head.weight" in name:
			
 
				                 continue
			
 
				             if ".attn.bias" in name:
			
--- a/aphrodite/modeling/models/gpt_j.py
+++ b/aphrodite/modeling/models/gpt_j.py
@@ -273,9 +273,7 @@ class GPTJForCausalLM(nn.Module):
 
				             ("gate_up_proj", "up_proj", 1),
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "attn.bias" in name or "attn.masked_bias" in name:
			
 
				                 continue
			
 
				             for (param_name, weight_name, shard_id) in stacked_params_mapping:
			
--- a/aphrodite/modeling/models/gpt_neox.py
+++ b/aphrodite/modeling/models/gpt_neox.py
@@ -277,9 +277,7 @@ class GPTNeoXForCausalLM(nn.Module):
 
				 
			
 
				     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if ("attention.bias" in name or "attention.masked_bias" in name
			
 
				                     or "rotary_emb.inv_freq" in name):
			
 
				                 continue
			
--- a/aphrodite/modeling/models/intern_vit.py
+++ b/aphrodite/modeling/models/intern_vit.py
@@ -273,9 +273,7 @@ class InternVisionModel(nn.Module):
 
				 
			
 
				     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             param = params_dict[name]
			
 
				             weight_loader = getattr(param, "weight_loader",
			
 
				                                     default_weight_loader)
			
--- a/aphrodite/modeling/models/internlm2.py
+++ b/aphrodite/modeling/models/internlm2.py
@@ -303,9 +303,7 @@ class InternLM2ForCausalLM(nn.Module):
 
				             ("gate_up_proj", "w3", 1),
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             for (param_name, weight_name, shard_id) in stacked_params_mapping:
			
--- a/aphrodite/modeling/models/jais.py
+++ b/aphrodite/modeling/models/jais.py
@@ -314,9 +314,7 @@ class JAISLMHeadModel(nn.Module):
 
				 
			
 
				     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
			
 
				         params_dict = dict(self.named_parameters(remove_duplicate=False))
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "lm_head.weight" in name:
			
 
				                 # GPT-2 ties the weights of the embedding layer and the final
			
 
				                 # linear layer.
			
--- a/aphrodite/modeling/models/jamba.py
+++ b/aphrodite/modeling/models/jamba.py
@@ -701,9 +701,7 @@ class JambaForCausalLM(nn.Module, HasInnerState):
 
				             num_experts=self.config.num_experts)
			
 
				 
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				 
			
--- a/aphrodite/modeling/models/llama_embedding.py
+++ b/aphrodite/modeling/models/llama_embedding.py
@@ -59,9 +59,7 @@ class LlamaEmbeddingModel(nn.Module):
 
				             ("gate_up_proj", "up_proj", 1),
			
 
				         ]
			
 
				         params_dict = dict(self.model.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             if ("rotary_emb.cos_cached" in name
			
--- a/aphrodite/modeling/models/medusa.py
+++ b/aphrodite/modeling/models/medusa.py
@@ -138,9 +138,7 @@ class Medusa(nn.Module):
 
				 
			
 
				         weights_map = {}
			
 
				 
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             name = name.replace("medusa_heads.", "")
			
 
				 
			
 
				             if name == "token_map":
			
--- a/aphrodite/modeling/models/minicpm.py
+++ b/aphrodite/modeling/models/minicpm.py
@@ -507,9 +507,7 @@ class MiniCPMForCausalLM(nn.Module, SupportsLoRA):
 
				             for weight_name in ["w1", "w2", "w3"]
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             if ("rotary_emb.cos_cached" in name
			
--- a/aphrodite/modeling/models/minicpmv.py
+++ b/aphrodite/modeling/models/minicpmv.py
@@ -660,9 +660,7 @@ class MiniCPMVBaseModel(nn.Module, SupportsMultiModal):
 
				             ("gate_up_proj", "up_proj", 1),
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             for key_to_modify, new_key in _KEYS_TO_MODIFY_MAPPING.items():
			
 
				                 if key_to_modify in name:
			
 
				                     name = name.replace(key_to_modify, new_key)
			
--- a/aphrodite/modeling/models/mixtral.py
+++ b/aphrodite/modeling/models/mixtral.py
@@ -423,9 +423,7 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA):
 
				             num_experts=self.config.num_local_experts)
			
 
				 
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				 
			
--- a/aphrodite/modeling/models/mixtral_quant.py
+++ b/aphrodite/modeling/models/mixtral_quant.py
@@ -388,9 +388,7 @@ class MixtralForCausalLM(nn.Module):
 
				         ]
			
 
				 
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             for (param_name, weight_name, shard_id) in stacked_params_mapping:
			
--- a/aphrodite/modeling/models/mlp_speculator.py
+++ b/aphrodite/modeling/models/mlp_speculator.py
@@ -182,9 +182,7 @@ class MLPSpeculator(nn.Module):
 
				 
			
 
				     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             param = params_dict.get(name.replace("speculator.", ""))
			
 
				             if param is not None:
			
 
				                 weight_loader = getattr(param, "weight_loader",
			
--- a/aphrodite/modeling/models/mpt.py
+++ b/aphrodite/modeling/models/mpt.py
@@ -298,9 +298,7 @@ class MPTForCausalLM(nn.Module):
 
				 
			
 
				     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
			
 
				         params_dict = dict(self.named_parameters(remove_duplicate=False))
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             # Skip loading extra bias for GPTQ models.
			
 
				             if name.endswith(".bias") and name not in params_dict:
			
 
				                 continue
			
--- a/aphrodite/modeling/models/nemotron.py
+++ b/aphrodite/modeling/models/nemotron.py
@@ -494,9 +494,7 @@ class NemotronForCausalLM(nn.Module, SupportsLoRA):
 
				             (".qkv_proj", ".v_proj", "v"),
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             if ("rotary_emb.cos_cached" in name
			
--- a/aphrodite/modeling/models/olmo.py
+++ b/aphrodite/modeling/models/olmo.py
@@ -338,9 +338,7 @@ class OlmoForCausalLM(nn.Module):
 
				             ("gate_up_proj", "up_proj", 1),
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters(remove_duplicate=False))
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             if ("rotary_emb.cos_cached" in name
			
--- a/aphrodite/modeling/models/olmoe.py
+++ b/aphrodite/modeling/models/olmoe.py
@@ -342,11 +342,7 @@ class OlmoeForCausalLM(nn.Module):
 
				             num_experts=self.config.num_experts)
			
 
				 
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(
			
 
				-            weights_list,
			
 
				-            desc="Loading modules..."
			
 
				-        ):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             for (param_name, weight_name, shard_id) in stacked_params_mapping:
			
--- a/aphrodite/modeling/models/opt.py
+++ b/aphrodite/modeling/models/opt.py
@@ -348,9 +348,7 @@ class OPTForCausalLM(nn.Module):
 
				             ("qkv_proj", "v_proj", "v"),
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters(remove_duplicate=False))
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "lm_head.weight" in name:
			
 
				                 continue
			
 
				             if name.startswith("decoder."):
			
--- a/aphrodite/modeling/models/orion.py
+++ b/aphrodite/modeling/models/orion.py
@@ -304,9 +304,7 @@ class OrionForCausalLM(nn.Module):
 
				             ("gate_up_proj", "up_proj", 1),
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             if ("rotary_emb.cos_cached" in name
			
--- a/aphrodite/modeling/models/paligemma.py
+++ b/aphrodite/modeling/models/paligemma.py
@@ -292,9 +292,7 @@ class PaliGemmaForConditionalGeneration(nn.Module, SupportsMultiModal):
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				         loaded_params = set()
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             for key_to_modify, new_key in _KEYS_TO_MODIFY_MAPPING.items():
			
 
				                 if key_to_modify in name:
			
 
				                     name = name.replace(key_to_modify, new_key)
			
--- a/aphrodite/modeling/models/persimmon.py
+++ b/aphrodite/modeling/models/persimmon.py
@@ -305,9 +305,7 @@ class PersimmonForCausalLM(nn.Module):
 
				 
			
 
				     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
			
 
				         params_dict = dict(self.named_parameters(remove_duplicate=False))
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             if ("rotary_emb.cos_cached" in name
			
--- a/aphrodite/modeling/models/phi.py
+++ b/aphrodite/modeling/models/phi.py
@@ -308,10 +308,7 @@ class PhiForCausalLM(nn.Module, SupportsLoRA):
 
				             ("qkv_proj", "v_proj", "v")
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				 
			
--- a/aphrodite/modeling/models/phi3_small.py
+++ b/aphrodite/modeling/models/phi3_small.py
@@ -440,9 +440,7 @@ class Phi3SmallForCausalLM(nn.Module):
 
				     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
			
 
				 
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             if name.endswith(".bias") and name not in params_dict:
			
--- a/aphrodite/modeling/models/phi3v.py
+++ b/aphrodite/modeling/models/phi3v.py
@@ -613,9 +613,7 @@ class Phi3VForCausalLM(nn.Module, SupportsMultiModal):
 
				             (".gate_up_proj", ".up_proj", 1),
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             # post_layernorm is not needed in CLIPVisionModel
			
--- a/aphrodite/modeling/models/qwen.py
+++ b/aphrodite/modeling/models/qwen.py
@@ -276,9 +276,7 @@ class QWenLMHeadModel(nn.Module):
 
				             ("gate_up_proj", "w1", 1),
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             for (param_name, weight_name, shard_id) in stacked_params_mapping:
			
--- a/aphrodite/modeling/models/qwen2_moe.py
+++ b/aphrodite/modeling/models/qwen2_moe.py
@@ -449,9 +449,7 @@ class Qwen2MoeForCausalLM(nn.Module):
 
				             num_experts=self.config.num_experts)
			
 
				 
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             for (param_name, weight_name, shard_id) in stacked_params_mapping:
			
--- a/aphrodite/modeling/models/siglip.py
+++ b/aphrodite/modeling/models/siglip.py
@@ -644,9 +644,7 @@ class SiglipVisionModel(nn.Module):
 
				         params_dict = dict(self.named_parameters())
			
 
				         layer_count = len(self.vision_model.encoder.layers)
			
 
				 
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             # omit layers when num_hidden_layers_override is set
			
 
				             if "vision_model.encoder.layers." in name:
			
 
				                 layer_idx = int(name.split(".")[3])
			
--- a/aphrodite/modeling/models/solar.py
+++ b/aphrodite/modeling/models/solar.py
@@ -481,9 +481,7 @@ class SolarForCausalLM(nn.Module, SupportsLoRA):
 
				             (".gate_up_proj", ".up_proj", 1),
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             if ("rotary_emb.cos_cached" in name
			
--- a/aphrodite/modeling/models/stablelm.py
+++ b/aphrodite/modeling/models/stablelm.py
@@ -285,9 +285,7 @@ class StablelmForCausalLM(nn.Module):
 
				             ("gate_up_proj", "up_proj", 1),
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				             if ("rotary_emb.cos_cached" in name
			
--- a/aphrodite/modeling/models/starcoder2.py
+++ b/aphrodite/modeling/models/starcoder2.py
@@ -294,9 +294,7 @@ class Starcoder2ForCausalLM(nn.Module):
 
				         ]
			
 
				 
			
 
				         params_dict = dict(self.named_parameters(remove_duplicate=False))
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if "rotary_emb.inv_freq" in name:
			
 
				                 continue
			
 
				 
			
--- a/aphrodite/modeling/models/xverse.py
+++ b/aphrodite/modeling/models/xverse.py
@@ -351,9 +351,7 @@ class XverseForCausalLM(nn.Module, SupportsLoRA):
 
				             ("gate_up_proj", "up_proj", 1),
			
 
				         ]
			
 
				         params_dict = dict(self.named_parameters())
			
 
				-        weights_list = list(weights)
			
 
				-        for name, loaded_weight in progress_bar(weights_list,
			
 
				-                                                desc="Loading modules..."):
			
 
				+        for name, loaded_weight in weights:
			
 
				             if ("rotary_emb.inv_freq" in name
			
 
				                     or "rotary_emb.cos_cached" in name
			
 
				                     or "rotary_emb.sin_cached" in name):