Skip to content

Commit

Permalink
Remove unnecessary optim in convert_HF (#71)
Browse files Browse the repository at this point in the history
  • Loading branch information
francoishernandez authored Jul 12, 2024
1 parent 2009415 commit d3f05fe
Show file tree
Hide file tree
Showing 7 changed files with 0 additions and 13 deletions.
1 change: 0 additions & 1 deletion eole/bin/convert/convert_HF.py
Original file line number Diff line number Diff line change
Expand Up @@ -1029,7 +1029,6 @@ def get_weight(checkpoint, tensor_name):
quant_type=quant_type,
w_bit=w_bit,
group_size=group_size,
optim="fusedadam",
),
)
config_dict = recursive_model_fields_set(config)
Expand Down
2 changes: 0 additions & 2 deletions eole/bin/convert/convert_T5.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,6 @@ def run(cls, args):
),
layer_norm="rms",
pos_ffn_activation_fn="gated-gelu",
self_attn_type="scaled-dot",
relative_positions_buckets=params["relative_attention_num_buckets"],
parallel_residual=False,
add_qkvbias=False,
Expand All @@ -437,7 +436,6 @@ def run(cls, args):
accum_count=[32],
accum_steps=[0],
valid_batch_size=256,
optim="fusedadam",
),
)

Expand Down
2 changes: 0 additions & 2 deletions eole/bin/convert/convert_falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,6 @@ def run(cls, args):
# tgt_word_vec_size=tgt_word_vec_size,
model_type="text",
pos_ffn_activation_fn="gelu",
self_attn_type="scaled-dot", # not sure if scaled-dot-flash is fine
num_kv=num_kv,
parallel_residual=True,
shared_layer_norm=shared_layer,
Expand All @@ -397,7 +396,6 @@ def run(cls, args):
accum_count=[32],
accum_steps=[0],
valid_batch_size=256,
optim="fusedadam",
),
)

Expand Down
2 changes: 0 additions & 2 deletions eole/bin/convert/convert_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,6 @@ def run(cls, args):
layer_norm="rms",
norm_eps=norm_eps,
pos_ffn_activation_fn="silu",
self_attn_type="scaled-dot",
rotary_interleave=True,
rotary_theta=10000,
rotary_dim=0,
Expand All @@ -393,7 +392,6 @@ def run(cls, args):
accum_count=[32],
accum_steps=[0],
valid_batch_size=256,
optim="fusedadam",
),
)

Expand Down
2 changes: 0 additions & 2 deletions eole/bin/convert/convert_mpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,6 @@ def run(cls, args):
# tgt_word_vec_size=tgt_word_vec_size,
layer_norm="standard",
pos_ffn_activation_fn="gelu",
self_attn_type="scaled-dot",
parallel_residual=False,
add_qkvbias=False,
add_ffnbias=False,
Expand All @@ -237,7 +236,6 @@ def run(cls, args):
accum_count=[32],
accum_steps=[0],
valid_batch_size=256,
optim="fusedadam",
),
)

Expand Down
2 changes: 0 additions & 2 deletions eole/bin/convert/convert_redpajama.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,6 @@ def run(cls, args):
# tgt_word_vec_size=tgt_word_vec_size,
layer_norm="standard",
pos_ffn_activation_fn="gelu",
self_attn_type="scaled-dot",
parallel_residual=False,
add_qkvbias=True,
add_ffnbias=True,
Expand All @@ -295,7 +294,6 @@ def run(cls, args):
accum_count=[32],
accum_steps=[0],
valid_batch_size=256,
optim="fusedadam",
),
)

Expand Down
2 changes: 0 additions & 2 deletions eole/bin/convert/convert_xgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,6 @@ def run(cls, args):
# tgt_word_vec_size=tgt_word_vec_size,
layer_norm="rms",
pos_ffn_activation_fn="silu",
self_attn_type="scaled-dot",
parallel_residual=False,
add_qkvbias=False,
add_ffnbias=False,
Expand All @@ -253,7 +252,6 @@ def run(cls, args):
accum_count=[32],
accum_steps=[0],
valid_batch_size=256,
optim="fusedadam",
),
)

Expand Down

0 comments on commit d3f05fe

Please sign in to comment.