Skip to content

Commit

Permalink
Merge branch 'main' into newptl_fix_validation_in_spellmapper
Browse files Browse the repository at this point in the history
  • Loading branch information
bene-ges committed Nov 27, 2023
2 parents 388bb50 + 79bc929 commit b97d87e
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 17 deletions.
2 changes: 2 additions & 0 deletions docs/source/asr/data/scores/nl/fastconformer_nl.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Model Name,Language,MCV Test-Set v12.0 (nl),MLS Test (nl)
stt_nl_fastconformer_hybrid_large_pc,nl,9.2 %,12.1 %
2 changes: 2 additions & 0 deletions docs/source/asr/data/scores_pc/nl/fastconformer_nl.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Model Name,Language,MCV Test-Set v12.0 (nl),MLS Test (nl)
stt_nl_fastconformer_hybrid_large_pc,nl,32.1 %,25.1 %
24 changes: 21 additions & 3 deletions docs/source/asr/scores.rst
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,16 @@ KAB

--------------------

NL
^^

.. csv-table::
:header-rows: 1
:align: left
:file: data/scores/nl/fastconformer_nl.csv

--------------------

PL
^^

Expand Down Expand Up @@ -350,7 +360,6 @@ ZH
--------------------



Scores with Punctuation and Capitalization
------------------------------------------

Expand Down Expand Up @@ -414,6 +423,16 @@ IT with P&C

--------------------

NL with P&C
^^^^^^^^^^^

.. csv-table::
:header-rows: 1
:align: left
:file: data/scores_pc/nl/fastconformer_nl.csv

--------------------

PL with P&C
^^^^^^^^^^^

Expand All @@ -432,5 +451,4 @@ UA with P&C
:align: left
:file: data/scores_pc/ua/fastconformer_ua.csv

--------------------

--------------------
28 changes: 14 additions & 14 deletions scripts/nlp_language_modeling/convert_nemo_llama_to_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def convert(input_nemo_file, output_hf_file, precision=None, cpu_only=False) ->
map_location = torch.device('cpu')
model_config = MegatronGPTModel.restore_from(input_nemo_file, trainer=dummy_trainer, return_config=True)
model_config.use_cpu_initialization = True
model_config.tensor_model_parallel_size = 1
else:
map_location, model_config = None, None

Expand All @@ -113,7 +114,6 @@ def convert(input_nemo_file, output_hf_file, precision=None, cpu_only=False) ->

param_to_weights = lambda param: param.to(dtype)
checkpoint = OrderedDict()
checkpoint['state_dict'] = OrderedDict()

hidden_size = model.cfg.hidden_size
head_num = model.cfg.num_attention_heads
Expand All @@ -128,7 +128,7 @@ def convert(input_nemo_file, output_hf_file, precision=None, cpu_only=False) ->
# Embedding
embed_weight = model.state_dict()[f'model.embedding.word_embeddings.weight']
embed_weights_base_name = f'model.embed_tokens.weight'
checkpoint['state_dict'][embed_weights_base_name] = param_to_weights(embed_weight)
checkpoint[embed_weights_base_name] = param_to_weights(embed_weight)

for l in range(int(num_layers)):
print(f"converting layer {l}")
Expand Down Expand Up @@ -158,14 +158,14 @@ def convert(input_nemo_file, output_hf_file, precision=None, cpu_only=False) ->
k_weights_base_name = f'model.layers.{l}.self_attn.k_proj.weight'
v_weights_base_name = f'model.layers.{l}.self_attn.v_proj.weight'

checkpoint['state_dict'][q_weights_base_name] = param_to_weights(qkv_weights[q_slice].reshape(-1, hidden_size))
checkpoint['state_dict'][k_weights_base_name] = param_to_weights(qkv_weights[k_slice].reshape(-1, hidden_size))
checkpoint['state_dict'][v_weights_base_name] = param_to_weights(qkv_weights[v_slice].reshape(-1, hidden_size))
checkpoint[q_weights_base_name] = param_to_weights(qkv_weights[q_slice].reshape(-1, hidden_size))
checkpoint[k_weights_base_name] = param_to_weights(qkv_weights[k_slice].reshape(-1, hidden_size))
checkpoint[v_weights_base_name] = param_to_weights(qkv_weights[v_slice].reshape(-1, hidden_size))

# attention dense
o_weight = model.state_dict()[f'model.decoder.layers.{l}.self_attention.linear_proj.weight']
o_weight_base_name = f'model.layers.{l}.self_attn.o_proj.weight'
checkpoint['state_dict'][o_weight_base_name] = param_to_weights(o_weight)
checkpoint[o_weight_base_name] = param_to_weights(o_weight)

# mlp
mlp_weights = model.state_dict()[f'model.decoder.layers.{l}.mlp.linear_fc1.weight']
Expand All @@ -175,31 +175,31 @@ def convert(input_nemo_file, output_hf_file, precision=None, cpu_only=False) ->
mlp_down_proj_base_name = f'model.layers.{l}.mlp.gate_proj.weight'
mlp_gate_proj_base_name = f'model.layers.{l}.mlp.up_proj.weight'

checkpoint['state_dict'][mlp_down_proj_base_name] = param_to_weights(mlp_down_proj_weight)
checkpoint['state_dict'][mlp_gate_proj_base_name] = param_to_weights(mlp_gate_proj_weight)
checkpoint[mlp_down_proj_base_name] = param_to_weights(mlp_down_proj_weight)
checkpoint[mlp_gate_proj_base_name] = param_to_weights(mlp_gate_proj_weight)

mlp_up_proj_weight = model.state_dict()[f'model.decoder.layers.{l}.mlp.linear_fc2.weight']
mlp_up_proj_base_name = f'model.layers.{l}.mlp.down_proj.weight'
checkpoint['state_dict'][mlp_up_proj_base_name] = param_to_weights(mlp_up_proj_weight)
checkpoint[mlp_up_proj_base_name] = param_to_weights(mlp_up_proj_weight)

# layernorm
input_ln_weight = model.state_dict()[f'model.decoder.layers.{l}.self_attention.linear_qkv.layer_norm_weight']
input_ln_base_name = f'model.layers.{l}.input_layernorm.weight'
checkpoint['state_dict'][input_ln_base_name] = param_to_weights(input_ln_weight)
checkpoint[input_ln_base_name] = param_to_weights(input_ln_weight)

post_attn_ln_weight = model.state_dict()[f'model.decoder.layers.{l}.mlp.linear_fc1.layer_norm_weight']
post_attn_ln_base_name = f'model.layers.{l}.post_attention_layernorm.weight'
checkpoint['state_dict'][post_attn_ln_base_name] = param_to_weights(post_attn_ln_weight)
checkpoint[post_attn_ln_base_name] = param_to_weights(post_attn_ln_weight)

print(f"done layer {l}")

final_ln_weight = model.state_dict()[f'model.decoder.final_layernorm.weight']
final_ln_base_name = f'model.norm.weight'
checkpoint['state_dict'][final_ln_base_name] = param_to_weights(final_ln_weight)
checkpoint[final_ln_base_name] = param_to_weights(final_ln_weight)

output_layer_weight = model.state_dict()[f'model.output_layer.weight']
output_layer_base_name = f'lm_head.weight'
checkpoint['state_dict'][output_layer_base_name] = param_to_weights(output_layer_weight)
checkpoint[output_layer_base_name] = param_to_weights(output_layer_weight)

os.makedirs(os.path.dirname(output_hf_file), exist_ok=True)
torch.save(checkpoint, output_hf_file)
Expand All @@ -210,7 +210,7 @@ def replace_hf_weights(weights_file, input_hf_path, output_hf_path):
model = AutoModelForCausalLM.from_pretrained(input_hf_path, local_files_only=True)
nemo_exported = torch.load(weights_file)

model.load_state_dict(nemo_exported['state_dict'])
model.load_state_dict(nemo_exported)
model.save_pretrained(output_hf_path)
logging.info(f"Full HF model saved to {output_hf_path}")

Expand Down

0 comments on commit b97d87e

Please sign in to comment.