From 521cfb4c9359c4033f34776da4b73fabf3e94240 Mon Sep 17 00:00:00 2001 From: zhehuaichen <139396994+zhehuaichen@users.noreply.github.com> Date: Wed, 22 Nov 2023 20:35:17 -0500 Subject: [PATCH 1/2] add Dutch P&C FC model info (#7892) * add Dutch P&C FC model info Signed-off-by: zhehuaichen * update order of the results Signed-off-by: zhehuaichen --------- Signed-off-by: zhehuaichen --- .../asr/data/scores/nl/fastconformer_nl.csv | 2 ++ .../data/scores_pc/nl/fastconformer_nl.csv | 2 ++ docs/source/asr/scores.rst | 24 ++++++++++++++++--- 3 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 docs/source/asr/data/scores/nl/fastconformer_nl.csv create mode 100644 docs/source/asr/data/scores_pc/nl/fastconformer_nl.csv diff --git a/docs/source/asr/data/scores/nl/fastconformer_nl.csv b/docs/source/asr/data/scores/nl/fastconformer_nl.csv new file mode 100644 index 000000000000..a0f2d61b0a9b --- /dev/null +++ b/docs/source/asr/data/scores/nl/fastconformer_nl.csv @@ -0,0 +1,2 @@ +Model Name,Language,MCV Test-Set v12.0 (nl),MLS Test (nl) +stt_nl_fastconformer_hybrid_large_pc,nl,9.2 %,12.1 % \ No newline at end of file diff --git a/docs/source/asr/data/scores_pc/nl/fastconformer_nl.csv b/docs/source/asr/data/scores_pc/nl/fastconformer_nl.csv new file mode 100644 index 000000000000..8faf0d52f9ff --- /dev/null +++ b/docs/source/asr/data/scores_pc/nl/fastconformer_nl.csv @@ -0,0 +1,2 @@ +Model Name,Language,MCV Test-Set v12.0 (nl),MLS Test (nl) +stt_nl_fastconformer_hybrid_large_pc,nl,32.1 %,25.1 % \ No newline at end of file diff --git a/docs/source/asr/scores.rst b/docs/source/asr/scores.rst index d008a26700ec..31ea31292847 100644 --- a/docs/source/asr/scores.rst +++ b/docs/source/asr/scores.rst @@ -278,6 +278,16 @@ KAB -------------------- +NL +^^ + +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores/nl/fastconformer_nl.csv + +-------------------- + PL ^^ @@ -350,7 +360,6 @@ ZH -------------------- - Scores with Punctuation and Capitalization ------------------------------------------ @@ -414,6 +423,16 @@ IT with P&C -------------------- +NL with P&C +^^^^^^^^^^^ + +.. csv-table:: + :header-rows: 1 + :align: left + :file: data/scores_pc/nl/fastconformer_nl.csv + +-------------------- + PL with P&C ^^^^^^^^^^^ @@ -432,5 +451,4 @@ UA with P&C :align: left :file: data/scores_pc/ua/fastconformer_ua.csv --------------------- - +-------------------- \ No newline at end of file From 79bc92957500f3c9d3976c25ee1cb830df2334ec Mon Sep 17 00:00:00 2001 From: Zhilin Wang Date: Sat, 25 Nov 2023 12:27:15 -0800 Subject: [PATCH 2/2] fix issues with convert_nemo_llama_to_hf.py (#7922) --- .../convert_nemo_llama_to_hf.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/scripts/nlp_language_modeling/convert_nemo_llama_to_hf.py b/scripts/nlp_language_modeling/convert_nemo_llama_to_hf.py index 334a70a29c80..cb1cfb3b5b79 100644 --- a/scripts/nlp_language_modeling/convert_nemo_llama_to_hf.py +++ b/scripts/nlp_language_modeling/convert_nemo_llama_to_hf.py @@ -91,6 +91,7 @@ def convert(input_nemo_file, output_hf_file, precision=None, cpu_only=False) -> map_location = torch.device('cpu') model_config = MegatronGPTModel.restore_from(input_nemo_file, trainer=dummy_trainer, return_config=True) model_config.use_cpu_initialization = True + model_config.tensor_model_parallel_size = 1 else: map_location, model_config = None, None @@ -113,7 +114,6 @@ def convert(input_nemo_file, output_hf_file, precision=None, cpu_only=False) -> param_to_weights = lambda param: param.to(dtype) checkpoint = OrderedDict() - checkpoint['state_dict'] = OrderedDict() hidden_size = model.cfg.hidden_size head_num = model.cfg.num_attention_heads @@ -128,7 +128,7 @@ def convert(input_nemo_file, output_hf_file, precision=None, cpu_only=False) -> # Embedding embed_weight = model.state_dict()[f'model.embedding.word_embeddings.weight'] embed_weights_base_name = f'model.embed_tokens.weight' - checkpoint['state_dict'][embed_weights_base_name] = param_to_weights(embed_weight) + checkpoint[embed_weights_base_name] = param_to_weights(embed_weight) for l in range(int(num_layers)): print(f"converting layer {l}") @@ -158,14 +158,14 @@ def convert(input_nemo_file, output_hf_file, precision=None, cpu_only=False) -> k_weights_base_name = f'model.layers.{l}.self_attn.k_proj.weight' v_weights_base_name = f'model.layers.{l}.self_attn.v_proj.weight' - checkpoint['state_dict'][q_weights_base_name] = param_to_weights(qkv_weights[q_slice].reshape(-1, hidden_size)) - checkpoint['state_dict'][k_weights_base_name] = param_to_weights(qkv_weights[k_slice].reshape(-1, hidden_size)) - checkpoint['state_dict'][v_weights_base_name] = param_to_weights(qkv_weights[v_slice].reshape(-1, hidden_size)) + checkpoint[q_weights_base_name] = param_to_weights(qkv_weights[q_slice].reshape(-1, hidden_size)) + checkpoint[k_weights_base_name] = param_to_weights(qkv_weights[k_slice].reshape(-1, hidden_size)) + checkpoint[v_weights_base_name] = param_to_weights(qkv_weights[v_slice].reshape(-1, hidden_size)) # attention dense o_weight = model.state_dict()[f'model.decoder.layers.{l}.self_attention.linear_proj.weight'] o_weight_base_name = f'model.layers.{l}.self_attn.o_proj.weight' - checkpoint['state_dict'][o_weight_base_name] = param_to_weights(o_weight) + checkpoint[o_weight_base_name] = param_to_weights(o_weight) # mlp mlp_weights = model.state_dict()[f'model.decoder.layers.{l}.mlp.linear_fc1.weight'] @@ -175,31 +175,31 @@ def convert(input_nemo_file, output_hf_file, precision=None, cpu_only=False) -> mlp_down_proj_base_name = f'model.layers.{l}.mlp.gate_proj.weight' mlp_gate_proj_base_name = f'model.layers.{l}.mlp.up_proj.weight' - checkpoint['state_dict'][mlp_down_proj_base_name] = param_to_weights(mlp_down_proj_weight) - checkpoint['state_dict'][mlp_gate_proj_base_name] = param_to_weights(mlp_gate_proj_weight) + checkpoint[mlp_down_proj_base_name] = param_to_weights(mlp_down_proj_weight) + checkpoint[mlp_gate_proj_base_name] = param_to_weights(mlp_gate_proj_weight) mlp_up_proj_weight = model.state_dict()[f'model.decoder.layers.{l}.mlp.linear_fc2.weight'] mlp_up_proj_base_name = f'model.layers.{l}.mlp.down_proj.weight' - checkpoint['state_dict'][mlp_up_proj_base_name] = param_to_weights(mlp_up_proj_weight) + checkpoint[mlp_up_proj_base_name] = param_to_weights(mlp_up_proj_weight) # layernorm input_ln_weight = model.state_dict()[f'model.decoder.layers.{l}.self_attention.linear_qkv.layer_norm_weight'] input_ln_base_name = f'model.layers.{l}.input_layernorm.weight' - checkpoint['state_dict'][input_ln_base_name] = param_to_weights(input_ln_weight) + checkpoint[input_ln_base_name] = param_to_weights(input_ln_weight) post_attn_ln_weight = model.state_dict()[f'model.decoder.layers.{l}.mlp.linear_fc1.layer_norm_weight'] post_attn_ln_base_name = f'model.layers.{l}.post_attention_layernorm.weight' - checkpoint['state_dict'][post_attn_ln_base_name] = param_to_weights(post_attn_ln_weight) + checkpoint[post_attn_ln_base_name] = param_to_weights(post_attn_ln_weight) print(f"done layer {l}") final_ln_weight = model.state_dict()[f'model.decoder.final_layernorm.weight'] final_ln_base_name = f'model.norm.weight' - checkpoint['state_dict'][final_ln_base_name] = param_to_weights(final_ln_weight) + checkpoint[final_ln_base_name] = param_to_weights(final_ln_weight) output_layer_weight = model.state_dict()[f'model.output_layer.weight'] output_layer_base_name = f'lm_head.weight' - checkpoint['state_dict'][output_layer_base_name] = param_to_weights(output_layer_weight) + checkpoint[output_layer_base_name] = param_to_weights(output_layer_weight) os.makedirs(os.path.dirname(output_hf_file), exist_ok=True) torch.save(checkpoint, output_hf_file) @@ -210,7 +210,7 @@ def replace_hf_weights(weights_file, input_hf_path, output_hf_path): model = AutoModelForCausalLM.from_pretrained(input_hf_path, local_files_only=True) nemo_exported = torch.load(weights_file) - model.load_state_dict(nemo_exported['state_dict']) + model.load_state_dict(nemo_exported) model.save_pretrained(output_hf_path) logging.info(f"Full HF model saved to {output_hf_path}")