diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 96f40b99bdd0..9aadb6853190 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -1282,7 +1282,7 @@ def _reset_sequence_parallelism_args(self): for module in self.get_gpt_module_list(): for mod in module.modules(): if hasattr(mod, "sequence_parallel"): - mod.sequence_parallel = self.last_sequence_parallel + mod.sequence_parallel = False def _restore_sequence_parallelism_args(self): """ Restores the sequence parallelism parameters using the values saved by