diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py index d7130788f9d8..8637ea08ac7b 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py @@ -475,6 +475,7 @@ def build_transformer_config(self) -> TransformerConfig: 'recompute_num_layers': recompute_num_layers, 'distribute_saved_activations': False, # not currently used in NeMo 'fp8': None, + 'deallocate_pipeline_outputs': True } # populate the transformer config dict