diff --git a/Jenkinsfile b/Jenkinsfile
index 83223d5c8669..fdd311ba4a59 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -61,7 +61,7 @@ pipeline {
       steps {
         sh 'git clone https://github.com/NVIDIA/Megatron-LM.git && \
             cd Megatron-LM && \
-            git checkout cd2537d444792b487b1ab5a6fa685e09c9957409 && \
+            git checkout e6d7e09845590d0a36bc7f29eb28db974fb8da4e && \
             pip install -e .'
       }
     }
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
index fd1382e668cf..96f40b99bdd0 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -468,6 +468,8 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only):
             no_sync_func=no_sync_func,
             grad_sync_func=grad_sync_func,
             param_sync_func=param_sync_func,
+            overlap_p2p_comm=self.cfg.get('overlap_p2p_comm', False),
+            batch_p2p_comm=self.cfg.get('batch_p2p_comm', True),
         )
 
         # only the last stages of the pipeline return losses