diff --git a/nemo/collections/nlp/models/language_modeling/megatron/gpt_full_te_layer_autocast_spec.py b/nemo/collections/nlp/models/language_modeling/megatron/gpt_full_te_layer_autocast_spec.py index 39ae89b50431..d94d218ef5d0 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron/gpt_full_te_layer_autocast_spec.py +++ b/nemo/collections/nlp/models/language_modeling/megatron/gpt_full_te_layer_autocast_spec.py @@ -241,9 +241,7 @@ def __init__(self, config, layer_number=1, hidden_dropout=None): super().__init__(**transformer_layer_args) if self.config.enable_cuda_graph and self.training: - assert ( - not config.cpu_offloading and config.recompute_granularity is None - ), "Cudagraphs not supported" + assert not config.cpu_offloading and config.recompute_granularity is None, "Cudagraphs not supported" self.add_module('cudagraph_manager', CudaGraphManager()) # Called by MCore's TransformerBlock.forward