[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
JimmyZhang12 · Mar 22, 2024 · 45adc1d · 45adc1d
1 parent cc34fcd
commit 45adc1d
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 8 deletions.
diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py
@@ -337,8 +337,9 @@ def save_checkpoint(
             hasattr(self.lightning_module, 'sharded_state_dict')
             and self.lightning_module.sharded_state_dict() is not None
         ):
-            assert len(checkpoint['optimizer_states']) == 1, \
-                "Currently only support checkpointing 1 distributed optimizer per time!"
+            assert (
+                len(checkpoint['optimizer_states']) == 1
+            ), "Currently only support checkpointing 1 distributed optimizer per time!"
             # converts the optimizer states to their sharded equivalents
             sharded_optim_state = self.optimizer_sharded_state_dict(
                 unsharded_optim_state=checkpoint['optimizer_states'][0]
@@ -439,15 +440,13 @@ def load_checkpoint(self, checkpoint_path: Union[str, Path]) -> Dict[str, Any]:
 
             # after dist_checkpointing.load, sharded tensors will be replaced with tensors
             checkpoint['state_dict'] = sharded_state_dict
-            checkpoint['optimizer_states'] = [self.optimizer_sharded_state_dict()]            
+            checkpoint['optimizer_states'] = [self.optimizer_sharded_state_dict()]
             strategy = dist_checkpointing.strategies.tensorstore.TensorStoreLoadShardedStrategy(
                 load_directly_on_device=True
             )
             checkpoint = dist_checkpointing.load(
-                sharded_state_dict=checkpoint, 
-                checkpoint_dir=checkpoint_path,
-                sharded_strategy=strategy
-                )
+                sharded_state_dict=checkpoint, checkpoint_dir=checkpoint_path, sharded_strategy=strategy
+            )
 
             return checkpoint
 

diff --git a/nemo/core/optim/distributed_adam.py b/nemo/core/optim/distributed_adam.py
@@ -549,7 +549,6 @@ def _check_params_shard_dtypes(self, params_buckets: Dict[int, DistributedFusedA
         # Handle any remaining dtype conversions
         super()._check_params_shard_dtypes(params_buckets)
 
-
     def sharded_state_dict(self, model_sharded_state_dict, optimizer_state_dict=None):
         if optimizer_state_dict is None:
             optimizer_state_dict = self.state_dict()