Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] authored and jiemingz committed Mar 22, 2024
1 parent cc34fcd commit 45adc1d
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 8 deletions.
13 changes: 6 additions & 7 deletions nemo/collections/nlp/parts/nlp_overrides.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,9 @@ def save_checkpoint(
hasattr(self.lightning_module, 'sharded_state_dict')
and self.lightning_module.sharded_state_dict() is not None
):
assert len(checkpoint['optimizer_states']) == 1, \
"Currently only support checkpointing 1 distributed optimizer per time!"
assert (
len(checkpoint['optimizer_states']) == 1
), "Currently only support checkpointing 1 distributed optimizer per time!"
# converts the optimizer states to their sharded equivalents
sharded_optim_state = self.optimizer_sharded_state_dict(
unsharded_optim_state=checkpoint['optimizer_states'][0]
Expand Down Expand Up @@ -439,15 +440,13 @@ def load_checkpoint(self, checkpoint_path: Union[str, Path]) -> Dict[str, Any]:

# after dist_checkpointing.load, sharded tensors will be replaced with tensors
checkpoint['state_dict'] = sharded_state_dict
checkpoint['optimizer_states'] = [self.optimizer_sharded_state_dict()]
checkpoint['optimizer_states'] = [self.optimizer_sharded_state_dict()]
strategy = dist_checkpointing.strategies.tensorstore.TensorStoreLoadShardedStrategy(
load_directly_on_device=True
)
checkpoint = dist_checkpointing.load(
sharded_state_dict=checkpoint,
checkpoint_dir=checkpoint_path,
sharded_strategy=strategy
)
sharded_state_dict=checkpoint, checkpoint_dir=checkpoint_path, sharded_strategy=strategy
)

return checkpoint

Expand Down
1 change: 0 additions & 1 deletion nemo/core/optim/distributed_adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,6 @@ def _check_params_shard_dtypes(self, params_buckets: Dict[int, DistributedFusedA
# Handle any remaining dtype conversions
super()._check_params_shard_dtypes(params_buckets)


def sharded_state_dict(self, model_sharded_state_dict, optimizer_state_dict=None):
if optimizer_state_dict is None:
optimizer_state_dict = self.state_dict()
Expand Down

0 comments on commit 45adc1d

Please sign in to comment.