Skip to content

Commit 3816eb2

Browse files
committed
More fixes to how FP16_Optimizer loads state from a checkpoint
1 parent a05738e commit 3816eb2

2 files changed

Lines changed: 3 additions & 3 deletions

File tree

deepspeed/runtime/fp16/fused_optimizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ def load_state_dict(self, state_dict, load_optimizer_states=True):
457457
optimizer.load_state_dict(checkpoint['optimizer'])
458458
"""
459459

460-
if state_dict is None:
460+
if state_dict is None or "dynamic_loss_scale" not in state_dict:
461461
state_dict = self.state_dict()
462462
self.refresh_fp32_params()
463463
return
@@ -492,7 +492,7 @@ def load_state_dict(self, state_dict, load_optimizer_states=True):
492492
self.fp32_groups_flat, state_dict["fp32_groups_flat"]
493493
):
494494
current.data.copy_(saved.data)
495-
except RuntimeError as error:
495+
except (RuntimeError, KeyError) as error:
496496
print(error)
497497
print(
498498
"Error in loading fp32 model parameters!\nRefreshing fp32 model params from the model's fp16 params instead. This may incur some precision loss."

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ def op_enabled(op_name):
167167

168168
start_time = time.time()
169169

170-
setup(name='deepspeed',
170+
setup(name='deepspeed-igor',
171171
version=version_str,
172172
description='DeepSpeed library',
173173
long_description=readme_text,

0 commit comments

Comments
 (0)