未验证 提交 80f94c10 编写于 作者: 江户川闰土's avatar 江户川闰土 提交者: GitHub

fix #2240: wrong time unit in flops_profiler (#2241)

Co-authored-by: NOlatunji Ruwase <olruwase@microsoft.com>
Co-authored-by: NJeff Rasley <jerasley@microsoft.com>
上级 2a644488
...@@ -431,7 +431,7 @@ class TopKGate(Module): ...@@ -431,7 +431,7 @@ class TopKGate(Module):
if self.wall_clock_breakdown: if self.wall_clock_breakdown:
self.timers('TopKGate').stop() self.timers('TopKGate').stop()
self.gate_time = self.timers('TopKGate').elapsed(reset=False) * 1000 self.gate_time = self.timers('TopKGate').elapsed(reset=False)
return gate_output return gate_output
...@@ -535,7 +535,7 @@ class MOELayer(Base): ...@@ -535,7 +535,7 @@ class MOELayer(Base):
if self.wall_clock_breakdown: if self.wall_clock_breakdown:
self.timers('falltoall').stop() self.timers('falltoall').stop()
self.time_falltoall = self.timers('falltoall').elapsed(reset=False) * 1000 self.time_falltoall = self.timers('falltoall').elapsed(reset=False)
# Re-shape after all-to-all: ecm -> gecm # Re-shape after all-to-all: ecm -> gecm
dispatched_input = dispatched_input.reshape(self.ep_size, dispatched_input = dispatched_input.reshape(self.ep_size,
...@@ -552,7 +552,7 @@ class MOELayer(Base): ...@@ -552,7 +552,7 @@ class MOELayer(Base):
if self.wall_clock_breakdown: if self.wall_clock_breakdown:
self.timers('salltoall').stop() self.timers('salltoall').stop()
self.time_salltoall = self.timers('salltoall').elapsed(reset=False) * 1000 self.time_salltoall = self.timers('salltoall').elapsed(reset=False)
# Re-shape back: gecm -> ecm # Re-shape back: gecm -> ecm
expert_output = expert_output.reshape(self.ep_size * self.num_local_experts, expert_output = expert_output.reshape(self.ep_size * self.num_local_experts,
...@@ -576,6 +576,6 @@ class MOELayer(Base): ...@@ -576,6 +576,6 @@ class MOELayer(Base):
if self.wall_clock_breakdown: if self.wall_clock_breakdown:
self.timers('moe').stop() self.timers('moe').stop()
self.time_moe = self.timers('moe').elapsed(reset=False) * 1000 self.time_moe = self.timers('moe').elapsed(reset=False)
return a return a
...@@ -2058,8 +2058,7 @@ class DeepSpeedEngine(Module): ...@@ -2058,8 +2058,7 @@ class DeepSpeedEngine(Module):
self._write_monitor() self._write_monitor()
if self.has_moe_layers: if self.has_moe_layers:
fwd_time = self.timers(FORWARD_GLOBAL_TIMER).elapsed( fwd_time = self.timers(FORWARD_GLOBAL_TIMER).elapsed(reset=False)
reset=False) * 1000
self.print_forward_breakdown(fwd_time=fwd_time) self.print_forward_breakdown(fwd_time=fwd_time)
self.timers.log(self.engine_timers.global_timers) self.timers.log(self.engine_timers.global_timers)
...@@ -2103,29 +2102,27 @@ class DeepSpeedEngine(Module): ...@@ -2103,29 +2102,27 @@ class DeepSpeedEngine(Module):
self.summary_events = [ self.summary_events = [
( (
f"Train/Samples/elapsed_time_ms_forward", f"Train/Samples/elapsed_time_ms_forward",
self.timers(FORWARD_GLOBAL_TIMER).elapsed(reset=False) * 1000.0, self.timers(FORWARD_GLOBAL_TIMER).elapsed(reset=False),
self.global_samples, self.global_samples,
), ),
( (
f"Train/Samples/elapsed_time_ms_backward", f"Train/Samples/elapsed_time_ms_backward",
self.timers(BACKWARD_GLOBAL_TIMER).elapsed(reset=False) * 1000.0, self.timers(BACKWARD_GLOBAL_TIMER).elapsed(reset=False),
self.global_samples, self.global_samples,
), ),
( (
f"Train/Samples/elapsed_time_ms_backward_inner", f"Train/Samples/elapsed_time_ms_backward_inner",
self.timers(BACKWARD_INNER_GLOBAL_TIMER).elapsed(reset=False) * self.timers(BACKWARD_INNER_GLOBAL_TIMER).elapsed(reset=False),
1000.0,
self.global_samples, self.global_samples,
), ),
( (
f"Train/Samples/elapsed_time_ms_backward_allreduce", f"Train/Samples/elapsed_time_ms_backward_allreduce",
self.timers(BACKWARD_REDUCE_GLOBAL_TIMER).elapsed(reset=False) * self.timers(BACKWARD_REDUCE_GLOBAL_TIMER).elapsed(reset=False),
1000.0,
self.global_samples, self.global_samples,
), ),
( (
f"Train/Samples/elapsed_time_ms_step", f"Train/Samples/elapsed_time_ms_step",
self.timers(STEP_GLOBAL_TIMER).elapsed(reset=False) * 1000.0, self.timers(STEP_GLOBAL_TIMER).elapsed(reset=False),
self.global_samples, self.global_samples,
), ),
] ]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册