未验证 提交 80f94c10 编写于 作者: 江户川闰土's avatar 江户川闰土 提交者: GitHub

fix #2240: wrong time unit in flops_profiler (#2241)

Co-authored-by: NOlatunji Ruwase <olruwase@microsoft.com>
Co-authored-by: NJeff Rasley <jerasley@microsoft.com>
上级 2a644488
......@@ -431,7 +431,7 @@ class TopKGate(Module):
if self.wall_clock_breakdown:
self.timers('TopKGate').stop()
self.gate_time = self.timers('TopKGate').elapsed(reset=False) * 1000
self.gate_time = self.timers('TopKGate').elapsed(reset=False)
return gate_output
......@@ -535,7 +535,7 @@ class MOELayer(Base):
if self.wall_clock_breakdown:
self.timers('falltoall').stop()
self.time_falltoall = self.timers('falltoall').elapsed(reset=False) * 1000
self.time_falltoall = self.timers('falltoall').elapsed(reset=False)
# Re-shape after all-to-all: ecm -> gecm
dispatched_input = dispatched_input.reshape(self.ep_size,
......@@ -552,7 +552,7 @@ class MOELayer(Base):
if self.wall_clock_breakdown:
self.timers('salltoall').stop()
self.time_salltoall = self.timers('salltoall').elapsed(reset=False) * 1000
self.time_salltoall = self.timers('salltoall').elapsed(reset=False)
# Re-shape back: gecm -> ecm
expert_output = expert_output.reshape(self.ep_size * self.num_local_experts,
......@@ -576,6 +576,6 @@ class MOELayer(Base):
if self.wall_clock_breakdown:
self.timers('moe').stop()
self.time_moe = self.timers('moe').elapsed(reset=False) * 1000
self.time_moe = self.timers('moe').elapsed(reset=False)
return a
......@@ -2058,8 +2058,7 @@ class DeepSpeedEngine(Module):
self._write_monitor()
if self.has_moe_layers:
fwd_time = self.timers(FORWARD_GLOBAL_TIMER).elapsed(
reset=False) * 1000
fwd_time = self.timers(FORWARD_GLOBAL_TIMER).elapsed(reset=False)
self.print_forward_breakdown(fwd_time=fwd_time)
self.timers.log(self.engine_timers.global_timers)
......@@ -2103,29 +2102,27 @@ class DeepSpeedEngine(Module):
self.summary_events = [
(
f"Train/Samples/elapsed_time_ms_forward",
self.timers(FORWARD_GLOBAL_TIMER).elapsed(reset=False) * 1000.0,
self.timers(FORWARD_GLOBAL_TIMER).elapsed(reset=False),
self.global_samples,
),
(
f"Train/Samples/elapsed_time_ms_backward",
self.timers(BACKWARD_GLOBAL_TIMER).elapsed(reset=False) * 1000.0,
self.timers(BACKWARD_GLOBAL_TIMER).elapsed(reset=False),
self.global_samples,
),
(
f"Train/Samples/elapsed_time_ms_backward_inner",
self.timers(BACKWARD_INNER_GLOBAL_TIMER).elapsed(reset=False) *
1000.0,
self.timers(BACKWARD_INNER_GLOBAL_TIMER).elapsed(reset=False),
self.global_samples,
),
(
f"Train/Samples/elapsed_time_ms_backward_allreduce",
self.timers(BACKWARD_REDUCE_GLOBAL_TIMER).elapsed(reset=False) *
1000.0,
self.timers(BACKWARD_REDUCE_GLOBAL_TIMER).elapsed(reset=False),
self.global_samples,
),
(
f"Train/Samples/elapsed_time_ms_step",
self.timers(STEP_GLOBAL_TIMER).elapsed(reset=False) * 1000.0,
self.timers(STEP_GLOBAL_TIMER).elapsed(reset=False),
self.global_samples,
),
]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册