未验证 提交 c520d476 编写于 作者: Y YiSheng5 提交者: GitHub

[profiling][mics]Fix some issues for log_summary(). (#3899)

* print_rank_0 is not defined in this python script

* Enable log_summary for mics feature based on Zero_Stage 3
上级 d24629f4
......@@ -68,6 +68,9 @@ def get_tensor_position(func):
# most colls
if 'tensor' in sig_params:
arg = 'tensor'
# all_reduce_coalesced coll
elif 'tensors' in sig_params:
arg = 'tensors'
# reduce scatter coll
elif 'input_list' in sig_params:
arg = 'input_list'
......@@ -87,6 +90,8 @@ def get_tensor_kwarg(func, kwargs):
if 'tensor' in func_args:
arg = func_args['tensor']
elif 'tensors' in func_args:
arg = func_args['tensors']
elif 'input_list' in func_args:
arg = func_args['input_list']
elif 'input_tensor_list' in func_args:
......
......@@ -12,6 +12,12 @@ def get_caller_func(frame=3):
return sys._getframe(frame).f_code.co_name
def print_rank_0(message):
import deepspeed.comm as dist
if dist.get_rank() == 0:
print(message)
# Helper function to pretty-print message sizes
def convert_size(size_bytes):
if size_bytes == 0:
......@@ -38,7 +44,7 @@ def calc_bw_log(comm_op, size, duration):
size *= n
tput = (size / duration)
busbw = (size / duration) * ((n - 1) / n)
elif comm_op == "all_reduce":
elif comm_op == "all_reduce" or comm_op == "all_reduce_coalesced":
tput = (size * 2 / duration)
busbw = (size / duration) * (2 * (n - 1) / n)
elif comm_op == "send" or comm_op == "recv" or comm_op == "isend" or comm_op == "irecv" or comm_op == "broadcast" or comm_op == "reduce" or comm_op == "gather" or comm_op == "scatter" or comm_op == "barrier":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册