Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
c4abebaf
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c4abebaf
编写于
6月 11, 2020
作者:
G
gukecai
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add sync bewteen hcom
上级
0e4fab23
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
72 addition
and
0 deletion
+72
-0
mindspore/ccsrc/device/ascend/ascend_stream_assign.cc
mindspore/ccsrc/device/ascend/ascend_stream_assign.cc
+71
-0
mindspore/ccsrc/device/ascend/ascend_stream_assign.h
mindspore/ccsrc/device/ascend/ascend_stream_assign.h
+1
-0
未找到文件。
mindspore/ccsrc/device/ascend/ascend_stream_assign.cc
浏览文件 @
c4abebaf
...
...
@@ -291,6 +291,74 @@ void AscendStreamAssign::FindAllReduceParallel(const shared_ptr<session::KernelG
}
}
void
AscendStreamAssign
::
InsertSendRecvForDiffHcom
(
const
shared_ptr
<
mindspore
::
session
::
KernelGraph
>
&
graph_ptr
)
{
MS_LOG
(
INFO
)
<<
"start"
;
MS_EXCEPTION_IF_NULL
(
graph_ptr
);
auto
cnode_ptr_list
=
graph_ptr
->
execution_order
();
vector
<
uint32_t
>
fusion_hcom_index
;
vector
<
CNodePtr
>
orders
;
for
(
size_t
i
=
0
;
i
<
cnode_ptr_list
.
size
();
i
++
)
{
auto
cur_cnode
=
cnode_ptr_list
[
i
];
if
(
IsHcom
(
cur_cnode
))
{
fusion_hcom_index
.
emplace_back
(
i
);
}
}
if
(
fusion_hcom_index
.
size
()
<
2
)
{
MS_LOG
(
INFO
)
<<
"fusion hcom size is less than 2, no need insert event between them"
;
return
;
}
uint32_t
first_index
=
fusion_hcom_index
[
0
];
uint32_t
last_index
=
fusion_hcom_index
[
fusion_hcom_index
.
size
()
-
1
];
uint32_t
cur_event_id
=
total_event_num_
;
uint32_t
pre_hcom_stream_id
=
UINT32_MAX
;
std
::
copy
(
cnode_ptr_list
.
begin
(),
cnode_ptr_list
.
begin
()
+
first_index
,
std
::
back_inserter
(
orders
));
for
(
size_t
i
=
first_index
;
i
<=
last_index
;
i
++
)
{
auto
cur_cnode
=
cnode_ptr_list
[
i
];
auto
it
=
std
::
find
(
fusion_hcom_index
.
begin
(),
fusion_hcom_index
.
end
(),
i
);
if
(
it
==
fusion_hcom_index
.
end
())
{
orders
.
emplace_back
(
cur_cnode
);
continue
;
}
auto
cur_hcom_stream_id
=
AnfAlgo
::
GetStreamId
(
cur_cnode
);
if
(
cur_hcom_stream_id
==
pre_hcom_stream_id
)
{
orders
.
emplace_back
(
cur_cnode
);
continue
;
}
if
(
i
==
first_index
)
{
// first fusion hcom
orders
.
emplace_back
(
cur_cnode
);
auto
send
=
CreateSendApplyKernel
(
graph_ptr
,
cur_event_id
,
cur_hcom_stream_id
);
orders
.
emplace_back
(
send
);
}
else
if
(
i
==
last_index
)
{
// last fusion hcom
auto
recv
=
CreateRecvApplyKernel
(
graph_ptr
,
cur_event_id
,
cur_hcom_stream_id
);
orders
.
emplace_back
(
recv
);
orders
.
emplace_back
(
cur_cnode
);
cur_event_id
++
;
}
else
{
auto
recv
=
CreateRecvApplyKernel
(
graph_ptr
,
cur_event_id
,
cur_hcom_stream_id
);
orders
.
emplace_back
(
recv
);
cur_event_id
++
;
orders
.
emplace_back
(
cur_cnode
);
auto
send
=
CreateSendApplyKernel
(
graph_ptr
,
cur_event_id
,
cur_hcom_stream_id
);
orders
.
emplace_back
(
send
);
}
pre_hcom_stream_id
=
cur_hcom_stream_id
;
}
std
::
copy
(
cnode_ptr_list
.
begin
()
+
last_index
+
1
,
cnode_ptr_list
.
end
(),
std
::
back_inserter
(
orders
));
graph_ptr
->
set_execution_order
(
orders
);
total_event_num_
=
cur_event_id
;
MS_LOG
(
INFO
)
<<
"after indsert between allreduce, total event nums["
<<
total_event_num_
<<
"]"
;
MS_LOG
(
INFO
)
<<
"end"
;
}
void
AscendStreamAssign
::
InsertSendRecvForHcomParallel
(
const
shared_ptr
<
mindspore
::
session
::
KernelGraph
>
&
graph_ptr
)
{
MS_LOG
(
INFO
)
<<
"start"
;
MS_EXCEPTION_IF_NULL
(
graph_ptr
);
...
...
@@ -324,6 +392,9 @@ void AscendStreamAssign::InsertSendRecvForHcomParallel(const shared_ptr<mindspor
graph_ptr
->
set_execution_order
(
cnodes
);
total_event_num_
=
cur_event_id
;
MS_LOG
(
INFO
)
<<
"after insert send/recv for hcom parallel, total event nums["
<<
total_event_num_
<<
"]"
;
// Insert Send/Recv between Hcom(such as:AllReduce1 Send1 Common Recv1 AllReduce2)
InsertSendRecvForDiffHcom
(
graph_ptr
);
MS_LOG
(
INFO
)
<<
"end"
;
}
...
...
mindspore/ccsrc/device/ascend/ascend_stream_assign.h
浏览文件 @
c4abebaf
...
...
@@ -97,6 +97,7 @@ class AscendStreamAssign {
void
GetParallelStream
(
uint32_t
cur_stream_id
,
uint32_t
stream_acitve_id
,
std
::
vector
<
uint32_t
>
*
parallel_streams
);
void
InsertSendRecvForIndependent
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
graph_ptr
);
void
InsertSendRecvForHcomParallel
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
graph_ptr
);
void
InsertSendRecvForDiffHcom
(
const
shared_ptr
<
mindspore
::
session
::
KernelGraph
>
&
graph_ptr
);
void
GetNeedActiveStreams
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
graph_ptr
);
void
ReorderIndependentOrders
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
graph_ptr
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录