Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
f15cb6b7
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f15cb6b7
编写于
6月 16, 2020
作者:
Y
yujianfeng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add sort by index for each group of AllReduce
上级
6089d58d
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
95 addition
and
11 deletion
+95
-11
mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc
mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc
+26
-8
mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc
mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc
+7
-0
mindspore/ccsrc/utils/utils.h
mindspore/ccsrc/utils/utils.h
+1
-0
tests/ut/cpp/pre_activate/pass/allreduce_fusion_test.cc
tests/ut/cpp/pre_activate/pass/allreduce_fusion_test.cc
+50
-3
tests/ut/cpp/python_input/gtest_input/pre_activate/ir_fusion_test.py
...p/python_input/gtest_input/pre_activate/ir_fusion_test.py
+11
-0
未找到文件。
mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc
浏览文件 @
f15cb6b7
...
@@ -91,6 +91,30 @@ kernel::KernelBuildInfoPtr CreateKernelBuildInfo() {
...
@@ -91,6 +91,30 @@ kernel::KernelBuildInfoPtr CreateKernelBuildInfo() {
builder
.
SetOutputsDeviceType
({
kNumberTypeFloat16
,
kNumberTypeInt32
});
builder
.
SetOutputsDeviceType
({
kNumberTypeFloat16
,
kNumberTypeInt32
});
return
builder
.
Build
();
return
builder
.
Build
();
}
}
bool
CheckInputNamesSize
(
const
CNodePtr
&
cnode
)
{
auto
input_names_vec
=
AnfAlgo
::
GetNodeAttr
<
std
::
vector
<
std
::
string
>>
(
cnode
,
kAttrInputNames
);
if
(
input_names_vec
.
size
()
<
kTopkIndexK
+
1
)
{
MS_LOG
(
INFO
)
<<
"The input k of topk has been converted to attr"
;
return
false
;
}
return
true
;
}
bool
CheckOutputShape
(
const
AnfNodePtr
&
node
)
{
auto
shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
node
,
0
);
if
(
shape
.
empty
())
{
MS_LOG
(
INFO
)
<<
"The output shape of topk to split must not be empty"
;
return
false
;
}
auto
last_dim
=
shape
[
shape
.
size
()
-
1
];
const
size_t
kMaxFloat16
=
65500
;
if
(
last_dim
>
kMaxFloat16
)
{
MS_LOG
(
INFO
)
<<
"The last dim is more than "
<<
kMaxFloat16
<<
", switch to aicpu ops."
;
return
false
;
}
return
true
;
}
}
// namespace
}
// namespace
const
BaseRef
TopKSplit
::
DefinePattern
()
const
{
const
BaseRef
TopKSplit
::
DefinePattern
()
const
{
...
@@ -107,16 +131,10 @@ const AnfNodePtr TopKSplit::Process(const FuncGraphPtr &func_graph, const AnfNod
...
@@ -107,16 +131,10 @@ const AnfNodePtr TopKSplit::Process(const FuncGraphPtr &func_graph, const AnfNod
// set value node as topk's input
// set value node as topk's input
auto
cnode
=
node
->
cast
<
CNodePtr
>
();
auto
cnode
=
node
->
cast
<
CNodePtr
>
();
MS_EXCEPTION_IF_NULL
(
cnode
);
MS_EXCEPTION_IF_NULL
(
cnode
);
auto
input_names_vec
=
AnfAlgo
::
GetNodeAttr
<
std
::
vector
<
std
::
string
>>
(
cnode
,
kAttrInputNames
);
if
(
!
CheckInputNamesSize
(
cnode
))
{
if
(
input_names_vec
.
size
()
<
kTopkIndexK
+
1
)
{
MS_LOG
(
INFO
)
<<
"The input k of topk has been converted to attr"
;
return
nullptr
;
return
nullptr
;
}
}
auto
shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
node
,
0
);
if
(
!
CheckOutputShape
(
cnode
))
{
auto
last_dim
=
shape
[
shape
.
size
()
-
1
];
const
size_t
kMaxFloat16
=
65500
;
if
(
last_dim
>
kMaxFloat16
)
{
MS_LOG
(
INFO
)
<<
"The last dim is more than 65500, switch to aicpu ops."
;
return
nullptr
;
return
nullptr
;
}
}
// Copy a new node to check supported.
// Copy a new node to check supported.
...
...
mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc
浏览文件 @
f15cb6b7
...
@@ -253,6 +253,13 @@ bool CommunicationOpFusion::Run(const FuncGraphPtr &func_graph) {
...
@@ -253,6 +253,13 @@ bool CommunicationOpFusion::Run(const FuncGraphPtr &func_graph) {
if
(
it
.
second
.
communication_op_nodes
.
size
()
<=
1
)
{
if
(
it
.
second
.
communication_op_nodes
.
size
()
<=
1
)
{
continue
;
continue
;
}
}
auto
first_node
=
it
.
second
.
communication_op_nodes
[
0
];
if
(
AnfAlgo
::
HasNodeAttr
(
kAttrIndex
,
first_node
)
&&
AnfAlgo
::
GetNodeAttr
<
int
>
(
first_node
,
kAttrIndex
)
>
0
)
{
std
::
stable_sort
(
it
.
second
.
communication_op_nodes
.
begin
(),
it
.
second
.
communication_op_nodes
.
end
(),
[](
const
CNodePtr
&
a
,
const
CNodePtr
&
b
)
{
return
AnfAlgo
::
GetNodeAttr
<
int
>
(
a
,
kAttrIndex
)
<
AnfAlgo
::
GetNodeAttr
<
int
>
(
b
,
kAttrIndex
);
});
}
size_t
segment_num
=
0
;
size_t
segment_num
=
0
;
std
::
vector
<
size_t
>
segment_index
;
std
::
vector
<
size_t
>
segment_index
;
if
(
GetSplitSegments
(
it
.
second
,
&
segment_num
,
&
segment_index
,
it
.
first
))
{
if
(
GetSplitSegments
(
it
.
second
,
&
segment_num
,
&
segment_index
,
it
.
first
))
{
...
...
mindspore/ccsrc/utils/utils.h
浏览文件 @
f15cb6b7
...
@@ -209,6 +209,7 @@ constexpr auto kAttrRecordEvent = "record_event";
...
@@ -209,6 +209,7 @@ constexpr auto kAttrRecordEvent = "record_event";
constexpr
auto
kAttrWaitEvent
=
"wait_event"
;
constexpr
auto
kAttrWaitEvent
=
"wait_event"
;
constexpr
auto
kAttrRecordEventStream
=
"record_event_stream"
;
constexpr
auto
kAttrRecordEventStream
=
"record_event_stream"
;
constexpr
auto
kAttrWaitEventStream
=
"wait_event_stream"
;
constexpr
auto
kAttrWaitEventStream
=
"wait_event_stream"
;
constexpr
auto
kAttrIndex
=
"index"
;
// attr value
// attr value
constexpr
auto
kValueTargetSwitch
=
"target_switch"
;
constexpr
auto
kValueTargetSwitch
=
"target_switch"
;
...
...
tests/ut/cpp/pre_activate/pass/allreduce_fusion_test.cc
浏览文件 @
f15cb6b7
...
@@ -58,7 +58,7 @@ TEST_F(TestHWAllReduceFusion, test_fusion_all) {
...
@@ -58,7 +58,7 @@ TEST_F(TestHWAllReduceFusion, test_fusion_all) {
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
builder
.
SetKernelType
(
KernelType
::
AUTO_DIFF_KERNEL
);
builder
.
SetKernelType
(
KernelType
::
AUTO_DIFF_KERNEL
);
auto
node_list
=
TopoSort
(
func_graph
->
get_return
());
auto
node_list
=
TopoSort
(
func_graph
->
get_return
());
for
(
auto
&
node
:
node_list
)
{
for
(
auto
&
node
:
node_list
)
{
if
(
node
==
nullptr
)
{
if
(
node
==
nullptr
)
{
continue
;
continue
;
}
}
...
@@ -99,7 +99,7 @@ TEST_F(TestHWAllReduceFusion, test_fusion_group) {
...
@@ -99,7 +99,7 @@ TEST_F(TestHWAllReduceFusion, test_fusion_group) {
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
builder
.
SetKernelType
(
KernelType
::
AUTO_DIFF_KERNEL
);
builder
.
SetKernelType
(
KernelType
::
AUTO_DIFF_KERNEL
);
auto
node_list
=
TopoSort
(
func_graph
->
get_return
());
auto
node_list
=
TopoSort
(
func_graph
->
get_return
());
for
(
auto
&
node
:
node_list
)
{
for
(
auto
&
node
:
node_list
)
{
if
(
node
==
nullptr
)
{
if
(
node
==
nullptr
)
{
continue
;
continue
;
}
}
...
@@ -141,7 +141,7 @@ TEST_F(TestHWAllReduceFusion, test_fusion_op) {
...
@@ -141,7 +141,7 @@ TEST_F(TestHWAllReduceFusion, test_fusion_op) {
builder
.
SetKernelType
(
KernelType
::
AUTO_DIFF_KERNEL
);
builder
.
SetKernelType
(
KernelType
::
AUTO_DIFF_KERNEL
);
auto
node_list
=
TopoSort
(
func_graph
->
get_return
());
auto
node_list
=
TopoSort
(
func_graph
->
get_return
());
int
count
=
0
;
int
count
=
0
;
for
(
auto
&
node
:
node_list
)
{
for
(
auto
&
node
:
node_list
)
{
if
(
node
==
nullptr
)
{
if
(
node
==
nullptr
)
{
continue
;
continue
;
}
}
...
@@ -171,5 +171,52 @@ TEST_F(TestHWAllReduceFusion, test_fusion_op) {
...
@@ -171,5 +171,52 @@ TEST_F(TestHWAllReduceFusion, test_fusion_op) {
EXPECT_NE
(
g_after
,
nullptr
);
EXPECT_NE
(
g_after
,
nullptr
);
EXPECT_TRUE
(
CheckEqualGraph
(
new_graph
,
g_after
));
EXPECT_TRUE
(
CheckEqualGraph
(
new_graph
,
g_after
));
}
}
TEST_F
(
TestHWAllReduceFusion
,
test_fusion_sorted
)
{
getPyFun_
.
SetDoResolve
(
true
);
FuncGraphPtr
g
=
getPyFun_
.
CallAndParseRet
(
"test_all_reduce_fusion_all"
,
"before"
);
EXPECT_NE
(
g
,
nullptr
);
std
::
vector
<
int
>
shp_x
{
1
,
64
,
112
,
112
};
auto
x_abstract
=
std
::
make_shared
<
abstract
::
AbstractTensor
>
(
kFloat32
,
shp_x
);
AbstractBasePtrList
args_spec_list
{
x_abstract
,
x_abstract
,
x_abstract
,
x_abstract
,
x_abstract
};
auto
func_graph
=
GetKernelGraph
(
g
,
args_spec_list
);
EXPECT_NE
(
func_graph
,
nullptr
);
auto
ret
=
func_graph
->
get_return
();
auto
make_tuple
=
ret
->
input
(
1
);
auto
make_tuple1
=
make_tuple
->
cast
<
CNodePtr
>
()
->
input
(
1
)
->
cast
<
CNodePtr
>
();
for
(
size_t
i
=
1
;
i
<
make_tuple1
->
inputs
().
size
();
++
i
)
{
AnfAlgo
::
SetNodeAttr
(
kAttrIndex
,
MakeValue
(
SizeToInt
(
i
)),
make_tuple1
->
input
(
i
));
}
// set kernel build info
kernel
::
KernelBuildInfo
::
KernelBuildInfoBuilder
builder
;
builder
.
SetInputsFormat
({
"NC1HWC0"
});
builder
.
SetOutputsFormat
({
"NC1HWC0"
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetFusionType
(
kernel
::
FusionType
::
ELEMWISE
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
builder
.
SetKernelType
(
KernelType
::
AUTO_DIFF_KERNEL
);
auto
node_list
=
TopoSort
(
func_graph
->
get_return
());
for
(
auto
&
node
:
node_list
)
{
if
(
node
==
nullptr
)
{
continue
;
}
if
((
node
->
isa
<
CNode
>
()
&&
AnfAlgo
::
GetCNodeName
(
node
)
==
kAllReduceOpName
)
||
node
->
isa
<
Parameter
>
())
{
node
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
.
Build
(),
node
.
get
());
}
}
// do all reduce fusion
auto
optimizer
=
std
::
make_shared
<
opt
::
GraphOptimizer
>
();
auto
pm
=
std
::
make_shared
<
opt
::
PassManager
>
();
pm
->
AddPass
(
std
::
make_shared
<
opt
::
AllReduceFusion
>
());
optimizer
->
AddPassManager
(
pm
);
FuncGraphPtr
new_graph
=
optimizer
->
Optimize
(
func_graph
);
EXPECT_NE
(
new_graph
,
nullptr
);
// check result
FuncGraphPtr
g_after
=
getPyFun_
.
CallAndParseRet
(
"test_all_reduce_fusion_all"
,
"after1"
);
EXPECT_NE
(
g_after
,
nullptr
);
EXPECT_TRUE
(
CheckEqualGraph
(
new_graph
,
g_after
));
}
}
// namespace opt
}
// namespace opt
}
// namespace mindspore
}
// namespace mindspore
tests/ut/cpp/python_input/gtest_input/pre_activate/ir_fusion_test.py
浏览文件 @
f15cb6b7
...
@@ -140,6 +140,17 @@ def test_all_reduce_fusion_all(tag):
...
@@ -140,6 +140,17 @@ def test_all_reduce_fusion_all(tag):
res
=
make_tuple
(
y1
,
y2
,
y3
,
y4
,
y5
)
res
=
make_tuple
(
y1
,
y2
,
y3
,
y4
,
y5
)
return
make_tuple
(
res
)
return
make_tuple
(
res
)
@
fns
def
after1
(
x1
,
x2
,
x3
,
x4
,
x5
):
ar
=
allreduce
(
x1
,
x2
,
x3
,
x4
,
x5
)
y1
=
tuple_getitem
(
ar
,
0
)
y2
=
tuple_getitem
(
ar
,
1
)
y3
=
tuple_getitem
(
ar
,
2
)
y4
=
tuple_getitem
(
ar
,
3
)
y5
=
tuple_getitem
(
ar
,
4
)
res
=
make_tuple
(
y1
,
y2
,
y3
,
y4
,
y5
)
return
make_tuple
(
res
)
return
fns
[
tag
]
return
fns
[
tag
]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录