Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
c4a5bfb7
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c4a5bfb7
编写于
5月 14, 2020
作者:
E
etone-chan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add matmul eltwise buffer fusion pass
上级
a2a3b1c6
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
124 addition
and
17 deletion
+124
-17
mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
.../ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
+27
-11
mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h
...e/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h
+2
-0
mindspore/ops/_op_impl/tbe/relu.py
mindspore/ops/_op_impl/tbe/relu.py
+1
-0
tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc
...p/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc
+72
-6
tests/ut/cpp/python_input/gtest_input/pre_activate/buffer_fusion_test.py
...thon_input/gtest_input/pre_activate/buffer_fusion_test.py
+22
-0
未找到文件。
mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
浏览文件 @
c4a5bfb7
...
...
@@ -580,27 +580,43 @@ void BufferFusion::MatchDepthwiseConvRelu(const CNodePtr &cnode, const session::
}
}
void
BufferFusion
::
MatchMatmulEltwise
(
const
CNodePtr
&
cnode
,
const
AnfNodePtr
&
relu_input
,
const
session
::
KernelGraph
&
kernel_graph
,
FusedNodeRecord
*
candidate_fusion
)
{
MS_EXCEPTION_IF_NULL
(
cnode
);
MS_EXCEPTION_IF_NULL
(
candidate_fusion
);
auto
manager
=
kernel_graph
.
manager
();
MS_EXCEPTION_IF_NULL
(
manager
);
std
::
vector
<
int
>
output_used_num
{
SizeToInt
(
manager
->
node_users
()[
relu_input
].
size
())};
AnfAlgo
::
SetNodeAttr
(
kAttrOutputUsedNum
,
MakeValue
(
output_used_num
),
relu_input
);
std
::
unordered_set
<
AnfNodePtr
>
record
{
cnode
,
relu_input
};
candidate_fusion
->
push_back
(
record
);
SetRecordFusionId
(
record
);
}
void
BufferFusion
::
MatchOpNamePattern
(
const
session
::
KernelGraph
&
kernel_graph
,
FusedNodeRecord
*
candidate_fusion
)
{
MS_EXCEPTION_IF_NULL
(
candidate_fusion
);
std
::
vector
<
AnfNodePtr
>
node_list
=
TopoSort
(
kernel_graph
.
get_return
());
for
(
auto
&
node
:
node_list
)
{
if
(
!
AnfAlgo
::
IsRealCNodeKernel
(
node
)
||
fusion_id_allocator
.
HasFusionIdAttr
(
node
))
{
if
(
!
AnfAlgo
::
IsRealCNodeKernel
(
node
)
||
fusion_id_allocator
.
HasFusionIdAttr
(
node
)
||
AnfAlgo
::
CheckPrimitiveType
(
node
,
prim
::
kPrimReturn
))
{
continue
;
}
auto
cnode
=
node
->
cast
<
CNodePtr
>
();
MS_EXCEPTION_IF_NULL
(
cnode
);
if
(
AnfAlgo
::
GetCNodeName
(
cnode
)
==
kBNTrainingReduceOpName
)
{
MatchConvBnreduce
(
cnode
,
kernel_graph
,
candidate_fusion
);
}
else
if
(
AnfAlgo
::
GetCNodeName
(
cnode
)
==
kReluV2OpName
||
AnfAlgo
::
GetCNodeName
(
cnode
)
==
prim
::
kPrimRelu
->
name
())
{
auto
relu_input
=
cnode
->
input
(
1
);
if
(
relu_input
->
isa
<
CNode
>
()
&&
AnfAlgo
::
GetCNodeName
(
relu_input
)
==
prim
::
kPrimTensorAdd
->
name
())
{
MatchBnupdateAddRelu
(
cnode
,
relu_input
,
kernel_graph
,
candidate_fusion
);
}
else
if
(
relu_input
->
isa
<
CNode
>
()
&&
AnfAlgo
::
GetCNodeName
(
relu_input
)
==
prim
::
kPrimTupleGetItem
->
name
())
{
MatchBnupdateRelu
(
cnode
,
relu_input
,
kernel_graph
,
candidate_fusion
);
}
else
if
(
relu_input
->
isa
<
CNode
>
()
&&
AnfAlgo
::
GetCNodeName
(
relu_input
)
==
prim
::
kPrimDepthwiseConv2dNative
->
name
())
{
MatchDepthwiseConvRelu
(
cnode
,
kernel_graph
,
candidate_fusion
,
true
);
}
else
if
(
AnfAlgo
::
GetKernelType
(
cnode
)
==
KernelType
::
TBE_KERNEL
&&
AnfAlgo
::
GetFusionType
(
cnode
)
==
kernel
::
FusionType
::
ELEMWISE
)
{
auto
eltwise_input
=
cnode
->
input
(
1
);
if
(
eltwise_input
->
isa
<
CNode
>
()
&&
AnfAlgo
::
CheckPrimitiveType
(
eltwise_input
,
prim
::
kPrimMatMul
))
{
MatchMatmulEltwise
(
cnode
,
eltwise_input
,
kernel_graph
,
candidate_fusion
);
}
if
(
AnfAlgo
::
GetCNodeName
(
cnode
)
==
kReluV2OpName
||
AnfAlgo
::
CheckPrimitiveType
(
cnode
,
prim
::
kPrimRelu
))
{
if
(
eltwise_input
->
isa
<
CNode
>
()
&&
AnfAlgo
::
CheckPrimitiveType
(
eltwise_input
,
prim
::
kPrimTensorAdd
))
{
MatchBnupdateAddRelu
(
cnode
,
eltwise_input
,
kernel_graph
,
candidate_fusion
);
}
else
if
(
eltwise_input
->
isa
<
CNode
>
()
&&
AnfAlgo
::
CheckPrimitiveType
(
eltwise_input
,
prim
::
kPrimTupleGetItem
))
{
MatchBnupdateRelu
(
cnode
,
eltwise_input
,
kernel_graph
,
candidate_fusion
);
}
}
}
else
if
(
AnfAlgo
::
GetCNodeName
(
cnode
)
==
prim
::
kPrimDepthwiseConv2dNative
->
name
())
{
MatchDepthwiseConvRelu
(
cnode
,
kernel_graph
,
candidate_fusion
,
false
);
...
...
mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h
浏览文件 @
c4a5bfb7
...
...
@@ -53,6 +53,8 @@ class BufferFusion : public Pass {
const
session
::
KernelGraph
&
kernel_graph
,
FusedNodeRecord
*
candidate_fusion
);
void
MatchDepthwiseConvRelu
(
const
CNodePtr
&
cnode
,
const
session
::
KernelGraph
&
kernel_graph
,
FusedNodeRecord
*
candidate_fusion
,
bool
is_order
);
void
MatchMatmulEltwise
(
const
CNodePtr
&
cnode
,
const
AnfNodePtr
&
relu_input
,
const
session
::
KernelGraph
&
kernel_graph
,
FusedNodeRecord
*
candidate_fusion
);
void
MatchOpNamePattern
(
const
session
::
KernelGraph
&
kernel_graph
,
FusedNodeRecord
*
candidate_fusion
);
void
MatchFusionTypePattern
(
const
session
::
KernelGraph
&
kernel_graph
,
FusedNodeRecord
*
candidate_fusion
);
...
...
mindspore/ops/_op_impl/tbe/relu.py
浏览文件 @
c4a5bfb7
...
...
@@ -33,6 +33,7 @@ relu_op_info = TBERegOp("ReLU") \
.
dtype_format
(
DataType
.
F16_5HD
,
DataType
.
F16_5HD
)
\
.
dtype_format
(
DataType
.
F32_Default
,
DataType
.
F32_Default
)
\
.
dtype_format
(
DataType
.
F32_5HD
,
DataType
.
F32_5HD
)
\
.
dtype_format
(
DataType
.
F16_FracNZ
,
DataType
.
F16_FracNZ
)
\
.
get_op_info
()
...
...
tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test
_py
.cc
→
tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc
浏览文件 @
c4a5bfb7
...
...
@@ -26,15 +26,15 @@
namespace
mindspore
{
namespace
opt
{
using
KernelBuildInfoBuilder
=
kernel
::
KernelBuildInfo
::
KernelBuildInfoBuilder
;
class
TestHWBufferFusion
Py
:
public
BackendCommon
{
class
TestHWBufferFusion
:
public
BackendCommon
{
public:
TestHWBufferFusion
Py
()
:
get_py_fun_
(
"gtest_input.pre_activate.buffer_fusion_test"
,
true
)
{}
~
TestHWBufferFusion
Py
()
override
=
default
;
TestHWBufferFusion
()
:
get_py_fun_
(
"gtest_input.pre_activate.buffer_fusion_test"
,
true
)
{}
~
TestHWBufferFusion
()
override
=
default
;
UT
::
PyFuncGraphFetcher
get_py_fun_
;
};
TEST_F
(
TestHWBufferFusion
Py
,
test_tbe_eltwise_fusion_1
)
{
TEST_F
(
TestHWBufferFusion
,
test_tbe_eltwise_fusion_1
)
{
FuncGraphPtr
g
=
get_py_fun_
.
CallAndParseRet
(
"test_tbe_eltwise_fusion_1"
,
"before"
);
std
::
vector
<
int
>
shp
{
2
,
32
,
224
,
224
};
auto
x_abstract
=
std
::
make_shared
<
abstract
::
AbstractTensor
>
(
kFloat32
,
shp
);
...
...
@@ -90,7 +90,7 @@ TEST_F(TestHWBufferFusionPy, test_tbe_eltwise_fusion_1) {
EXPECT_TRUE
(
CheckEqualGraph
(
g_after
,
new_graph
));
}
TEST_F
(
TestHWBufferFusion
Py
,
test_tbe_eltwise_fusion_2
)
{
TEST_F
(
TestHWBufferFusion
,
test_tbe_eltwise_fusion_2
)
{
FuncGraphPtr
g
=
get_py_fun_
.
CallAndParseRet
(
"test_tbe_eltwise_fusion_2"
,
"before"
);
std
::
vector
<
int
>
shp
{
32
,
10
};
std
::
vector
<
int
>
shp_bias
{
10
};
...
...
@@ -179,7 +179,7 @@ TEST_F(TestHWBufferFusionPy, test_tbe_eltwise_fusion_2) {
EXPECT_TRUE
(
CheckEqualGraph
(
g_after
,
new_graph
));
}
TEST_F
(
TestHWBufferFusion
Py
,
test_tbe_reduce_eltwise_fusion
)
{
TEST_F
(
TestHWBufferFusion
,
test_tbe_reduce_eltwise_fusion
)
{
FuncGraphPtr
g
=
get_py_fun_
.
CallAndParseRet
(
"test_tbe_reduce_eltwise_fusion"
,
"before"
);
std
::
vector
<
int
>
shp
{
32
,
10
};
auto
x_abstract
=
std
::
make_shared
<
abstract
::
AbstractTensor
>
(
kFloat32
,
shp
);
...
...
@@ -265,5 +265,71 @@ TEST_F(TestHWBufferFusionPy, test_tbe_reduce_eltwise_fusion) {
FuncGraphPtr
g_after
=
get_py_fun_
.
CallAndParseRet
(
"test_tbe_reduce_eltwise_fusion"
,
"after"
);
EXPECT_TRUE
(
CheckEqualGraph
(
g_after
,
new_graph
));
}
TEST_F
(
TestHWBufferFusion
,
test_tbe_matmul_eltwise_fusion
)
{
FuncGraphPtr
g
=
get_py_fun_
.
CallAndParseRet
(
"test_tbe_matmul_eltwise_fusion"
,
"before"
);
std
::
vector
<
int
>
x_shp
{
2048
,
768
};
std
::
vector
<
int
>
y_shp
{
768
,
768
};
auto
x_abstract
=
std
::
make_shared
<
abstract
::
AbstractTensor
>
(
kFloat32
,
x_shp
);
auto
y_abstract
=
std
::
make_shared
<
abstract
::
AbstractTensor
>
(
kFloat32
,
y_shp
);
AbstractBasePtrList
args_spec_list
{
x_abstract
,
y_abstract
};
auto
kg
=
GetKernelGraph
(
g
,
args_spec_list
);
auto
ret
=
kg
->
get_return
();
EXPECT_NE
(
ret
->
input
(
1
),
nullptr
);
auto
tuple
=
ret
->
input
(
1
);
EXPECT_NE
(
tuple
,
nullptr
);
auto
cast
=
tuple
->
cast
<
CNodePtr
>
()
->
input
(
1
);
EXPECT_NE
(
cast
,
nullptr
);
auto
relu
=
cast
->
cast
<
CNodePtr
>
()
->
input
(
1
);
EXPECT_NE
(
relu
,
nullptr
);
auto
matmul
=
relu
->
cast
<
CNodePtr
>
()
->
input
(
1
);
KernelBuildInfoBuilder
builder
;
builder
.
SetInputsFormat
({
"NC1HWC0"
});
builder
.
SetOutputsFormat
({
"NC1HWC0"
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder
.
SetFusionType
(
kernel
::
FusionType
::
ELEMWISE
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
builder
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
relu
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
.
Build
(),
relu
.
get
());
KernelBuildInfoBuilder
builder2
;
builder2
.
SetInputsFormat
({
"NC1HWC0"
,
"NC1HWC0"
});
builder2
.
SetOutputsFormat
({
"NC1HWC0"
});
builder2
.
SetInputsDeviceType
({
kFloat32
->
type_id
(),
kFloat32
->
type_id
()});
builder2
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder2
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder2
.
SetFusionType
(
kernel
::
FusionType
::
OPAQUE
);
builder2
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
builder2
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
matmul
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder2
.
Build
(),
matmul
.
get
());
KernelBuildInfoBuilder
builder1
;
builder1
.
SetInputsFormat
({
"NC1HWC0"
});
builder1
.
SetOutputsFormat
({
"NC1HWC0"
});
builder1
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
builder1
.
SetOutputsDeviceType
({
kFloat16
->
type_id
()});
builder1
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder1
.
SetFusionType
(
kernel
::
FusionType
::
OPAQUE
);
builder1
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
builder1
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
cast
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder1
.
Build
(),
cast
.
get
());
auto
optimizer
=
std
::
make_shared
<
opt
::
GraphOptimizer
>
();
auto
pm
=
std
::
make_shared
<
opt
::
PassManager
>
();
auto
buffer_fusion_pass
=
std
::
make_shared
<
opt
::
BufferFusion
>
();
pm
->
AddPass
(
buffer_fusion_pass
);
optimizer
->
AddPassManager
(
pm
);
FuncGraphPtr
new_graph
=
optimizer
->
Optimize
(
kg
);
FuncGraphPtr
g_after
=
get_py_fun_
.
CallAndParseRet
(
"test_tbe_matmul_eltwise_fusion"
,
"after"
);
EXPECT_TRUE
(
CheckEqualGraph
(
g_after
,
new_graph
));
}
}
// namespace opt
}
// namespace mindspore
tests/ut/cpp/python_input/gtest_input/pre_activate/buffer_fusion_test.py
浏览文件 @
c4a5bfb7
...
...
@@ -24,10 +24,12 @@ Reduce = P.ReduceOp()
Biasadd
=
P
.
BiasAdd
()
Biasaddgrad
=
G
.
BiasAddGrad
()
Cast
=
P
.
Cast
()
MatMul
=
P
.
MatMul
()
Fusion_relu_relu
=
Primitive
(
'FusionOp_ReLU_ReLU'
)
Fusion_biasadd
=
Primitive
(
'FusionOp_ReLU_ReLU_ReLU_BiasAdd_ReLU_ReLU_ReLU'
)
Fusion_biasaddgrad
=
Primitive
(
'FusionOp_ReLU_ReLU_ReLU_BiasAddGrad_ReLU_ReLU_ReLU'
)
Fusion_matmul_relu
=
Primitive
(
'FusionOp_MatMul_ReLU'
)
Add
=
P
.
TensorAdd
()
Sub
=
P
.
Sub
()
...
...
@@ -133,3 +135,23 @@ def test_conv_singlein_fusion(tag):
return
tuple
return
fns
[
tag
]
def
test_tbe_matmul_eltwise_fusion
(
tag
):
fns
=
FnDict
()
@
fns
def
before
(
x
,
y
):
matmul
=
MatMul
(
x
,
y
)
relu
=
Relu
(
matmul
)
res
=
Cast
(
relu
,
mstype
.
float16
)
return
res
@
fns
def
after
(
x
,
y
):
fusion
=
Fusion_matmul_relu
(
x
,
y
)
res
=
Cast
(
fusion
)
tuple
=
make_tuple
(
res
)
return
tuple
return
fns
[
tag
]
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录