Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
正统之独孤求败
mindspore
提交
e87ac652
M
mindspore
项目概览
正统之独孤求败
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
e87ac652
编写于
6月 04, 2020
作者:
Y
yujianfeng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add batch norm fusion pattern for mix precision
上级
e7b7abc5
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
80 addition
and
9 deletion
+80
-9
mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
.../ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
+2
-1
mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc
.../pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc
+24
-1
mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h
...c/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h
+12
-3
tests/ut/cpp/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc
...activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc
+26
-3
tests/ut/cpp/python_input/gtest_input/pre_activate/fused_batch_norm_fusion_test.py
.../gtest_input/pre_activate/fused_batch_norm_fusion_test.py
+16
-1
未找到文件。
mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
浏览文件 @
e87ac652
...
...
@@ -239,7 +239,8 @@ void AscendBackendIRFusionOptimization(const std::shared_ptr<session::KernelGrap
}
else
{
ir_fusion_pm
->
AddPass
(
std
::
make_shared
<
BatchNormGradSplit
>
());
ir_fusion_pm
->
AddPass
(
std
::
make_shared
<
FusedBatchNormFusion
>
());
ir_fusion_pm
->
AddPass
(
std
::
make_shared
<
FusedBatchNormMixPrecisionFusion
>
());
ir_fusion_pm
->
AddPass
(
std
::
make_shared
<
FusedBatchNormMixPrecisionFusion0
>
());
ir_fusion_pm
->
AddPass
(
std
::
make_shared
<
FusedBatchNormMixPrecisionFusion1
>
());
}
ir_fusion_pm
->
AddPass
(
std
::
make_shared
<
AddMemcpyAsync
>
());
ir_fusion_pm
->
AddPass
(
std
::
make_shared
<
InsertPadForNMSWithMask
>
());
...
...
mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc
浏览文件 @
e87ac652
...
...
@@ -291,7 +291,7 @@ const AnfNodePtr FusedBatchNormFusion::Process(const FuncGraphPtr &func_graph, c
return
bn_training_update_outputs
[
0
];
}
const
BaseRef
FusedBatchNormMixPrecisionFusion
::
DefinePattern
()
const
{
const
BaseRef
FusedBatchNormMixPrecisionFusion
0
::
DefinePattern
()
const
{
std
::
shared_ptr
<
Var
>
Xs
=
std
::
make_shared
<
SeqVar
>
();
VarPtr
index0
=
std
::
make_shared
<
CondVar
>
(
IsC
);
VarPtr
index1
=
std
::
make_shared
<
CondVar
>
(
IsC
);
...
...
@@ -313,5 +313,28 @@ const BaseRef FusedBatchNormMixPrecisionFusion::DefinePattern() const {
VectorRef
depend0
=
VectorRef
({
prim
::
kPrimDepend
,
tuple_getitem0
,
assign_sub0
});
return
VectorRef
({
prim
::
kPrimDepend
,
depend0
,
assign_sub1
});
}
const
BaseRef
FusedBatchNormMixPrecisionFusion1
::
DefinePattern
()
const
{
std
::
shared_ptr
<
Var
>
Xs
=
std
::
make_shared
<
SeqVar
>
();
VarPtr
index0
=
std
::
make_shared
<
CondVar
>
(
IsC
);
VarPtr
index1
=
std
::
make_shared
<
CondVar
>
(
IsC
);
VarPtr
index2
=
std
::
make_shared
<
CondVar
>
(
IsC
);
VectorRef
batch_norm
=
VectorRef
({
batch_norm_var_
,
data_input0_var_
,
data_input1_var_
,
data_input2_var_
,
Xs
});
VectorRef
tuple_getitem0
=
VectorRef
({
prim
::
kPrimTupleGetItem
,
batch_norm
,
index0
});
VectorRef
tuple_getitem1
=
VectorRef
({
prim
::
kPrimTupleGetItem
,
batch_norm
,
index1
});
VectorRef
tuple_getitem2
=
VectorRef
({
prim
::
kPrimTupleGetItem
,
batch_norm
,
index2
});
VectorRef
cast_variable_input0
=
VectorRef
({
prim
::
kPrimCast
,
variable_input0_var_
});
VectorRef
cast_variable_input1
=
VectorRef
({
prim
::
kPrimCast
,
variable_input1_var_
});
VectorRef
sub0
=
VectorRef
({
prim
::
kPrimSub
,
cast_variable_input0
,
tuple_getitem1
});
VectorRef
sub1
=
VectorRef
({
prim
::
kPrimSub
,
cast_variable_input1
,
tuple_getitem2
});
VectorRef
cast0
=
VectorRef
({
prim
::
kPrimCast
,
sub0
});
VectorRef
cast1
=
VectorRef
({
prim
::
kPrimCast
,
sub1
});
VectorRef
mul0
=
VectorRef
({
prim
::
kPrimMul
,
cast0
,
constant_input0_var_
});
VectorRef
mul1
=
VectorRef
({
prim
::
kPrimMul
,
cast1
,
constant_input1_var_
});
VectorRef
assign_sub0
=
VectorRef
({
prim
::
kPrimAssignSub
,
variable_input0_var_
,
mul0
});
VectorRef
assign_sub1
=
VectorRef
({
prim
::
kPrimAssignSub
,
variable_input1_var_
,
mul1
});
VectorRef
depend0
=
VectorRef
({
prim
::
kPrimDepend
,
tuple_getitem0
,
assign_sub0
});
return
VectorRef
({
prim
::
kPrimDepend
,
depend0
,
assign_sub1
});
}
}
// namespace opt
}
// namespace mindspore
mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h
浏览文件 @
e87ac652
...
...
@@ -61,12 +61,21 @@ class FusedBatchNormFusion : public PatternProcessPass {
VarPtr
batch_norm_var_
;
};
class
FusedBatchNormMixPrecisionFusion
:
public
FusedBatchNormFusion
{
class
FusedBatchNormMixPrecisionFusion
0
:
public
FusedBatchNormFusion
{
public:
explicit
FusedBatchNormMixPrecisionFusion
(
bool
multigraph
=
true
)
explicit
FusedBatchNormMixPrecisionFusion
0
(
bool
multigraph
=
true
)
:
FusedBatchNormFusion
(
"fused_batch_norm_mix_precision_fusion"
,
multigraph
)
{}
~
FusedBatchNormMixPrecisionFusion
()
override
=
default
;
~
FusedBatchNormMixPrecisionFusion0
()
override
=
default
;
const
BaseRef
DefinePattern
()
const
override
;
};
class
FusedBatchNormMixPrecisionFusion1
:
public
FusedBatchNormFusion
{
public:
explicit
FusedBatchNormMixPrecisionFusion1
(
bool
multigraph
=
true
)
:
FusedBatchNormFusion
(
"fused_batch_norm_mix_precision_fusion"
,
multigraph
)
{}
~
FusedBatchNormMixPrecisionFusion1
()
override
=
default
;
const
BaseRef
DefinePattern
()
const
override
;
};
}
// namespace opt
...
...
tests/ut/cpp/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc
浏览文件 @
e87ac652
...
...
@@ -51,8 +51,8 @@ TEST_F(TestHWFusedBatchNormFusion, test_fused_batch_norm_fusion) {
EXPECT_TRUE
(
CheckEqualGraph
(
g_after
,
new_graph
));
}
TEST_F
(
TestHWFusedBatchNormFusion
,
test_fused_batch_norm_mix_precision_fusion
)
{
FuncGraphPtr
g
=
get_py_fun_
.
CallAndParseRet
(
"test_fused_batch_norm_fusion"
,
"before_mix_precision"
);
TEST_F
(
TestHWFusedBatchNormFusion
,
test_fused_batch_norm_mix_precision_fusion
0
)
{
FuncGraphPtr
g
=
get_py_fun_
.
CallAndParseRet
(
"test_fused_batch_norm_fusion"
,
"before_mix_precision
0
"
);
EXPECT_NE
(
g
,
nullptr
);
std
::
vector
<
int
>
shp_x
{
32
,
64
,
112
,
112
};
auto
x_abstract
=
std
::
make_shared
<
abstract
::
AbstractTensor
>
(
kFloat32
,
shp_x
);
...
...
@@ -66,7 +66,30 @@ TEST_F(TestHWFusedBatchNormFusion, test_fused_batch_norm_mix_precision_fusion) {
auto
optimizer
=
std
::
make_shared
<
opt
::
GraphOptimizer
>
();
auto
pm
=
std
::
make_shared
<
opt
::
PassManager
>
();
pm
->
AddPass
(
std
::
make_shared
<
opt
::
FusedBatchNormMixPrecisionFusion
>
());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
FusedBatchNormMixPrecisionFusion0
>
());
optimizer
->
AddPassManager
(
pm
);
FuncGraphPtr
new_graph
=
optimizer
->
Optimize
(
kg
);
FuncGraphPtr
g_after
=
get_py_fun_
.
CallAndParseRet
(
"test_fused_batch_norm_fusion"
,
"after"
);
EXPECT_TRUE
(
CheckEqualGraph
(
g_after
,
new_graph
));
}
TEST_F
(
TestHWFusedBatchNormFusion
,
test_fused_batch_norm_mix_precision_fusion1
)
{
FuncGraphPtr
g
=
get_py_fun_
.
CallAndParseRet
(
"test_fused_batch_norm_fusion"
,
"before_mix_precision1"
);
EXPECT_NE
(
g
,
nullptr
);
std
::
vector
<
int
>
shp_x
{
32
,
64
,
112
,
112
};
auto
x_abstract
=
std
::
make_shared
<
abstract
::
AbstractTensor
>
(
kFloat32
,
shp_x
);
std
::
vector
<
int
>
shp_y
{
64
};
auto
y_abstract
=
std
::
make_shared
<
abstract
::
AbstractTensor
>
(
kFloat32
,
shp_y
);
AbstractBasePtrList
args_spec_list
{
x_abstract
};
for
(
size_t
i
=
0
;
i
<
6
;
++
i
)
{
args_spec_list
.
push_back
(
y_abstract
);
}
auto
kg
=
GetKernelGraph
(
g
,
args_spec_list
);
auto
optimizer
=
std
::
make_shared
<
opt
::
GraphOptimizer
>
();
auto
pm
=
std
::
make_shared
<
opt
::
PassManager
>
();
pm
->
AddPass
(
std
::
make_shared
<
opt
::
FusedBatchNormMixPrecisionFusion1
>
());
optimizer
->
AddPassManager
(
pm
);
FuncGraphPtr
new_graph
=
optimizer
->
Optimize
(
kg
);
...
...
tests/ut/cpp/python_input/gtest_input/pre_activate/fused_batch_norm_fusion_test.py
浏览文件 @
e87ac652
...
...
@@ -61,7 +61,7 @@ def test_fused_batch_norm_fusion(tag):
return
output
@
fns
def
before_mix_precision
(
input0
,
input1
,
input2
,
input3
,
input4
,
var0
,
var1
):
def
before_mix_precision
0
(
input0
,
input1
,
input2
,
input3
,
input4
,
var0
,
var1
):
batch_norm
=
BatchNorm
(
input0
,
input1
,
input2
,
input3
,
input4
)
sub0
=
Sub
(
Cast
(
var0
,
mstype
.
float32
),
tuple_getitem
(
batch_norm
,
1
))
sub1
=
Sub
(
Cast
(
var1
,
mstype
.
float32
),
tuple_getitem
(
batch_norm
,
2
))
...
...
@@ -75,6 +75,21 @@ def test_fused_batch_norm_fusion(tag):
output
=
tuple_getitem
(
outputs
,
0
)
return
output
@
fns
def
before_mix_precision1
(
input0
,
input1
,
input2
,
input3
,
input4
,
var0
,
var1
):
batch_norm
=
BatchNorm
(
input0
,
input1
,
input2
,
input3
,
input4
)
sub0
=
Sub
(
Cast
(
var0
,
mstype
.
float32
),
tuple_getitem
(
batch_norm
,
1
))
sub1
=
Sub
(
Cast
(
var1
,
mstype
.
float32
),
tuple_getitem
(
batch_norm
,
2
))
mul0
=
Mul
(
Cast
(
sub0
,
mstype
.
float32
),
constant0
)
mul1
=
Mul
(
Cast
(
sub1
,
mstype
.
float32
),
constant1
)
assign_sub0
=
AssignSub
(
var0
,
mul0
)
assign_sub1
=
AssignSub
(
var1
,
mul1
)
depend0
=
depend
(
tuple_getitem
(
batch_norm
,
0
),
assign_sub0
)
depend1
=
depend
(
depend0
,
assign_sub1
)
outputs
=
make_tuple
(
depend1
,
tuple_getitem
(
batch_norm
,
3
),
tuple_getitem
(
batch_norm
,
4
))
output
=
tuple_getitem
(
outputs
,
0
)
return
output
@
fns
def
after
(
input0
,
input1
,
input2
,
input3
,
input4
,
var0
,
var1
):
bn_training_reduce
=
BNTrainingReduce
(
input0
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录