Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
cd6ed0e3
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
cd6ed0e3
编写于
4月 28, 2020
作者:
J
jjfeing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support buffer fusion
上级
bf1d0031
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
386 addition
and
158 deletion
+386
-158
mindspore/_extends/parallel_compile/tbe_compiler/common.py
mindspore/_extends/parallel_compile/tbe_compiler/common.py
+6
-4
mindspore/ccsrc/kernel/kernel_fusion.cc
mindspore/ccsrc/kernel/kernel_fusion.cc
+2
-1
mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
+46
-0
mindspore/ccsrc/kernel/tbe/tbe_adapter.h
mindspore/ccsrc/kernel/tbe/tbe_adapter.h
+5
-8
mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
+108
-49
mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h
mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h
+11
-4
mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
.../ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
+2
-0
mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
.../ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
+86
-90
mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h
...e/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h
+0
-1
mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc
...pre_activate/ascend/ir_fusion/refresh_parameter_format.cc
+71
-0
mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h
.../pre_activate/ascend/ir_fusion/refresh_parameter_format.h
+40
-0
mindspore/ccsrc/session/anf_runtime_algorithm.cc
mindspore/ccsrc/session/anf_runtime_algorithm.cc
+3
-1
mindspore/ccsrc/utils/utils.h
mindspore/ccsrc/utils/utils.h
+4
-0
mindspore/ops/_op_impl/tbe/reduce_mean.py
mindspore/ops/_op_impl/tbe/reduce_mean.py
+1
-0
mindspore/ops/operations/nn_ops.py
mindspore/ops/operations/nn_ops.py
+1
-0
未找到文件。
mindspore/_extends/parallel_compile/tbe_compiler/common.py
浏览文件 @
cd6ed0e3
...
...
@@ -122,10 +122,12 @@ def get_args(op_info, arg_type):
elif
arg_type
==
'attrs'
:
for
item
in
op_info
[
arg_type
]:
if
'value'
not
in
item
:
raise
ValueError
(
"Json string Errors, attr key:value not found."
)
if
item
[
"name"
]
!=
"isRef"
:
args
.
append
(
item
[
'value'
])
if
item
[
"valid"
]:
if
'value'
not
in
item
:
raise
ValueError
(
"Json string Errors, attr key:value not found."
)
if
item
[
"name"
]
!=
"isRef"
:
args
.
append
(
item
[
'value'
])
return
args
...
...
mindspore/ccsrc/kernel/kernel_fusion.cc
浏览文件 @
cd6ed0e3
...
...
@@ -108,7 +108,8 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
}
if
((
task_result
!=
nullptr
)
&&
(
strcmp
(
task_result
,
"Success"
)
!=
0
))
{
MS_LOG
(
DEBUG
)
<<
"fuison op build failed, err log: "
<<
task_result
<<
" change to single op build."
;
MS_LOG
(
INFO
)
<<
"Fusion warning: Fuison op build failed, err log: "
<<
task_result
<<
" change to single op build."
;
build_failed_num
++
;
}
auto
kernel_mod_item
=
build_manger
->
TaskFinishProcess
(
task_id
,
false
);
...
...
mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
浏览文件 @
cd6ed0e3
...
...
@@ -153,6 +153,52 @@ void TbeAdapter::InputOrderPass(const std::string &op_name, std::vector<std::vec
}
}
void
TbeAdapter
::
FusionInputOrderPass
(
const
std
::
string
&
op_name
,
const
std
::
vector
<
nlohmann
::
json
>
&
inputs_list
,
std
::
vector
<
nlohmann
::
json
>
*
inputs_json
)
{
MS_EXCEPTION_IF_NULL
(
inputs_json
);
if
(
input_order_adjusted_ops
.
find
(
op_name
)
==
input_order_adjusted_ops
.
end
())
{
(
void
)
std
::
copy
(
inputs_list
.
begin
(),
inputs_list
.
end
(),
std
::
back_inserter
((
*
inputs_json
)));
}
else
{
if
(
op_name
==
"MinimumGrad"
||
op_name
==
"MaximumGrad"
)
{
inputs_json
->
emplace_back
(
inputs_list
[
2
]);
inputs_json
->
emplace_back
(
inputs_list
[
0
]);
inputs_json
->
emplace_back
(
inputs_list
[
1
]);
for
(
size_t
i
=
3
;
i
<
inputs_list
.
size
();
++
i
)
{
inputs_json
->
emplace_back
(
inputs_list
[
i
]);
}
}
else
{
inputs_json
->
emplace_back
(
inputs_list
[
1
]);
inputs_json
->
emplace_back
(
inputs_list
[
0
]);
for
(
size_t
i
=
2
;
i
<
inputs_list
.
size
();
++
i
)
{
inputs_json
->
emplace_back
(
inputs_list
[
i
]);
}
}
}
}
void
TbeAdapter
::
FusionDataOrderPass
(
const
std
::
string
&
op_name
,
const
std
::
vector
<
AnfNodePtr
>
&
data_layer
,
std
::
vector
<
AnfNodePtr
>
*
reorder_data_layer
)
{
MS_EXCEPTION_IF_NULL
(
reorder_data_layer
);
if
(
input_order_adjusted_ops
.
find
(
op_name
)
==
input_order_adjusted_ops
.
end
())
{
(
void
)
std
::
copy
(
data_layer
.
begin
(),
data_layer
.
end
(),
std
::
back_inserter
((
*
reorder_data_layer
)));
}
else
{
if
(
op_name
==
"MinimumGrad"
||
op_name
==
"MaximumGrad"
)
{
reorder_data_layer
->
emplace_back
(
data_layer
[
2
]);
reorder_data_layer
->
emplace_back
(
data_layer
[
0
]);
reorder_data_layer
->
emplace_back
(
data_layer
[
1
]);
for
(
size_t
i
=
3
;
i
<
data_layer
.
size
();
++
i
)
{
reorder_data_layer
->
emplace_back
(
data_layer
[
i
]);
}
}
else
{
reorder_data_layer
->
emplace_back
(
data_layer
[
1
]);
reorder_data_layer
->
emplace_back
(
data_layer
[
0
]);
for
(
size_t
i
=
2
;
i
<
data_layer
.
size
();
++
i
)
{
reorder_data_layer
->
emplace_back
(
data_layer
[
i
]);
}
}
}
}
std
::
map
<
std
::
string
,
FAttrsPass
>
TbeAdapter
::
build_json_attr_pass_map_
=
{
{
"MaximumGrad"
,
TbeAdapter
::
MaximumGradAttrJsonPass
},
{
"MinimumGrad"
,
TbeAdapter
::
MinimumGradAttrJsonPass
},
...
...
mindspore/ccsrc/kernel/tbe/tbe_adapter.h
浏览文件 @
cd6ed0e3
...
...
@@ -44,15 +44,12 @@ class TbeAdapter {
static
void
GenTopKV2IndicesTensorInfo
(
const
std
::
shared_ptr
<
AnfNode
>
&
anf_node
,
size_t
real_input_index
,
std
::
vector
<
nlohmann
::
json
>
*
input_list
,
kCreaterType
creater_type
);
static
void
FusionInputOrderPass
(
const
std
::
string
&
op_name
,
const
std
::
vector
<
nlohmann
::
json
>
&
inputs_list
,
std
::
vector
<
nlohmann
::
json
>
*
inputs_json
);
static
void
FusionDataOrderPass
(
const
std
::
string
&
op_name
,
const
std
::
vector
<
AnfNodePtr
>
&
data_layer
,
std
::
vector
<
AnfNodePtr
>
*
reorder_data_layer
);
private:
static
void
Conv2DAttrJsonPass
(
const
AnfNodePtr
&
anf_node
,
const
std
::
vector
<
std
::
shared_ptr
<
OpAttr
>>
&
op_info_attrs
,
nlohmann
::
json
*
attrs_json
);
static
void
Conv2DBackpropFilterAttrJsonPass
(
const
AnfNodePtr
&
anf_node
,
const
std
::
vector
<
std
::
shared_ptr
<
OpAttr
>>
&
op_info_attrs
,
nlohmann
::
json
*
attrs_json
);
static
void
Conv2DBackpropInputAttrJsonPass
(
const
AnfNodePtr
&
anf_node
,
const
std
::
vector
<
std
::
shared_ptr
<
OpAttr
>>
&
op_info_attrs
,
nlohmann
::
json
*
attrs_json
);
static
void
MaximumGradAttrJsonPass
(
const
AnfNodePtr
&
anf_node
,
const
std
::
vector
<
std
::
shared_ptr
<
OpAttr
>>
&
op_info_attrs
,
nlohmann
::
json
*
attrs_json
);
...
...
mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
浏览文件 @
cd6ed0e3
...
...
@@ -375,20 +375,26 @@ bool TbeKernelJsonCreator::GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_no
MS_EXCEPTION_IF_NULL
(
primitive
);
for
(
const
auto
&
attr_ptr
:
attrs_ptr
)
{
std
::
string
attr_name
=
attr_ptr
->
name
();
nlohmann
::
json
attr_obj
;
attr_obj
[
"name"
]
=
attr_name
;
if
(
primitive
->
GetAttr
(
attr_name
)
!=
nullptr
)
{
nlohmann
::
json
attr_obj
;
auto
value
=
primitive
->
GetAttr
(
attr_name
);
std
::
string
type
=
attr_ptr
->
type
();
ParseAttrValue
(
type
,
value
,
&
attr_obj
);
attr_obj
[
"name"
]
=
attr_name
;
attr_obj
[
"valid"
]
=
true
;
(
*
attrs_json
).
push_back
(
attr_obj
);
}
else
{
if
(
attr_ptr
->
param_type
()
==
"required"
&&
creater_type_
==
SINGLE_BUILD
&&
op_info
->
impl_path
()
!=
""
)
{
MS_LOG
(
EXCEPTION
)
<<
"op name: "
<<
op_info
->
op_name
()
<<
" attr: "
<<
attr_name
<<
" is required, but not set."
;
if
(
op_info
->
impl_path
().
empty
())
{
attr_obj
[
"valid"
]
=
false
;
}
else
{
if
(
attr_ptr
->
param_type
()
==
"required"
&&
creater_type_
==
SINGLE_BUILD
)
{
MS_LOG
(
EXCEPTION
)
<<
"op name: "
<<
op_info
->
op_name
()
<<
" attr: "
<<
attr_name
<<
" is required, but not set."
;
}
else
{
attr_obj
[
"valid"
]
=
false
;
}
}
}
(
*
attrs_json
).
push_back
(
attr_obj
);
}
return
true
;
}
...
...
@@ -484,7 +490,8 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector<mindspore::AnfNodePtr> &inp
MS_EXCEPTION_IF_NULL
(
fusion_kernel
);
// get input layer info
std
::
vector
<
std
::
vector
<
mindspore
::
AnfNodePtr
>>
input_layers
;
if
(
!
GetInputLayers
(
input_nodes
,
compute_nodes
,
&
input_layers
))
{
std
::
map
<
const
AnfNodePtr
,
FusionDataType
>
spec_data_input
;
if
(
!
GetInputLayers
(
input_nodes
,
compute_nodes
,
&
input_layers
,
&
spec_data_input
))
{
return
false
;
}
// gen fusion scopre_op jsom
...
...
@@ -505,8 +512,8 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector<mindspore::AnfNodePtr> &inp
for
(
const
auto
&
layer
:
input_layers
)
{
for
(
const
auto
&
data_input
:
layer
)
{
nlohmann
::
json
data_str
;
if
(
!
GenFusionDataInputJson
(
data_input
,
&
data_str
,
&
index
))
{
MS_LOG
(
DEBUG
)
<<
"GenFusionDataInputJ
son faild."
;
if
(
!
GenFusionDataInputJson
(
data_input
,
spec_data_input
,
&
data_str
,
&
index
))
{
MS_LOG
(
INFO
)
<<
"Fusion error: gen fusion datainput j
son faild."
;
return
false
;
}
data_list
.
push_back
(
data_str
);
...
...
@@ -519,7 +526,7 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector<mindspore::AnfNodePtr> &inp
}
void
TbeKernelBuild
::
GenDescJson
(
const
std
::
shared_ptr
<
mindspore
::
AnfNode
>
&
anf_node
,
size_t
node_out_idx
,
size_t
desc_output_idx
,
nlohmann
::
json
*
output_desc
)
{
size_t
desc_output_idx
,
nlohmann
::
json
*
output_desc
,
FusionDataType
fusion_data_type
)
{
std
::
string
output_desc_name
=
anf_node
->
fullname_with_scope
();
if
(
node_out_idx
>
0
)
{
output_desc_name
=
output_desc_name
+
"_"
+
std
::
to_string
(
node_out_idx
);
...
...
@@ -539,58 +546,109 @@ void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_
(
*
output_desc
)[
"shape"
]
=
shape
;
auto
format
=
AnfAlgo
::
GetOutputFormat
(
anf_node
,
node_out_idx
);
if
(
format
==
kOpFormat_DEFAULT
)
{
if
(
ori_shape
.
size
()
==
4
)
{
format
=
kOpFormat_NCHW
;
}
else
{
format
=
kOpFormat_ND
;
}
format
=
ori_shape
.
size
()
==
4
?
kOpFormat_NCHW
:
kOpFormat_ND
;
}
(
*
output_desc
)[
"format"
]
=
format
;
(
*
output_desc
)[
"ori_format"
]
=
kOpFormat_NCHW
;
(
*
output_desc
)[
"output_index"
]
=
desc_output_idx
;
if
(
fusion_data_type
==
kFusionAddN
&&
format
==
kOpFormat_NC1HWC0
)
{
std
::
vector
<
size_t
>
spec_shape
=
{};
spec_shape
.
emplace_back
(
shape
[
0
]);
spec_shape
.
emplace_back
(
shape
[
1
]);
spec_shape
.
emplace_back
(
shape
[
2
]
*
shape
[
3
]);
spec_shape
.
emplace_back
(
shape
[
4
]);
(
*
output_desc
)[
"shape"
]
=
spec_shape
;
}
else
if
(
fusion_data_type
==
kFusionReLUGradV2
&&
(
*
output_desc
)[
"data_type"
]
==
"uint8"
)
{
std
::
vector
<
size_t
>
spec_shape
=
{};
spec_shape
.
emplace_back
(
shape
[
0
]);
spec_shape
.
emplace_back
(
shape
[
1
]);
spec_shape
.
emplace_back
(
shape
[
2
]
*
shape
[
3
]);
spec_shape
.
emplace_back
(
16
);
(
*
output_desc
)[
"shape"
]
=
spec_shape
;
(
*
output_desc
)[
"data_type"
]
=
"bool"
;
}
}
void
TbeKernelBuild
::
GenReusedOutputDesc
(
const
shared_ptr
<
mindspore
::
AnfNode
>
&
anf_node
,
size_t
index
,
size_t
output_index
,
nlohmann
::
json
*
output_desc
)
{
std
::
string
output_desc_name
=
anf_node
->
fullname_with_scope
()
+
"_"
+
std
::
to_string
(
index
);
(
*
output_desc
)[
"name"
]
=
NormalizeFullScopeName
(
output_desc_name
);
(
*
output_desc
)[
"data_type"
]
=
tbe
::
TypeIdToString
(
kNumberTypeFloat32
);
(
*
output_desc
)[
"output_index"
]
=
output_index
;
std
::
vector
<
size_t
>
shape
;
(
*
output_desc
)[
"shape"
]
=
shape
;
}
bool
TbeKernelBuild
::
GetInputLayers
(
const
vector
<
mindspore
::
AnfNodePtr
>
&
input_nodes
,
const
vector
<
mindspore
::
AnfNodePtr
>
&
compute_nodes
,
std
::
vector
<
std
::
vector
<
mindspore
::
AnfNodePtr
>>
*
input_layers
)
{
bool
TbeKernelBuild
::
GetSpecInputLayers
(
const
std
::
string
&
op_name
,
const
std
::
vector
<
mindspore
::
AnfNodePtr
>
&
reorder_layer
,
std
::
map
<
const
AnfNodePtr
,
FusionDataType
>
*
spec_data_input
)
{
if
((
op_name
==
kReluGradV2OpName
||
op_name
==
kAddNOpName
)
&&
reorder_layer
.
empty
())
{
MS_LOG
(
INFO
)
<<
"Fusion error: node("
<<
op_name
<<
" )'s input is null. "
;
return
false
;
}
MS_LOG
(
INFO
)
<<
"Fusion info: op_name: "
<<
op_name
<<
"input layer size: "
<<
reorder_layer
.
size
();
if
(
op_name
==
kReluGradV2OpName
)
{
(
*
spec_data_input
)[
reorder_layer
[
0
]]
=
kFusionReLUGradV2
;
}
else
if
(
op_name
==
kAddNOpName
)
{
for
(
const
auto
&
it
:
reorder_layer
)
{
(
*
spec_data_input
)[
it
]
=
kFusionAddN
;
}
}
return
true
;
}
bool
TbeKernelBuild
::
GetInputLayers
(
const
std
::
vector
<
mindspore
::
AnfNodePtr
>
&
input_nodes
,
const
std
::
vector
<
mindspore
::
AnfNodePtr
>
&
compute_nodes
,
std
::
vector
<
std
::
vector
<
mindspore
::
AnfNodePtr
>>
*
input_layers
,
std
::
map
<
const
AnfNodePtr
,
FusionDataType
>
*
spec_data_input
)
{
auto
result
=
std
::
find_if
(
compute_nodes
.
begin
(),
compute_nodes
.
end
(),
[](
const
auto
&
it
)
{
auto
op_name
=
AnfAlgo
::
GetCNodeName
(
it
);
return
op_name
==
kConv2DBackpropInputOpName
;
});
bool
need_spec
=
(
result
!=
compute_nodes
.
end
());
size_t
input_size
=
0
;
for
(
const
auto
&
compute_node
:
compute_nodes
)
{
std
::
vector
<
mindspore
::
AnfNodePtr
>
layer
;
std
::
vector
<
mindspore
::
AnfNodePtr
>
layer
=
{};
std
::
vector
<
mindspore
::
AnfNodePtr
>
reorder_layer
=
{};
MS_EXCEPTION_IF_NULL
(
compute_node
);
auto
op_name
=
AnfAlgo
::
GetCNodeName
(
compute_node
);
auto
ccompute_node
=
compute_node
->
cast
<
CNodePtr
>
();
if
(
ccompute_node
==
nullptr
)
{
MS_LOG
(
DEBUG
)
<<
"
fusion compute node must be cnode"
;
MS_LOG
(
INFO
)
<<
"Fusion error:
fusion compute node must be cnode"
;
return
false
;
}
MS_LOG
(
INFO
)
<<
"Fusion info: compute name: "
<<
compute_node
->
fullname_with_scope
();
for
(
size_t
i
=
1
;
i
<
ccompute_node
->
inputs
().
size
();
++
i
)
{
auto
input
=
ccompute_node
->
input
(
i
);
auto
find_iter
=
std
::
find
(
input_nodes
.
begin
(),
input_nodes
.
end
(),
input
);
if
(
find_iter
!=
input_nodes
.
end
())
{
MS_LOG
(
INFO
)
<<
"Fusion info: add compute node's ["
<<
i
<<
"] input: "
<<
input
->
fullname_with_scope
();
layer
.
emplace_back
((
*
find_iter
));
}
else
{
MS_LOG
(
INFO
)
<<
"Fusion warnig: this input ["
<<
i
<<
"] may be pre compute("
<<
input
->
fullname_with_scope
()
<<
") node's output."
;
}
}
TbeAdapter
::
FusionDataOrderPass
(
op_name
,
layer
,
&
reorder_layer
);
if
(
need_spec
)
{
MS_LOG
(
INFO
)
<<
"Fusion info: match conv2d backprop input + ... patten."
;
if
(
!
GetSpecInputLayers
(
op_name
,
reorder_layer
,
spec_data_input
))
{
return
false
;
}
}
input_size
+=
layer
.
size
();
input_layers
->
emplace_back
(
layer
);
input_size
+=
reorder_
layer
.
size
();
input_layers
->
emplace_back
(
reorder_
layer
);
}
if
(
input_nodes
.
size
()
!=
input_size
)
{
MS_LOG
(
DEBUG
)
<<
"fusion scope error, layer input:"
<<
input_size
<<
", input_node:"
<<
input_nodes
.
size
();
MS_LOG
(
INFO
)
<<
"Fusion error: fusion scope error, layer input:"
<<
input_size
<<
", input_node:"
<<
input_nodes
.
size
();
return
false
;
}
return
true
;
}
bool
TbeKernelBuild
::
GenFusionDataInputJson
(
const
shared_ptr
<
mindspore
::
AnfNode
>
&
data_input
,
nlohmann
::
json
*
data_str
,
size_t
*
index
)
{
bool
TbeKernelBuild
::
GenFusionDataInputJson
(
const
std
::
shared_ptr
<
mindspore
::
AnfNode
>
&
data_input
,
const
std
::
map
<
const
AnfNodePtr
,
FusionDataType
>
&
spec_data_input
,
nlohmann
::
json
*
data_str
,
size_t
*
index
)
{
MS_EXCEPTION_IF_NULL
(
data_str
);
MS_EXCEPTION_IF_NULL
(
index
);
std
::
vector
<
nlohmann
::
json
>
output_desc_list
;
...
...
@@ -604,13 +662,17 @@ bool TbeKernelBuild::GenFusionDataInputJson(const shared_ptr<mindspore::AnfNode>
output_desc_list
.
push_back
(
output_desc
);
(
*
index
)
++
;
}
else
{
FusionDataType
fusion_data_type
=
kFusionNormal
;
if
(
spec_data_input
.
find
(
data_input
)
!=
spec_data_input
.
end
())
{
fusion_data_type
=
spec_data_input
.
at
(
data_input
);
}
auto
kernel_idx
=
AnfAlgo
::
VisitKernel
(
data_input
,
0
);
auto
real_node
=
kernel_idx
.
first
;
size_t
real_idx
=
kernel_idx
.
second
;
MS_LOG
(
INFO
)
<<
"real name "
<<
real_node
->
fullname_with_scope
()
<<
" index:"
<<
real_idx
;
// "output_desc"
nlohmann
::
json
output_desc
;
GenDescJson
(
real_node
,
real_idx
,
real_idx
,
&
output_desc
);
GenDescJson
(
real_node
,
real_idx
,
real_idx
,
&
output_desc
,
fusion_data_type
);
output_desc_list
.
push_back
(
output_desc
);
(
*
data_str
)[
"name"
]
=
NormalizeFullScopeName
(
real_node
->
fullname_with_scope
());
}
...
...
@@ -632,11 +694,12 @@ bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) {
auto
real_input_size
=
cnode
->
inputs
().
size
()
-
1
;
auto
dyn_input_size
=
dyn_input_sizes
.
size
();
if
(
dyn_input_size
!=
1
)
{
MS_LOG
(
DEBUG
)
<<
"
fusion build not support dyn_input_sizes > 1"
;
MS_LOG
(
INFO
)
<<
"Fusion error:
fusion build not support dyn_input_sizes > 1"
;
return
ret
;
}
if
(
IntToSize
(
dyn_input_sizes
[
0
])
!=
real_input_size
)
{
MS_LOG
(
DEBUG
)
<<
" dyn_input_size"
<<
dyn_input_sizes
[
0
]
<<
"not equal real_input_size"
<<
real_input_size
;
MS_LOG
(
INFO
)
<<
"Fusion error: dyn_input_size"
<<
dyn_input_sizes
[
0
]
<<
"not equal real_input_size"
<<
real_input_size
;
return
ret
;
}
ret
=
true
;
...
...
@@ -663,6 +726,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
std
::
vector
<
nlohmann
::
json
>
*
input_desc_list
,
size_t
*
index
)
{
MS_EXCEPTION_IF_NULL
(
cnode
);
MS_EXCEPTION_IF_NULL
(
input_desc_list
);
std
::
vector
<
nlohmann
::
json
>
input_desc_list_tmp
=
{};
bool
is_dynamic_input
=
IsDynamicInput
(
cnode
);
for
(
size_t
i
=
1
;
i
<
cnode
->
inputs
().
size
();
++
i
)
{
auto
input
=
cnode
->
input
(
i
);
...
...
@@ -676,7 +740,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
MS_LOG
(
INFO
)
<<
"node has dynamic input."
;
input_desc
[
"dyn_index"
]
=
(
i
-
1
);
}
(
*
input_desc_list
)
.
emplace_back
(
input_desc
);
input_desc_list_tmp
.
emplace_back
(
input_desc
);
}
size_t
optional_num
=
GetOptionalInput
(
cnode
,
is_dynamic_input
);
if
(
optional_num
>
0
)
{
...
...
@@ -686,35 +750,24 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
optional_input_desc
[
"name"
]
=
std
::
string
(
kOptional
)
+
std
::
to_string
(
*
index
);
(
*
index
)
++
;
(
*
layer_iter
)
->
emplace_back
(
nullptr
);
(
*
input_desc_list
)
.
emplace_back
(
optional_input_desc
);
input_desc_list_tmp
.
emplace_back
(
optional_input_desc
);
}
}
auto
op_name
=
AnfAlgo
::
GetCNodeName
(
cnode
);
TbeAdapter
::
FusionInputOrderPass
(
op_name
,
input_desc_list_tmp
,
input_desc_list
);
return
true
;
}
std
::
vector
<
size_t
>
TbeKernelBuild
::
GetDescOutputIndex
(
const
std
::
vector
<
int
>
&
output_used_nums
)
{
std
::
vector
<
size_t
>
desc_output_index
=
{};
bool
find_reused
=
false
;
size_t
reused_num
=
0
;
for
(
size_t
idx
=
0
;
idx
<
output_used_nums
.
size
();
++
idx
)
{
auto
output_use_num_item
=
output_used_nums
[
idx
];
MS_LOG
(
INFO
)
<<
"output used num["
<<
idx
<<
"] = "
<<
output_use_num_item
;
if
(
output_use_num_item
==
1
||
output_use_num_item
==
0
)
{
desc_output_index
.
emplace_back
(
idx
);
if
(
output_use_num_item
>
1
)
{
desc_output_index
.
emplace_back
(
idx
);
}
else
{
if
(
!
find_reused
)
{
desc_output_index
.
emplace_back
(
idx
);
}
else
{
desc_output_index
.
emplace_back
(
desc_output_index
[
idx
-
1
]);
}
reused_num
+=
(
output_use_num_item
-
1
);
find_reused
=
true
;
}
}
auto
pad_value
=
output_used_nums
.
size
()
==
1
?
0
:
desc_output_index
[
desc_output_index
.
size
()
-
1
]
+
1
;
for
(
size_t
i
=
0
;
i
<
reused_num
;
++
i
)
{
desc_output_index
.
emplace_back
(
pad_value
);
}
return
desc_output_index
;
}
...
...
@@ -811,6 +864,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vecto
}
auto
ret
=
GetIOSizeImpl
(
data_output
);
input_size_list
->
push_back
(
ret
);
MS_LOG
(
INFO
)
<<
"Fusion info: scope input name: "
<<
op
[
"name"
]
<<
", size: "
<<
ret
;
}
}
}
...
...
@@ -819,26 +873,31 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vecto
auto
kernel_idx
=
AnfAlgo
::
VisitKernel
(
output_node
,
0
);
auto
real_node
=
kernel_idx
.
first
;
size_t
real_idx
=
kernel_idx
.
second
;
auto
normal_name
=
NormalizeFullScopeName
(
real_node
->
fullname_with_scope
());
MS_LOG
(
INFO
)
<<
"Fusion info: real node name: "
<<
normal_name
<<
", real output index: "
<<
real_idx
;
for
(
const
auto
&
op
:
fusion_op_list
)
{
auto
normal_name
=
NormalizeFullScopeName
(
real_node
->
fullname_with_scope
());
if
(
op
[
"name"
]
==
normal_name
)
{
auto
op_output_desces
=
op
[
"output_desc"
];
if
(
output_node
!=
real_node
)
{
// tuple_get item
MS_LOG
(
DEBUG
)
<<
"output is a tuple getitem node"
;
MS_LOG
(
INFO
)
<<
"output is a tuple getitem node"
;
auto
output_desc
=
op_output_desces
[
real_idx
];
if
(
output_desc
[
"shape"
].
empty
())
{
continue
;
MS_LOG
(
INFO
)
<<
"Fusion error: output_desc's shape is empty. real_index "
<<
real_idx
;
return
false
;
}
auto
ret
=
GetIOSizeImpl
(
output_desc
);
output_size_list
->
push_back
(
ret
);
MS_LOG
(
INFO
)
<<
"Fusion info: scope output index: "
<<
real_idx
<<
", size: "
<<
ret
;
}
else
{
for
(
const
auto
&
output_desc
:
op_output_desces
)
{
if
(
output_desc
[
"shape"
].
empty
())
{
MS_LOG
(
INFO
)
<<
"Fusion info: output_desc's shape is empty, may be this node output"
;
continue
;
}
auto
ret
=
GetIOSizeImpl
(
output_desc
);
output_size_list
->
push_back
(
ret
);
MS_LOG
(
INFO
)
<<
"Fusion info: scope output size: "
<<
ret
;
}
}
}
...
...
mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h
浏览文件 @
cd6ed0e3
...
...
@@ -35,6 +35,8 @@ namespace kernel {
// kernel operate type used for generate json
class
TbeKernelBuild
{
enum
FusionDataType
{
kFusionNormal
=
0
,
kFusionAddN
,
kFusionReLUGradV2
};
public:
static
bool
GetIOSize
(
const
nlohmann
::
json
&
kernel_json
,
std
::
vector
<
size_t
>
*
input_size_list
,
std
::
vector
<
size_t
>
*
output_size_list
);
...
...
@@ -48,8 +50,9 @@ class TbeKernelBuild {
private:
TbeKernelBuild
()
=
default
;
~
TbeKernelBuild
()
=
default
;
static
bool
GenFusionDataInputJson
(
const
std
::
shared_ptr
<
mindspore
::
AnfNode
>
&
data_input
,
nlohmann
::
json
*
data_str
,
size_t
*
index
);
static
bool
GenFusionDataInputJson
(
const
std
::
shared_ptr
<
mindspore
::
AnfNode
>
&
data_input
,
const
std
::
map
<
const
AnfNodePtr
,
FusionDataType
>
&
spec_data_input
,
nlohmann
::
json
*
data_str
,
size_t
*
index
);
static
bool
GenFusionComputeJson
(
const
mindspore
::
AnfNodePtr
&
compute_node
,
std
::
vector
<
std
::
vector
<
mindspore
::
AnfNodePtr
>>::
iterator
*
layer_iter
,
nlohmann
::
json
*
compute_op_str
,
std
::
string
*
fusion_kernel_name
,
size_t
*
index
);
...
...
@@ -60,13 +63,17 @@ class TbeKernelBuild {
static
bool
GenFusionComputeOutputJson
(
const
mindspore
::
CNodePtr
&
cnode
,
std
::
vector
<
nlohmann
::
json
>
*
output_desc_list
);
static
void
GenDescJson
(
const
std
::
shared_ptr
<
mindspore
::
AnfNode
>
&
anf_node
,
size_t
node_out_idx
,
size_t
desc_output_idx
,
nlohmann
::
json
*
output_desc
);
size_t
desc_output_idx
,
nlohmann
::
json
*
output_desc
,
FusionDataType
fusion_data_type
=
kFusionNormal
);
static
void
GenReusedOutputDesc
(
const
std
::
shared_ptr
<
mindspore
::
AnfNode
>
&
anf_node
,
size_t
index
,
size_t
output_index
,
nlohmann
::
json
*
output_desc
);
static
size_t
GetIOSizeImpl
(
const
nlohmann
::
json
&
desc
);
static
bool
GetSpecInputLayers
(
const
std
::
string
&
op_name
,
const
std
::
vector
<
mindspore
::
AnfNodePtr
>
&
reorder_layer
,
std
::
map
<
const
AnfNodePtr
,
FusionDataType
>
*
spec_data_input
);
static
bool
GetInputLayers
(
const
std
::
vector
<
mindspore
::
AnfNodePtr
>
&
input_nodes
,
const
std
::
vector
<
mindspore
::
AnfNodePtr
>
&
compute_nodes
,
std
::
vector
<
std
::
vector
<
mindspore
::
AnfNodePtr
>>
*
input_layers
);
std
::
vector
<
std
::
vector
<
mindspore
::
AnfNodePtr
>>
*
input_layers
,
std
::
map
<
const
AnfNodePtr
,
FusionDataType
>
*
spec_data_input
);
static
bool
IsDynamicInput
(
const
CNodePtr
&
cnode
);
static
size_t
GetOptionalInput
(
const
CNodePtr
&
cnode
,
bool
is_dynamic_input
);
};
...
...
mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
浏览文件 @
cd6ed0e3
...
...
@@ -38,6 +38,7 @@
#include "pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h"
#include "pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h"
#include "pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h"
#include "pre_activate/ascend/ir_fusion/refresh_parameter_format.h"
#include "pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h"
#include "pre_activate/ascend/ir_fusion/transdata_split.h"
#include "pre_activate/ascend/ir_fission/topk_split.h"
...
...
@@ -265,6 +266,7 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern
other_pm
->
AddPass
(
std
::
make_shared
<
AllReduceFusion
>
());
other_pm
->
AddPass
(
std
::
make_shared
<
AllGatherFusion
>
());
other_pm
->
AddPass
(
std
::
make_shared
<
ParameterTransOpFusion
>
());
other_pm
->
AddPass
(
std
::
make_shared
<
RefreshParameterFormat
>
());
other_pm
->
AddPass
(
std
::
make_shared
<
BufferFusion
>
());
other_pm
->
AddPass
(
std
::
make_shared
<
GetitemTuple
>
());
other_pm
->
AddPass
(
std
::
make_shared
<
CommonSubexpressionElimination
>
());
...
...
mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
浏览文件 @
cd6ed0e3
...
...
@@ -24,6 +24,7 @@
#include <memory>
#include <string>
#include <algorithm>
#include <iterator>
#include "kernel/kernel_fusion.h"
#include "debug/anf_ir_dump.h"
...
...
@@ -261,23 +262,24 @@ CNodePtr CreateFusionOp(const std::vector<AnfNodePtr> &inputs_list, const std::v
return
buffer_fusion_kernel
;
}
kernel
::
KernelBuildInfoPtr
CreateFusionOpKernelInfo
(
const
std
::
vector
<
AnfNodePtr
>
&
inputs_list_in
,
const
std
::
vector
<
AnfNodePtr
>
&
inputs_list
,
kernel
::
KernelBuildInfoPtr
CreateFusionOpKernelInfo
(
const
std
::
vector
<
AnfNodePtr
>
&
inputs_list
,
const
std
::
vector
<
AnfNodePtr
>
&
outputs_list
)
{
MS_LOG
(
DEBUG
)
<<
"Start Create Kernel Info"
;
kernel
::
KernelBuildInfo
::
KernelBuildInfoBuilder
builder
;
// inputs format and data type
std
::
vector
<
std
::
string
>
inputs_format
;
std
::
vector
<
TypeId
>
inputs_data_type
;
for
(
auto
node
:
inputs_list_in
)
{
auto
cnode
=
node
->
cast
<
CNodePtr
>
();
MS_EXCEPTION_IF_NULL
(
cnode
);
auto
&
inputs
=
cnode
->
inputs
();
for
(
size_t
input_index
=
1
;
input_index
<
inputs
.
size
();
++
input_index
)
{
if
(
std
::
find
(
inputs_list
.
begin
(),
inputs_list
.
end
(),
inputs
[
input_index
])
!=
inputs_list
.
end
())
{
inputs_format
.
push_back
(
AnfAlgo
::
GetInputFormat
(
node
,
input_index
-
1
));
inputs_data_type
.
push_back
(
AnfAlgo
::
GetInputDeviceDataType
(
node
,
input_index
-
1
));
}
for
(
const
auto
&
input
:
inputs_list
)
{
if
(
input
->
isa
<
CNode
>
()
&&
AnfAlgo
::
GetCNodeName
(
input
)
==
prim
::
kPrimTupleGetItem
->
name
())
{
auto
tuple_getitem
=
input
->
cast
<
CNodePtr
>
();
MS_EXCEPTION_IF_NULL
(
tuple_getitem
);
inputs_format
.
push_back
(
AnfAlgo
::
GetOutputFormat
(
tuple_getitem
->
input
(
1
),
IntToSize
(
GetValue
<
int
>
(
GetValueNode
(
tuple_getitem
->
input
(
2
))))));
inputs_data_type
.
push_back
(
AnfAlgo
::
GetOutputDeviceDataType
(
tuple_getitem
->
input
(
1
),
IntToSize
(
GetValue
<
int
>
(
GetValueNode
(
tuple_getitem
->
input
(
2
))))));
}
else
{
inputs_format
.
push_back
(
AnfAlgo
::
GetOutputFormat
(
input
,
0
));
inputs_data_type
.
push_back
(
AnfAlgo
::
GetOutputDeviceDataType
(
input
,
0
));
}
}
// outputs format and data type
...
...
@@ -360,62 +362,6 @@ void ReplaceOldNode(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusi
}
}
void
GetInputList
(
const
CNodePtr
&
node
,
const
int32_t
cur_fusion_id
,
std
::
vector
<
AnfNodePtr
>
*
inputs_list
)
{
MS_EXCEPTION_IF_NULL
(
node
);
MS_EXCEPTION_IF_NULL
(
inputs_list
);
auto
&
inputs
=
node
->
inputs
();
for
(
size_t
input_index
=
1
;
input_index
<
inputs
.
size
();
++
input_index
)
{
auto
input
=
inputs
[
input_index
];
if
(
AnfAlgo
::
IsRealCNodeKernel
(
input
))
{
if
(
AnfAlgo
::
HasNodeAttr
(
kOpAttrFusionId
,
input
))
{
auto
fusion_id
=
AnfAlgo
::
GetNodeAttr
<
int32_t
>
(
input
,
kOpAttrFusionId
);
if
(
fusion_id
!=
cur_fusion_id
)
{
inputs_list
->
push_back
(
input
);
}
}
else
{
inputs_list
->
push_back
(
input
);
}
}
else
if
(
input
->
isa
<
CNode
>
())
{
for
(
auto
&
input_in
:
input
->
cast
<
CNodePtr
>
()
->
inputs
())
{
if
(
AnfAlgo
::
IsRealCNodeKernel
(
input_in
))
{
if
(
AnfAlgo
::
HasNodeAttr
(
kOpAttrFusionId
,
input_in
))
{
auto
fusion_id
=
AnfAlgo
::
GetNodeAttr
<
int32_t
>
(
input_in
,
kOpAttrFusionId
);
if
(
fusion_id
!=
cur_fusion_id
)
{
inputs_list
->
push_back
(
input
);
}
}
else
{
inputs_list
->
push_back
(
input
);
}
}
}
}
else
{
inputs_list
->
push_back
(
input
);
}
}
}
void
CheckCurrentNodeIsInput
(
const
CNodePtr
&
node
,
const
int32_t
&
cur_fusion_id
,
std
::
unordered_map
<
int32_t
,
BufferFusionInfo_t
>
*
buffer_fusion_infos
)
{
MS_EXCEPTION_IF_NULL
(
buffer_fusion_infos
);
if
((
*
buffer_fusion_infos
).
find
(
cur_fusion_id
)
==
(
*
buffer_fusion_infos
).
end
())
{
BufferFusionInfo_t
buffer_fusion_info
;
(
*
buffer_fusion_infos
)[
cur_fusion_id
]
=
buffer_fusion_info
;
}
std
::
vector
<
AnfNodePtr
>
inputs_list
;
GetInputList
(
node
,
cur_fusion_id
,
&
inputs_list
);
if
(
!
inputs_list
.
empty
())
{
if
(
!
(
*
buffer_fusion_infos
)[
cur_fusion_id
].
inputs_list
.
empty
())
{
(
void
)(
*
buffer_fusion_infos
)[
cur_fusion_id
].
inputs_list
.
insert
(
(
*
buffer_fusion_infos
)[
cur_fusion_id
].
inputs_list
.
end
(),
inputs_list
.
begin
(),
inputs_list
.
end
());
(
void
)(
*
buffer_fusion_infos
)[
cur_fusion_id
].
inputs_list_in
.
insert
(
(
*
buffer_fusion_infos
)[
cur_fusion_id
].
inputs_list_in
.
end
(),
node
);
}
else
{
(
*
buffer_fusion_infos
)[
cur_fusion_id
].
inputs_list
=
inputs_list
;
(
*
buffer_fusion_infos
)[
cur_fusion_id
].
inputs_list_in
.
push_back
(
node
);
}
}
}
void
GetFusionScopeComputeNodeList
(
session
::
KernelGraph
*
kernel_graph
,
std
::
unordered_map
<
int32_t
,
BufferFusionInfo_t
>
*
buffer_fusion_infos
)
{
MS_EXCEPTION_IF_NULL
(
buffer_fusion_infos
);
...
...
@@ -429,6 +375,45 @@ void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph,
}
}
void
GetFusionScopeInputNodeList
(
session
::
KernelGraph
*
kernel_graph
,
std
::
unordered_map
<
int32_t
,
BufferFusionInfo_t
>
*
buffer_fusion_infos
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph
);
MS_EXCEPTION_IF_NULL
(
buffer_fusion_infos
);
auto
manager
=
kernel_graph
->
manager
();
MS_EXCEPTION_IF_NULL
(
manager
);
for
(
auto
&
buffer_fusion_info
:
*
buffer_fusion_infos
)
{
auto
fusion_id
=
buffer_fusion_info
.
first
;
auto
fusion_info
=
buffer_fusion_info
.
second
;
for
(
const
auto
&
node
:
fusion_info
.
anf_nodes
)
{
auto
cnode
=
node
->
cast
<
CNodePtr
>
();
for
(
size_t
idx
=
1
;
idx
<
cnode
->
inputs
().
size
();
++
idx
)
{
auto
real_input
=
AnfAlgo
::
VisitKernel
(
cnode
->
input
(
idx
),
0
);
if
(
std
::
find
(
fusion_info
.
anf_nodes
.
begin
(),
fusion_info
.
anf_nodes
.
end
(),
real_input
.
first
)
==
fusion_info
.
anf_nodes
.
end
())
{
if
(
std
::
find
((
*
buffer_fusion_infos
)[
fusion_id
].
inputs_list
.
begin
(),
(
*
buffer_fusion_infos
)[
fusion_id
].
inputs_list
.
end
(),
cnode
->
input
(
idx
))
==
(
*
buffer_fusion_infos
)[
fusion_id
].
inputs_list
.
end
())
{
(
*
buffer_fusion_infos
)[
fusion_id
].
inputs_list
.
push_back
(
cnode
->
input
(
idx
));
}
}
}
}
}
}
bool
TupleGetitemNodeCompare
(
const
AnfNodePtr
&
node1
,
const
AnfNodePtr
&
node2
)
{
MS_EXCEPTION_IF_NULL
(
node1
);
MS_EXCEPTION_IF_NULL
(
node2
);
auto
getitem1
=
node1
->
cast
<
CNodePtr
>
();
auto
getitem2
=
node2
->
cast
<
CNodePtr
>
();
MS_EXCEPTION_IF_NULL
(
getitem1
);
MS_EXCEPTION_IF_NULL
(
getitem2
);
auto
output_idx1
=
GetValue
<
int
>
(
GetValueNode
(
getitem1
->
input
(
2
)));
auto
output_idx2
=
GetValue
<
int
>
(
GetValueNode
(
getitem2
->
input
(
2
)));
return
output_idx1
<
output_idx2
;
}
void
GetFusionScopeOutputNodeList
(
session
::
KernelGraph
*
kernel_graph
,
std
::
unordered_map
<
int32_t
,
BufferFusionInfo_t
>
*
buffer_fusion_infos
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph
);
...
...
@@ -454,14 +439,7 @@ void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph,
std
::
transform
(
manager
->
node_users
()[
node
].
begin
(),
manager
->
node_users
()[
node
].
end
(),
std
::
back_inserter
(
tuple_getitem_nodes
),
[](
const
std
::
pair
<
AnfNodePtr
,
int
>
&
use_node
)
{
return
use_node
.
first
;
});
std
::
sort
(
tuple_getitem_nodes
.
begin
(),
tuple_getitem_nodes
.
end
(),
[](
const
AnfNodePtr
&
node1
,
const
AnfNodePtr
&
node2
)
{
auto
getitem1
=
node1
->
cast
<
CNodePtr
>
();
auto
getitem2
=
node2
->
cast
<
CNodePtr
>
();
auto
output_idx1
=
GetValue
<
int
>
(
GetValueNode
(
getitem1
->
input
(
2
)));
auto
output_idx2
=
GetValue
<
int
>
(
GetValueNode
(
getitem2
->
input
(
2
)));
return
output_idx1
<
output_idx2
;
});
std
::
sort
(
tuple_getitem_nodes
.
begin
(),
tuple_getitem_nodes
.
end
(),
TupleGetitemNodeCompare
);
for
(
auto
getitem
:
tuple_getitem_nodes
)
{
auto
getitem_ptr
=
getitem
->
cast
<
CNodePtr
>
();
auto
input2
=
getitem_ptr
->
input
(
2
);
...
...
@@ -484,6 +462,36 @@ void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph,
}
}
void
SetFusionOpRefInfos
(
session
::
KernelGraph
*
kernel_graph
,
const
std
::
vector
<
AnfNodePtr
>
&
outputs_list
,
const
AnfNodePtr
&
fusion_kernel
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph
);
auto
manager
=
kernel_graph
->
manager
();
MS_EXCEPTION_IF_NULL
(
manager
);
for
(
size_t
idx
=
0
;
idx
<
outputs_list
.
size
();
++
idx
)
{
auto
output
=
outputs_list
[
idx
];
if
(
output
->
isa
<
CNode
>
()
&&
AnfAlgo
::
GetCNodeName
(
output
)
==
prim
::
kPrimTupleGetItem
->
name
())
{
auto
real_output
=
AnfAlgo
::
VisitKernel
(
output
,
0
);
auto
output_cnode
=
output
->
cast
<
CNodePtr
>
();
MS_EXCEPTION_IF_NULL
(
output_cnode
);
auto
input2
=
output_cnode
->
input
(
2
);
auto
output_idx
=
GetValue
<
int
>
(
GetValueNode
(
input2
));
session
::
AnfWithOutIndex
out_pair
(
real_output
.
first
,
output_idx
);
if
(
kernel_graph
->
IsInRefOutputMap
(
out_pair
))
{
auto
origin_pair
=
kernel_graph
->
GetRefCorrespondOutput
(
out_pair
);
session
::
AnfWithOutIndex
fusion_final_pair
(
fusion_kernel
,
idx
);
kernel_graph
->
AddRefCorrespondPairs
(
fusion_final_pair
,
origin_pair
);
}
}
else
{
session
::
AnfWithOutIndex
out_pair
(
output
,
0
);
if
(
kernel_graph
->
IsInRefOutputMap
(
out_pair
))
{
auto
origin_pair
=
kernel_graph
->
GetRefCorrespondOutput
(
out_pair
);
session
::
AnfWithOutIndex
fusion_final_pair
(
fusion_kernel
,
idx
);
kernel_graph
->
AddRefCorrespondPairs
(
fusion_final_pair
,
origin_pair
);
}
}
}
}
void
MatchConvBnreduce
(
const
CNodePtr
&
cnode
,
const
session
::
KernelGraph
&
kernel_graph
,
std
::
unordered_set
<
AnfNodePtr
>
*
fused_set
,
FusedNodeRecord
*
candidate_fusion
)
{
MS_EXCEPTION_IF_NULL
(
cnode
);
...
...
@@ -634,24 +642,12 @@ void MatchFusionTypePattern(const session::KernelGraph &kernel_graph, std::unord
void
BufferFusion
::
GetBufferFusionInfo
(
session
::
KernelGraph
*
kernel_graph
,
std
::
unordered_map
<
int32_t
,
BufferFusionInfo_t
>
*
buffer_fusion_infos
)
const
{
MS_EXCEPTION_IF_NULL
(
buffer_fusion_infos
);
std
::
vector
<
AnfNodePtr
>
node_list
=
TopoSort
(
kernel_graph
->
get_return
());
for
(
auto
&
node
:
node_list
)
{
if
(
!
AnfAlgo
::
IsRealCNodeKernel
(
node
))
{
continue
;
}
auto
cnode
=
node
->
cast
<
CNodePtr
>
();
MS_EXCEPTION_IF_NULL
(
cnode
);
if
(
AnfAlgo
::
HasNodeAttr
(
kOpAttrFusionId
,
cnode
))
{
auto
cur_fusion_id
=
AnfAlgo
::
GetNodeAttr
<
int32_t
>
(
cnode
,
kOpAttrFusionId
);
CheckCurrentNodeIsInput
(
cnode
,
cur_fusion_id
,
buffer_fusion_infos
);
}
}
GetFusionScopeComputeNodeList
(
kernel_graph
,
buffer_fusion_infos
);
GetFusionScopeInputNodeList
(
kernel_graph
,
buffer_fusion_infos
);
GetFusionScopeOutputNodeList
(
kernel_graph
,
buffer_fusion_infos
);
for
(
auto
&
buffer_fusion_info
:
*
buffer_fusion_infos
)
{
buffer_fusion_info
.
second
.
kernel_build_info
=
CreateFusionOpKernelInfo
(
buffer_fusion_info
.
second
.
inputs_list_in
,
buffer_fusion_info
.
second
.
inputs_list
,
buffer_fusion_info
.
second
.
outputs_list
);
CreateFusionOpKernelInfo
(
buffer_fusion_info
.
second
.
inputs_list
,
buffer_fusion_info
.
second
.
outputs_list
);
}
}
...
...
@@ -743,7 +739,7 @@ bool BufferFusion::ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_
}
AnfAlgo
::
SetOutputInferTypeAndShape
(
types
,
shapes
,
buffer_fusion
.
get
());
AnfAlgo
::
SetKernelMod
(
kernel_ptr
,
buffer_fusion
.
get
());
// replace node
SetFusionOpRefInfos
(
kernel_graph
,
buffer_fusion_info
.
outputs_list
,
buffer_fusion
);
ReplaceOldNode
(
buffer_fusion_infos
,
fusion_id
,
buffer_fusion
,
kernel_graph
);
return
true
;
}
...
...
mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h
浏览文件 @
cd6ed0e3
...
...
@@ -30,7 +30,6 @@ namespace opt {
struct
BufferFusionInfo_t
{
std
::
vector
<
AnfNodePtr
>
anf_nodes
;
std
::
vector
<
AnfNodePtr
>
inputs_list
;
std
::
vector
<
AnfNodePtr
>
inputs_list_in
;
std
::
vector
<
AnfNodePtr
>
outputs_list
;
kernel
::
KernelBuildInfoPtr
kernel_build_info
;
};
...
...
mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc
0 → 100644
浏览文件 @
cd6ed0e3
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "pre_activate/ascend/ir_fusion/refresh_parameter_format.h"
#include "session/anf_runtime_algorithm.h"
#include "utils/utils.h"
#include "operator/ops.h"
#include "device/kernel_info.h"
#include "pre_activate/common/helper.h"
#include "pre_activate/common/optimizer.h"
#include "pre_activate/ascend/ascend_helper.h"
namespace
mindspore
{
namespace
opt
{
void
DoRefresh
(
const
CNodePtr
&
cnode
)
{
if
(
cnode
==
nullptr
)
{
MS_LOG
(
EXCEPTION
)
<<
"node is nullptr"
;
}
for
(
size_t
input_index
=
0
;
input_index
<
AnfAlgo
::
GetInputTensorNum
(
cnode
);
input_index
++
)
{
auto
input_kernel_node
=
AnfAlgo
::
GetInputNode
(
cnode
,
input_index
);
if
(
input_kernel_node
->
isa
<
Parameter
>
())
{
std
::
shared_ptr
<
kernel
::
KernelBuildInfo
::
KernelBuildInfoBuilder
>
builder
=
std
::
make_shared
<
kernel
::
KernelBuildInfo
::
KernelBuildInfoBuilder
>
();
auto
cnode_input_format
=
AnfAlgo
::
GetInputFormat
(
cnode
,
input_index
);
auto
kernel_node_format
=
AnfAlgo
::
GetOutputFormat
(
input_kernel_node
,
0
);
auto
dtype
=
AnfAlgo
::
GetOutputDeviceDataType
(
input_kernel_node
,
0
);
if
(
kernel_node_format
!=
cnode_input_format
)
{
builder
->
SetOutputsFormat
({
cnode_input_format
});
builder
->
SetOutputsDeviceType
({
dtype
});
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
->
Build
(),
input_kernel_node
.
get
());
}
}
}
}
bool
RefreshParameterFormat
::
Run
(
const
FuncGraphPtr
&
func_graph
)
{
if
(
func_graph
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"func_graph is nullptr."
;
return
false
;
}
std
::
vector
<
AnfNodePtr
>
node_list
=
TopoSort
(
func_graph
->
get_return
());
for
(
auto
node
:
node_list
)
{
if
(
node
==
nullptr
||
!
node
->
isa
<
CNode
>
())
{
continue
;
}
auto
cnode
=
node
->
cast
<
CNodePtr
>
();
if
(
cnode
==
nullptr
)
{
continue
;
}
auto
node_name
=
AnfAlgo
::
GetCNodeName
(
cnode
);
if
(
node_name
==
kBNTrainingUpdateOpName
)
{
DoRefresh
(
cnode
);
}
}
return
true
;
}
}
// namespace opt
}
// namespace mindspore
mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h
0 → 100644
浏览文件 @
cd6ed0e3
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_
#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_
#include <vector>
#include <memory>
#include <utility>
#include "ir/anf.h"
#include "pre_activate/common/pass.h"
namespace
mindspore
{
namespace
opt
{
class
RefreshParameterFormat
:
public
Pass
{
public:
explicit
RefreshParameterFormat
(
size_t
groups
=
1
)
:
Pass
(
"refresh_parameter_format"
),
groups_
(
groups
)
{}
~
RefreshParameterFormat
()
override
=
default
;
bool
Run
(
const
FuncGraphPtr
&
graph
)
override
;
private:
size_t
groups_
=
1
;
};
}
// namespace opt
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_
mindspore/ccsrc/session/anf_runtime_algorithm.cc
浏览文件 @
cd6ed0e3
...
...
@@ -825,6 +825,8 @@ size_t AnfRuntimeAlgorithm::GetRealInputIndex(const mindspore::AnfNodePtr &anf_n
static
std
::
map
<
std
::
string
,
std
::
map
<
size_t
,
size_t
>>
spec_node_list
=
{
{
prim
::
kPrimConv2DBackpropInput
->
name
(),
{{
0
,
1
},
{
1
,
0
}}},
{
prim
::
kPrimConv2DBackpropFilter
->
name
(),
{{
0
,
1
},
{
1
,
0
}}},
{
kFusionOpConv2DBackpropInputReluGradV2Name
,
{{
0
,
1
},
{
1
,
0
}}},
{
kFusionOpConv2DBackpropInputAddNReluGradV2Name
,
{{
0
,
1
},
{
1
,
0
}}},
{
prim
::
kPrimLogSoftmaxGrad
->
name
(),
{{
0
,
1
},
{
1
,
0
}}},
{
prim
::
kPrimLayerNormGrad
->
name
(),
{{
0
,
1
},
{
1
,
0
},
{
2
,
2
},
{
3
,
3
},
{
4
,
4
}}},
{
prim
::
kPrimLayerNormBetaGammaBackprop
->
name
(),
{{
0
,
1
},
{
1
,
0
},
{
2
,
2
},
{
3
,
3
}}},
...
...
@@ -835,7 +837,7 @@ size_t AnfRuntimeAlgorithm::GetRealInputIndex(const mindspore::AnfNodePtr &anf_n
auto
node_name
=
AnfAlgo
::
GetCNodeName
(
anf_node
);
if
(
AnfAlgo
::
GetKernelType
(
anf_node
)
==
TBE_KERNEL
)
{
auto
find
=
spec_node_list
.
find
(
node_name
);
if
(
find
!=
spec_node_list
.
end
())
{
if
(
find
!=
spec_node_list
.
end
()
&&
cur_index
<
find
->
second
.
size
()
)
{
ret
=
find
->
second
[
cur_index
];
MS_LOG
(
INFO
)
<<
"Real input index change to"
<<
ret
<<
", node name:"
<<
node_name
;
}
...
...
mindspore/ccsrc/utils/utils.h
浏览文件 @
cd6ed0e3
...
...
@@ -122,6 +122,10 @@ constexpr auto kSendOpName = "Send";
constexpr
auto
kRecvOpName
=
"Recv"
;
constexpr
auto
kReluV2OpName
=
"ReLUV2"
;
constexpr
auto
kReluGradV2OpName
=
"ReluGradV2"
;
constexpr
auto
kAddNOpName
=
"AddN"
;
constexpr
auto
kConv2DBackpropInputOpName
=
"Conv2DBackpropInput"
;
constexpr
auto
kFusionOpConv2DBackpropInputReluGradV2Name
=
"FusionOp_Conv2DBackpropInput_ReluGradV2"
;
constexpr
auto
kFusionOpConv2DBackpropInputAddNReluGradV2Name
=
"FusionOp_Conv2DBackpropInput_AddN_ReluGradV2"
;
// attr key name
constexpr
auto
kAttrInputNames
=
"input_names"
;
...
...
mindspore/ops/_op_impl/tbe/reduce_mean.py
浏览文件 @
cd6ed0e3
...
...
@@ -31,6 +31,7 @@ reduce_mean_op_info = TBERegOp("ReduceMean") \
.
dtype_format
(
DataType
.
U8_Default
,
DataType
.
U8_Default
)
\
.
dtype_format
(
DataType
.
F16_Default
,
DataType
.
F16_Default
)
\
.
dtype_format
(
DataType
.
F32_Default
,
DataType
.
F32_Default
)
\
.
dtype_format
(
DataType
.
F16_5HD
,
DataType
.
F16_5HD
)
\
.
get_op_info
()
...
...
mindspore/ops/operations/nn_ops.py
浏览文件 @
cd6ed0e3
...
...
@@ -654,6 +654,7 @@ class Conv2D(PrimitiveWithInfer):
self
.
add_prim_attr
(
'data_format'
,
"NCHW"
)
self
.
out_channel
=
validator
.
check_integer
(
'out_channel'
,
out_channel
,
0
,
Rel
.
GT
,
self
.
name
)
self
.
group
=
validator
.
check_integer
(
'group'
,
group
,
0
,
Rel
.
GT
,
self
.
name
)
self
.
add_prim_attr
(
'offset_a'
,
0
)
def
infer_shape
(
self
,
x_shape
,
w_shape
):
validator
.
check_integer
(
"weight rank"
,
len
(
w_shape
),
4
,
Rel
.
EQ
,
self
.
name
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录