Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
ca7154a5
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ca7154a5
编写于
8月 05, 2020
作者:
Z
zhoufeng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
graph compile performance optimization
Signed-off-by:
N
zhoufeng
<
zhoufeng54@huawei.com
>
上级
5c7712ca
变更
24
隐藏空白更改
内联
并排
Showing
24 changed file
with
162 addition
and
143 deletion
+162
-143
mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.cc
mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.cc
+4
-4
mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.h
mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.h
+4
-4
mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h
mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h
+4
-0
mindspore/ccsrc/backend/kernel_compiler/oplib/opinfo.h
mindspore/ccsrc/backend/kernel_compiler/oplib/opinfo.h
+6
-6
mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.cc
mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.cc
+2
-4
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.cc
...ernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.cc
+16
-18
mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.cc
mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.cc
+29
-32
mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.cc
...ckend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.cc
+27
-32
mindspore/ccsrc/backend/optimizer/ascend/format_type/check_consistency.cc
...backend/optimizer/ascend/format_type/check_consistency.cc
+5
-3
mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_cast.cc
...ccsrc/backend/optimizer/ascend/format_type/insert_cast.cc
+4
-3
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/addn_fission.cc
...ccsrc/backend/optimizer/ascend/ir_fission/addn_fission.cc
+2
-2
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.cc
...nd/optimizer/ascend/ir_fission/batch_norm_bert_fission.cc
+1
-1
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.cc
...imizer/ascend/ir_fission/batch_norm_grad_infer_fission.cc
+1
-1
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_split.cc
...kend/optimizer/ascend/ir_fission/batch_norm_grad_split.cc
+3
-2
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/concat_fission.cc
...src/backend/optimizer/ascend/ir_fission/concat_fission.cc
+3
-3
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/lars_v2_fission.cc
...rc/backend/optimizer/ascend/ir_fission/lars_v2_fission.cc
+9
-10
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/layer_norm_grad_split.cc
...kend/optimizer/ascend/ir_fission/layer_norm_grad_split.cc
+1
-0
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/pack_fission.cc
...ccsrc/backend/optimizer/ascend/ir_fission/pack_fission.cc
+2
-2
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/split_fission.cc
...csrc/backend/optimizer/ascend/ir_fission/split_fission.cc
+5
-12
mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.cc
...end/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.cc
+1
-1
mindspore/ccsrc/backend/optimizer/common/helper.cc
mindspore/ccsrc/backend/optimizer/common/helper.cc
+2
-2
mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
+1
-1
mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc
...spore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc
+1
-0
mindspore/ccsrc/utils/utils.h
mindspore/ccsrc/utils/utils.h
+29
-0
未找到文件。
mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.cc
浏览文件 @
ca7154a5
...
...
@@ -52,13 +52,13 @@ TypeId KernelBuildInfo::GetOutputDeviceType(size_t output_index) const {
return
outputs_device_type_
[
output_index
];
}
std
::
vector
<
std
::
string
>
KernelBuildInfo
::
GetAllInputFormats
()
const
{
return
inputs_format_
;
}
const
std
::
vector
<
std
::
string
>
&
KernelBuildInfo
::
GetAllInputFormats
()
const
{
return
inputs_format_
;
}
std
::
vector
<
std
::
string
>
KernelBuildInfo
::
GetAllOutputFormats
()
const
{
return
outputs_format_
;
}
const
std
::
vector
<
std
::
string
>
&
KernelBuildInfo
::
GetAllOutputFormats
()
const
{
return
outputs_format_
;
}
std
::
vector
<
TypeId
>
KernelBuildInfo
::
GetAllInputDeviceTypes
()
const
{
return
inputs_device_type_
;
}
const
std
::
vector
<
TypeId
>
&
KernelBuildInfo
::
GetAllInputDeviceTypes
()
const
{
return
inputs_device_type_
;
}
std
::
vector
<
TypeId
>
KernelBuildInfo
::
GetAllOutputDeviceTypes
()
const
{
return
outputs_device_type_
;
}
const
std
::
vector
<
TypeId
>
&
KernelBuildInfo
::
GetAllOutputDeviceTypes
()
const
{
return
outputs_device_type_
;
}
size_t
KernelBuildInfo
::
GetInputNum
()
const
{
return
inputs_format_
.
size
();
}
...
...
mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.h
浏览文件 @
ca7154a5
...
...
@@ -63,13 +63,13 @@ class KernelBuildInfo {
std
::
vector
<
Axis
>
GetOutputReshapeType
(
size_t
input_index
)
const
;
std
::
vector
<
std
::
string
>
GetAllInputFormats
()
const
;
const
std
::
vector
<
std
::
string
>
&
GetAllInputFormats
()
const
;
std
::
vector
<
std
::
string
>
GetAllOutputFormats
()
const
;
const
std
::
vector
<
std
::
string
>
&
GetAllOutputFormats
()
const
;
std
::
vector
<
TypeId
>
GetAllInputDeviceTypes
()
const
;
const
std
::
vector
<
TypeId
>
&
GetAllInputDeviceTypes
()
const
;
std
::
vector
<
TypeId
>
GetAllOutputDeviceTypes
()
const
;
const
std
::
vector
<
TypeId
>
&
GetAllOutputDeviceTypes
()
const
;
std
::
vector
<
std
::
vector
<
Axis
>>
GetAllOutputReshapeType
()
const
;
...
...
mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h
浏览文件 @
ca7154a5
...
...
@@ -25,6 +25,10 @@ namespace kernel {
* @brief fuse op and return a callable mod
*/
struct
FusionScopeInfo
{
FusionScopeInfo
()
{}
FusionScopeInfo
(
int32_t
id
,
const
std
::
vector
<
AnfNodePtr
>
&
in
,
const
std
::
vector
<
AnfNodePtr
>
&
comp
,
const
std
::
vector
<
AnfNodePtr
>
&
out
)
:
scope_id
(
id
),
input_nodes
(
in
),
compute_nodes
(
comp
),
output_nodes
(
out
)
{}
int32_t
scope_id
;
std
::
vector
<
AnfNodePtr
>
input_nodes
;
std
::
vector
<
AnfNodePtr
>
compute_nodes
;
...
...
mindspore/ccsrc/backend/kernel_compiler/oplib/opinfo.h
浏览文件 @
ca7154a5
...
...
@@ -59,13 +59,13 @@ class OpIOInfo {
~
OpIOInfo
()
=
default
;
int
index
()
const
{
return
index_
;
}
std
::
string
name
()
const
{
return
name_
;
}
const
std
::
string
&
name
()
const
{
return
name_
;
}
bool
need_compile
()
const
{
return
need_compile_
;
}
std
::
string
param_type
()
const
{
return
param_type_
;
}
std
::
string
reshape_type
()
const
{
return
reshape_type_
;
}
std
::
string
shape
()
const
{
return
shape_
;
}
std
::
vector
<
std
::
string
>
dtypes
()
const
{
return
dtypes_
;
}
std
::
vector
<
std
::
string
>
formats
()
const
{
return
formats_
;
}
const
std
::
string
&
param_type
()
const
{
return
param_type_
;
}
const
std
::
string
&
reshape_type
()
const
{
return
reshape_type_
;
}
const
std
::
string
&
shape
()
const
{
return
shape_
;
}
const
std
::
vector
<
std
::
string
>
&
dtypes
()
const
{
return
dtypes_
;
}
const
std
::
vector
<
std
::
string
>
&
formats
()
const
{
return
formats_
;
}
void
set_index
(
const
int
index
)
{
index_
=
index
;
}
void
set_name
(
const
std
::
string
&
name
)
{
name_
=
name
;
}
...
...
mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.cc
浏览文件 @
ca7154a5
...
...
@@ -336,13 +336,11 @@ std::shared_ptr<OpInfo> OpLib::FindOp(const std::string &op_name, OpImplyType im
<<
", current op num: "
<<
op_info_
.
size
();
return
nullptr
;
}
std
::
string
target_processor
=
is_gpu
?
kCUDA
:
kAiCore
;
for
(
const
auto
&
op_info
:
op_info_
)
{
MS_EXCEPTION_IF_NULL
(
op_info
);
if
(
op_info
->
op_name
()
==
op_name
&&
op_info
->
imply_type
()
==
imply_type
)
{
auto
akg_processor_match
=
[
&
]()
{
return
is_gpu
?
op_info
->
processor
()
==
kCUDA
:
op_info
->
processor
()
==
kAiCore
;
};
if
(
imply_type
!=
kAKG
||
akg_processor_match
())
{
if
(
imply_type
!=
kAKG
||
op_info
->
processor
()
==
target_processor
)
{
return
op_info
;
}
}
...
...
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.cc
浏览文件 @
ca7154a5
...
...
@@ -82,7 +82,6 @@ void TbeKernelSelect::TbeMetadataInfoEx() {
}
// check support
FilterInVaildKernelInfo
();
MS_LOG
(
INFO
)
<<
"End get kernel build info size: "
<<
kernel_info_list_
->
size
()
<<
", after tbe select."
;
}
void
TbeKernelSelect
::
GetCommonPatternKernelInfo
(
const
OpInfo
&
op_info
)
{
...
...
@@ -221,38 +220,37 @@ void TbeKernelSelect::FilterInVaildKernelInfo() {
MS_LOG
(
INFO
)
<<
"Warning: get kernel build info failed."
;
return
;
}
auto
kernel_build_info_iter
=
kernel_info_list_
->
begin
();
while
(
kernel_build_info_iter
!=
kernel_info_list_
->
end
())
{
if
(
!
FilterInVaildShape
(
kernel_build_info_iter
))
{
MS_LOG
(
INFO
)
<<
"Filter invaild shape, filter item info: "
<<
(
*
kernel_build_info_iter
)
->
ToString
();
kernel_build_info_iter
=
kernel_info_list_
->
erase
(
kernel_build_info_iter
);
std
::
vector
<
std
::
shared_ptr
<
KernelBuildInfo
>>
new_kernel_info_list
;
for
(
auto
iter
=
kernel_info_list_
->
begin
();
iter
!=
kernel_info_list_
->
end
();
++
iter
)
{
if
(
!
FilterInVaildShape
(
iter
))
{
MS_LOG
(
INFO
)
<<
"Filter invaild shape, filter item info: "
<<
(
*
iter
)
->
ToString
();
continue
;
}
if
(
!
TbeCheckSupported
(
kernel_build_info_iter
))
{
MS_LOG
(
INFO
)
<<
"Check support shape, filter item info: "
<<
(
*
kernel_build_info_iter
)
->
ToString
();
kernel_build_info_iter
=
kernel_info_list_
->
erase
(
kernel_build_info_iter
);
if
(
!
TbeCheckSupported
(
iter
))
{
MS_LOG
(
INFO
)
<<
"Check support shape, filter item info: "
<<
(
*
iter
)
->
ToString
();
continue
;
}
kernel_build_info_iter
++
;
new_kernel_info_list
.
emplace_back
(
*
iter
)
;
}
(
*
kernel_info_list_
)
=
new_kernel_info_list
;
}
bool
TbeKernelSelect
::
FilterInVaildShape
(
const
mindspore
::
kernel
::
TbeKernelSelect
::
KernelBuildInfoIter
&
kernel_build_info_iter
)
{
MS_EXCEPTION_IF_NULL
((
*
kernel_build_info_iter
));
auto
kernel_build_info_inputs_format
=
(
*
kernel_build_info_iter
)
->
GetAllInputFormats
();
const
auto
&
kernel_build_info_inputs_format
=
(
*
kernel_build_info_iter
)
->
GetAllInputFormats
();
for
(
size_t
i
=
0
;
i
<
kernel_build_info_inputs_format
.
size
();
++
i
)
{
auto
shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
cnode_ptr_
,
i
);
auto
format
=
kernel_build_info_inputs_format
.
at
(
i
)
;
const
auto
&
format
=
kernel_build_info_inputs_format
[
i
]
;
if
(
!
IsShapeMatchFormat
(
shape
,
format
))
{
MS_LOG
(
INFO
)
<<
"The "
<<
i
<<
"th input check failed."
;
return
false
;
}
}
auto
kernel_build_info_outputs_format
=
(
*
kernel_build_info_iter
)
->
GetAllOutputFormats
();
const
auto
&
kernel_build_info_outputs_format
=
(
*
kernel_build_info_iter
)
->
GetAllOutputFormats
();
for
(
size_t
j
=
0
;
j
<
kernel_build_info_outputs_format
.
size
();
++
j
)
{
auto
shape
=
AnfAlgo
::
GetOutputInferShape
(
cnode_ptr_
,
j
);
auto
format
=
kernel_build_info_outputs_format
.
at
(
j
)
;
const
auto
&
format
=
kernel_build_info_outputs_format
[
j
]
;
if
(
!
IsShapeMatchFormat
(
shape
,
format
))
{
MS_LOG
(
INFO
)
<<
"The "
<<
j
<<
"th input check failed."
;
return
false
;
...
...
@@ -344,12 +342,12 @@ bool TbeKernelSelect::GenBuilderItem(bool is_input, size_t kernel_build_info_ind
size_t
io_info_num
=
ios_info
.
size
();
for
(;
io_info_index
<
io_info_num
&&
real_io_tensor_index
<
real_io_tensor_num
;
io_info_index
++
)
{
std
::
shared_ptr
<
OpIOInfo
>
io_info_item
=
ios_info
[
io_info_index
];
auto
kernel_build_info_dtype
=
io_info_item
->
dtypes
().
at
(
kernel_build_info_index
)
;
const
auto
&
kernel_build_info_dtype
=
io_info_item
->
dtypes
()[
kernel_build_info_index
]
;
std
::
string
kernel_build_info_format
;
if
(
!
io_info_item
->
formats
().
empty
())
{
kernel_build_info_format
=
io_info_item
->
formats
()
.
at
(
kernel_build_info_index
)
;
kernel_build_info_format
=
io_info_item
->
formats
()
[
kernel_build_info_index
]
;
}
std
::
string
io_param_type
=
io_info_item
->
param_type
();
const
std
::
string
&
io_param_type
=
io_info_item
->
param_type
();
std
::
vector
<
Axis
>
reshape_type
;
StringToAxisVector
(
io_info_item
->
reshape_type
(),
&
reshape_type
);
if
(
io_param_type
==
kParamTypeDynamic
)
{
...
...
@@ -367,6 +365,7 @@ bool TbeKernelSelect::GenBuilderItem(bool is_input, size_t kernel_build_info_ind
}
dynamic_input_index
++
;
real_io_tensor_index
+=
dynamic_input_size
;
}
else
{
if
(
ios_info
.
size
()
!=
1
)
{
MS_LOG
(
EXCEPTION
)
<<
"if output is dynamic, so output must has one output."
;
...
...
@@ -388,7 +387,6 @@ bool TbeKernelSelect::GenBuilderItem(bool is_input, size_t kernel_build_info_ind
MS_LOG
(
EXCEPTION
)
<<
"op info's param type is not match: "
<<
io_param_type
;
}
}
if
(
io_info_index
!=
io_info_num
)
{
MS_LOG
(
INFO
)
<<
"Warning: io_info_index("
<<
io_info_index
<<
") != io_info_num("
<<
io_info_num
<<
"), this node may has optional input/output."
;
...
...
mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.cc
浏览文件 @
ca7154a5
...
...
@@ -51,11 +51,11 @@ AnfNodePtr CreateReshapeNode(const FuncGraphPtr &func_graph, const AnfNodePtr &i
AnfNodePtr
AddTransOpNodeToGraph
(
const
FuncGraphPtr
&
func_graph
,
const
AnfNodePtr
&
node
,
const
KernelSelectPtr
&
kernel_select
,
size_t
insert_index
,
bool
is_insert_input
)
{
AnfNodePtr
trans_node
=
nullptr
;
AnfNodePtr
input_node
=
n
ode
;
AnfNodePtr
input_node
=
n
ullptr
;
CNodePtr
trans_data
=
nullptr
;
std
::
string
input_format
=
is_insert_input
?
kOpFormat_DEFAULT
:
AnfAlgo
::
GetOutputFormat
(
node
,
0
);
std
::
string
dst_format
=
is_insert_input
?
AnfAlgo
::
GetInputFormat
(
node
,
0
)
:
kOpFormat_DEFAULT
;
std
::
vector
<
kernel
::
Axis
>
padding_axis
=
AnfAlgo
::
GetOutputReshapeType
(
node
,
0
)
;
std
::
vector
<
kernel
::
Axis
>
padding_axis
;
MS_EXCEPTION_IF_NULL
(
node
);
// if insert transdata for input we need to change the input
if
(
is_insert_input
)
{
...
...
@@ -66,12 +66,17 @@ AnfNodePtr AddTransOpNodeToGraph(const FuncGraphPtr &func_graph, const AnfNodePt
dst_format
=
AnfAlgo
::
GetInputFormat
(
cnode
,
insert_index
);
input_node
=
AnfAlgo
::
GetInputNode
(
cnode
,
insert_index
);
padding_axis
=
AnfAlgo
::
GetInputReshapeType
(
node
,
insert_index
);
}
else
{
input_node
=
node
;
padding_axis
=
AnfAlgo
::
GetOutputReshapeType
(
node
,
0
);
}
auto
input_node_out_shape
=
AnfAlgo
::
GetOutputInferShape
(
input_node
,
0
);
bool
need_padding
=
false
;
if
(
is_insert_input
)
{
need_padding
=
(
trans
::
IsNeedPadding
(
dst_format
,
AnfAlgo
::
GetOutputInferShape
(
input_node
,
0
)
.
size
()));
need_padding
=
(
trans
::
IsNeedPadding
(
dst_format
,
input_node_out_shape
.
size
()));
}
else
{
need_padding
=
(
trans
::
IsNeedPadding
(
input_format
,
AnfAlgo
::
GetOutputInferShape
(
input_node
,
0
)
.
size
()));
need_padding
=
(
trans
::
IsNeedPadding
(
input_format
,
input_node_out_shape
.
size
()));
}
if
(
!
need_padding
)
{
// don't need padding insert transdata only
...
...
@@ -80,8 +85,7 @@ AnfNodePtr AddTransOpNodeToGraph(const FuncGraphPtr &func_graph, const AnfNodePt
}
else
if
(
is_insert_input
)
{
// if need padding & is input need insert a transdata
// reshape[padding shape] -> transdata[padding shape] -> node
auto
padding_shape
=
trans
::
PaddingShapeTo4d
(
AnfAlgo
::
GetOutputInferShape
(
input_node
,
0
),
AnfAlgo
::
GetInputReshapeType
(
node
,
0
));
auto
padding_shape
=
trans
::
PaddingShapeTo4d
(
input_node_out_shape
,
AnfAlgo
::
GetInputReshapeType
(
node
,
0
));
auto
reshape_node
=
CreateReshapeNode
(
func_graph
,
input_node
,
kernel_select
,
padding_shape
);
trans_data
=
NewTransOpNode
(
func_graph
,
reshape_node
,
kernel_select
,
need_padding
,
prim
::
KPrimTransData
->
name
());
trans_node
=
trans_data
;
...
...
@@ -89,8 +93,7 @@ AnfNodePtr AddTransOpNodeToGraph(const FuncGraphPtr &func_graph, const AnfNodePt
// if need padding & is output need insert a transdata
// node -> transdata[padding shape] -> reshape[ori_shape]
trans_data
=
NewTransOpNode
(
func_graph
,
input_node
,
kernel_select
,
need_padding
,
prim
::
KPrimTransData
->
name
());
auto
reshape_node
=
CreateReshapeNode
(
func_graph
,
trans_data
,
kernel_select
,
AnfAlgo
::
GetOutputInferShape
(
input_node
,
0
));
auto
reshape_node
=
CreateReshapeNode
(
func_graph
,
trans_data
,
kernel_select
,
input_node_out_shape
);
trans_node
=
reshape_node
;
}
// refresh the transdata's format to ori format & dst format
...
...
@@ -140,10 +143,10 @@ AnfNodePtr InsertTransOpForMultipleOutput(const FuncGraphPtr &func_graph, const
const
KernelSelectPtr
&
kernel_select
)
{
MS_EXCEPTION_IF_NULL
(
func_graph
);
MS_EXCEPTION_IF_NULL
(
node
);
std
::
vector
<
AnfNodePtr
>
make_tuple_inputs
;
make_tuple_inputs
.
push_back
(
NewValueNode
(
prim
::
kPrimMakeTuple
));
std
::
vector
<
AnfNodePtr
>
make_tuple_inputs
=
{
NewValueNode
(
prim
::
kPrimMakeTuple
)};
auto
kernel_graph
=
func_graph
->
cast
<
KernelGraphPtr
>
();
for
(
size_t
output_idx
=
0
;
output_idx
<
AnfAlgo
::
GetOutputTensorNum
(
node
);
++
output_idx
)
{
size_t
out_num
=
AnfAlgo
::
GetOutputTensorNum
(
node
);
for
(
size_t
output_idx
=
0
;
output_idx
<
out_num
;
++
output_idx
)
{
std
::
string
output_format
=
AnfAlgo
::
GetOutputFormat
(
node
,
output_idx
);
if
(
output_format
==
kOpFormat_NC1KHKWHWC0
)
{
MS_LOG
(
EXCEPTION
)
<<
"Got the special format"
<<
output_format
<<
" when insert the transdata node "
...
...
@@ -151,12 +154,12 @@ AnfNodePtr InsertTransOpForMultipleOutput(const FuncGraphPtr &func_graph, const
}
auto
tuple_getitem
=
CreatTupleGetItemNode
(
func_graph
,
node
,
output_idx
);
std
::
vector
<
size_t
>
origin_shape
=
AnfAlgo
::
GetOutputInferShape
(
node
,
output_idx
);
if
(
kCommonFormatSet
.
find
(
output_format
)
==
kCommonFormatSet
.
end
()
&&
origin_shape
.
size
()
>
1
)
{
if
(
origin_shape
.
size
()
>
1
&&
kCommonFormatSet
.
find
(
output_format
)
==
kCommonFormatSet
.
end
()
)
{
auto
trans_op
=
AddTransOpNodeToGraph
(
func_graph
,
tuple_getitem
,
kernel_select
,
0
,
false
);
if
(
kernel_graph
!=
nullptr
&&
kernel_graph
->
IsInternalOutput
(
node
,
output_idx
))
{
kernel_graph
->
ReplaceInternalOutput
(
node
,
trans_op
,
output_idx
,
0
);
}
make_tuple_inputs
.
emplace
_back
(
trans_op
);
make_tuple_inputs
.
push
_back
(
trans_op
);
}
else
{
// No need insert trans op.
make_tuple_inputs
.
push_back
(
tuple_getitem
);
...
...
@@ -188,15 +191,11 @@ CNodePtr NewTransOpNode(const FuncGraphPtr &func_graph, const AnfNodePtr &input,
const
bool
need_padding
,
const
std
::
string
&
op_name
)
{
MS_EXCEPTION_IF_NULL
(
func_graph
);
MS_EXCEPTION_IF_NULL
(
input
);
std
::
vector
<
AnfNodePtr
>
trans_inputs
;
auto
prim
=
std
::
make_shared
<
Primitive
>
(
op_name
);
trans_inputs
.
push_back
(
NewValueNode
(
prim
));
trans_inputs
.
push_back
(
input
);
CNodePtr
trans_node
=
func_graph
->
NewCNode
(
trans_inputs
);
CNodePtr
trans_node
=
func_graph
->
NewCNode
({
NewValueNode
(
std
::
make_shared
<
Primitive
>
(
op_name
)),
input
});
MS_EXCEPTION_IF_NULL
(
trans_node
);
auto
padding_axis
=
AnfAlgo
::
GetOutputReshapeType
(
input
,
0
);
if
(
need_padding
)
{
// if need padding we should set the transdata node's shape to the padding shape
auto
padding_axis
=
AnfAlgo
::
GetOutputReshapeType
(
input
,
0
);
AnfAlgo
::
SetOutputInferTypeAndShape
({
AnfAlgo
::
GetOutputInferDataType
(
input
,
0
)},
{
trans
::
PaddingShapeTo4d
(
AnfAlgo
::
GetOutputInferShape
(
input
,
0
),
padding_axis
)},
trans_node
.
get
());
...
...
@@ -224,11 +223,7 @@ AnfNodePtr AddCastOpNodeToGraph(const FuncGraphPtr &func_graph, const AnfNodePtr
MS_EXCEPTION_IF_NULL
(
func_graph
);
std
::
string
input_format
=
format
;
std
::
string
output_format
=
format
;
std
::
vector
<
AnfNodePtr
>
new_cast_inputs
;
auto
prim
=
std
::
make_shared
<
Primitive
>
(
prim
::
kPrimCast
->
name
());
new_cast_inputs
.
push_back
(
NewValueNode
(
prim
));
new_cast_inputs
.
push_back
(
input
);
CNodePtr
cast
=
func_graph
->
NewCNode
(
new_cast_inputs
);
CNodePtr
cast
=
func_graph
->
NewCNode
({
NewValueNode
(
std
::
make_shared
<
Primitive
>
(
prim
::
kPrimCast
->
name
())),
input
});
MS_EXCEPTION_IF_NULL
(
cast
);
// set kernel build info
kernel
::
KernelBuildInfo
::
KernelBuildInfoBuilder
builder
;
...
...
@@ -280,7 +275,8 @@ AnfNodePtr InsertTransOpForInput(const FuncGraphPtr &func_graph, const AnfNodePt
auto
cnode
=
node
->
cast
<
CNodePtr
>
();
MS_EXCEPTION_IF_NULL
(
cnode
);
std
::
vector
<
AnfNodePtr
>
new_inputs
=
{
AnfAlgo
::
GetCNodePrimitiveNode
(
cnode
)};
for
(
size_t
input_index
=
0
;
input_index
<
AnfAlgo
::
GetInputTensorNum
(
cnode
);
++
input_index
)
{
size_t
in_num
=
AnfAlgo
::
GetInputTensorNum
(
cnode
);
for
(
size_t
input_index
=
0
;
input_index
<
in_num
;
++
input_index
)
{
AnfNodePtr
input_node
=
GetTransInputNodePtr
(
func_graph
,
cnode
,
input_index
,
kernel_select
);
MS_EXCEPTION_IF_NULL
(
input_node
);
new_inputs
.
push_back
(
input_node
);
...
...
@@ -301,8 +297,10 @@ AnfNodePtr InsertTransOpForInput(const FuncGraphPtr &func_graph, const AnfNodePt
CNodePtr
InsertCastForInput
(
const
FuncGraphPtr
&
func_graph
,
const
CNodePtr
&
cnode
)
{
MS_EXCEPTION_IF_NULL
(
cnode
);
std
::
vector
<
AnfNodePtr
>
new_inputs
=
{
AnfAlgo
::
GetCNodePrimitiveNode
(
cnode
)};
for
(
size_t
input_index
=
0
;
input_index
<
AnfAlgo
::
GetInputTensorNum
(
cnode
);
++
input_index
)
{
const
auto
infer_type
=
AnfAlgo
::
GetPrevNodeOutputInferDataType
(
cnode
,
input_index
);
size_t
in_num
=
AnfAlgo
::
GetInputTensorNum
(
cnode
);
for
(
size_t
input_index
=
0
;
input_index
<
in_num
;
++
input_index
)
{
auto
prev_node
=
AnfAlgo
::
GetPrevNodeOutput
(
cnode
,
input_index
);
const
auto
infer_type
=
AnfAlgo
::
GetOutputInferDataType
(
prev_node
.
first
,
prev_node
.
second
);
TypeId
origin_type
(
kTypeUnknown
);
auto
cur_input
=
AnfAlgo
::
GetInputNode
(
cnode
,
input_index
);
auto
kernel_with_index
=
AnfAlgo
::
VisitKernel
(
cur_input
,
0
);
...
...
@@ -311,20 +309,19 @@ CNodePtr InsertCastForInput(const FuncGraphPtr &func_graph, const CNodePtr &cnod
// weight
origin_type
=
AnfAlgo
::
GetPrevNodeOutputPrecision
(
cnode
,
input_index
);
if
(
origin_type
==
kTypeUnknown
)
{
origin_type
=
AnfAlgo
::
Get
PrevNodeOutputDeviceDataType
(
cnode
,
input_index
);
origin_type
=
AnfAlgo
::
Get
OutputDeviceDataType
(
prev_node
.
first
,
prev_node
.
second
);
}
}
else
{
// feature map
origin_type
=
AnfAlgo
::
Get
PrevNodeOutputInferDataType
(
cnode
,
input_index
);
origin_type
=
AnfAlgo
::
Get
OutputInferDataType
(
prev_node
.
first
,
prev_node
.
second
);
}
const
std
::
string
dev_fmt
=
AnfAlgo
::
GetInputFormat
(
cnode
,
input_index
);
const
std
::
vector
<
size_t
>
origin_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
cnode
,
input_index
);
const
TypeId
device_type
=
AnfAlgo
::
GetInputDeviceDataType
(
cnode
,
input_index
);
const
std
::
vector
<
size_t
>
origin_shape
=
AnfAlgo
::
GetOutputInferShape
(
prev_node
.
first
,
prev_node
.
second
);
// In graph kernel, we check parameter,
// the eliminate pass will not eliminate this case, so we just do not insert the noused cast.
if
(
func_graph
->
has_attr
(
FUNC_GRAPH_ATTR_GRAPH_KERNEL
)
&&
IsValueNode
<
tensor
::
Tensor
>
(
cur_input
))
{
new_inputs
.
push_back
(
cur_input
);
}
else
if
(
origin_type
!=
device_type
)
{
}
else
if
(
TypeId
device_type
=
AnfAlgo
::
GetInputDeviceDataType
(
cnode
,
input_index
);
origin_type
!=
device_type
)
{
auto
cast
=
AddCastOpNodeToGraph
(
func_graph
,
cur_input
,
dev_fmt
,
origin_type
,
device_type
,
origin_shape
,
infer_type
);
MS_EXCEPTION_IF_NULL
(
cast
);
...
...
mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.cc
浏览文件 @
ca7154a5
...
...
@@ -120,8 +120,8 @@ kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector<AnfNodePtr
std
::
vector
<
TypeId
>
inputs_data_type
;
for
(
const
auto
&
input
:
inputs_list
)
{
auto
real_input
=
AnfAlgo
::
VisitKernel
(
input
,
0
);
inputs_format
.
push
_back
(
AnfAlgo
::
GetOutputFormat
(
real_input
.
first
,
real_input
.
second
));
inputs_data_type
.
push
_back
(
AnfAlgo
::
GetOutputDeviceDataType
(
real_input
.
first
,
real_input
.
second
));
inputs_format
.
emplace
_back
(
AnfAlgo
::
GetOutputFormat
(
real_input
.
first
,
real_input
.
second
));
inputs_data_type
.
emplace
_back
(
AnfAlgo
::
GetOutputDeviceDataType
(
real_input
.
first
,
real_input
.
second
));
}
// outputs format and data type
std
::
vector
<
std
::
string
>
outputs_format
;
...
...
@@ -130,13 +130,13 @@ kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector<AnfNodePtr
if
(
AnfAlgo
::
GetCNodeName
(
output
)
==
prim
::
kPrimTupleGetItem
->
name
())
{
auto
tuple_getitem
=
output
->
cast
<
CNodePtr
>
();
MS_EXCEPTION_IF_NULL
(
tuple_getitem
);
outputs_format
.
push
_back
(
AnfAlgo
::
GetOutputFormat
(
outputs_format
.
emplace
_back
(
AnfAlgo
::
GetOutputFormat
(
tuple_getitem
->
input
(
1
),
IntToSize
(
GetValue
<
int
>
(
GetValueNode
(
tuple_getitem
->
input
(
2
))))));
outputs_data_type
.
push
_back
(
AnfAlgo
::
GetOutputDeviceDataType
(
outputs_data_type
.
emplace
_back
(
AnfAlgo
::
GetOutputDeviceDataType
(
tuple_getitem
->
input
(
1
),
IntToSize
(
GetValue
<
int
>
(
GetValueNode
(
tuple_getitem
->
input
(
2
))))));
}
else
{
outputs_format
.
push
_back
(
AnfAlgo
::
GetOutputFormat
(
output
,
0
));
outputs_data_type
.
push
_back
(
AnfAlgo
::
GetOutputDeviceDataType
(
output
,
0
));
outputs_format
.
emplace
_back
(
AnfAlgo
::
GetOutputFormat
(
output
,
0
));
outputs_data_type
.
emplace
_back
(
AnfAlgo
::
GetOutputDeviceDataType
(
output
,
0
));
}
}
builder
.
SetInputsFormat
(
inputs_format
);
...
...
@@ -229,7 +229,7 @@ void GetFusionScopeInputNodeList(const session::KernelGraph &kernel_graph,
for
(
auto
&
buffer_fusion_info
:
*
buffer_fusion_infos
)
{
auto
fusion_id
=
buffer_fusion_info
.
first
;
auto
fusion_info
=
buffer_fusion_info
.
second
;
const
auto
&
fusion_info
=
buffer_fusion_info
.
second
;
for
(
const
auto
&
node
:
fusion_info
.
anf_nodes
)
{
auto
cnode
=
node
->
cast
<
CNodePtr
>
();
MS_EXCEPTION_IF_NULL
(
cnode
);
...
...
@@ -237,10 +237,10 @@ void GetFusionScopeInputNodeList(const session::KernelGraph &kernel_graph,
auto
real_input
=
AnfAlgo
::
VisitKernel
(
cnode
->
input
(
idx
),
0
);
if
(
std
::
find
(
fusion_info
.
anf_nodes
.
begin
(),
fusion_info
.
anf_nodes
.
end
(),
real_input
.
first
)
==
fusion_info
.
anf_nodes
.
end
())
{
if
(
std
::
find
((
*
buffer_fusion_infos
)[
fusion_id
].
inputs_list
.
begin
(),
(
*
buffer_fusion_infos
)[
fusion_id
].
inputs_list
.
end
(),
cnode
->
input
(
idx
)
)
==
(
*
buffer_fusion_infos
)[
fusion_id
].
inputs_list
.
end
())
{
(
*
buffer_fusion_infos
)[
fusion_id
].
inputs_list
.
push_back
(
cnode
->
input
(
idx
)
);
if
(
auto
in
=
cnode
->
input
(
idx
);
std
::
find
((
*
buffer_fusion_infos
)[
fusion_id
].
inputs_list
.
begin
(),
(
*
buffer_fusion_infos
)[
fusion_id
].
inputs_list
.
end
(),
in
)
==
(
*
buffer_fusion_infos
)[
fusion_id
].
inputs_list
.
end
())
{
(
*
buffer_fusion_infos
)[
fusion_id
].
inputs_list
.
push_back
(
in
);
}
}
}
...
...
@@ -277,7 +277,7 @@ void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph,
for
(
auto
&
buffer_fusion_info
:
*
buffer_fusion_infos
)
{
auto
fusion_id
=
buffer_fusion_info
.
first
;
auto
fusion_info
=
buffer_fusion_info
.
second
;
const
auto
&
fusion_info
=
buffer_fusion_info
.
second
;
for
(
const
auto
&
node
:
fusion_info
.
anf_nodes
)
{
if
(
AnfAlgo
::
GetOutputTensorNum
(
node
)
==
1
)
{
for
(
auto
use_node
:
manager
->
node_users
()[
node
])
{
...
...
@@ -294,7 +294,7 @@ void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph,
std
::
back_inserter
(
tuple_getitem_nodes
),
[](
const
std
::
pair
<
AnfNodePtr
,
int
>
&
use_node
)
{
return
use_node
.
first
;
});
std
::
sort
(
tuple_getitem_nodes
.
begin
(),
tuple_getitem_nodes
.
end
(),
TupleGetitemNodeCompare
);
for
(
auto
getitem
:
tuple_getitem_nodes
)
{
for
(
auto
&
getitem
:
tuple_getitem_nodes
)
{
MS_EXCEPTION_IF_NULL
(
getitem
);
auto
getitem_ptr
=
getitem
->
cast
<
CNodePtr
>
();
auto
input2
=
getitem_ptr
->
input
(
2
);
...
...
@@ -304,7 +304,7 @@ void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph,
(
*
buffer_fusion_infos
)[
fusion_id
].
outputs_list
.
push_back
(
stub_node
);
}
prev_idx
=
output_idx
+
1
;
for
(
auto
item_use_node
:
manager
->
node_users
()[
getitem
])
{
for
(
auto
&
item_use_node
:
manager
->
node_users
()[
getitem
])
{
if
(
std
::
find
(
fusion_info
.
anf_nodes
.
begin
(),
fusion_info
.
anf_nodes
.
end
(),
item_use_node
.
first
)
==
fusion_info
.
anf_nodes
.
end
())
{
(
*
buffer_fusion_infos
)[
fusion_id
].
outputs_list
.
push_back
(
getitem
);
...
...
@@ -365,31 +365,25 @@ bool UbPatternFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph
MS_EXCEPTION_IF_NULL
(
kernel_graph
);
bool
change
=
false
;
std
::
unordered_map
<
int32_t
,
BufferFusionInfo_t
>
buffer_fusion_infos
;
buffer_fusion_infos
.
clear
();
GetBufferFusionInfo
(
kernel_graph
,
&
buffer_fusion_infos
);
std
::
vector
<
mindspore
::
kernel
::
FusionScopeInfo
>
fusion_scope_infos
;
for
(
auto
&
buffer_fusion_info
:
buffer_fusion_infos
)
{
mindspore
::
kernel
::
FusionScopeInfo
fusion_scope_info
;
fusion_scope_info
.
scope_id
=
buffer_fusion_info
.
first
;
fusion_scope_info
.
input_nodes
=
buffer_fusion_info
.
second
.
inputs_list
;
fusion_scope_info
.
compute_nodes
=
buffer_fusion_info
.
second
.
anf_nodes
;
fusion_scope_info
.
output_nodes
=
buffer_fusion_info
.
second
.
outputs_list
;
fusion_scope_infos
.
push_back
(
fusion_scope_info
);
#ifdef DEBUG
DumpFusionScopeInfo
(
fusion_scope_info
);
#endif
}
std
::
transform
(
buffer_fusion_infos
.
begin
(),
buffer_fusion_infos
.
end
(),
std
::
back_inserter
(
fusion_scope_infos
),
[](
const
std
::
pair
<
int32_t
,
BufferFusionInfo_t
>
&
buffer_fusion_info
)
->
mindspore
::
kernel
::
FusionScopeInfo
{
return
mindspore
::
kernel
::
FusionScopeInfo
(
buffer_fusion_info
.
first
,
buffer_fusion_info
.
second
.
inputs_list
,
buffer_fusion_info
.
second
.
anf_nodes
,
buffer_fusion_info
.
second
.
outputs_list
);
});
auto
kernel_mods
=
mindspore
::
kernel
::
KernelFusion
(
fusion_scope_infos
);
std
::
vector
<
int32_t
>
fusion_ids
;
std
::
set
<
int32_t
>
fusion_ids
;
for
(
auto
&
buffer_fusion_info
:
buffer_fusion_infos
)
{
MS_LOG
(
DEBUG
)
<<
"anf node size: "
<<
buffer_fusion_info
.
second
.
anf_nodes
.
size
()
<<
", inputs_list size: "
<<
buffer_fusion_info
.
second
.
inputs_list
.
size
()
<<
", outputs list size: "
<<
buffer_fusion_info
.
second
.
outputs_list
.
size
();
fusion_ids
.
push_back
(
buffer_fusion_info
.
first
);
fusion_ids
.
insert
(
buffer_fusion_info
.
first
);
}
// Replace fusion op from return to head
std
::
sort
(
fusion_ids
.
begin
(),
fusion_ids
.
end
());
for
(
auto
&
fusion_id
:
fusion_ids
)
{
// Get kernel mod when supporting tbe
if
(
kernel_mods
.
find
(
fusion_id
)
==
kernel_mods
.
end
()
||
kernel_mods
[
fusion_id
]
==
nullptr
)
{
...
...
@@ -414,9 +408,10 @@ bool UbPatternFusion::ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionIn
std
::
vector
<
TypeId
>
types
;
std
::
vector
<
std
::
vector
<
size_t
>>
shapes
;
for
(
const
auto
&
out_node
:
buffer_fusion_info
.
outputs_list
)
{
for
(
size_t
idx
=
0
;
idx
<
AnfAlgo
::
GetOutputTensorNum
(
out_node
);
++
idx
)
{
types
.
push_back
(
AnfAlgo
::
GetOutputInferDataType
(
out_node
,
idx
));
shapes
.
push_back
(
AnfAlgo
::
GetOutputInferShape
(
out_node
,
idx
));
size_t
out_num
=
AnfAlgo
::
GetOutputTensorNum
(
out_node
);
for
(
size_t
idx
=
0
;
idx
<
out_num
;
++
idx
)
{
types
.
emplace_back
(
AnfAlgo
::
GetOutputInferDataType
(
out_node
,
idx
));
shapes
.
emplace_back
(
AnfAlgo
::
GetOutputInferShape
(
out_node
,
idx
));
}
}
if
(
types
.
empty
()
||
shapes
.
empty
())
{
...
...
mindspore/ccsrc/backend/optimizer/ascend/format_type/check_consistency.cc
浏览文件 @
ca7154a5
...
...
@@ -30,12 +30,13 @@ namespace {
bool
CheckFormatForConsistency
(
const
CNodePtr
&
node
,
const
size_t
input_index
)
{
MS_EXCEPTION_IF_NULL
(
node
);
// get prior node's device output format
string
pre_output_format
=
AnfAlgo
::
GetPrevNodeOutputFormat
(
node
,
input_index
);
auto
prev_node
=
AnfAlgo
::
GetPrevNodeOutput
(
node
,
input_index
);
string
pre_output_format
=
AnfAlgo
::
GetOutputFormat
(
prev_node
.
first
,
prev_node
.
second
);
string
selected_input_format
=
AnfAlgo
::
GetInputFormat
(
node
,
input_index
);
if
(
pre_output_format
==
selected_input_format
)
{
return
true
;
}
auto
input_origin_shape
=
AnfAlgo
::
Get
PrevNodeOutputInferShape
(
node
,
input_index
);
auto
input_origin_shape
=
AnfAlgo
::
Get
OutputInferShape
(
prev_node
.
first
,
prev_node
.
second
);
if
(
pre_output_format
==
kOpFormat_DEFAULT
||
selected_input_format
==
kOpFormat_DEFAULT
)
{
string
checking_format
=
(
pre_output_format
==
kOpFormat_DEFAULT
)
?
selected_input_format
:
pre_output_format
;
// when input shape size is 1D, default format and NC1HWC0 are compatible
...
...
@@ -87,7 +88,8 @@ const AnfNodePtr CheckConsistency::Process(const FuncGraphPtr &, const AnfNodePt
for
(
auto
&
t
:
todos
)
{
CNodePtr
cnode
=
t
->
cast
<
CNodePtr
>
();
for
(
size_t
i
=
0
;
i
<
AnfAlgo
::
GetInputTensorNum
(
cnode
);
i
++
)
{
size_t
in_num
=
AnfAlgo
::
GetInputTensorNum
(
cnode
);
for
(
size_t
i
=
0
;
i
<
in_num
;
++
i
)
{
if
(
!
CheckFormatForConsistency
(
cnode
,
i
)
||
!
CheckDataTypeForConsistency
(
cnode
,
i
))
{
MS_LOG
(
EXCEPTION
)
<<
"Found inconsistent format or data type! Op: "
<<
AnfAlgo
::
GetCNodeName
(
cnode
)
<<
"["
<<
cnode
->
DebugString
()
<<
"]"
;
...
...
mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_cast.cc
浏览文件 @
ca7154a5
...
...
@@ -39,9 +39,10 @@ AnfNodePtr InsertCastForMultipleOutput(const FuncGraphPtr &func_graph, const CNo
MS_EXCEPTION_IF_NULL
(
cnode
);
std
::
vector
<
AnfNodePtr
>
make_tuple_inputs
;
AbstractBasePtrList
abstract_list
;
make_tuple_inputs
.
push
_back
(
NewValueNode
(
prim
::
kPrimMakeTuple
));
make_tuple_inputs
.
emplace
_back
(
NewValueNode
(
prim
::
kPrimMakeTuple
));
auto
kernel_graph
=
func_graph
->
cast
<
KernelGraphPtr
>
();
for
(
size_t
output_idx
=
0
;
output_idx
<
AnfAlgo
::
GetOutputTensorNum
(
cnode
);
++
output_idx
)
{
size_t
out_num
=
AnfAlgo
::
GetOutputTensorNum
(
cnode
);
for
(
size_t
output_idx
=
0
;
output_idx
<
out_num
;
++
output_idx
)
{
AnfNodePtr
replace_node
=
nullptr
;
const
auto
origin_shape
=
AnfAlgo
::
GetOutputInferShape
(
cnode
,
output_idx
);
const
auto
infer_type
=
AnfAlgo
::
GetOutputInferDataType
(
cnode
,
output_idx
);
...
...
@@ -74,7 +75,7 @@ AnfNodePtr InsertCastForMultipleOutput(const FuncGraphPtr &func_graph, const CNo
}
else
{
replace_node
=
getitem
;
}
abstract_list
.
push
_back
(
replace_node
->
abstract
());
abstract_list
.
emplace
_back
(
replace_node
->
abstract
());
make_tuple_inputs
.
push_back
(
replace_node
);
}
AnfNodePtr
make_tuple
=
func_graph
->
NewCNode
(
make_tuple_inputs
);
...
...
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/addn_fission.cc
浏览文件 @
ca7154a5
...
...
@@ -27,7 +27,7 @@ AnfNodePtr CreateNewAddn(const FuncGraphPtr &func_graph, const CNodePtr &origin_
MS_EXCEPTION_IF_NULL
(
origin_addn_cnode
);
std
::
vector
<
AnfNodePtr
>
new_addn_inputs
{
NewValueNode
(
std
::
make_shared
<
Primitive
>
(
prim
::
kPrimAddN
->
name
()))};
for
(
size_t
i
=
begin_index
;
i
<
begin_index
+
offset
;
++
i
)
{
new_addn_inputs
.
push
_back
(
origin_addn_cnode
->
input
(
i
));
new_addn_inputs
.
emplace
_back
(
origin_addn_cnode
->
input
(
i
));
}
CNodePtr
new_addn
=
func_graph
->
NewCNode
(
new_addn_inputs
);
MS_EXCEPTION_IF_NULL
(
new_addn
);
...
...
@@ -66,7 +66,7 @@ const AnfNodePtr AddnFission::Process(const FuncGraphPtr &func_graph, const AnfN
cur_input_index
+=
inputs_divisor_
;
}
for
(
size_t
i
=
cur_input_index
;
i
<=
origin_input_size
;
i
++
)
{
base_addn_inputs
.
push
_back
(
new_cnode
->
input
(
i
));
base_addn_inputs
.
emplace
_back
(
new_cnode
->
input
(
i
));
}
CNodePtr
base_addn
=
func_graph
->
NewCNode
(
base_addn_inputs
);
MS_EXCEPTION_IF_NULL
(
base_addn
);
...
...
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.cc
浏览文件 @
ca7154a5
...
...
@@ -37,7 +37,7 @@ bool GetBatchNormOutputs(const FuncGraphPtr &func_graph, const AnfNodePtr &bn, s
}
size_t
output_num
=
0
;
for
(
const
auto
&
node_index
:
manager
->
node_users
()[
bn
])
{
AnfNodePtr
output
=
node_index
.
first
;
const
AnfNodePtr
&
output
=
node_index
.
first
;
MS_EXCEPTION_IF_NULL
(
output
);
if
(
!
IsPrimitiveCNode
(
output
,
prim
::
kPrimTupleGetItem
))
{
continue
;
...
...
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.cc
浏览文件 @
ca7154a5
...
...
@@ -32,7 +32,7 @@ bool CheckOutputsIndex(const FuncGraphPtr &func_graph, const AnfNodePtr &node) {
return
false
;
}
for
(
const
auto
&
node_index
:
manager
->
node_users
()[
node
])
{
AnfNodePtr
output
=
node_index
.
first
;
const
AnfNodePtr
&
output
=
node_index
.
first
;
MS_EXCEPTION_IF_NULL
(
output
);
if
(
!
IsPrimitiveCNode
(
output
,
prim
::
kPrimTupleGetItem
))
{
continue
;
...
...
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_split.cc
浏览文件 @
ca7154a5
...
...
@@ -33,7 +33,7 @@ void CreateOutputsOfUpdateGrad(const FuncGraphPtr &graph, const CNodePtr &bn_gra
std
::
vector
<
AnfNodePtr
>
*
bn_update_grad_outputs
)
{
MS_EXCEPTION_IF_NULL
(
graph
);
MS_EXCEPTION_IF_NULL
(
bn_grad_node
);
auto
bn_grad_inputs
=
bn_grad_node
->
inputs
();
const
auto
&
bn_grad_inputs
=
bn_grad_node
->
inputs
();
if
(
bn_grad_inputs
.
size
()
<
kBNGradInputNum
)
{
MS_LOG
(
EXCEPTION
)
<<
"BNGrad has wrong inputs size"
;
}
...
...
@@ -58,7 +58,8 @@ void CreateOutputsOfReduceGrad(const FuncGraphPtr &graph, const CNodePtr &bn_gra
std
::
vector
<
AnfNodePtr
>
*
bn_reduce_grad_outputs
)
{
MS_EXCEPTION_IF_NULL
(
graph
);
MS_EXCEPTION_IF_NULL
(
bn_grad_node
);
auto
bn_grad_inputs
=
bn_grad_node
->
inputs
();
MS_EXCEPTION_IF_NULL
(
bn_reduce_grad_outputs
);
const
auto
&
bn_grad_inputs
=
bn_grad_node
->
inputs
();
if
(
bn_grad_inputs
.
size
()
<
kBNGradInputNum
)
{
MS_LOG
(
EXCEPTION
)
<<
"BNGrad has wrong inputs size"
;
}
...
...
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/concat_fission.cc
浏览文件 @
ca7154a5
...
...
@@ -25,9 +25,9 @@ AnfNodePtr CreateNewConcat(const FuncGraphPtr &func_graph, const CNodePtr &origi
size_t
offset
)
{
MS_EXCEPTION_IF_NULL
(
func_graph
);
MS_EXCEPTION_IF_NULL
(
origin_concat_cnode
);
std
::
vector
<
AnfNodePtr
>
new_concat_inputs
{
NewValueNode
(
std
::
make_shared
<
Primitive
>
(
prim
::
kPrimConcat
->
name
()))};
std
::
vector
<
AnfNodePtr
>
new_concat_inputs
=
{
NewValueNode
(
std
::
make_shared
<
Primitive
>
(
prim
::
kPrimConcat
->
name
()))};
for
(
size_t
i
=
begin_index
;
i
<
begin_index
+
offset
;
++
i
)
{
new_concat_inputs
.
push
_back
(
origin_concat_cnode
->
input
(
i
));
new_concat_inputs
.
emplace
_back
(
origin_concat_cnode
->
input
(
i
));
}
CNodePtr
new_concat
=
func_graph
->
NewCNode
(
new_concat_inputs
);
MS_EXCEPTION_IF_NULL
(
new_concat
);
...
...
@@ -83,7 +83,7 @@ const AnfNodePtr ConcatFission::Process(const FuncGraphPtr &func_graph, const An
cur_input_index
+=
inputs_divisor_
;
}
for
(
size_t
i
=
cur_input_index
;
i
<=
origin_input_size
;
i
++
)
{
base_concat_inputs
.
push
_back
(
new_cnode
->
input
(
i
));
base_concat_inputs
.
emplace
_back
(
new_cnode
->
input
(
i
));
}
CNodePtr
base_concat
=
func_graph
->
NewCNode
(
base_concat_inputs
);
MS_EXCEPTION_IF_NULL
(
base_concat
);
...
...
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/lars_v2_fission.cc
浏览文件 @
ca7154a5
...
...
@@ -31,9 +31,8 @@ void CreateOutputsOfSquareSumAll(const FuncGraphPtr &graph, const CNodePtr &lars
MS_LOG
(
EXCEPTION
)
<<
"Op lars_v2's input not equal "
<<
kLarsV2InputNum
;
}
std
::
vector
<
AnfNodePtr
>
inputs
=
{
NewValueNode
(
std
::
make_shared
<
Primitive
>
(
kSquareSumAllOpName
))};
inputs
.
push_back
(
lars_v2
->
input
(
1
));
inputs
.
push_back
(
lars_v2
->
input
(
2
));
std
::
vector
<
AnfNodePtr
>
inputs
=
{
NewValueNode
(
std
::
make_shared
<
Primitive
>
(
kSquareSumAllOpName
)),
lars_v2
->
input
(
1
),
lars_v2
->
input
(
2
)};
auto
square_sum_all
=
graph
->
NewCNode
(
inputs
);
MS_EXCEPTION_IF_NULL
(
square_sum_all
);
square_sum_all
->
set_scope
(
lars_v2
->
scope
());
...
...
@@ -56,13 +55,13 @@ CNodePtr CreateLarsV2Update(const FuncGraphPtr &graph, const CNodePtr &lars_v2,
if
(
lars_v2
->
size
()
!=
kLarsV2InputNum
)
{
MS_LOG
(
EXCEPTION
)
<<
"Op lars_v2's input not equal "
<<
kLarsV2InputNum
;
}
std
::
vector
<
AnfNodePtr
>
inputs
=
{
NewValueNode
(
std
::
make_shared
<
Primitive
>
(
kLarsV2UpdateOpName
))
};
inputs
.
push_back
(
lars_v2
->
input
(
1
));
inputs
.
push_back
(
lars_v2
->
input
(
2
));
inputs
.
push_back
(
square_sum_all_outputs
[
0
]);
inputs
.
push_back
(
square_sum_all_outputs
[
1
]);
inputs
.
push_back
(
lars_v2
->
input
(
3
));
inputs
.
push_back
(
lars_v2
->
input
(
4
))
;
std
::
vector
<
AnfNodePtr
>
inputs
=
{
NewValueNode
(
std
::
make_shared
<
Primitive
>
(
kLarsV2UpdateOpName
))
,
lars_v2
->
input
(
1
),
lars_v2
->
input
(
2
),
square_sum_all_outputs
[
0
],
square_sum_all_outputs
[
1
],
lars_v2
->
input
(
3
),
lars_v2
->
input
(
4
)}
;
auto
lars_v2_update
=
graph
->
NewCNode
(
inputs
);
MS_EXCEPTION_IF_NULL
(
lars_v2_update
);
lars_v2_update
->
set_scope
(
lars_v2
->
scope
());
...
...
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/layer_norm_grad_split.cc
浏览文件 @
ca7154a5
...
...
@@ -32,6 +32,7 @@ void LayerNormGradSplit::CreateOutputsOfLayerNormXBackprop(
std
::
vector
<
AnfNodePtr
>
*
layer_norm_x_backprop_outputs
)
const
{
MS_EXCEPTION_IF_NULL
(
graph
);
MS_EXCEPTION_IF_NULL
(
layer_norm_grad
);
MS_EXCEPTION_IF_NULL
(
layer_norm_x_backprop_outputs
);
auto
prim
=
std
::
make_shared
<
Primitive
>
(
kLayerNormXBackpropOpName
);
std
::
vector
<
AnfNodePtr
>
layer_norm_x_backprop_inputs
=
{
NewValueNode
(
prim
)};
for
(
size_t
i
=
1
;
i
<
layer_norm_grad
->
inputs
().
size
();
++
i
)
{
...
...
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/pack_fission.cc
浏览文件 @
ca7154a5
...
...
@@ -83,11 +83,11 @@ const AnfNodePtr PackFission::Process(const FuncGraphPtr &func_graph, const AnfN
size_t
cur_input_index
=
1
;
// Divide the inputs of pack by inputs_divisor_.
while
(
origin_input_size
-
cur_input_index
+
1
>=
inputs_divisor_
)
{
base_concat_inputs
.
push
_back
(
CreateNewPack
(
func_graph
,
cnode
,
cur_input_index
,
inputs_divisor_
));
base_concat_inputs
.
emplace
_back
(
CreateNewPack
(
func_graph
,
cnode
,
cur_input_index
,
inputs_divisor_
));
cur_input_index
+=
inputs_divisor_
;
}
if
(
cur_input_index
<=
origin_input_size
)
{
base_concat_inputs
.
push
_back
(
base_concat_inputs
.
emplace
_back
(
CreateNewPack
(
func_graph
,
cnode
,
cur_input_index
,
origin_input_size
-
cur_input_index
+
1
));
}
...
...
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/split_fission.cc
浏览文件 @
ca7154a5
...
...
@@ -96,17 +96,16 @@ void CreateOutputShapeAndTypeId(const CNodePtr &origin_cnode, int split_dim, int
void
SetAttrAndAbstractForBaseSplitv
(
const
CNodePtr
&
origin_cnode
,
const
CNodePtr
&
base_splitv
,
const
std
::
vector
<
int
>
&
size_splits_base
,
int
split_dim
,
int
num_split
)
{
SetAttrForSplitVNode
(
base_splitv
,
size_splits_base
,
split_dim
,
num_split
);
std
::
vector
<
TypeId
>
base_type_ids
;
std
::
vector
<
std
::
vector
<
size_t
>>
base_output_shapes_base
;
auto
output_shape
=
AnfAlgo
::
GetOutputInferShape
(
origin_cnode
,
0
);
TypeId
type_id
=
AnfAlgo
::
GetOutputInferDataType
(
origin_cnode
,
0
);
std
::
vector
<
TypeId
>
base_type_ids
(
num_split
,
type_id
);
std
::
vector
<
std
::
vector
<
size_t
>>
base_output_shapes_base
;
if
(
split_dim
<
0
)
{
split_dim
+=
output_shape
.
size
();
}
for
(
int
i
=
0
;
i
<
num_split
;
++
i
)
{
output_shape
[
split_dim
]
=
size_splits_base
[
i
];
base_output_shapes_base
.
emplace_back
(
output_shape
);
base_type_ids
.
emplace_back
(
type_id
);
}
AnfAlgo
::
SetOutputInferTypeAndShape
(
base_type_ids
,
base_output_shapes_base
,
base_splitv
.
get
());
}
...
...
@@ -118,17 +117,14 @@ AnfNodePtr DoFission(const FuncGraphPtr &func_graph, const CNodePtr &cnode, int
// Create new size_splits for "size_splits" attr of each new Splitv node which has full inputs.
auto
small_split_size
=
SizeToInt
(
GetSmallSplitSize
(
cnode
,
split_dim
,
num_split
));
std
::
vector
<
int
>
size_splits_new
;
for
(
int
i
=
0
;
i
<
divisor
;
++
i
)
{
size_splits_new
.
emplace_back
(
small_split_size
);
}
std
::
vector
<
int
>
size_splits_new
(
divisor
,
small_split_size
);
// Create new output shape and new output type id for each new Splitv node which has full inputs.
std
::
vector
<
TypeId
>
new_type_ids
;
std
::
vector
<
std
::
vector
<
size_t
>>
new_output_shapes
;
CreateOutputShapeAndTypeId
(
cnode
,
split_dim
,
small_split_size
,
divisor
,
&
new_type_ids
,
&
new_output_shapes
);
// Create make_tuple input to create a make_tuple for replacing the old Split node.
std
::
vector
<
AnfNodePtr
>
make_tuple_inputs
{
NewValueNode
(
prim
::
kPrimMakeTuple
)};
std
::
vector
<
AnfNodePtr
>
make_tuple_inputs
=
{
NewValueNode
(
prim
::
kPrimMakeTuple
)};
// Start to divide the outputs of Split.
std
::
vector
<
int
>
size_splits_base
;
const
auto
base_split_size
=
divisor
*
small_split_size
;
...
...
@@ -147,10 +143,7 @@ AnfNodePtr DoFission(const FuncGraphPtr &func_graph, const CNodePtr &cnode, int
auto
last_node_num_split
=
num_split
-
cur_output_index
;
if
(
last_node_num_split
>
1
)
{
CNodePtr
new_splitv
=
CreateSplitVNode
(
func_graph
,
CreateTupleGetItem
(
func_graph
,
base_splitv
,
nodes_num
));
std
::
vector
<
int
>
size_splits_new_last
;
for
(
int
i
=
0
;
i
<
last_node_num_split
;
++
i
)
{
size_splits_new_last
.
emplace_back
(
small_split_size
);
}
std
::
vector
<
int
>
size_splits_new_last
(
last_node_num_split
,
small_split_size
);
SetAttrForSplitVNode
(
new_splitv
,
size_splits_new_last
,
split_dim
,
last_node_num_split
);
// Create new output shape and new output type id for the last Splitv node
std
::
vector
<
TypeId
>
last_new_type_ids
;
...
...
mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.cc
浏览文件 @
ca7154a5
...
...
@@ -44,7 +44,7 @@ void GetBNOutput(const FuncGraphPtr &func_graph, const AnfNodePtr &bn, std::vect
MS_LOG
(
EXCEPTION
)
<<
"The bn node "
<<
bn
->
DebugString
()
<<
" should has some outputs"
;
}
for
(
const
auto
&
node_index
:
manager
->
node_users
()[
bn
])
{
AnfNodePtr
output
=
node_index
.
first
;
const
AnfNodePtr
&
output
=
node_index
.
first
;
MS_EXCEPTION_IF_NULL
(
output
);
bn_outputs
->
push_back
(
output
);
}
...
...
mindspore/ccsrc/backend/optimizer/common/helper.cc
浏览文件 @
ca7154a5
...
...
@@ -313,9 +313,9 @@ void CreateMultipleOutputsOfAnfNode(const FuncGraphPtr &func_graph, const AnfNod
MS_EXCEPTION_IF_NULL
(
node
);
MS_EXCEPTION_IF_NULL
(
outputs
);
for
(
size_t
i
=
0
;
i
<
output_num
;
i
++
)
{
auto
idx
=
NewValueNode
(
SizeToInt
(
i
));
MS_EXCEPTION_IF_NULL
(
idx
);
int
temp
=
SizeToInt
(
i
);
auto
idx
=
NewValueNode
(
temp
);
MS_EXCEPTION_IF_NULL
(
idx
);
auto
imm
=
std
::
make_shared
<
Int32Imm
>
(
temp
);
auto
abstract_scalar
=
std
::
make_shared
<
abstract
::
AbstractScalar
>
(
imm
);
idx
->
set_abstract
(
abstract_scalar
);
...
...
mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
浏览文件 @
ca7154a5
...
...
@@ -745,7 +745,7 @@ void AnfRuntimeAlgorithm::SetOutputInferTypeAndShape(const std::vector<TypeId> &
for
(
size_t
i
=
0
;
i
<
types
.
size
();
++
i
)
{
std
::
vector
<
int
>
shape_int
;
std
::
transform
(
shapes
[
i
].
begin
(),
shapes
[
i
].
end
(),
std
::
back_inserter
(
shape_int
),
SizeToInt
);
abstract_list
.
push
_back
(
std
::
make_shared
<
AbstractTensor
>
(
TypeIdToType
(
types
[
i
]),
shape_int
));
abstract_list
.
emplace
_back
(
std
::
make_shared
<
AbstractTensor
>
(
TypeIdToType
(
types
[
i
]),
shape_int
));
}
auto
abstract_tuple
=
std
::
make_shared
<
AbstractTuple
>
(
abstract_list
);
node
->
set_abstract
(
abstract_tuple
);
...
...
mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc
浏览文件 @
ca7154a5
...
...
@@ -550,6 +550,7 @@ KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, KernelType kern
kernel
::
KernelQuery
(
kernel_node
,
&
kernel_info_list
,
kernel_type
);
auto
select_status
=
SetMatchedKernelInfo
(
kernel_node
,
kernel_info_list
);
// If aicore not find valid kernel info reloading aicpu kernel info list to find it
if
(
select_status
==
kNoMatched
)
{
MS_LOG
(
WARNING
)
<<
"The node ["
<<
kernel_node
->
DebugString
()
<<
"] cannot find valid TBE kernel info, try to get aicpu kernel info"
;
...
...
mindspore/ccsrc/utils/utils.h
浏览文件 @
ca7154a5
...
...
@@ -20,6 +20,7 @@
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <string>
#include <vector>
#include <set>
...
...
@@ -337,5 +338,33 @@ static inline void ChangeFileMode(const std::string &file_name, mode_t mode) {
MS_LOG
(
DEBUG
)
<<
"File `"
<<
file_name
<<
"` change mode failed! May be not exist."
;
}
}
static
inline
uint64_t
GetCurrentUSec
()
{
struct
timeval
tv
;
int
ret
=
gettimeofday
(
&
tv
,
nullptr
);
if
(
ret
!=
0
)
{
MS_LOG
(
EXCEPTION
)
<<
"Fail gettimeofday, ret = "
<<
ret
;
}
return
static_cast
<
uint64_t
>
(
tv
.
tv_usec
+
tv
.
tv_sec
*
1000000
);
}
#define PROF_START(stage) uint64_t start_usec_##stage = mindspore::GetCurrentUSec()
#define PROF_END(stage) \
do { \
uint64_t end_usec_##stage = mindspore::GetCurrentUSec(); \
MS_LOG(INFO) << #stage << " costs " << (end_usec_##stage - start_usec_##stage) << " usec."; \
} while (0)
#define PROF_MULTI_DEFINE(stage) \
static uint64_t total_##stage = 0; \
static uint64_t count_##stage = 0;
#define PROF_MULTI_START(stage) uint64_t start_usec_##stage = mindspore::GetCurrentUSec()
#define PROF_MULTI_END(stage) \
++count_##stage; \
uint64_t end_usec_##stage = mindspore::GetCurrentUSec(); \
total_##stage += (end_usec_##stage - start_usec_##stage)
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_UTILS_UTILS_H_
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录