Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
兔爷不爱我
mindspore
提交
d2727d05
M
mindspore
项目概览
兔爷不爱我
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
d2727d05
编写于
4月 17, 2020
作者:
E
Etone.Chan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add buffer fusion bnupdate eltwise pass
上级
d1c28c1d
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
182 addition
and
1388 deletion
+182
-1388
mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
+1
-2
mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
.../ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
+178
-85
mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h
...e/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h
+3
-3
tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc
...p/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc
+0
-1298
未找到文件。
mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
浏览文件 @
d2727d05
...
...
@@ -722,8 +722,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode
std
::
vector
<
nlohmann
::
json
>
*
output_desc_list
)
{
auto
output_size
=
AnfAlgo
::
GetOutputTensorNum
(
cnode
);
if
(
AnfAlgo
::
HasNodeAttr
(
kAttrOutputUsedNum
,
cnode
))
{
// wait anther pr: auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum);
auto
output_used_nums
=
{
SizeToInt
(
AnfAlgo
::
GetNodeAttr
<
std
::
size_t
>
(
cnode
,
kAttrOutputUsedNum
))};
auto
output_used_nums
=
AnfAlgo
::
GetNodeAttr
<
std
::
vector
<
int
>>
(
cnode
,
kAttrOutputUsedNum
);
MS_LOG
(
INFO
)
<<
"This node's output has been reused, node name: "
<<
cnode
->
fullname_with_scope
();
if
(
output_used_nums
.
size
()
!=
output_size
)
{
MS_LOG
(
INFO
)
<<
"Fusion error: output tenor num("
<<
output_size
<<
")"
...
...
mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
浏览文件 @
d2727d05
...
...
@@ -17,6 +17,7 @@
#include <vector>
#include <tuple>
#include <utility>
#include <unordered_set>
#include <unordered_map>
#include <deque>
...
...
@@ -282,11 +283,17 @@ kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector<AnfNodePtr
// outputs format and data type
std
::
vector
<
std
::
string
>
outputs_format
;
std
::
vector
<
TypeId
>
outputs_data_type
;
for
(
size_t
index
=
0
;
index
<
outputs_list
.
size
();
++
index
)
{
for
(
size_t
idx
=
0
;
idx
<
AnfAlgo
::
GetOutputTensorNum
(
outputs_list
[
index
]);
++
idx
)
{
auto
kernel_with_index
=
AnfAlgo
::
VisitKernel
(
outputs_list
[
index
],
idx
);
outputs_format
.
push_back
(
AnfAlgo
::
GetOutputFormat
(
kernel_with_index
.
first
,
kernel_with_index
.
second
));
outputs_data_type
.
push_back
(
AnfAlgo
::
GetOutputDeviceDataType
(
kernel_with_index
.
first
,
kernel_with_index
.
second
));
for
(
const
auto
&
output
:
outputs_list
)
{
if
(
AnfAlgo
::
GetCNodeName
(
output
)
==
prim
::
kPrimTupleGetItem
->
name
())
{
auto
tuple_getitem
=
output
->
cast
<
CNodePtr
>
();
MS_EXCEPTION_IF_NULL
(
tuple_getitem
);
outputs_format
.
push_back
(
AnfAlgo
::
GetOutputFormat
(
tuple_getitem
->
input
(
1
),
IntToSize
(
GetValue
<
int
>
(
GetValueNode
(
tuple_getitem
->
input
(
2
))))));
outputs_data_type
.
push_back
(
AnfAlgo
::
GetOutputDeviceDataType
(
tuple_getitem
->
input
(
1
),
IntToSize
(
GetValue
<
int
>
(
GetValueNode
(
tuple_getitem
->
input
(
2
))))));
}
else
{
outputs_format
.
push_back
(
AnfAlgo
::
GetOutputFormat
(
output
,
0
));
outputs_data_type
.
push_back
(
AnfAlgo
::
GetOutputDeviceDataType
(
output
,
0
));
}
}
builder
.
SetInputsFormat
(
inputs_format
);
...
...
@@ -320,32 +327,35 @@ AnfNodePtr CreateTupleGetItem(const AnfNodePtr &buffer_fusion_kernel, session::K
return
tuple_item
;
}
void
ReplaceOldNode
(
const
std
::
vector
<
AnfNodePtr
>
&
outputs_list
,
const
AnfNodePtr
&
buffer_fusion_kernel
,
session
::
KernelGraph
*
kernel_graph
)
{
void
ReplaceInputNodeInOtherFusionScope
(
std
::
unordered_map
<
int32_t
,
BufferFusionInfo_t
>
*
buffer_fusion_infos
,
int32_t
fusion_id
,
const
AnfNodePtr
&
output_item
,
const
AnfNodePtr
&
replace_item
)
{
for
(
int32_t
id
=
fusion_id
+
1
;
id
<=
SizeToInt
(
buffer_fusion_infos
->
size
());
++
id
)
{
auto
itr
=
std
::
find
((
*
buffer_fusion_infos
)[
id
].
inputs_list
.
begin
(),
(
*
buffer_fusion_infos
)[
id
].
inputs_list
.
end
(),
output_item
);
if
(
itr
!=
(
*
buffer_fusion_infos
)[
id
].
inputs_list
.
end
())
{
MS_LOG
(
DEBUG
)
<<
"replace input of other pattern, id = "
<<
id
;
*
itr
=
replace_item
;
}
}
}
void
ReplaceOldNode
(
std
::
unordered_map
<
int32_t
,
BufferFusionInfo_t
>
*
buffer_fusion_infos
,
int32_t
fusion_id
,
const
AnfNodePtr
&
buffer_fusion_kernel
,
session
::
KernelGraph
*
kernel_graph
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph
);
auto
manager
=
kernel_graph
->
manager
();
MS_EXCEPTION_IF_NULL
(
manager
);
if
(
outputs_list
.
size
()
==
1
)
{
// single output
(
void
)
manager
->
Replace
(
outputs_list
[
0
],
buffer_fusion_kernel
);
auto
buffer_fusion_info
=
(
*
buffer_fusion_infos
)[
fusion_id
];
if
(
buffer_fusion_info
.
outputs_list
.
size
()
==
1
)
{
// single output
(
void
)
manager
->
Replace
(
buffer_fusion_info
.
outputs_list
[
0
],
buffer_fusion_kernel
);
ReplaceInputNodeInOtherFusionScope
(
buffer_fusion_infos
,
fusion_id
,
buffer_fusion_info
.
outputs_list
[
0
],
buffer_fusion_kernel
);
}
else
{
// multiple output
size_t
real_idx
=
0
;
for
(
size_t
index
=
0
;
index
<
outputs_list
.
size
();
++
index
)
{
if
(
AnfAlgo
::
GetOutputTensorNum
(
outputs_list
[
index
])
==
1
)
{
auto
tuple_item
=
CreateTupleGetItem
(
buffer_fusion_kernel
,
kernel_graph
,
real_idx
++
);
(
void
)
manager
->
Replace
(
outputs_list
[
index
],
tuple_item
);
}
else
{
std
::
vector
<
AnfNodePtr
>
make_tuple_inputs
;
AbstractBasePtrList
abstract_list
;
make_tuple_inputs
.
push_back
(
NewValueNode
(
prim
::
kPrimMakeTuple
));
for
(
size_t
idx
=
0
;
idx
<
AnfAlgo
::
GetOutputTensorNum
(
outputs_list
[
index
]);
++
idx
)
{
auto
tuple_item
=
CreateTupleGetItem
(
buffer_fusion_kernel
,
kernel_graph
,
real_idx
++
);
abstract_list
.
push_back
(
tuple_item
->
abstract
());
make_tuple_inputs
.
push_back
(
tuple_item
);
}
AnfNodePtr
make_tuple
=
kernel_graph
->
NewCNode
(
make_tuple_inputs
);
make_tuple
->
set_abstract
(
std
::
make_shared
<
abstract
::
AbstractTuple
>
(
abstract_list
));
(
void
)
manager
->
Replace
(
outputs_list
[
index
],
make_tuple
);
}
for
(
size_t
index
=
0
;
index
<
buffer_fusion_info
.
outputs_list
.
size
();
++
index
)
{
auto
tuple_item
=
CreateTupleGetItem
(
buffer_fusion_kernel
,
kernel_graph
,
index
);
(
void
)
manager
->
Replace
(
buffer_fusion_info
.
outputs_list
[
index
],
tuple_item
);
ReplaceInputNodeInOtherFusionScope
(
buffer_fusion_infos
,
fusion_id
,
buffer_fusion_info
.
outputs_list
[
index
],
tuple_item
);
}
}
}
...
...
@@ -406,38 +416,67 @@ void CheckCurrentNodeIsInput(const CNodePtr &node, const int32_t &cur_fusion_id,
}
}
void
InsertNode
(
const
AnfNodePtr
&
node
,
std
::
vector
<
AnfNodePtr
>
*
list
)
{
MS_EXCEPTION_IF_NULL
(
list
);
if
(
std
::
find
(
list
->
begin
(),
list
->
end
(),
node
)
==
list
->
end
())
{
(
void
)
list
->
insert
(
list
->
end
(),
node
);
void
GetFusionScopeComputeNodeList
(
session
::
KernelGraph
*
kernel_graph
,
std
::
unordered_map
<
int32_t
,
BufferFusionInfo_t
>
*
buffer_fusion_infos
)
{
MS_EXCEPTION_IF_NULL
(
buffer_fusion_infos
);
auto
nodes
=
TopoSort
(
kernel_graph
->
get_return
());
for
(
auto
&
node
:
nodes
)
{
MS_EXCEPTION_IF_NULL
(
node
);
if
(
AnfAlgo
::
IsRealCNodeKernel
(
node
)
&&
AnfAlgo
::
HasNodeAttr
(
kOpAttrFusionId
,
node
))
{
auto
fusion_id
=
AnfAlgo
::
GetNodeAttr
<
int32_t
>
(
node
,
kOpAttrFusionId
);
(
*
buffer_fusion_infos
)[
fusion_id
].
anf_nodes
.
push_back
(
node
);
}
}
}
void
CheckCurrentNodeIsOutput
(
const
CNodePtr
&
node
,
const
int32_t
&
cur_fusion_id
,
std
::
unordered_map
<
int32_t
,
BufferFusionInfo_t
>
*
buffer_fusion_infos
)
{
MS_EXCEPTION_IF_NULL
(
node
);
void
GetFusionScopeOutputNodeList
(
session
::
KernelGraph
*
kernel_graph
,
std
::
unordered_map
<
int32_t
,
BufferFusionInfo_t
>
*
buffer_fusion_infos
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph
);
MS_EXCEPTION_IF_NULL
(
buffer_fusion_infos
);
for
(
auto
&
input
:
node
->
inputs
())
{
MS_EXCEPTION_IF_NULL
(
input
);
if
(
AnfAlgo
::
IsRealCNodeKernel
(
input
)
&&
AnfAlgo
::
HasNodeAttr
(
kOpAttrFusionId
,
input
))
{
auto
fusion_id
=
AnfAlgo
::
GetNodeAttr
<
int32_t
>
(
input
,
kOpAttrFusionId
);
if
(
buffer_fusion_infos
->
find
(
fusion_id
)
==
buffer_fusion_infos
->
end
())
{
BufferFusionInfo_t
buffer_fusion_info
;
(
*
buffer_fusion_infos
)[
fusion_id
]
=
buffer_fusion_info
;
}
if
(
fusion_id
!=
cur_fusion_id
)
{
InsertNode
(
input
,
&
((
*
buffer_fusion_infos
)[
fusion_id
].
outputs_list
));
}
}
else
if
(
input
->
isa
<
CNode
>
())
{
for
(
auto
&
input_in
:
input
->
cast
<
CNodePtr
>
()
->
inputs
())
{
if
(
AnfAlgo
::
IsRealCNodeKernel
(
input_in
)
&&
AnfAlgo
::
HasNodeAttr
(
kOpAttrFusionId
,
input_in
))
{
auto
fusion_id
=
AnfAlgo
::
GetNodeAttr
<
int32_t
>
(
input_in
,
kOpAttrFusionId
);
if
(
buffer_fusion_infos
->
find
(
fusion_id
)
==
buffer_fusion_infos
->
end
())
{
BufferFusionInfo_t
buffer_fusion_info
;
(
*
buffer_fusion_infos
)[
fusion_id
]
=
buffer_fusion_info
;
auto
manager
=
kernel_graph
->
manager
();
MS_EXCEPTION_IF_NULL
(
manager
);
for
(
auto
&
buffer_fusion_info
:
*
buffer_fusion_infos
)
{
auto
fusion_id
=
buffer_fusion_info
.
first
;
auto
fusion_info
=
buffer_fusion_info
.
second
;
for
(
const
auto
&
node
:
fusion_info
.
anf_nodes
)
{
if
(
AnfAlgo
::
GetOutputTensorNum
(
node
)
==
1
)
{
for
(
auto
use_node
:
manager
->
node_users
()[
node
])
{
if
(
std
::
find
(
fusion_info
.
anf_nodes
.
begin
(),
fusion_info
.
anf_nodes
.
end
(),
use_node
.
first
)
==
fusion_info
.
anf_nodes
.
end
())
{
(
*
buffer_fusion_infos
)[
fusion_id
].
outputs_list
.
push_back
(
node
);
break
;
}
}
}
else
{
int
prev_idx
=
0
;
std
::
vector
<
AnfNodePtr
>
tuple_getitem_nodes
;
std
::
transform
(
manager
->
node_users
()[
node
].
begin
(),
manager
->
node_users
()[
node
].
end
(),
std
::
back_inserter
(
tuple_getitem_nodes
),
[](
const
std
::
pair
<
AnfNodePtr
,
int
>
&
use_node
)
{
return
use_node
.
first
;
});
std
::
sort
(
tuple_getitem_nodes
.
begin
(),
tuple_getitem_nodes
.
end
(),
[](
const
AnfNodePtr
&
node1
,
const
AnfNodePtr
&
node2
)
{
auto
getitem1
=
node1
->
cast
<
CNodePtr
>
();
auto
getitem2
=
node2
->
cast
<
CNodePtr
>
();
auto
output_idx1
=
GetValue
<
int
>
(
GetValueNode
(
getitem1
->
input
(
2
)));
auto
output_idx2
=
GetValue
<
int
>
(
GetValueNode
(
getitem2
->
input
(
2
)));
return
output_idx1
<
output_idx2
;
});
for
(
auto
getitem
:
tuple_getitem_nodes
)
{
auto
getitem_ptr
=
getitem
->
cast
<
CNodePtr
>
();
auto
input2
=
getitem_ptr
->
input
(
2
);
auto
output_idx
=
GetValue
<
int
>
(
GetValueNode
(
input2
));
for
(
int
stub_idx
=
prev_idx
;
stub_idx
<
output_idx
;
++
stub_idx
)
{
auto
stub_node
=
CreateTupleGetItem
(
node
,
kernel_graph
,
IntToSize
(
stub_idx
));
(
*
buffer_fusion_infos
)[
fusion_id
].
outputs_list
.
push_back
(
stub_node
);
}
if
(
fusion_id
!=
cur_fusion_id
)
{
InsertNode
(
input_in
,
&
((
*
buffer_fusion_infos
)[
fusion_id
].
outputs_list
));
prev_idx
=
output_idx
+
1
;
for
(
auto
item_use_node
:
manager
->
node_users
()[
getitem
])
{
if
(
std
::
find
(
fusion_info
.
anf_nodes
.
begin
(),
fusion_info
.
anf_nodes
.
end
(),
item_use_node
.
first
)
==
fusion_info
.
anf_nodes
.
end
())
{
(
*
buffer_fusion_infos
)[
fusion_id
].
outputs_list
.
push_back
(
getitem
);
break
;
}
}
}
}
...
...
@@ -445,15 +484,72 @@ void CheckCurrentNodeIsOutput(const CNodePtr &node, const int32_t &cur_fusion_id
}
}
void
GetFusionScopeNodeList
(
const
session
::
KernelGraph
&
kernel_graph
,
std
::
unordered_map
<
int32_t
,
BufferFusionInfo_t
>
*
buffer_fusion_infos
)
{
MS_EXCEPTION_IF_NULL
(
buffer_fusion_infos
);
auto
nodes
=
TopoSort
(
kernel_graph
.
get_return
());
for
(
auto
&
node
:
nodes
)
{
MS_EXCEPTION_IF_NULL
(
node
);
if
(
AnfAlgo
::
IsRealCNodeKernel
(
node
)
&&
AnfAlgo
::
HasNodeAttr
(
kOpAttrFusionId
,
node
))
{
auto
fusion_id
=
AnfAlgo
::
GetNodeAttr
<
int32_t
>
(
node
,
kOpAttrFusionId
);
(
*
buffer_fusion_infos
)[
fusion_id
].
anf_nodes
.
push_back
(
node
);
void
MatchConvBnreduce
(
const
CNodePtr
&
cnode
,
const
session
::
KernelGraph
&
kernel_graph
,
std
::
unordered_set
<
AnfNodePtr
>
*
fused_set
,
FusedNodeRecord
*
candidate_fusion
)
{
MS_EXCEPTION_IF_NULL
(
cnode
);
MS_EXCEPTION_IF_NULL
(
fused_set
);
MS_EXCEPTION_IF_NULL
(
candidate_fusion
);
auto
manager
=
kernel_graph
.
manager
();
MS_EXCEPTION_IF_NULL
(
manager
);
auto
conv
=
cnode
->
input
(
1
);
if
(
conv
->
isa
<
CNode
>
()
&&
AnfAlgo
::
GetCNodeName
(
conv
)
==
prim
::
kPrimConv2D
->
name
())
{
std
::
vector
<
int
>
output_used_num
{
SizeToInt
(
manager
->
node_users
()[
conv
].
size
())};
AnfAlgo
::
SetNodeAttr
(
kAttrOutputUsedNum
,
MakeValue
(
output_used_num
),
conv
);
std
::
unordered_set
<
AnfNodePtr
>
record
{
cnode
,
conv
};
candidate_fusion
->
push_back
(
record
);
fused_set
->
insert
(
record
.
begin
(),
record
.
end
());
}
}
void
MatchBnupdateRelu
(
const
CNodePtr
&
cnode
,
const
AnfNodePtr
&
relu_input
,
const
session
::
KernelGraph
&
kernel_graph
,
std
::
unordered_set
<
AnfNodePtr
>
*
fused_set
,
FusedNodeRecord
*
candidate_fusion
)
{
MS_EXCEPTION_IF_NULL
(
cnode
);
MS_EXCEPTION_IF_NULL
(
fused_set
);
MS_EXCEPTION_IF_NULL
(
candidate_fusion
);
auto
manager
=
kernel_graph
.
manager
();
MS_EXCEPTION_IF_NULL
(
manager
);
auto
getitem
=
relu_input
->
cast
<
CNodePtr
>
();
auto
bnupdate
=
getitem
->
input
(
1
);
if
(
bnupdate
->
isa
<
CNode
>
()
&&
AnfAlgo
::
GetCNodeName
(
bnupdate
)
==
kBNTrainingUpdateOpName
)
{
std
::
vector
<
int
>
output_used_num
(
AnfAlgo
::
GetOutputTensorNum
(
bnupdate
),
0
);
for
(
auto
out_getitem
:
manager
->
node_users
()[
bnupdate
])
{
auto
out_getitem_ptr
=
out_getitem
.
first
->
cast
<
CNodePtr
>
();
auto
input2
=
out_getitem_ptr
->
input
(
2
);
auto
output_idx
=
GetValue
<
int
>
(
GetValueNode
(
input2
));
output_used_num
[
output_idx
]
=
SizeToInt
(
manager
->
node_users
()[
out_getitem
.
first
].
size
());
}
AnfAlgo
::
SetNodeAttr
(
kAttrOutputUsedNum
,
MakeValue
(
output_used_num
),
bnupdate
);
std
::
unordered_set
<
AnfNodePtr
>
record
{
cnode
,
bnupdate
};
candidate_fusion
->
push_back
(
record
);
fused_set
->
insert
(
record
.
begin
(),
record
.
end
());
}
}
void
MatchBnupdateAddRelu
(
const
CNodePtr
&
cnode
,
const
AnfNodePtr
&
relu_input
,
const
session
::
KernelGraph
&
kernel_graph
,
std
::
unordered_set
<
AnfNodePtr
>
*
fused_set
,
FusedNodeRecord
*
candidate_fusion
)
{
MS_EXCEPTION_IF_NULL
(
cnode
);
MS_EXCEPTION_IF_NULL
(
fused_set
);
MS_EXCEPTION_IF_NULL
(
candidate_fusion
);
auto
manager
=
kernel_graph
.
manager
();
MS_EXCEPTION_IF_NULL
(
manager
);
auto
add
=
relu_input
->
cast
<
CNodePtr
>
();
MS_EXCEPTION_IF_NULL
(
add
);
auto
tuple_getitem
=
add
->
input
(
1
);
if
(
tuple_getitem
->
isa
<
CNode
>
()
&&
AnfAlgo
::
GetCNodeName
(
tuple_getitem
)
==
prim
::
kPrimTupleGetItem
->
name
())
{
auto
getitem
=
tuple_getitem
->
cast
<
CNodePtr
>
();
auto
bnupdate
=
getitem
->
input
(
1
);
if
(
bnupdate
->
isa
<
CNode
>
()
&&
AnfAlgo
::
GetCNodeName
(
bnupdate
)
==
kBNTrainingUpdateOpName
)
{
std
::
vector
<
int
>
output_used_num
(
AnfAlgo
::
GetOutputTensorNum
(
bnupdate
),
0
);
for
(
auto
out_getitem
:
manager
->
node_users
()[
bnupdate
])
{
auto
out_getitem_ptr
=
out_getitem
.
first
->
cast
<
CNodePtr
>
();
auto
input2
=
out_getitem_ptr
->
input
(
2
);
auto
output_idx
=
GetValue
<
int
>
(
GetValueNode
(
input2
));
output_used_num
[
output_idx
]
=
SizeToInt
(
manager
->
node_users
()[
out_getitem
.
first
].
size
());
}
AnfAlgo
::
SetNodeAttr
(
kAttrOutputUsedNum
,
MakeValue
(
output_used_num
),
bnupdate
);
std
::
unordered_set
<
AnfNodePtr
>
record
{
cnode
,
relu_input
,
bnupdate
};
candidate_fusion
->
push_back
(
record
);
fused_set
->
insert
(
record
.
begin
(),
record
.
end
());
}
}
}
...
...
@@ -470,15 +566,14 @@ void MatchOpNamePattern(const session::KernelGraph &kernel_graph, std::unordered
auto
cnode
=
node
->
cast
<
CNodePtr
>
();
MS_EXCEPTION_IF_NULL
(
cnode
);
if
(
AnfAlgo
::
GetCNodeName
(
cnode
)
==
kBNTrainingReduceOpName
)
{
auto
conv
=
cnode
->
input
(
1
);
if
(
conv
->
isa
<
CNode
>
()
&&
AnfAlgo
::
GetCNodeName
(
conv
)
==
prim
::
kPrimConv2D
->
name
())
{
auto
manager
=
kernel_graph
.
manager
();
MS_EXCEPTION_IF_NULL
(
manager
);
auto
&
users
=
manager
->
node_users
();
AnfAlgo
::
SetNodeAttr
(
kAttrOutputUsedNum
,
MakeValue
(
users
[
conv
].
size
()),
conv
);
std
::
unordered_set
<
AnfNodePtr
>
record
({
cnode
,
conv
});
candidate_fusion
->
push_back
(
record
);
fused_set
->
insert
(
record
.
begin
(),
record
.
end
());
MatchConvBnreduce
(
cnode
,
kernel_graph
,
fused_set
,
candidate_fusion
);
}
else
if
(
AnfAlgo
::
GetCNodeName
(
cnode
)
==
kReluV2OpName
||
AnfAlgo
::
GetCNodeName
(
cnode
)
==
prim
::
kPrimRelu
->
name
())
{
auto
relu_input
=
cnode
->
input
(
1
);
if
(
relu_input
->
isa
<
CNode
>
()
&&
AnfAlgo
::
GetCNodeName
(
relu_input
)
==
prim
::
kPrimTensorAdd
->
name
())
{
MatchBnupdateAddRelu
(
cnode
,
relu_input
,
kernel_graph
,
fused_set
,
candidate_fusion
);
}
else
if
(
relu_input
->
isa
<
CNode
>
()
&&
AnfAlgo
::
GetCNodeName
(
relu_input
)
==
prim
::
kPrimTupleGetItem
->
name
())
{
MatchBnupdateRelu
(
cnode
,
relu_input
,
kernel_graph
,
fused_set
,
candidate_fusion
);
}
}
}
...
...
@@ -536,27 +631,23 @@ void MatchFusionTypePattern(const session::KernelGraph &kernel_graph, std::unord
}
}
// namespace
void
BufferFusion
::
GetBufferFusionInfo
(
const
session
::
KernelGraph
&
kernel_graph
,
void
BufferFusion
::
GetBufferFusionInfo
(
session
::
KernelGraph
*
kernel_graph
,
std
::
unordered_map
<
int32_t
,
BufferFusionInfo_t
>
*
buffer_fusion_infos
)
const
{
MS_EXCEPTION_IF_NULL
(
buffer_fusion_infos
);
std
::
vector
<
AnfNodePtr
>
node_list
=
TopoSort
(
kernel_graph
.
get_return
());
std
::
vector
<
AnfNodePtr
>
node_list
=
TopoSort
(
kernel_graph
->
get_return
());
for
(
auto
&
node
:
node_list
)
{
if
(
!
AnfAlgo
::
IsRealCNodeKernel
(
node
))
{
continue
;
}
int32_t
cur_fusion_id
=
-
1
;
auto
cnode
=
node
->
cast
<
CNodePtr
>
();
MS_EXCEPTION_IF_NULL
(
cnode
);
if
(
AnfAlgo
::
HasNodeAttr
(
kOpAttrFusionId
,
cnode
))
{
cur_fusion_id
=
AnfAlgo
::
GetNodeAttr
<
int32_t
>
(
cnode
,
kOpAttrFusionId
);
auto
cur_fusion_id
=
AnfAlgo
::
GetNodeAttr
<
int32_t
>
(
cnode
,
kOpAttrFusionId
);
CheckCurrentNodeIsInput
(
cnode
,
cur_fusion_id
,
buffer_fusion_infos
);
}
// Check if current node is output
CheckCurrentNodeIsOutput
(
cnode
,
cur_fusion_id
,
buffer_fusion_infos
);
}
GetFusionScopeNodeList
(
kernel_graph
,
buffer_fusion_infos
);
GetFusionScopeComputeNodeList
(
kernel_graph
,
buffer_fusion_infos
);
GetFusionScope
Output
NodeList
(
kernel_graph
,
buffer_fusion_infos
);
for
(
auto
&
buffer_fusion_info
:
*
buffer_fusion_infos
)
{
buffer_fusion_info
.
second
.
kernel_build_info
=
CreateFusionOpKernelInfo
(
buffer_fusion_info
.
second
.
inputs_list_in
,
buffer_fusion_info
.
second
.
inputs_list
,
...
...
@@ -569,7 +660,7 @@ bool BufferFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) c
bool
change
=
false
;
std
::
unordered_map
<
int32_t
,
BufferFusionInfo_t
>
buffer_fusion_infos
;
buffer_fusion_infos
.
clear
();
GetBufferFusionInfo
(
*
kernel_graph
,
&
buffer_fusion_infos
);
GetBufferFusionInfo
(
kernel_graph
,
&
buffer_fusion_infos
);
std
::
vector
<
mindspore
::
kernel
::
FusionScopeInfo
>
fusion_scope_infos
;
for
(
auto
&
buffer_fusion_info
:
buffer_fusion_infos
)
{
...
...
@@ -600,7 +691,7 @@ bool BufferFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) c
MS_LOG
(
DEBUG
)
<<
"fusion id: "
<<
fusion_id
<<
", fusion op compiling failed"
;
continue
;
}
change
=
ReplaceFusionOp
(
buffer_fusion_infos
[
fusion_id
]
,
kernel_mods
[
fusion_id
],
kernel_graph
);
change
=
ReplaceFusionOp
(
&
buffer_fusion_infos
,
fusion_id
,
kernel_mods
[
fusion_id
],
kernel_graph
);
}
MS_LOG
(
DEBUG
)
<<
"End Buffer Fusion"
;
return
change
;
...
...
@@ -630,8 +721,10 @@ bool BufferFusion::MatchBufferFusionPattern(const session::KernelGraph &kernel_g
return
true
;
}
bool
BufferFusion
::
ReplaceFusionOp
(
const
BufferFusionInfo_t
&
buffer_fusion_info
,
const
kernel
::
KernelModPtr
&
kernel_ptr
,
bool
BufferFusion
::
ReplaceFusionOp
(
std
::
unordered_map
<
int32_t
,
BufferFusionInfo_t
>
*
buffer_fusion_infos
,
int32_t
fusion_id
,
const
kernel
::
KernelModPtr
&
kernel_ptr
,
session
::
KernelGraph
*
kernel_graph
)
const
{
auto
buffer_fusion_info
=
(
*
buffer_fusion_infos
)[
fusion_id
];
auto
buffer_fusion
=
CreateFusionOp
(
buffer_fusion_info
.
inputs_list
,
buffer_fusion_info
.
outputs_list
,
buffer_fusion_info
.
anf_nodes
,
kernel_graph
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
buffer_fusion_info
.
kernel_build_info
,
buffer_fusion
.
get
());
...
...
@@ -651,7 +744,7 @@ bool BufferFusion::ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info,
AnfAlgo
::
SetOutputInferTypeAndShape
(
types
,
shapes
,
buffer_fusion
.
get
());
AnfAlgo
::
SetKernelMod
(
kernel_ptr
,
buffer_fusion
.
get
());
// replace node
ReplaceOldNode
(
buffer_fusion_info
.
outputs_list
,
buffer_fusion
,
kernel_graph
);
ReplaceOldNode
(
buffer_fusion_info
s
,
fusion_id
,
buffer_fusion
,
kernel_graph
);
return
true
;
}
...
...
mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h
浏览文件 @
d2727d05
...
...
@@ -44,10 +44,10 @@ class BufferFusion : public Pass {
bool
Run
(
const
FuncGraphPtr
&
graph
)
override
;
private:
void
GetBufferFusionInfo
(
const
session
::
KernelGraph
&
kernel_graph
,
void
GetBufferFusionInfo
(
session
::
KernelGraph
*
kernel_graph
,
std
::
unordered_map
<
int32_t
,
BufferFusionInfo_t
>
*
buffer_fusion_infos
)
const
;
bool
ReplaceFusionOp
(
const
BufferFusionInfo_t
&
buffer_fusion_info
,
const
kernel
::
KernelModPtr
&
kernel_ptr
,
session
::
KernelGraph
*
kernel_graph
)
const
;
bool
ReplaceFusionOp
(
std
::
unordered_map
<
int32_t
,
BufferFusionInfo_t
>
*
buffer_fusion_infos
,
int32_t
fusion_id
,
const
kernel
::
KernelModPtr
&
kernel_ptr
,
session
::
KernelGraph
*
kernel_graph
)
const
;
bool
MatchBufferFusionPattern
(
const
session
::
KernelGraph
&
kernel_graph
)
const
;
bool
FuseBufferFusionPattern
(
session
::
KernelGraph
*
kernel_graph
)
const
;
};
...
...
tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc
已删除
100644 → 0
浏览文件 @
d1c28c1d
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <memory>
#include <unordered_map>
#include "common/common_test.h"
#include "common/py_func_graph_fetcher.h"
#include "ir/anf.h"
#include "ir/func_graph_cloner.h"
#include "utils/context/ms_context.h"
#include "debug/draw.h"
#include "debug/anf_ir_dump.h"
#include "operator/ops.h"
#include "utils/utils.h"
#include "kernel/tbe/tbe_kernel_mod.h"
#include "session/kernel_graph.h"
#include "device/kernel_info.h"
#include "session/anf_runtime_algorithm.h"
#include "pre_activate/common/pattern_engine.h"
#define private public
#include "pre_activate/ascend/buffer_fusion/buffer_fusion.h"
namespace
mindspore
{
namespace
opt
{
using
Primitive
=
mindspore
::
Primitive
;
using
session
::
KernelGraph
;
using
KernelGraphPtr
=
std
::
shared_ptr
<
session
::
KernelGraph
>
;
using
KernelBuildInfoBuilder
=
kernel
::
KernelBuildInfo
::
KernelBuildInfoBuilder
;
class
TestHWBufferFusion
:
public
UT
::
Common
{
public:
TestHWBufferFusion
()
:
getPyFun_
(
"gtest_input.pre_activate.hw_opt_test"
,
true
)
{}
public:
UT
::
PyFuncGraphFetcher
getPyFun_
;
};
static
KernelGraphPtr
CreateKernelGraphForBufferFusionMultipleIn
(
uint32_t
after_layers
,
mindspore
::
kernel
::
FusionType
fusiontype
=
mindspore
::
kernel
::
CONVLUTION
)
{
KernelGraphPtr
g
=
std
::
make_shared
<
KernelGraph
>
();
std
::
vector
<
AnfNodePtr
>
inputs
;
std
::
vector
<
int
>
shp
=
{
1
,
3
,
3
,
4
};
TensorTypePtr
tensor_type
=
std
::
make_shared
<
TensorType
>
(
kFloat32
);
tensor
::
DeviceInfo
device_info
{
kOpFormat_NCHW
,
tensor_type
};
uint32_t
layerscount
=
1
;
CNodePtr
ptr_formerlayer
;
std
::
string
name
=
""
;
// Construct first node
tensor
::
TensorPtr
y_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kFloat32
->
type_id
(),
shp
);
y_tensor
->
set_device_info
(
device_info
);
tensor
::
TensorPtr
z_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kFloat32
->
type_id
(),
shp
);
z_tensor
->
set_device_info
(
device_info
);
auto
y_const
=
NewValueNode
(
y_tensor
);
auto
z_const
=
NewValueNode
(
z_tensor
);
y_const
->
set_abstract
(
y_tensor
->
ToAbstract
());
z_const
->
set_abstract
(
z_tensor
->
ToAbstract
());
g
->
MutableInputs
()
->
push_back
(
y_const
);
g
->
MutableInputs
()
->
push_back
(
z_const
);
auto
p_conv
=
std
::
make_shared
<
Primitive
>
(
"Conv2D"
);
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
,
"y"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_conv
->
set_attr
(
"input_names"
,
input_names_v
);
p_conv
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_conv
));
inputs
.
push_back
(
y_const
);
inputs
.
push_back
(
z_const
);
name
=
"test_conv_"
+
std
::
to_string
(
layerscount
)
+
"layers_graph.dot"
;
auto
kernelptr_first
=
g
->
NewCNode
(
inputs
);
kernelptr_first
->
set_abstract
(
y_tensor
->
ToAbstract
());
kernelptr_first
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder
;
builder
.
SetInputsFormat
({
kOpFormat_NCHW
,
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
(),
kFloat32
->
type_id
()});
builder
.
SetOutputsFormat
({
kOpFormat_NCHW
});
builder
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder
.
SetFusionType
(
fusiontype
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
.
Build
(),
kernelptr_first
.
get
());
ptr_formerlayer
=
kernelptr_first
;
// configure fusion successor layers
int
layer_idx
=
0
;
while
(
after_layers
--
)
{
auto
p_relu
=
std
::
make_shared
<
Primitive
>
(
"ReLU6"
);
if
(
layer_idx
==
0
)
{
tensor
::
TensorPtr
x_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kFloat32
->
type_id
(),
shp
);
x_tensor
->
set_device_info
(
device_info
);
auto
x_const
=
NewValueNode
(
x_tensor
);
x_const
->
set_abstract
(
x_tensor
->
ToAbstract
());
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
,
"y"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_relu
->
set_attr
(
"input_names"
,
input_names_v
);
p_relu
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_relu
));
inputs
.
push_back
(
ptr_formerlayer
);
inputs
.
push_back
(
x_const
);
}
else
{
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_relu
->
set_attr
(
"input_names"
,
input_names_v
);
p_relu
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_relu
));
inputs
.
push_back
(
ptr_formerlayer
);
}
auto
kernelptr_floor
=
g
->
NewCNode
(
inputs
);
kernelptr_floor
->
set_abstract
(
y_tensor
->
ToAbstract
());
kernelptr_floor
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder
;
if
(
layer_idx
==
0
)
{
builder
.
SetInputsFormat
({
kOpFormat_NCHW
,
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
(),
kFloat32
->
type_id
()});
}
else
{
builder
.
SetInputsFormat
({
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
}
builder
.
SetOutputsFormat
({
kOpFormat_NCHW
});
builder
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder
.
SetFusionType
(
kernel
::
FusionType
::
ELEMWISE
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
.
Build
(),
kernelptr_floor
.
get
());
ptr_formerlayer
=
kernelptr_floor
;
layerscount
++
;
layer_idx
++
;
}
// return res
auto
p_return
=
std
::
make_shared
<
Primitive
>
(
"return"
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_return
));
inputs
.
push_back
(
ptr_formerlayer
);
auto
ret
=
g
->
NewCNode
(
inputs
);
ret
->
set_abstract
(
y_tensor
->
ToAbstract
());
g
->
set_return
(
ret
);
draw
::
Draw
(
name
,
g
);
return
g
;
}
static
KernelGraphPtr
CreateKernelGraphForBufferFusionEltwiseBeforeAndAfter
(
uint32_t
before_layers
,
uint32_t
after_layers
=
3
,
mindspore
::
kernel
::
FusionType
fusiontype
=
mindspore
::
kernel
::
SEGMENT
)
{
KernelGraphPtr
g
=
std
::
make_shared
<
KernelGraph
>
();
std
::
vector
<
AnfNodePtr
>
inputs
;
std
::
vector
<
int
>
shp
=
{
1
,
3
,
3
,
4
};
TensorTypePtr
tensor_type
=
std
::
make_shared
<
TensorType
>
(
kFloat32
);
tensor
::
DeviceInfo
device_info
{
kOpFormat_NCHW
,
tensor_type
};
uint32_t
layerscount
=
1
;
CNodePtr
ptr_formerlayer
;
std
::
string
name
=
""
;
tensor
::
TensorPtr
x_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kFloat32
->
type_id
(),
shp
);
auto
x_abstract
=
x_tensor
->
ToAbstract
();
auto
x_const
=
NewValueNode
(
x_tensor
);
x_const
->
set_abstract
(
x_abstract
);
g
->
MutableInputs
()
->
push_back
(
x_const
);
while
(
before_layers
--
)
{
auto
p_relu
=
std
::
make_shared
<
Primitive
>
(
"ReLU6"
);
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_relu
->
set_attr
(
"input_names"
,
input_names_v
);
p_relu
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
if
(
layerscount
==
1
)
{
inputs
.
push_back
(
NewValueNode
(
p_relu
));
inputs
.
push_back
(
x_const
);
}
else
{
inputs
.
push_back
(
NewValueNode
(
p_relu
));
inputs
.
push_back
(
ptr_formerlayer
);
}
auto
kernelptr_floor
=
g
->
NewCNode
(
inputs
);
kernelptr_floor
->
set_abstract
(
x_abstract
);
kernelptr_floor
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder
;
builder
.
SetInputsFormat
({
kOpFormat_NCHW
});
builder
.
SetOutputsFormat
({
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder
.
SetFusionType
(
kernel
::
FusionType
::
ELEMWISE
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
.
Build
(),
kernelptr_floor
.
get
());
ptr_formerlayer
=
kernelptr_floor
;
layerscount
++
;
}
// Construct the conv2d node
tensor
::
TensorPtr
y_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kFloat32
->
type_id
(),
shp
);
y_tensor
->
set_device_info
(
device_info
);
auto
y_const
=
NewValueNode
(
y_tensor
);
y_const
->
set_abstract
(
y_tensor
->
ToAbstract
());
if
(
fusiontype
==
kernel
::
FusionType
::
CONVLUTION
)
{
auto
p_conv
=
std
::
make_shared
<
Primitive
>
(
"Conv2D"
);
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
,
"y"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_conv
->
set_attr
(
"input_names"
,
input_names_v
);
p_conv
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_conv
));
inputs
.
push_back
(
y_const
);
inputs
.
push_back
(
ptr_formerlayer
);
name
=
"test_conv_"
+
std
::
to_string
(
layerscount
)
+
"layers_graph.dot"
;
}
else
{
auto
p_red_seg
=
std
::
make_shared
<
Primitive
>
(
"ReduceOrSegment"
);
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_red_seg
->
set_attr
(
"input_names"
,
input_names_v
);
p_red_seg
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_red_seg
));
inputs
.
push_back
(
ptr_formerlayer
);
name
=
"test_regOrSeg_"
+
std
::
to_string
(
layerscount
)
+
"layers_graph.dot"
;
}
auto
kernelptr_first
=
g
->
NewCNode
(
inputs
);
kernelptr_first
->
set_abstract
(
y_tensor
->
ToAbstract
());
kernelptr_first
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder
;
if
(
fusiontype
==
kernel
::
FusionType
::
CONVLUTION
)
{
builder
.
SetInputsFormat
({
kOpFormat_NCHW
,
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
(),
kFloat32
->
type_id
()});
}
else
{
builder
.
SetInputsFormat
({
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
}
builder
.
SetOutputsFormat
({
kOpFormat_NCHW
});
builder
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder
.
SetFusionType
(
fusiontype
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
.
Build
(),
kernelptr_first
.
get
());
ptr_formerlayer
=
kernelptr_first
;
// configure fusion successor layers
while
(
after_layers
--
)
{
auto
p_relu
=
std
::
make_shared
<
Primitive
>
(
"ReLU6"
);
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_relu
->
set_attr
(
"input_names"
,
input_names_v
);
p_relu
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_relu
));
inputs
.
push_back
(
ptr_formerlayer
);
auto
kernelptr_floor
=
g
->
NewCNode
(
inputs
);
kernelptr_floor
->
set_abstract
(
y_tensor
->
ToAbstract
());
kernelptr_floor
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder
;
builder
.
SetInputsFormat
({
kOpFormat_NCHW
});
builder
.
SetOutputsFormat
({
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder
.
SetFusionType
(
kernel
::
FusionType
::
ELEMWISE
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
.
Build
(),
kernelptr_floor
.
get
());
ptr_formerlayer
=
kernelptr_floor
;
layerscount
++
;
}
// return res
auto
p_return
=
std
::
make_shared
<
Primitive
>
(
"return"
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_return
));
inputs
.
push_back
(
ptr_formerlayer
);
auto
ret
=
g
->
NewCNode
(
inputs
);
ret
->
set_abstract
(
y_tensor
->
ToAbstract
());
g
->
set_return
(
ret
);
draw
::
Draw
(
name
,
g
);
return
g
;
}
static
KernelGraphPtr
CreateKernelGraphForBufferFusionSingleIn
(
uint32_t
after_layers
,
mindspore
::
kernel
::
FusionType
fusiontype
=
mindspore
::
kernel
::
CONVLUTION
)
{
// build the func_graph manually, eg:
/* CreateKernelGraphForBufferFusionSingleIn(1)
* @mindspore
* def f(x):
* z=conv2d(x, y)
* ret=relu(z)
* return ret
*/
KernelGraphPtr
g
=
std
::
make_shared
<
KernelGraph
>
();
std
::
vector
<
AnfNodePtr
>
inputs
;
std
::
vector
<
int
>
shp
=
{
1
,
3
,
3
,
4
};
TensorTypePtr
tensor_type
=
std
::
make_shared
<
TensorType
>
(
kFloat32
);
tensor
::
DeviceInfo
device_info
{
kOpFormat_NCHW
,
tensor_type
};
uint32_t
layerscount
=
1
;
CNodePtr
ptr_formerlayer
;
std
::
string
name
=
""
;
// Construct first node
tensor
::
TensorPtr
y_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kFloat32
->
type_id
(),
shp
);
y_tensor
->
set_device_info
(
device_info
);
tensor
::
TensorPtr
z_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kFloat32
->
type_id
(),
shp
);
z_tensor
->
set_device_info
(
device_info
);
auto
y_const
=
NewValueNode
(
y_tensor
);
auto
z_const
=
NewValueNode
(
z_tensor
);
y_const
->
set_abstract
(
y_tensor
->
ToAbstract
());
z_const
->
set_abstract
(
z_tensor
->
ToAbstract
());
g
->
MutableInputs
()
->
push_back
(
y_const
);
g
->
MutableInputs
()
->
push_back
(
z_const
);
if
(
fusiontype
==
kernel
::
FusionType
::
CONVLUTION
)
{
auto
p_conv
=
std
::
make_shared
<
Primitive
>
(
"Conv2D"
);
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
,
"y"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_conv
->
set_attr
(
"input_names"
,
input_names_v
);
p_conv
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_conv
));
inputs
.
push_back
(
y_const
);
inputs
.
push_back
(
z_const
);
name
=
"test_conv_"
+
std
::
to_string
(
layerscount
)
+
"layers_graph.dot"
;
}
else
{
auto
p_red_seg
=
std
::
make_shared
<
Primitive
>
(
"ReduceOrSegment"
);
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_red_seg
->
set_attr
(
"input_names"
,
input_names_v
);
p_red_seg
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_red_seg
));
inputs
.
push_back
(
y_const
);
name
=
"test_regOrSeg_"
+
std
::
to_string
(
layerscount
)
+
"layers_graph.dot"
;
}
auto
kernelptr_first
=
g
->
NewCNode
(
inputs
);
kernelptr_first
->
set_abstract
(
y_tensor
->
ToAbstract
());
kernelptr_first
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder
;
if
(
fusiontype
==
kernel
::
FusionType
::
CONVLUTION
)
{
builder
.
SetInputsFormat
({
kOpFormat_NCHW
,
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
(),
kFloat32
->
type_id
()});
}
else
{
builder
.
SetInputsFormat
({
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
}
builder
.
SetOutputsFormat
({
kOpFormat_NCHW
});
builder
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder
.
SetFusionType
(
fusiontype
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
.
Build
(),
kernelptr_first
.
get
());
ptr_formerlayer
=
kernelptr_first
;
// configure fusion successor layers
while
(
after_layers
--
)
{
auto
p_relu
=
std
::
make_shared
<
Primitive
>
(
"ReLU6"
);
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_relu
->
set_attr
(
"input_names"
,
input_names_v
);
p_relu
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_relu
));
inputs
.
push_back
(
ptr_formerlayer
);
auto
kernelptr_floor
=
g
->
NewCNode
(
inputs
);
kernelptr_floor
->
set_abstract
(
y_tensor
->
ToAbstract
());
kernelptr_floor
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder
;
builder
.
SetInputsFormat
({
kOpFormat_NCHW
});
builder
.
SetOutputsFormat
({
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder
.
SetFusionType
(
kernel
::
FusionType
::
ELEMWISE
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
.
Build
(),
kernelptr_floor
.
get
());
ptr_formerlayer
=
kernelptr_floor
;
layerscount
++
;
}
// return res
auto
p_return
=
std
::
make_shared
<
Primitive
>
(
"return"
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_return
));
inputs
.
push_back
(
ptr_formerlayer
);
auto
ret
=
g
->
NewCNode
(
inputs
);
ret
->
set_abstract
(
y_tensor
->
ToAbstract
());
g
->
set_return
(
ret
);
draw
::
Draw
(
name
,
g
);
return
g
;
}
static
KernelGraphPtr
CreateKernelGraphForBufferFusion
(
uint32_t
targetlayers
,
bool
conv_flag
=
false
,
mindspore
::
kernel
::
FusionType
fusiontype
=
mindspore
::
kernel
::
CONVLUTION
)
{
// build the func_graph manually, eg:
/* CreateKernelGraphForBufferFusion(3)
* @mindspore
* def f(x):
* y=relu(x)
* z=relu(y)
* ret=relu(z)
* return ret
*/
KernelGraphPtr
g
=
std
::
make_shared
<
KernelGraph
>
();
std
::
vector
<
AnfNodePtr
>
inputs
;
// x is input tensor.
std
::
vector
<
int
>
shp
=
{
1
,
3
,
3
,
4
};
tensor
::
TensorPtr
x_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kFloat32
->
type_id
(),
shp
);
TensorTypePtr
tensor_type
=
std
::
make_shared
<
TensorType
>
(
kFloat32
);
tensor
::
DeviceInfo
device_info
{
kOpFormat_NCHW
,
tensor_type
};
x_tensor
->
set_device_info
(
device_info
);
auto
x_abstract
=
x_tensor
->
ToAbstract
();
auto
x_const
=
NewValueNode
(
x_tensor
);
x_const
->
set_abstract
(
x_abstract
);
g
->
MutableInputs
()
->
push_back
(
x_const
);
uint32_t
layerscount
=
1
;
CNodePtr
ptr_formerlayer
;
// configure func_graph hiden layers
while
(
targetlayers
--
)
{
auto
p_relu
=
std
::
make_shared
<
Primitive
>
(
"ReLU6"
);
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_relu
->
set_attr
(
"input_names"
,
input_names_v
);
p_relu
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
if
(
layerscount
==
1
)
{
inputs
.
push_back
(
NewValueNode
(
p_relu
));
inputs
.
push_back
(
x_const
);
}
else
{
inputs
.
push_back
(
NewValueNode
(
p_relu
));
inputs
.
push_back
(
ptr_formerlayer
);
}
auto
kernelptr_floor
=
g
->
NewCNode
(
inputs
);
kernelptr_floor
->
set_abstract
(
x_abstract
);
kernelptr_floor
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder
;
builder
.
SetInputsFormat
({
kOpFormat_NCHW
});
builder
.
SetOutputsFormat
({
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder
.
SetFusionType
(
kernel
::
FusionType
::
ELEMWISE
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
.
Build
(),
kernelptr_floor
.
get
());
ptr_formerlayer
=
kernelptr_floor
;
layerscount
++
;
}
std
::
string
name
=
"test_construct_"
+
std
::
to_string
(
layerscount
)
+
"layers_graph.dot"
;
if
(
conv_flag
)
{
tensor
::
TensorPtr
y_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kFloat32
->
type_id
(),
shp
);
y_tensor
->
set_device_info
(
device_info
);
tensor
::
TensorPtr
z_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kFloat32
->
type_id
(),
shp
);
z_tensor
->
set_device_info
(
device_info
);
auto
y_const
=
NewValueNode
(
y_tensor
);
auto
z_const
=
NewValueNode
(
y_tensor
);
y_const
->
set_abstract
(
y_tensor
->
ToAbstract
());
z_const
->
set_abstract
(
z_tensor
->
ToAbstract
());
g
->
MutableInputs
()
->
push_back
(
y_const
);
if
(
fusiontype
==
kernel
::
FusionType
::
CONVLUTION
)
{
auto
p_conv
=
std
::
make_shared
<
Primitive
>
(
"Conv2D"
);
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
,
"y"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_conv
->
set_attr
(
"input_names"
,
input_names_v
);
p_conv
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_conv
));
inputs
.
push_back
(
y_const
);
inputs
.
push_back
(
ptr_formerlayer
);
}
else
{
auto
p_conv
=
std
::
make_shared
<
Primitive
>
(
"ReduceOrSegment"
);
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_conv
->
set_attr
(
"input_names"
,
input_names_v
);
p_conv
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_conv
));
inputs
.
push_back
(
ptr_formerlayer
);
}
auto
kernelptr_conv
=
g
->
NewCNode
(
inputs
);
kernelptr_conv
->
set_abstract
(
x_abstract
);
kernelptr_conv
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder
;
if
(
fusiontype
==
kernel
::
FusionType
::
CONVLUTION
)
{
builder
.
SetInputsFormat
({
kOpFormat_NCHW
,
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
(),
kFloat32
->
type_id
()});
}
else
{
builder
.
SetInputsFormat
({
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
}
builder
.
SetOutputsFormat
({
kOpFormat_NCHW
});
builder
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder
.
SetFusionType
(
fusiontype
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
.
Build
(),
kernelptr_conv
.
get
());
ptr_formerlayer
=
kernelptr_conv
;
name
=
"test_conv_"
+
std
::
to_string
(
layerscount
)
+
"layers_graph.dot"
;
}
// return res
auto
p_return
=
std
::
make_shared
<
Primitive
>
(
"return"
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_return
));
inputs
.
push_back
(
ptr_formerlayer
);
auto
ret
=
g
->
NewCNode
(
inputs
);
ret
->
set_abstract
(
x_abstract
);
g
->
set_return
(
ret
);
draw
::
Draw
(
name
,
g
);
return
g
;
}
CNodePtr
CreateKernelGraphBranch
(
KernelGraphPtr
g
,
CNodePtr
inputptr
,
int
layers
,
const
kernel
::
FusionType
fusiontype
=
kernel
::
FusionType
::
CONVLUTION
)
{
std
::
vector
<
int
>
shp
=
{
1
,
3
,
3
,
4
};
tensor
::
TensorPtr
x_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kFloat32
->
type_id
(),
shp
);
TensorTypePtr
tensor_type
=
std
::
make_shared
<
TensorType
>
(
kFloat32
);
tensor
::
DeviceInfo
device_info
{
kOpFormat_NCHW
,
tensor_type
};
x_tensor
->
set_device_info
(
device_info
);
auto
x_abstract
=
x_tensor
->
ToAbstract
();
auto
x_const
=
NewValueNode
(
x_tensor
);
x_const
->
set_abstract
(
x_abstract
);
CNodePtr
ptr_formerlayer
=
inputptr
;
while
(
layers
--
)
{
auto
p_relu
=
std
::
make_shared
<
Primitive
>
(
"ReLU6"
);
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_relu
->
set_attr
(
"input_names"
,
input_names_v
);
p_relu
->
set_attr
(
"output_names"
,
output_names_v
);
std
::
vector
<
AnfNodePtr
>
inputs
;
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_relu
));
inputs
.
push_back
(
ptr_formerlayer
);
auto
kernelptr_floor
=
g
->
NewCNode
(
inputs
);
kernelptr_floor
->
set_abstract
(
x_abstract
);
kernelptr_floor
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder
;
builder
.
SetInputsFormat
({
kOpFormat_NCHW
});
builder
.
SetOutputsFormat
({
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder
.
SetFusionType
(
kernel
::
FusionType
::
ELEMWISE
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
.
Build
(),
kernelptr_floor
.
get
());
ptr_formerlayer
=
kernelptr_floor
;
}
tensor
::
TensorPtr
y_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kFloat32
->
type_id
(),
shp
);
y_tensor
->
set_device_info
(
device_info
);
tensor
::
TensorPtr
z_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kFloat32
->
type_id
(),
shp
);
z_tensor
->
set_device_info
(
device_info
);
auto
y_const
=
NewValueNode
(
y_tensor
);
auto
z_const
=
NewValueNode
(
y_tensor
);
y_const
->
set_abstract
(
y_tensor
->
ToAbstract
());
z_const
->
set_abstract
(
z_tensor
->
ToAbstract
());
g
->
MutableInputs
()
->
push_back
(
y_const
);
auto
p_conv
=
std
::
make_shared
<
Primitive
>
(
"Conv2D"
);
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
,
"y"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_conv
->
set_attr
(
"input_names"
,
input_names_v
);
p_conv
->
set_attr
(
"output_names"
,
output_names_v
);
std
::
vector
<
AnfNodePtr
>
inputs
;
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_conv
));
inputs
.
push_back
(
y_const
);
inputs
.
push_back
(
ptr_formerlayer
);
auto
kernelptr_conv
=
g
->
NewCNode
(
inputs
);
kernelptr_conv
->
set_abstract
(
x_abstract
);
kernelptr_conv
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder
;
builder
.
SetInputsFormat
({
kOpFormat_NCHW
,
kOpFormat_NCHW
});
builder
.
SetOutputsFormat
({
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
(),
kFloat32
->
type_id
()});
builder
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder
.
SetFusionType
(
fusiontype
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
.
Build
(),
kernelptr_conv
.
get
());
return
kernelptr_conv
;
}
static
KernelGraphPtr
CreateKernelGraphForMultiUse
(
uint32_t
targetlayer1s
,
uint32_t
targetlayer2s
)
{
/* @mindspore
* def f(x):
* multi_use=relu(x)
* y=relu(multi_use)
* z=relu(multi_use)
* ret=relu(y, z)
* return ret
*/
KernelGraphPtr
g
=
std
::
make_shared
<
KernelGraph
>
();
std
::
vector
<
AnfNodePtr
>
inputs
;
// x is input tensor.
std
::
vector
<
int
>
shp
=
{
1
,
3
,
3
,
4
};
tensor
::
TensorPtr
x_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kFloat32
->
type_id
(),
shp
);
TensorTypePtr
tensor_type
=
std
::
make_shared
<
TensorType
>
(
kFloat32
);
tensor
::
DeviceInfo
device_info
{
kOpFormat_NCHW
,
tensor_type
};
x_tensor
->
set_device_info
(
device_info
);
auto
x_abstract
=
x_tensor
->
ToAbstract
();
auto
x_const
=
NewValueNode
(
x_tensor
);
x_const
->
set_abstract
(
x_abstract
);
g
->
MutableInputs
()
->
push_back
(
x_const
);
auto
p_multi
=
std
::
make_shared
<
Primitive
>
(
"MULTI_USE_ReLU6"
);
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_multi
->
set_attr
(
"input_names"
,
input_names_v
);
p_multi
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_multi
));
inputs
.
push_back
(
x_const
);
auto
kernelptr_multi
=
g
->
NewCNode
(
inputs
);
kernelptr_multi
->
set_abstract
(
x_abstract
);
kernelptr_multi
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder
;
builder
.
SetInputsFormat
({
kOpFormat_NCHW
});
builder
.
SetOutputsFormat
({
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder
.
SetFusionType
(
kernel
::
FusionType
::
ELEMWISE
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
.
Build
(),
kernelptr_multi
.
get
());
CNodePtr
outptrbranch1
=
CreateKernelGraphBranch
(
g
,
kernelptr_multi
,
targetlayer2s
);
CNodePtr
outptrbranch2
=
CreateKernelGraphBranch
(
g
,
kernelptr_multi
,
targetlayer1s
);
auto
p_relu
=
std
::
make_shared
<
Primitive
>
(
"ReLU6"
);
input_names
=
{
"x"
};
output_names
=
{
"output"
};
input_names_v
=
MakeValue
(
input_names
);
output_names_v
=
MakeValue
(
output_names
);
p_relu
->
set_attr
(
"input_names"
,
input_names_v
);
p_relu
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_relu
));
inputs
.
push_back
(
outptrbranch1
);
inputs
.
push_back
(
outptrbranch2
);
auto
kernelptr_floor
=
g
->
NewCNode
(
inputs
);
kernelptr_floor
->
set_abstract
(
x_abstract
);
kernelptr_floor
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder1
;
builder1
.
SetInputsFormat
({
kOpFormat_NCHW
,
kOpFormat_NCHW
});
builder1
.
SetOutputsFormat
({
kOpFormat_NCHW
});
builder1
.
SetInputsDeviceType
({
kFloat32
->
type_id
(),
kFloat32
->
type_id
()});
builder1
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder1
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder1
.
SetFusionType
(
kernel
::
FusionType
::
ELEMWISE
);
builder1
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder1
.
Build
(),
kernelptr_floor
.
get
());
// return res
auto
p_return
=
std
::
make_shared
<
Primitive
>
(
"return"
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_return
));
inputs
.
push_back
(
kernelptr_floor
);
auto
ret
=
g
->
NewCNode
(
inputs
);
ret
->
set_abstract
(
x_abstract
);
g
->
set_return
(
ret
);
string
name
=
"multi_use_graph.dot"
;
draw
::
Draw
(
name
,
g
);
return
g
;
}
#ifdef BUFFER_FUSION_MULTI_OUT
static
KernelGraphPtr
CreateKernelGraphForMultiOutputWithLinearInput
(
uint32_t
targetlayer1s
,
uint32_t
targetlayer2s
,
bool
use_flag
=
true
,
const
kernel
::
FusionType
fusion_type
=
kernel
::
FusionType
::
CONVLUTION
)
{
KernelGraphPtr
g
=
std
::
make_shared
<
KernelGraph
>
();
std
::
vector
<
AnfNodePtr
>
inputs
;
// x is input tensor.
std
::
vector
<
int
>
shp
=
{
1
,
3
,
3
,
4
};
tensor
::
TensorPtr
x_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kFloat32
->
type_id
(),
shp
);
TensorTypePtr
tensor_type
=
std
::
make_shared
<
TensorType
>
(
kFloat32
);
tensor
::
DeviceInfo
device_info
{
kOpFormat_NCHW
,
tensor_type
};
x_tensor
->
set_device_info
(
device_info
);
auto
x_abstract
=
x_tensor
->
ToAbstract
();
auto
x_const
=
NewValueNode
(
x_tensor
);
x_const
->
set_abstract
(
x_abstract
);
g
->
MutableInputs
()
->
push_back
(
x_const
);
auto
p_relu0
=
std
::
make_shared
<
Primitive
>
(
"ReLU6"
);
std
::
vector
<
std
::
string
>
input_names0
=
{
"x"
};
std
::
vector
<
std
::
string
>
output_names0
=
{
"output"
};
ValuePtr
input_names_v0
=
MakeValue
(
input_names0
);
ValuePtr
output_names_v0
=
MakeValue
(
output_names0
);
p_relu0
->
set_attr
(
"input_names"
,
input_names_v0
);
p_relu0
->
set_attr
(
"output_names"
,
output_names_v0
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_relu0
));
inputs
.
push_back
(
x_const
);
auto
kernelptr_floor0
=
g
->
NewCNode
(
inputs
);
kernelptr_floor0
->
set_abstract
(
x_abstract
);
kernelptr_floor0
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder0
;
builder0
.
SetInputsFormat
({
kOpFormat_NCHW
});
builder0
.
SetOutputsFormat
({
kOpFormat_NCHW
});
builder0
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
builder0
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder0
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder0
.
SetFusionType
(
kernel
::
FusionType
::
ELEMWISE
);
builder0
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder0
.
Build
(),
kernelptr_floor0
.
get
());
CNodePtr
ptr_formerlayer
;
ptr_formerlayer
=
kernelptr_floor0
;
auto
p_multi
=
std
::
make_shared
<
Primitive
>
(
"MULTI_USE_ReLU6"
);
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_multi
->
set_attr
(
"input_names"
,
input_names_v
);
p_multi
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_multi
));
inputs
.
push_back
(
ptr_formerlayer
);
auto
kernelptr_multi
=
g
->
NewCNode
(
inputs
);
kernelptr_multi
->
set_abstract
(
x_abstract
);
kernelptr_multi
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder
;
builder
.
SetInputsFormat
({
kOpFormat_NCHW
});
builder
.
SetOutputsFormat
({
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetOutputsDeviceType
({
kFloat16
->
type_id
()});
builder
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder
.
SetFusionType
(
kernel
::
FusionType
::
ELEMWISE
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
.
Build
(),
kernelptr_multi
.
get
());
CNodePtr
outptrbranch2
=
nullptr
;
CNodePtr
outptrbranch1
=
CreateKernelGraphBranch
(
g
,
kernelptr_multi
,
targetlayer2s
,
fusion_type
);
if
(
use_flag
)
{
outptrbranch2
=
CreateKernelGraphBranch
(
g
,
kernelptr_multi
,
targetlayer1s
,
fusion_type
);
}
auto
p_relu
=
std
::
make_shared
<
Primitive
>
(
"ReLU6"
);
input_names
=
{
"x"
};
output_names
=
{
"output"
};
input_names_v
=
MakeValue
(
input_names
);
output_names_v
=
MakeValue
(
output_names
);
p_relu
->
set_attr
(
"input_names"
,
input_names_v
);
p_relu
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_relu
));
inputs
.
push_back
(
outptrbranch1
);
if
(
use_flag
)
{
inputs
.
push_back
(
outptrbranch2
);
}
auto
kernelptr_floor
=
g
->
NewCNode
(
inputs
);
kernelptr_floor
->
set_abstract
(
x_abstract
);
kernelptr_floor
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder1
;
if
(
use_flag
)
{
builder1
.
SetInputsFormat
({
kOpFormat_NCHW
,
kOpFormat_NCHW
});
builder1
.
SetInputsDeviceType
({
kFloat32
->
type_id
(),
kFloat32
->
type_id
()});
}
else
{
builder1
.
SetInputsFormat
({
kOpFormat_NCHW
});
builder1
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
}
builder1
.
SetOutputsFormat
({
kOpFormat_NCHW
});
builder1
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder1
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder1
.
SetFusionType
(
kernel
::
FusionType
::
ELEMWISE
);
builder1
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder1
.
Build
(),
kernelptr_floor
.
get
());
cout
<<
"built two branches done"
<<
endl
;
// return res
auto
p_return
=
std
::
make_shared
<
Primitive
>
(
"return"
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_return
));
inputs
.
push_back
(
kernelptr_floor
);
auto
ret
=
g
->
NewCNode
(
inputs
);
ret
->
set_abstract
(
x_abstract
);
g
->
set_return
(
ret
);
string
name
=
"multi_use_graph.dot"
;
draw
::
Draw
(
name
,
g
);
return
g
;
}
static
KernelGraphPtr
CreateKernelGraphForMultiOutput
(
uint32_t
targetlayer1s
,
uint32_t
targetlayer2s
,
bool
use_flag
=
true
,
const
kernel
::
FusionType
fusion_type
=
kernel
::
FusionType
::
CONVLUTION
)
{
KernelGraphPtr
g
=
std
::
make_shared
<
KernelGraph
>
();
std
::
vector
<
AnfNodePtr
>
inputs
;
// x is input tensor.
std
::
vector
<
int
>
shp
=
{
1
,
3
,
3
,
4
};
tensor
::
TensorPtr
x_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kFloat32
->
type_id
(),
shp
);
TensorTypePtr
tensor_type
=
std
::
make_shared
<
TensorType
>
(
kFloat32
);
tensor
::
DeviceInfo
device_info
{
kOpFormat_NCHW
,
tensor_type
};
x_tensor
->
set_device_info
(
device_info
);
auto
x_abstract
=
x_tensor
->
ToAbstract
();
auto
x_const
=
NewValueNode
(
x_tensor
);
x_const
->
set_abstract
(
x_abstract
);
g
->
MutableInputs
()
->
push_back
(
x_const
);
auto
p_multi
=
std
::
make_shared
<
Primitive
>
(
"MULTI_USE_ReLU6"
);
std
::
vector
<
std
::
string
>
input_names
=
{
"x"
};
std
::
vector
<
std
::
string
>
output_names
=
{
"output"
};
ValuePtr
input_names_v
=
MakeValue
(
input_names
);
ValuePtr
output_names_v
=
MakeValue
(
output_names
);
p_multi
->
set_attr
(
"input_names"
,
input_names_v
);
p_multi
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_multi
));
inputs
.
push_back
(
x_const
);
auto
kernelptr_multi
=
g
->
NewCNode
(
inputs
);
kernelptr_multi
->
set_abstract
(
x_abstract
);
kernelptr_multi
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder
;
builder
.
SetInputsFormat
({
kOpFormat_NCHW
});
builder
.
SetOutputsFormat
({
kOpFormat_NCHW
,
kOpFormat_NCHW
});
builder
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
builder
.
SetOutputsDeviceType
({
kFloat16
->
type_id
(),
kFloat32
->
type_id
()});
builder
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder
.
SetFusionType
(
kernel
::
FusionType
::
ELEMWISE
);
builder
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
.
Build
(),
kernelptr_multi
.
get
());
CNodePtr
outptrbranch2
=
nullptr
;
CNodePtr
outptrbranch1
=
CreateKernelGraphBranch
(
g
,
kernelptr_multi
,
targetlayer2s
,
fusion_type
);
if
(
use_flag
)
{
outptrbranch2
=
CreateKernelGraphBranch
(
g
,
kernelptr_multi
,
targetlayer1s
,
fusion_type
);
}
auto
p_relu
=
std
::
make_shared
<
Primitive
>
(
"ReLU6"
);
input_names
=
{
"x"
};
output_names
=
{
"output"
};
input_names_v
=
MakeValue
(
input_names
);
output_names_v
=
MakeValue
(
output_names
);
p_relu
->
set_attr
(
"input_names"
,
input_names_v
);
p_relu
->
set_attr
(
"output_names"
,
output_names_v
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_relu
));
inputs
.
push_back
(
outptrbranch1
);
if
(
use_flag
)
{
inputs
.
push_back
(
outptrbranch2
);
}
auto
kernelptr_floor
=
g
->
NewCNode
(
inputs
);
kernelptr_floor
->
set_abstract
(
x_abstract
);
kernelptr_floor
->
set_kernel_info
(
std
::
make_shared
<
device
::
KernelInfo
>
());
KernelBuildInfoBuilder
builder1
;
if
(
use_flag
)
{
builder1
.
SetInputsFormat
({
kOpFormat_NCHW
,
kOpFormat_NCHW
});
builder1
.
SetInputsDeviceType
({
kFloat32
->
type_id
(),
kFloat32
->
type_id
()});
}
else
{
builder1
.
SetInputsFormat
({
kOpFormat_NCHW
});
builder1
.
SetInputsDeviceType
({
kFloat32
->
type_id
()});
}
builder1
.
SetOutputsFormat
({
kOpFormat_NCHW
});
builder1
.
SetOutputsDeviceType
({
kFloat32
->
type_id
()});
builder1
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
builder1
.
SetFusionType
(
kernel
::
FusionType
::
ELEMWISE
);
builder1
.
SetProcessor
(
kernel
::
Processor
::
AICORE
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder1
.
Build
(),
kernelptr_floor
.
get
());
// return res
auto
p_return
=
std
::
make_shared
<
Primitive
>
(
"return"
);
inputs
.
clear
();
inputs
.
push_back
(
NewValueNode
(
p_return
));
inputs
.
push_back
(
kernelptr_floor
);
auto
ret
=
g
->
NewCNode
(
inputs
);
ret
->
set_abstract
(
x_abstract
);
g
->
set_return
(
ret
);
string
name
=
"multi_use_graph.dot"
;
draw
::
Draw
(
name
,
g
);
return
g
;
}
#endif
TEST_F
(
TestHWBufferFusion
,
BufferFusionlayerSingleIn1
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForBufferFusionSingleIn
(
1
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
draw
::
Draw
(
"before_BufferFusionlayerSingleIn1.dot"
,
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
8
);
buffer_fusion
.
Run
(
graph_ptr
);
draw
::
Draw
(
"after_BufferFusionlayerSingleIn1.dot"
,
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
6
);
}
TEST_F
(
TestHWBufferFusion
,
BufferFusionlayerSingleIn2
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForBufferFusionSingleIn
(
2
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
draw
::
Draw
(
"before_BufferFusionlayerSingleIn2.dot"
,
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
10
);
buffer_fusion
.
Run
(
graph_ptr
);
draw
::
Draw
(
"after_BufferFusionlayerSingleIn2.dot"
,
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
6
);
}
TEST_F
(
TestHWBufferFusion
,
BufferFusionlayerSingleIn3
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForBufferFusionSingleIn
(
3
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
draw
::
Draw
(
"before_BufferFusionlayerSingleIn3.dot"
,
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
12
);
buffer_fusion
.
Run
(
graph_ptr
);
draw
::
Draw
(
"after_BufferFusionlayerSingleIn3.dot"
,
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
6
);
}
TEST_F
(
TestHWBufferFusion
,
BufferFusionlayer1
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForBufferFusion
(
1
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
5
);
buffer_fusion
.
Run
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
5
);
}
TEST_F
(
TestHWBufferFusion
,
BufferFusionlayer2
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForBufferFusion
(
2
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
7
);
buffer_fusion
.
Run
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
5
);
}
TEST_F
(
TestHWBufferFusion
,
BufferFusionlayer4
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForBufferFusion
(
4
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
11
);
buffer_fusion
.
Run
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
5
);
}
TEST_F
(
TestHWBufferFusion
,
BufferFusionlayer6
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForBufferFusion
(
6
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
15
);
buffer_fusion
.
Run
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
7
);
}
TEST_F
(
TestHWBufferFusion
,
BufferFusionlayer8
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForBufferFusion
(
8
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
19
);
buffer_fusion
.
Run
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
7
);
}
TEST_F
(
TestHWBufferFusion
,
BufferFusionconv1
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForBufferFusion
(
1
,
true
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
buffer_fusion
.
MatchBufferFusionPattern
(
*
graph_ptr
),
false
);
}
TEST_F
(
TestHWBufferFusion
,
BufferFusionconv8
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForBufferFusion
(
8
,
true
);
draw
::
Draw
(
"before_BufferFusionconv8.dot"
,
graph_ptr
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
buffer_fusion
.
MatchBufferFusionPattern
(
*
graph_ptr
),
true
);
kernel
::
KernelPackPtr
kernel_pack
=
std
::
make_shared
<
kernel
::
KernelPack
>
();
auto
kernel_ptr
=
std
::
make_shared
<
kernel
::
TbeKernelMod
>
(
kernel_pack
);
std
::
unordered_map
<
int
,
BufferFusionInfo_t
>
buffer_fusion_infos
;
buffer_fusion
.
GetBufferFusionInfo
(
*
graph_ptr
,
&
buffer_fusion_infos
);
std
::
vector
<
int32_t
>
fusion_ids
;
for
(
auto
&
buffer_fusion_info
:
buffer_fusion_infos
)
{
fusion_ids
.
push_back
(
buffer_fusion_info
.
first
);
}
std
::
sort
(
fusion_ids
.
begin
(),
fusion_ids
.
end
());
for
(
auto
&
fusion_id
:
fusion_ids
)
{
buffer_fusion
.
ReplaceFusionOp
(
buffer_fusion_infos
[
fusion_id
],
kernel_ptr
,
graph_ptr
.
get
());
}
draw
::
Draw
(
"after_BufferFusionconv8.dot"
,
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
10
);
}
#ifdef BUFFER_FUSION_MULTI_OUT
TEST_F
(
TestHWBufferFusion
,
BufferFusionMultiOutWithLinearInput
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForMultiOutputWithLinearInput
(
1
,
1
,
true
,
mindspore
::
kernel
::
OPAQUE
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
19
);
ASSERT_EQ
(
buffer_fusion
.
MatchBufferFusionPattern
(
*
graph_ptr
),
true
);
kernel
::
KernelPackPtr
kernel_pack
=
std
::
make_shared
<
kernel
::
KernelPack
>
();
auto
kernel_ptr
=
std
::
make_shared
<
kernel
::
TbeKernelMod
>
(
kernel_pack
);
std
::
unordered_map
<
int
,
BufferFusionInfo_t
>
buffer_fusion_infos
;
buffer_fusion
.
GetBufferFusionInfo
(
*
graph_ptr
,
&
buffer_fusion_infos
);
for
(
auto
&
buffer_fusion_info
:
buffer_fusion_infos
)
{
EXPECT_EQ
(
buffer_fusion_info
.
second
.
anf_nodes
.
size
(),
3
);
EXPECT_EQ
(
buffer_fusion_info
.
second
.
inputs_list
.
size
(),
1
);
EXPECT_EQ
(
buffer_fusion_info
.
second
.
outputs_list
.
size
(),
2
);
buffer_fusion
.
ReplaceFusionOp
(
buffer_fusion_info
.
second
,
kernel_ptr
,
graph_ptr
.
get
());
}
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
21
);
}
TEST_F
(
TestHWBufferFusion
,
BufferFusionMultiOut
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForMultiOutput
(
1
,
1
,
true
,
mindspore
::
kernel
::
OPAQUE
);
draw
::
Draw
(
"before_BufferFusionMultiOut.dot"
,
graph_ptr
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
17
);
ASSERT_EQ
(
buffer_fusion
.
MatchBufferFusionPattern
(
*
graph_ptr
),
true
);
kernel
::
KernelPackPtr
kernel_pack
=
std
::
make_shared
<
kernel
::
KernelPack
>
();
auto
kernel_ptr
=
std
::
make_shared
<
kernel
::
TbeKernelMod
>
(
kernel_pack
);
std
::
unordered_map
<
int
,
BufferFusionInfo_t
>
buffer_fusion_infos
;
buffer_fusion
.
GetBufferFusionInfo
(
*
graph_ptr
,
&
buffer_fusion_infos
);
for
(
auto
&
buffer_fusion_info
:
buffer_fusion_infos
)
{
EXPECT_EQ
(
buffer_fusion_info
.
second
.
anf_nodes
.
size
(),
2
);
EXPECT_EQ
(
buffer_fusion_info
.
second
.
inputs_list
.
size
(),
1
);
EXPECT_EQ
(
buffer_fusion_info
.
second
.
outputs_list
.
size
(),
2
);
buffer_fusion
.
ReplaceFusionOp
(
buffer_fusion_info
.
second
,
kernel_ptr
,
graph_ptr
.
get
());
}
draw
::
Draw
(
"after_BufferFusionMultiOut.dot"
,
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
21
);
}
#endif
TEST_F
(
TestHWBufferFusion
,
BufferMultiUse
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForMultiUse
(
3
,
4
);
draw
::
Draw
(
"before_BufferMultiUse.dot"
,
graph_ptr
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
buffer_fusion
.
MatchBufferFusionPattern
(
*
graph_ptr
),
true
);
kernel
::
KernelPackPtr
kernel_pack
=
std
::
make_shared
<
kernel
::
KernelPack
>
();
auto
kernel_ptr
=
std
::
make_shared
<
kernel
::
TbeKernelMod
>
(
kernel_pack
);
std
::
unordered_map
<
int
,
BufferFusionInfo_t
>
buffer_fusion_infos
;
buffer_fusion
.
GetBufferFusionInfo
(
*
graph_ptr
,
&
buffer_fusion_infos
);
std
::
vector
<
int32_t
>
fusion_ids
;
for
(
auto
&
buffer_fusion_info
:
buffer_fusion_infos
)
{
fusion_ids
.
push_back
(
buffer_fusion_info
.
first
);
}
std
::
sort
(
fusion_ids
.
begin
(),
fusion_ids
.
end
());
for
(
auto
&
fusion_id
:
fusion_ids
)
{
buffer_fusion
.
ReplaceFusionOp
(
buffer_fusion_infos
[
fusion_id
],
kernel_ptr
,
graph_ptr
.
get
());
}
draw
::
Draw
(
"after_BufferMultiUse.dot"
,
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
15
);
}
TEST_F
(
TestHWBufferFusion
,
BufferFusionReduce
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForBufferFusion
(
2
,
true
,
mindspore
::
kernel
::
COMMREDUCE
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
buffer_fusion
.
MatchBufferFusionPattern
(
*
graph_ptr
),
true
);
kernel
::
KernelPackPtr
kernel_pack
=
std
::
make_shared
<
kernel
::
KernelPack
>
();
auto
kernel_ptr
=
std
::
make_shared
<
kernel
::
TbeKernelMod
>
(
kernel_pack
);
std
::
unordered_map
<
int
,
BufferFusionInfo_t
>
buffer_fusion_infos
;
buffer_fusion
.
GetBufferFusionInfo
(
*
graph_ptr
,
&
buffer_fusion_infos
);
for
(
auto
&
buffer_fusion_info
:
buffer_fusion_infos
)
{
EXPECT_EQ
(
buffer_fusion_info
.
second
.
anf_nodes
.
size
(),
3
);
EXPECT_EQ
(
buffer_fusion_info
.
second
.
inputs_list
.
size
(),
1
);
EXPECT_EQ
(
buffer_fusion_info
.
second
.
outputs_list
.
size
(),
1
);
buffer_fusion
.
ReplaceFusionOp
(
buffer_fusion_info
.
second
,
kernel_ptr
,
graph_ptr
.
get
());
}
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
5
);
}
TEST_F
(
TestHWBufferFusion
,
BufferFusionSegment
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForBufferFusion
(
2
,
true
,
mindspore
::
kernel
::
SEGMENT
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
buffer_fusion
.
MatchBufferFusionPattern
(
*
graph_ptr
),
true
);
kernel
::
KernelPackPtr
kernel_pack
=
std
::
make_shared
<
kernel
::
KernelPack
>
();
auto
kernel_ptr
=
std
::
make_shared
<
kernel
::
TbeKernelMod
>
(
kernel_pack
);
std
::
unordered_map
<
int
,
BufferFusionInfo_t
>
buffer_fusion_infos
;
buffer_fusion
.
GetBufferFusionInfo
(
*
graph_ptr
,
&
buffer_fusion_infos
);
for
(
auto
&
buffer_fusion_info
:
buffer_fusion_infos
)
{
EXPECT_EQ
(
buffer_fusion_info
.
second
.
anf_nodes
.
size
(),
3
);
EXPECT_EQ
(
buffer_fusion_info
.
second
.
inputs_list
.
size
(),
1
);
EXPECT_EQ
(
buffer_fusion_info
.
second
.
outputs_list
.
size
(),
1
);
buffer_fusion
.
ReplaceFusionOp
(
buffer_fusion_info
.
second
,
kernel_ptr
,
graph_ptr
.
get
());
}
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
5
);
}
TEST_F
(
TestHWBufferFusion
,
BufferFusionEltwise1BeforeAnd3After
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForBufferFusionEltwiseBeforeAndAfter
(
1
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
draw
::
Draw
(
"before_BufferFusionEltwiseBeforeAndAfter1.dot"
,
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
13
);
buffer_fusion
.
Run
(
graph_ptr
);
draw
::
Draw
(
"after_BufferFusionEltwiseBeforeAndAfter1.dot"
,
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
5
);
}
TEST_F
(
TestHWBufferFusion
,
BufferFusionEltwise2BeforeAnd3After
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForBufferFusionEltwiseBeforeAndAfter
(
2
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
draw
::
Draw
(
"before_BufferFusionEltwiseBeforeAndAfter2.dot"
,
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
15
);
buffer_fusion
.
Run
(
graph_ptr
);
draw
::
Draw
(
"after_BufferFusionEltwiseBeforeAndAfter2.dot"
,
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
5
);
}
TEST_F
(
TestHWBufferFusion
,
BufferFusionEltwise3BeforeAnd3After
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForBufferFusionEltwiseBeforeAndAfter
(
3
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
draw
::
Draw
(
"before_BufferFusionEltwiseBeforeAndAfter3.dot"
,
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
17
);
buffer_fusion
.
Run
(
graph_ptr
);
draw
::
Draw
(
"after_BufferFusionEltwiseBeforeAndAfter3.dot"
,
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
5
);
}
TEST_F
(
TestHWBufferFusion
,
BufferFusionMultipleIn
)
{
KernelGraphPtr
graph_ptr
=
CreateKernelGraphForBufferFusionMultipleIn
(
2
);
ASSERT_TRUE
(
nullptr
!=
graph_ptr
);
draw
::
Draw
(
"before_BufferFusionMultipleIn.dot"
,
graph_ptr
);
mindspore
::
opt
::
BufferFusion
buffer_fusion
=
BufferFusion
();
std
::
vector
<
FuncGraphPtr
>
graphs
{
graph_ptr
};
FuncGraphManagerPtr
manager
=
std
::
make_shared
<
FuncGraphManager
>
(
graphs
);
manager
->
AddFuncGraph
(
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
11
);
buffer_fusion
.
Run
(
graph_ptr
);
draw
::
Draw
(
"after_BufferFusionMultipleIn.dot"
,
graph_ptr
);
ASSERT_EQ
(
manager
->
all_nodes
().
size
(),
7
);
}
}
// namespace opt
}
// namespace mindspore
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录