Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
fc4bf192
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
fc4bf192
编写于
9月 07, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
9月 07, 2020
浏览文件
操作
浏览文件
下载
差异文件
!5820 fix ub fusion
Merge pull request !5820 from jjfeing/r0.7_ub_fusion
上级
5de9578a
bd1261e5
变更
18
展开全部
隐藏空白更改
内联
并排
Showing
18 changed file
with
217 addition
and
256 deletion
+217
-256
mindspore/_extends/parallel_compile/tbe_compiler/compiler.py
mindspore/_extends/parallel_compile/tbe_compiler/compiler.py
+2
-33
mindspore/ccsrc/backend/kernel_compiler/kernel.h
mindspore/ccsrc/backend/kernel_compiler/kernel.h
+2
-3
mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
+11
-28
mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h
mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h
+4
-5
mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h
mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h
+3
-4
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc
+1
-1
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h
+1
-1
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc
...re/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc
+1
-1
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
...ore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
+175
-53
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h
...pore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h
+10
-4
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
.../backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
+0
-74
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h
...c/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h
+1
-6
mindspore/ccsrc/backend/session/ascend_session.cc
mindspore/ccsrc/backend/session/ascend_session.cc
+0
-1
mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
+2
-33
mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h
mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h
+0
-4
mindspore/ccsrc/utils/utils.h
mindspore/ccsrc/utils/utils.h
+1
-0
mindspore/ops/_op_impl/tbe/matmul.py
mindspore/ops/_op_impl/tbe/matmul.py
+1
-1
tests/st/networks/models/resnet50/test_resnet50_imagenet.py
tests/st/networks/models/resnet50/test_resnet50_imagenet.py
+2
-4
未找到文件。
mindspore/_extends/parallel_compile/tbe_compiler/compiler.py
浏览文件 @
fc4bf192
...
...
@@ -17,8 +17,6 @@ import json
import
os
import
sys
from
te.platform.cce_conf
import
te_set_version
from
te.platform.fusion_manager
import
op_build_cfg_dis
,
op_build_cfg_en
,
set_current_op_name
,
\
init_op_pattern
,
set_op_params
,
set_op_build_type
,
get_op_pattern
,
set_current_op_func_name
from
te.platform.fusion_util
import
fusion_op
from
common
import
check_kernel_info
,
get_args
,
get_build_in_impl_path
,
get_ddk_version
...
...
@@ -27,7 +25,6 @@ build_in_impl_path = get_build_in_impl_path()
# op function list
op_build
=
"compile"
op_pre_build
=
"pre_build"
fusion_pattern_start_flag
=
"fusion_pattern_start"
fusion_pattern_end_flag
=
"fusion_pattern_end"
...
...
@@ -83,19 +80,7 @@ def build_op(build_type, json_str):
else
:
op_module
=
__import__
(
"impl."
+
op_name
,
globals
(),
locals
(),
[
op_name
],
0
)
# get function
if
build_type
==
op_pre_build
:
# set op parameter
op_build_cfg_dis
()
set_current_op_func_name
(
op_name
)
set_current_op_name
(
kernel_name
)
init_op_pattern
()
set_op_params
(
*
outputs_args
,
*
attrs_args
,
kernel_name
=
kernel_name
)
set_op_build_type
(
'prebuild'
)
if
custom_flag
:
py_fn_name
=
kernel_info
[
'op_info'
][
'name'
]
else
:
py_fn_name
=
op_name
elif
build_type
==
op_build
:
if
build_type
==
op_build
:
if
custom_flag
:
py_fn_name
=
kernel_info
[
'op_info'
][
'name'
]
else
:
...
...
@@ -106,13 +91,6 @@ def build_op(build_type, json_str):
if
op_func
is
None
:
raise
ValueError
(
"Op:{} function {} is not supported by Tbe."
.
format
(
op_name
,
build_type
))
# pre build
if
build_type
==
op_pre_build
:
op_func
(
*
inputs_args
,
*
outputs_args
,
*
attrs_args
,
kernel_name
=
kernel_name
)
# disable only pattern configuration
op_build_cfg_en
()
return
get_op_pattern
()
# call function
if
kernel_name
[
0
:
19
]
==
"bounding_box_encode"
:
return
op_func
(
*
inputs_args
,
*
outputs_args
,
*
attrs_args
,
kernel_name_val
=
kernel_name
)
...
...
@@ -120,8 +98,6 @@ def build_op(build_type, json_str):
return
op_func
(
*
inputs_args
,
*
outputs_args
,
*
attrs_args
,
kernel_name
=
kernel_name
)
except
Exception
as
e
:
if
build_type
==
op_pre_build
:
op_build_cfg_en
()
raise
RuntimeError
(
e
)
...
...
@@ -136,14 +112,9 @@ def compile_fusion_op(json_str):
Exception: If specific keyword is not found.
"""
args
=
json
.
loads
(
json_str
)
te_set_version
(
ddk_version
)
if
'fusion_op'
not
in
args
or
not
args
[
'fusion_op'
]:
raise
ValueError
(
"Json string Errors, key:fusion_op not found."
)
if
'prebuild_ops'
not
in
args
or
not
args
[
'prebuild_ops'
]:
raise
ValueError
(
"Json string Errors, key:prebuild_ops not found."
)
pre_build_op_list
=
args
[
'prebuild_ops'
]
for
op
in
pre_build_op_list
:
build_op
(
op_pre_build
,
json
.
dumps
(
op
))
fusion_op_arg
=
args
[
'fusion_op'
]
return
fusion_op
(
json
.
dumps
(
fusion_op_arg
))
...
...
@@ -159,8 +130,6 @@ def compile_with_json(json_str):
json_info
=
json
.
loads
(
json_str
)
if
"fusion_op"
in
json_info
:
ret
=
compile_fusion_op
(
json_str
)
elif
"compile_type"
in
json_info
:
ret
=
build_op
(
op_pre_build
,
json_str
)
else
:
ret
=
build_op
(
op_build
,
json_str
)
return
ret
...
...
mindspore/ccsrc/backend/kernel_compiler/kernel.h
浏览文件 @
fc4bf192
...
...
@@ -37,7 +37,6 @@ enum FusionType {
COMMREDUCE
,
SEGMENT
,
OPAQUE
,
DYNAMIC
,
UNKNOWN_FUSION_TYPE
=
-
1
,
};
enum
OpPattern
{
...
...
@@ -80,8 +79,8 @@ class KernelPack {
bool
LoadKernelMeta
(
const
std
::
string
&
json_f
,
const
std
::
string
&
processor
);
bool
ReadFromJsonFile
(
const
std
::
string
&
json_f
,
const
std
::
string
&
processor
);
const
std
::
string
Serialize
()
const
;
const
FlexArray
*
const
GetJson
()
const
{
return
json_
;
}
const
FlexArray
*
const
GetKernel
()
const
{
return
kernel_
;
}
const
FlexArray
*
GetJson
()
const
{
return
json_
;
}
const
FlexArray
*
GetKernel
()
const
{
return
kernel_
;
}
~
KernelPack
()
{
if
(
json_
)
{
delete
[]
json_
;
...
...
mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
浏览文件 @
fc4bf192
...
...
@@ -19,53 +19,36 @@
#include <map>
#include <string>
#include <memory>
#include <utility>
#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
#include "backend/kernel_compiler/tbe/tbe_utils.h"
#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
#include "utils/ms_context.h"
namespace
mindspore
{
namespace
kernel
{
using
mindspore
::
kernel
::
tbe
::
TbeUtils
;
static
bool
GenPreBuildKernelJson
(
const
std
::
vector
<
AnfNodePtr
>
&
compute_nodes
,
std
::
vector
<
nlohmann
::
json
>
*
prebuild_op_list
)
{
MS_EXCEPTION_IF_NULL
(
prebuild_op_list
);
TbeKernelJsonCreator
creator
(
PREBUILD
);
for
(
const
auto
&
anf_node
:
compute_nodes
)
{
nlohmann
::
json
prebuild
;
if
(
!
creator
.
GenTbeSingleKernelJson
(
anf_node
,
&
prebuild
))
{
MS_LOG
(
ERROR
)
<<
"GenTbeSingleKernelJson failed"
;
return
false
;
}
(
*
prebuild_op_list
).
push_back
(
prebuild
);
}
return
true
;
}
std
::
map
<
int32_t
,
KernelModPtr
>
KernelFusion
(
const
std
::
vector
<
FusionScopeInfo
>
&
fusion_scopes
)
{
MS_LOG
(
INFO
)
<<
"kernel fusion build start, scope size:"
<<
fusion_scopes
.
size
();
std
::
map
<
int32_t
,
KernelModPtr
>
kernel_mod_ret
;
auto
build_manger
=
std
::
make_shared
<
ParallelBuildManager
>
();
MS_EXCEPTION_IF_NULL
(
build_manger
);
for
(
const
auto
&
fusion_scope_iter
:
fusion_scopes
)
{
auto
scope_id
=
fusion_scope_iter
.
scope_id
;
string
fusion_kernel_name
;
nlohmann
::
json
fusion_op
;
string
fusion_kernel
=
"te_fusion"
;
if
(
!
TbeKernelBuild
::
GenFusionScopeJson
(
fusion_scope_iter
.
input_nodes
,
fusion_scope_iter
.
compute_nodes
,
&
fusion_op
,
&
fusion_kernel
))
{
&
fusion_kernel
_name
))
{
continue
;
}
// gen kernel_name & check cache
std
::
string
json_str
=
fusion_op
.
dump
();
size_t
hash_id
=
std
::
hash
<
std
::
string
>
()(
json_str
);
auto
json_name
=
fusion_kernel
.
append
(
"_"
).
append
(
std
::
to_string
(
hash_id
));
auto
context_ptr
=
MsContext
::
GetInstance
();
MS_EXCEPTION_IF_NULL
(
context_ptr
);
auto
device_id
=
context_ptr
->
device_id
();
auto
json_name
=
fusion_kernel_name
.
append
(
"_"
).
append
(
std
::
to_string
(
hash_id
)).
append
(
"_"
).
append
(
std
::
to_string
(
device_id
));
fusion_op
[
"fusion_op_name"
]
=
json_name
;
// gen json for prebuild
std
::
vector
<
nlohmann
::
json
>
prebuild_op_list
;
if
(
!
GenPreBuildKernelJson
(
fusion_scope_iter
.
compute_nodes
,
&
prebuild_op_list
))
{
continue
;
}
// get io size
std
::
vector
<
size_t
>
input_size_list
;
std
::
vector
<
size_t
>
output_size_list
;
...
...
@@ -80,20 +63,20 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
auto
kernel_mod
=
build_manger
->
GenKernelMod
(
json_name
,
tbe
::
kProcessorAiCore
,
input_size_list
,
output_size_list
,
kernel_pack
);
if
(
kernel_mod
!=
nullptr
)
{
kernel_mod_ret
[
scope_id
]
=
kernel_mod
;
kernel_mod_ret
[
fusion_scope_iter
.
scope_id
]
=
kernel_mod
;
continue
;
}
}
// fusion build
nlohmann
::
json
fusion_json
;
fusion_json
[
"fusion_op"
]
=
fusion_op
;
fusion_json
[
"prebuild_ops"
]
=
prebuild_op_list
;
auto
task_id
=
build_manger
->
StartCompileOp
(
fusion_json
);
TbeUtils
::
SaveJsonInfo
(
json_name
,
fusion_json
.
dump
());
if
(
task_id
<
0
)
{
MS_EXCEPTION
(
ArgumentError
)
<<
"start compile failed."
;
}
build_manger
->
SaveTaskInfo
(
task_id
,
nullptr
,
json_name
,
input_size_list
,
output_size_list
,
scope_id
);
build_manger
->
SaveTaskInfo
(
task_id
,
nullptr
,
json_name
,
input_size_list
,
output_size_list
,
fusion_scope_iter
.
scope_id
);
}
int
build_failed_num
=
0
;
...
...
mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h
浏览文件 @
fc4bf192
...
...
@@ -16,6 +16,7 @@
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
#include <utility>
#include <vector>
#include <map>
#include "backend/kernel_compiler/kernel.h"
...
...
@@ -25,11 +26,9 @@ namespace kernel {
* @brief fuse op and return a callable mod
*/
struct
FusionScopeInfo
{
FusionScopeInfo
()
{}
FusionScopeInfo
(
int32_t
id
,
const
std
::
vector
<
AnfNodePtr
>
&
in
,
const
std
::
vector
<
AnfNodePtr
>
&
comp
,
const
std
::
vector
<
AnfNodePtr
>
&
out
)
:
scope_id
(
id
),
input_nodes
(
in
),
compute_nodes
(
comp
),
output_nodes
(
out
)
{}
int32_t
scope_id
;
FusionScopeInfo
(
int32_t
id
,
std
::
vector
<
AnfNodePtr
>
in
,
std
::
vector
<
AnfNodePtr
>
comp
,
std
::
vector
<
AnfNodePtr
>
out
)
:
scope_id
(
id
),
input_nodes
(
std
::
move
(
in
)),
compute_nodes
(
std
::
move
(
comp
)),
output_nodes
(
std
::
move
(
out
))
{}
int32_t
scope_id
{};
std
::
vector
<
AnfNodePtr
>
input_nodes
;
std
::
vector
<
AnfNodePtr
>
compute_nodes
;
std
::
vector
<
AnfNodePtr
>
output_nodes
;
...
...
mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h
浏览文件 @
fc4bf192
...
...
@@ -40,14 +40,13 @@ class OpLib {
private:
static
bool
RegOpFromLocalInfo
();
static
bool
DecodeOpInfo
(
const
nlohmann
::
json
&
obj
,
const
OpImplyType
imply_type
,
const
std
::
string
&
impl_path
);
static
bool
DecodeAttr
(
const
nlohmann
::
json
&
obj
,
const
OpImplyType
imply_type
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
);
static
bool
DecodeOpInfo
(
const
nlohmann
::
json
&
obj
,
OpImplyType
imply_type
,
const
std
::
string
&
impl_path
);
static
bool
DecodeAttr
(
const
nlohmann
::
json
&
obj
,
OpImplyType
imply_type
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
);
static
bool
DecodeDtypeFormat
(
const
nlohmann
::
json
&
dtype_format
,
const
std
::
shared_ptr
<
OpIOInfo
>
&
op_io
,
size_t
index
);
static
void
DecodeTBESpecificInfo
(
const
nlohmann
::
json
&
obj
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
);
static
void
DecodeAKGSpecificInfo
(
const
nlohmann
::
json
&
obj
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
);
static
bool
DecodeInputOutput
(
const
nlohmann
::
json
&
obj
,
const
OpImplyType
imply_type
,
const
OpIOType
io_type
,
static
bool
DecodeInputOutput
(
const
nlohmann
::
json
&
obj
,
OpImplyType
imply_type
,
OpIOType
io_type
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
,
const
nlohmann
::
json
&
dtype_format
);
static
bool
GetRefInfo
(
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
);
static
bool
CheckRepetition
(
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
);
...
...
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc
浏览文件 @
fc4bf192
...
...
@@ -173,7 +173,7 @@ void TbeAdapter::NormalizeFuncName(std::string *func_name) {
*
func_name
=
name_tmp
;
auto
iter
=
tbe_func_adapter_map
.
find
(
*
func_name
);
if
(
iter
!=
tbe_func_adapter_map
.
end
())
{
MS_LOG
(
INFO
)
<<
"
map actual op from me "
<<
*
func_name
<<
" to tbe op
"
<<
iter
->
second
;
MS_LOG
(
INFO
)
<<
"
Map actual op from me: "
<<
*
func_name
<<
" to tbe op:
"
<<
iter
->
second
;
*
func_name
=
iter
->
second
;
}
}
...
...
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h
浏览文件 @
fc4bf192
...
...
@@ -27,7 +27,7 @@
// the TBE back-end operator implementation difference
namespace
mindspore
{
namespace
kernel
{
enum
kCreaterType
:
int
{
SINGLE_BUILD
=
0
,
PREBUILD
,
OP_SELECT_FORMAT
,
CHECK_SUPPORTED
,
OP_PRE_COMPILE
};
enum
kCreaterType
:
int
{
SINGLE_BUILD
=
0
,
OP_SELECT_FORMAT
,
CHECK_SUPPORTED
,
OP_PRE_COMPILE
};
namespace
tbe
{
using
FAttrsPass
=
void
(
*
)(
const
AnfNodePtr
&
anf_node
,
const
std
::
vector
<
std
::
shared_ptr
<
OpAttr
>>
&
op_info_attrs
,
nlohmann
::
json
*
attrs_json
);
...
...
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc
浏览文件 @
fc4bf192
...
...
@@ -63,7 +63,7 @@ const std::unordered_map<std::string, size_t> type_nbyte_maps = {
const
std
::
unordered_map
<
std
::
string
,
FusionType
>
fusion_type_maps
=
{
{
"CONVLUTION"
,
FusionType
::
CONVLUTION
},
{
"ELEMWISE"
,
FusionType
::
ELEMWISE
},
{
"COMMREDUCE"
,
FusionType
::
COMMREDUCE
},
{
"SEGMENT"
,
FusionType
::
SEGMENT
},
{
"
DYNAMIC"
,
FusionType
::
DYNAMIC
},
{
"
OPAQUE"
,
FusionType
::
OPAQUE
},
{
"SEGMENT"
,
FusionType
::
SEGMENT
},
{
"OPAQUE"
,
FusionType
::
OPAQUE
},
};
TypeId
DtypeToTypeId
(
const
std
::
string
&
dtypes
)
{
...
...
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
浏览文件 @
fc4bf192
此差异已折叠。
点击以展开。
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h
浏览文件 @
fc4bf192
...
...
@@ -41,8 +41,8 @@ class TbeKernelBuild {
std
::
vector
<
size_t
>
*
output_size_list
);
// Ub Fuison
static
bool
GenFusionScopeJson
(
const
std
::
vector
<
AnfNodePtr
>
&
input_nodes
,
const
std
::
vector
<
AnfNodePtr
>
&
compute_nodes
,
nlohmann
::
json
*
fusion_
str
,
std
::
string
*
fusion_kernel
);
const
std
::
vector
<
AnfNodePtr
>
&
compute_nodes
,
nlohmann
::
json
*
fusion_
json
,
std
::
string
*
fusion_kernel
_name
);
static
bool
GetIOSize
(
const
nlohmann
::
json
&
fusion_op_list
,
const
std
::
vector
<
AnfNodePtr
>
&
output_nodes
,
std
::
vector
<
size_t
>
*
input_size_list
,
std
::
vector
<
size_t
>
*
output_size_list
);
...
...
@@ -61,9 +61,14 @@ class TbeKernelBuild {
static
std
::
vector
<
size_t
>
GetDescOutputIndex
(
const
std
::
vector
<
int
>
&
output_used_nums
);
static
bool
GenFusionComputeOutputJson
(
const
mindspore
::
CNodePtr
&
cnode
,
std
::
vector
<
nlohmann
::
json
>
*
output_desc_list
);
static
void
GenPreDescJson
(
nlohmann
::
json
*
output_desc
);
static
void
GenFusionComputeCommonJson
(
const
mindspore
::
CNodePtr
&
cnode
,
nlohmann
::
json
*
compute_op_str
,
std
::
string
*
fusion_kernel_name
);
static
void
GenFusionComputePreBuildJson
(
const
mindspore
::
CNodePtr
&
cnode
,
nlohmann
::
json
*
compute_op_str
);
static
void
GenDescJson
(
const
std
::
shared_ptr
<
mindspore
::
AnfNode
>
&
anf_node
,
size_t
node_out_idx
,
size_t
desc_output_idx
,
nlohmann
::
json
*
output_desc
,
FusionDataType
fusion_data_type
=
kFusionNormal
);
static
void
GenSuffixDescJson
(
nlohmann
::
json
*
output_desc
);
static
void
GenReusedOutputDesc
(
const
std
::
shared_ptr
<
mindspore
::
AnfNode
>
&
anf_node
,
size_t
index
,
size_t
output_index
,
nlohmann
::
json
*
output_desc
);
static
size_t
GetIOSizeImpl
(
const
nlohmann
::
json
&
desc
);
...
...
@@ -76,6 +81,7 @@ class TbeKernelBuild {
static
bool
IsDynamicInput
(
const
CNodePtr
&
cnode
);
static
size_t
GetOptionalInput
(
const
CNodePtr
&
cnode
,
bool
is_dynamic_input
);
static
std
::
string
GetRealOpType
(
const
std
::
string
&
origin_type
);
static
std
::
string
GetNodeFusionType
(
const
CNodePtr
&
cnode
);
};
class
TbeKernelJsonCreator
{
...
...
@@ -84,14 +90,14 @@ class TbeKernelJsonCreator {
~
TbeKernelJsonCreator
()
=
default
;
bool
GenTbeSingleKernelJson
(
const
std
::
shared_ptr
<
AnfNode
>
&
anf_node
,
nlohmann
::
json
*
kernel_json
);
std
::
string
json_name
()
{
return
json_name_
;
}
bool
GenTbeAttrJson
(
const
std
::
shared_ptr
<
AnfNode
>
&
anf_node
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
,
nlohmann
::
json
*
attrs_json
);
private:
bool
GenTbeInputsJson
(
const
std
::
shared_ptr
<
AnfNode
>
&
anf_node
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
,
nlohmann
::
json
*
inputs_json
);
bool
GenTbeOutputsJson
(
const
std
::
shared_ptr
<
AnfNode
>
&
anf_node
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
,
nlohmann
::
json
*
outputs_json
);
bool
GenTbeAttrJson
(
const
std
::
shared_ptr
<
AnfNode
>
&
anf_node
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
,
nlohmann
::
json
*
attrs_json
);
static
void
ParseAttrValue
(
const
std
::
string
&
type
,
const
ValuePtr
&
value
,
nlohmann
::
json
*
attr_obj
);
bool
GenInputDescJson
(
const
std
::
shared_ptr
<
AnfNode
>
&
anf_node
,
size_t
real_input_index
,
bool
value
,
const
std
::
shared_ptr
<
OpIOInfo
>
&
input_ptr
,
const
string
&
op_input_name
,
size_t
input_i
,
...
...
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
浏览文件 @
fc4bf192
...
...
@@ -33,42 +33,6 @@
namespace
mindspore
{
namespace
kernel
{
using
mindspore
::
kernel
::
tbe
::
TbeUtils
;
bool
TbeOpParallelPreBuild
(
const
std
::
vector
<
AnfNodePtr
>
&
anf_nodes
)
{
auto
build_manger
=
std
::
make_shared
<
ParallelBuildManager
>
();
MS_EXCEPTION_IF_NULL
(
build_manger
);
for
(
const
auto
&
anf_node
:
anf_nodes
)
{
// gen kernel json
MS_EXCEPTION_IF_NULL
(
anf_node
);
nlohmann
::
json
kernel_json
;
TbeKernelJsonCreator
creator
(
OP_PRE_COMPILE
);
if
(
!
creator
.
GenTbeSingleKernelJson
(
anf_node
,
&
kernel_json
))
{
MS_LOG
(
ERROR
)
<<
"GenTbeSingleKernelJson failed"
;
return
false
;
}
kernel_json
[
"compile_type"
]
=
"pre_build"
;
// op build
auto
task_id
=
build_manger
->
StartCompileOp
(
kernel_json
);
build_manger
->
SavePreTaskInfo
(
task_id
,
anf_node
);
}
while
(
!
build_manger
->
IsAllPreTaskFinish
())
{
int
task_id
=
-
1
;
std
::
string
task_result
;
std
::
string
pre_build_result
;
auto
ret
=
build_manger
->
WaitOne
(
&
task_id
,
&
task_result
,
&
pre_build_result
);
if
(
!
ret
)
{
MS_EXCEPTION
(
ArgumentError
)
<<
"Pre Build Failed. wait one ret:"
<<
ret
<<
", task id:"
<<
task_id
;
}
if
(
task_result
!=
"Success"
)
{
MS_EXCEPTION
(
ArgumentError
)
<<
"task pre compile Failed, task id:"
<<
task_id
<<
", cause:"
<<
task_result
;
}
build_manger
->
PreTaskFinishProcess
(
task_id
,
pre_build_result
);
}
return
true
;
}
bool
TbeOpParallelBuild
(
const
std
::
vector
<
AnfNodePtr
>
&
anf_nodes
)
{
auto
build_manger
=
std
::
make_shared
<
ParallelBuildManager
>
();
MS_EXCEPTION_IF_NULL
(
build_manger
);
...
...
@@ -123,15 +87,8 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
return
build_manger
->
GenSameOpKernelMod
();
}
ParallelBuildManager
::
ParallelBuildManager
()
{}
ParallelBuildManager
::~
ParallelBuildManager
()
{
ResetTaskInfo
();
}
void
ParallelBuildManager
::
SavePreTaskInfo
(
int32_t
task_id
,
const
mindspore
::
AnfNodePtr
&
anf_node
)
{
MS_LOG
(
INFO
)
<<
"SavePreTaskInfo, task id: "
<<
task_id
;
pre_task_map_
[
task_id
]
=
anf_node
;
}
void
ParallelBuildManager
::
SaveTaskInfo
(
int32_t
task_id
,
const
mindspore
::
AnfNodePtr
&
anf_node
,
const
std
::
string
&
json_name
,
const
std
::
vector
<
size_t
>
&
input_size_list
,
const
std
::
vector
<
size_t
>
&
output_size_list
,
int32_t
scope_id
)
{
...
...
@@ -150,42 +107,11 @@ void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNod
task_map_
[
task_id
]
=
task_info
;
}
bool
ParallelBuildManager
::
IsAllPreTaskFinish
()
const
{
MS_LOG
(
INFO
)
<<
"wait pre build process task_num: "
<<
pre_task_map_
.
size
();
return
pre_task_map_
.
empty
();
}
bool
ParallelBuildManager
::
IsAllTaskFinish
()
const
{
MS_LOG
(
INFO
)
<<
"wait process task_num: "
<<
task_map_
.
size
();
return
task_map_
.
empty
();
}
void
ParallelBuildManager
::
PreTaskFinishProcess
(
int32_t
task_id
,
const
std
::
string
&
pre_build_result
)
{
auto
task_iter
=
pre_task_map_
.
find
(
task_id
);
if
(
task_iter
==
pre_task_map_
.
end
())
{
MS_EXCEPTION
(
ArgumentError
)
<<
"can find pre task_id:"
<<
task_id
;
}
auto
node
=
task_iter
->
second
;
auto
builder
=
std
::
make_shared
<
kernel
::
KernelBuildInfo
::
KernelBuildInfoBuilder
>
(
AnfAlgo
::
GetSelectKernelBuildInfo
(
node
));
std
::
string
start_flag
=
"fusion_pattern_start"
;
std
::
string
end_flag
=
"fusion_pattern_end"
;
int
start
=
pre_build_result
.
find
(
start_flag
);
int
end
=
pre_build_result
.
find
(
end_flag
);
if
(
start
!=
-
1
&&
end
!=
-
1
&&
end
>=
start
)
{
std
::
string
result
=
pre_build_result
.
substr
(
start
+
start_flag
.
size
(),
end
-
start
-
start_flag
.
size
());
if
(
result
==
""
)
{
(
void
)
pre_task_map_
.
erase
(
task_iter
);
return
;
}
transform
(
result
.
begin
(),
result
.
end
(),
result
.
begin
(),
::
toupper
);
FusionType
fusion_type
=
tbe
::
GetFusionType
(
result
);
builder
->
SetFusionType
(
fusion_type
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
->
Build
(),
node
.
get
());
}
(
void
)
pre_task_map_
.
erase
(
task_iter
);
}
std
::
pair
<
int32_t
,
KernelModPtr
>
ParallelBuildManager
::
TaskFinishProcess
(
int32_t
task_id
,
bool
set_kernel_mod
)
{
auto
task_iter
=
task_map_
.
find
(
task_id
);
if
(
task_iter
==
task_map_
.
end
())
{
...
...
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h
浏览文件 @
fc4bf192
...
...
@@ -28,7 +28,6 @@
namespace
mindspore
{
namespace
kernel
{
bool
TbeOpParallelPreBuild
(
const
std
::
vector
<
AnfNodePtr
>
&
anf_nodes
);
bool
TbeOpParallelBuild
(
const
std
::
vector
<
AnfNodePtr
>
&
anf_nodes
);
struct
KernelBuildTaskInfo
{
...
...
@@ -42,9 +41,8 @@ struct KernelBuildTaskInfo {
class
ParallelBuildManager
{
public:
ParallelBuildManager
();
ParallelBuildManager
()
=
default
;
~
ParallelBuildManager
();
void
SavePreTaskInfo
(
int32_t
task_id
,
const
AnfNodePtr
&
anf_node
);
void
SaveTaskInfo
(
int32_t
task_id
,
const
AnfNodePtr
&
anf_node
,
const
std
::
string
&
json_name
,
const
std
::
vector
<
size_t
>
&
input_size_list
,
const
std
::
vector
<
size_t
>
&
output_size_list
,
int32_t
scope_id
=
0
);
...
...
@@ -54,10 +52,7 @@ class ParallelBuildManager {
bool
SearchInCache
(
const
std
::
string
&
json_name
,
const
std
::
string
&
processor
,
const
std
::
vector
<
size_t
>
&
input_size_list
,
const
std
::
vector
<
size_t
>
&
output_size_list
,
AnfNode
*
node
)
const
;
bool
IsAllPreTaskFinish
()
const
;
bool
IsAllTaskFinish
()
const
;
void
PreTaskFinishProcess
(
int32_t
task_id
,
const
std
::
string
&
pre_build_result
);
std
::
pair
<
int32_t
,
KernelModPtr
>
TaskFinishProcess
(
int32_t
task_id
,
bool
set_kernel_mod
=
true
);
KernelModPtr
GenKernelMod
(
const
string
&
json_name
,
const
string
&
processor
,
const
std
::
vector
<
size_t
>
&
input_size_list
,
const
std
::
vector
<
size_t
>
&
output_size_list
,
...
...
mindspore/ccsrc/backend/session/ascend_session.cc
浏览文件 @
fc4bf192
...
...
@@ -474,7 +474,6 @@ void AscendSession::InitRuntimeResource() {
}
void
AscendSession
::
HardwareOptimize
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
{
device
::
ascend
::
KernelPreBuild
(
kernel_graph
.
get
());
MS_LOG
(
INFO
)
<<
"HardwareOptimize start!"
;
opt
::
AscendBackendOptimization
(
kernel_graph
);
opt
::
AscendGraphKernelCommonProcess
(
kernel_graph
);
...
...
mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
浏览文件 @
fc4bf192
...
...
@@ -19,7 +19,8 @@
#include <vector>
#include <string>
#include <memory>
#include <set>
#include <map>
#include "runtime/device/ascend/kernel_select_ascend.h"
#include "runtime/device/kernel_info.h"
#include "backend/kernel_compiler/kernel.h"
...
...
@@ -61,32 +62,6 @@ static kernel::KernelModPtr SerialCompileImpl(const AnfNodePtr &anf_node) {
return
kernel_mod_ptr
;
}
static
bool
KernelPreBuildParallelCompile
(
const
mindspore
::
session
::
KernelGraph
*
kernel_graph_ptr
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph_ptr
);
std
::
vector
<
AnfNodePtr
>
tbe_nodes
;
for
(
const
auto
&
anf_node
:
kernel_graph_ptr
->
execution_order
())
{
MS_EXCEPTION_IF_NULL
(
anf_node
);
if
(
!
AnfAlgo
::
IsRealKernel
(
anf_node
))
{
continue
;
}
KernelType
kernel_type
=
AnfAlgo
::
GetKernelType
(
anf_node
);
switch
(
kernel_type
)
{
case
KernelType
::
TBE_KERNEL
:
{
if
(
AnfAlgo
::
GetKernelMod
(
anf_node
)
==
nullptr
&&
AnfAlgo
::
GetFusionType
(
anf_node
)
==
kernel
::
FusionType
::
DYNAMIC
)
{
tbe_nodes
.
push_back
(
anf_node
);
}
break
;
}
default:
{
break
;
}
}
}
bool
ret
=
kernel
::
TbeOpParallelPreBuild
(
tbe_nodes
);
return
ret
;
}
static
bool
KernelBuildParallelCompile
(
const
mindspore
::
session
::
KernelGraph
*
kernel_graph_ptr
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph_ptr
);
std
::
vector
<
AnfNodePtr
>
tbe_nodes
;
...
...
@@ -230,12 +205,6 @@ static bool IsAtomicNode(const CNodePtr &kernel_node) {
return
!
(
workspace_indexs
.
empty
()
&&
output_indexs
.
empty
());
}
bool
KernelPreBuild
(
const
mindspore
::
session
::
KernelGraph
*
kernel_graph_ptr
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph_ptr
);
bool
ret
=
device
::
ascend
::
KernelPreBuildParallelCompile
(
kernel_graph_ptr
);
return
ret
;
}
bool
KernelBuild
(
const
mindspore
::
session
::
KernelGraph
*
kernel_graph_ptr
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph_ptr
);
TbeUtils
::
LoadCache
();
...
...
mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h
浏览文件 @
fc4bf192
...
...
@@ -22,10 +22,6 @@
namespace
mindspore
{
namespace
device
{
namespace
ascend
{
/**
* @brief kernel pre build for ascend.
*/
bool
KernelPreBuild
(
const
mindspore
::
session
::
KernelGraph
*
kernel_graph_ptr
);
/**
* @brief kernel build for ascend.
*/
...
...
mindspore/ccsrc/utils/utils.h
浏览文件 @
fc4bf192
...
...
@@ -32,6 +32,7 @@ namespace mindspore {
// op name. Op which not exists in operator/ops.h, so define it's name here
constexpr
auto
kFour2FiveOpName
=
"Four2Five"
;
constexpr
auto
kFive2FourOpName
=
"Five2Four"
;
constexpr
auto
kConv2DOpName
=
"Conv2D"
;
constexpr
auto
kConvBN1OpName
=
"ConvBN1"
;
constexpr
auto
kBN2AddReluOpName
=
"BN2AddRelu"
;
constexpr
auto
kBN2ReLUOpName
=
"BN2Relu"
;
...
...
mindspore/ops/_op_impl/tbe/matmul.py
浏览文件 @
fc4bf192
...
...
@@ -17,7 +17,7 @@
from
mindspore.ops.op_info_register
import
op_info_register
,
TBERegOp
,
DataType
matmul_op_info
=
TBERegOp
(
"MatMul"
)
\
.
fusion_type
(
"
ELEMWIS
E"
)
\
.
fusion_type
(
"
OPAQU
E"
)
\
.
async_flag
(
False
)
\
.
binfile_name
(
"matmul.so"
)
\
.
compute_cost
(
10
)
\
...
...
tests/st/networks/models/resnet50/test_resnet50_imagenet.py
浏览文件 @
fc4bf192
...
...
@@ -367,8 +367,7 @@ def test_resnet_and_resnet_thor_imagenet_4p():
os
.
system
(
"rm -rf "
+
str
(
i
))
print
(
"End training..."
)
assert
acc
>
0.15
# the original perf is: 20 in C75B100
assert
cost
<
22
assert
cost
<
20
# THOR
thor_acc
=
0.0
...
...
@@ -384,5 +383,4 @@ def test_resnet_and_resnet_thor_imagenet_4p():
os
.
system
(
"rm -rf "
+
str
(
i
))
print
(
"End training..."
)
assert
thor_acc
>
0.22
# the original perf is: 21 in C75B100
assert
thor_cost
<
23
assert
thor_cost
<
21
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录