Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
bd1261e5
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
bd1261e5
编写于
9月 07, 2020
作者:
J
jjfeing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
adapt tensorengin modify, fix ub fusion
上级
5de9578a
变更
18
展开全部
隐藏空白更改
内联
并排
Showing
18 changed file
with
217 addition
and
256 deletion
+217
-256
mindspore/_extends/parallel_compile/tbe_compiler/compiler.py
mindspore/_extends/parallel_compile/tbe_compiler/compiler.py
+2
-33
mindspore/ccsrc/backend/kernel_compiler/kernel.h
mindspore/ccsrc/backend/kernel_compiler/kernel.h
+2
-3
mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
+11
-28
mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h
mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h
+4
-5
mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h
mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h
+3
-4
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc
+1
-1
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h
+1
-1
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc
...re/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc
+1
-1
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
...ore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
+175
-53
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h
...pore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h
+10
-4
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
.../backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
+0
-74
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h
...c/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h
+1
-6
mindspore/ccsrc/backend/session/ascend_session.cc
mindspore/ccsrc/backend/session/ascend_session.cc
+0
-1
mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
+2
-33
mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h
mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h
+0
-4
mindspore/ccsrc/utils/utils.h
mindspore/ccsrc/utils/utils.h
+1
-0
mindspore/ops/_op_impl/tbe/matmul.py
mindspore/ops/_op_impl/tbe/matmul.py
+1
-1
tests/st/networks/models/resnet50/test_resnet50_imagenet.py
tests/st/networks/models/resnet50/test_resnet50_imagenet.py
+2
-4
未找到文件。
mindspore/_extends/parallel_compile/tbe_compiler/compiler.py
浏览文件 @
bd1261e5
...
...
@@ -17,8 +17,6 @@ import json
import
os
import
sys
from
te.platform.cce_conf
import
te_set_version
from
te.platform.fusion_manager
import
op_build_cfg_dis
,
op_build_cfg_en
,
set_current_op_name
,
\
init_op_pattern
,
set_op_params
,
set_op_build_type
,
get_op_pattern
,
set_current_op_func_name
from
te.platform.fusion_util
import
fusion_op
from
common
import
check_kernel_info
,
get_args
,
get_build_in_impl_path
,
get_ddk_version
...
...
@@ -27,7 +25,6 @@ build_in_impl_path = get_build_in_impl_path()
# op function list
op_build
=
"compile"
op_pre_build
=
"pre_build"
fusion_pattern_start_flag
=
"fusion_pattern_start"
fusion_pattern_end_flag
=
"fusion_pattern_end"
...
...
@@ -83,19 +80,7 @@ def build_op(build_type, json_str):
else
:
op_module
=
__import__
(
"impl."
+
op_name
,
globals
(),
locals
(),
[
op_name
],
0
)
# get function
if
build_type
==
op_pre_build
:
# set op parameter
op_build_cfg_dis
()
set_current_op_func_name
(
op_name
)
set_current_op_name
(
kernel_name
)
init_op_pattern
()
set_op_params
(
*
outputs_args
,
*
attrs_args
,
kernel_name
=
kernel_name
)
set_op_build_type
(
'prebuild'
)
if
custom_flag
:
py_fn_name
=
kernel_info
[
'op_info'
][
'name'
]
else
:
py_fn_name
=
op_name
elif
build_type
==
op_build
:
if
build_type
==
op_build
:
if
custom_flag
:
py_fn_name
=
kernel_info
[
'op_info'
][
'name'
]
else
:
...
...
@@ -106,13 +91,6 @@ def build_op(build_type, json_str):
if
op_func
is
None
:
raise
ValueError
(
"Op:{} function {} is not supported by Tbe."
.
format
(
op_name
,
build_type
))
# pre build
if
build_type
==
op_pre_build
:
op_func
(
*
inputs_args
,
*
outputs_args
,
*
attrs_args
,
kernel_name
=
kernel_name
)
# disable only pattern configuration
op_build_cfg_en
()
return
get_op_pattern
()
# call function
if
kernel_name
[
0
:
19
]
==
"bounding_box_encode"
:
return
op_func
(
*
inputs_args
,
*
outputs_args
,
*
attrs_args
,
kernel_name_val
=
kernel_name
)
...
...
@@ -120,8 +98,6 @@ def build_op(build_type, json_str):
return
op_func
(
*
inputs_args
,
*
outputs_args
,
*
attrs_args
,
kernel_name
=
kernel_name
)
except
Exception
as
e
:
if
build_type
==
op_pre_build
:
op_build_cfg_en
()
raise
RuntimeError
(
e
)
...
...
@@ -136,14 +112,9 @@ def compile_fusion_op(json_str):
Exception: If specific keyword is not found.
"""
args
=
json
.
loads
(
json_str
)
te_set_version
(
ddk_version
)
if
'fusion_op'
not
in
args
or
not
args
[
'fusion_op'
]:
raise
ValueError
(
"Json string Errors, key:fusion_op not found."
)
if
'prebuild_ops'
not
in
args
or
not
args
[
'prebuild_ops'
]:
raise
ValueError
(
"Json string Errors, key:prebuild_ops not found."
)
pre_build_op_list
=
args
[
'prebuild_ops'
]
for
op
in
pre_build_op_list
:
build_op
(
op_pre_build
,
json
.
dumps
(
op
))
fusion_op_arg
=
args
[
'fusion_op'
]
return
fusion_op
(
json
.
dumps
(
fusion_op_arg
))
...
...
@@ -159,8 +130,6 @@ def compile_with_json(json_str):
json_info
=
json
.
loads
(
json_str
)
if
"fusion_op"
in
json_info
:
ret
=
compile_fusion_op
(
json_str
)
elif
"compile_type"
in
json_info
:
ret
=
build_op
(
op_pre_build
,
json_str
)
else
:
ret
=
build_op
(
op_build
,
json_str
)
return
ret
...
...
mindspore/ccsrc/backend/kernel_compiler/kernel.h
浏览文件 @
bd1261e5
...
...
@@ -37,7 +37,6 @@ enum FusionType {
COMMREDUCE
,
SEGMENT
,
OPAQUE
,
DYNAMIC
,
UNKNOWN_FUSION_TYPE
=
-
1
,
};
enum
OpPattern
{
...
...
@@ -80,8 +79,8 @@ class KernelPack {
bool
LoadKernelMeta
(
const
std
::
string
&
json_f
,
const
std
::
string
&
processor
);
bool
ReadFromJsonFile
(
const
std
::
string
&
json_f
,
const
std
::
string
&
processor
);
const
std
::
string
Serialize
()
const
;
const
FlexArray
*
const
GetJson
()
const
{
return
json_
;
}
const
FlexArray
*
const
GetKernel
()
const
{
return
kernel_
;
}
const
FlexArray
*
GetJson
()
const
{
return
json_
;
}
const
FlexArray
*
GetKernel
()
const
{
return
kernel_
;
}
~
KernelPack
()
{
if
(
json_
)
{
delete
[]
json_
;
...
...
mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
浏览文件 @
bd1261e5
...
...
@@ -19,53 +19,36 @@
#include <map>
#include <string>
#include <memory>
#include <utility>
#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
#include "backend/kernel_compiler/tbe/tbe_utils.h"
#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
#include "utils/ms_context.h"
namespace
mindspore
{
namespace
kernel
{
using
mindspore
::
kernel
::
tbe
::
TbeUtils
;
static
bool
GenPreBuildKernelJson
(
const
std
::
vector
<
AnfNodePtr
>
&
compute_nodes
,
std
::
vector
<
nlohmann
::
json
>
*
prebuild_op_list
)
{
MS_EXCEPTION_IF_NULL
(
prebuild_op_list
);
TbeKernelJsonCreator
creator
(
PREBUILD
);
for
(
const
auto
&
anf_node
:
compute_nodes
)
{
nlohmann
::
json
prebuild
;
if
(
!
creator
.
GenTbeSingleKernelJson
(
anf_node
,
&
prebuild
))
{
MS_LOG
(
ERROR
)
<<
"GenTbeSingleKernelJson failed"
;
return
false
;
}
(
*
prebuild_op_list
).
push_back
(
prebuild
);
}
return
true
;
}
std
::
map
<
int32_t
,
KernelModPtr
>
KernelFusion
(
const
std
::
vector
<
FusionScopeInfo
>
&
fusion_scopes
)
{
MS_LOG
(
INFO
)
<<
"kernel fusion build start, scope size:"
<<
fusion_scopes
.
size
();
std
::
map
<
int32_t
,
KernelModPtr
>
kernel_mod_ret
;
auto
build_manger
=
std
::
make_shared
<
ParallelBuildManager
>
();
MS_EXCEPTION_IF_NULL
(
build_manger
);
for
(
const
auto
&
fusion_scope_iter
:
fusion_scopes
)
{
auto
scope_id
=
fusion_scope_iter
.
scope_id
;
string
fusion_kernel_name
;
nlohmann
::
json
fusion_op
;
string
fusion_kernel
=
"te_fusion"
;
if
(
!
TbeKernelBuild
::
GenFusionScopeJson
(
fusion_scope_iter
.
input_nodes
,
fusion_scope_iter
.
compute_nodes
,
&
fusion_op
,
&
fusion_kernel
))
{
&
fusion_kernel
_name
))
{
continue
;
}
// gen kernel_name & check cache
std
::
string
json_str
=
fusion_op
.
dump
();
size_t
hash_id
=
std
::
hash
<
std
::
string
>
()(
json_str
);
auto
json_name
=
fusion_kernel
.
append
(
"_"
).
append
(
std
::
to_string
(
hash_id
));
auto
context_ptr
=
MsContext
::
GetInstance
();
MS_EXCEPTION_IF_NULL
(
context_ptr
);
auto
device_id
=
context_ptr
->
device_id
();
auto
json_name
=
fusion_kernel_name
.
append
(
"_"
).
append
(
std
::
to_string
(
hash_id
)).
append
(
"_"
).
append
(
std
::
to_string
(
device_id
));
fusion_op
[
"fusion_op_name"
]
=
json_name
;
// gen json for prebuild
std
::
vector
<
nlohmann
::
json
>
prebuild_op_list
;
if
(
!
GenPreBuildKernelJson
(
fusion_scope_iter
.
compute_nodes
,
&
prebuild_op_list
))
{
continue
;
}
// get io size
std
::
vector
<
size_t
>
input_size_list
;
std
::
vector
<
size_t
>
output_size_list
;
...
...
@@ -80,20 +63,20 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
auto
kernel_mod
=
build_manger
->
GenKernelMod
(
json_name
,
tbe
::
kProcessorAiCore
,
input_size_list
,
output_size_list
,
kernel_pack
);
if
(
kernel_mod
!=
nullptr
)
{
kernel_mod_ret
[
scope_id
]
=
kernel_mod
;
kernel_mod_ret
[
fusion_scope_iter
.
scope_id
]
=
kernel_mod
;
continue
;
}
}
// fusion build
nlohmann
::
json
fusion_json
;
fusion_json
[
"fusion_op"
]
=
fusion_op
;
fusion_json
[
"prebuild_ops"
]
=
prebuild_op_list
;
auto
task_id
=
build_manger
->
StartCompileOp
(
fusion_json
);
TbeUtils
::
SaveJsonInfo
(
json_name
,
fusion_json
.
dump
());
if
(
task_id
<
0
)
{
MS_EXCEPTION
(
ArgumentError
)
<<
"start compile failed."
;
}
build_manger
->
SaveTaskInfo
(
task_id
,
nullptr
,
json_name
,
input_size_list
,
output_size_list
,
scope_id
);
build_manger
->
SaveTaskInfo
(
task_id
,
nullptr
,
json_name
,
input_size_list
,
output_size_list
,
fusion_scope_iter
.
scope_id
);
}
int
build_failed_num
=
0
;
...
...
mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h
浏览文件 @
bd1261e5
...
...
@@ -16,6 +16,7 @@
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
#include <utility>
#include <vector>
#include <map>
#include "backend/kernel_compiler/kernel.h"
...
...
@@ -25,11 +26,9 @@ namespace kernel {
* @brief fuse op and return a callable mod
*/
struct
FusionScopeInfo
{
FusionScopeInfo
()
{}
FusionScopeInfo
(
int32_t
id
,
const
std
::
vector
<
AnfNodePtr
>
&
in
,
const
std
::
vector
<
AnfNodePtr
>
&
comp
,
const
std
::
vector
<
AnfNodePtr
>
&
out
)
:
scope_id
(
id
),
input_nodes
(
in
),
compute_nodes
(
comp
),
output_nodes
(
out
)
{}
int32_t
scope_id
;
FusionScopeInfo
(
int32_t
id
,
std
::
vector
<
AnfNodePtr
>
in
,
std
::
vector
<
AnfNodePtr
>
comp
,
std
::
vector
<
AnfNodePtr
>
out
)
:
scope_id
(
id
),
input_nodes
(
std
::
move
(
in
)),
compute_nodes
(
std
::
move
(
comp
)),
output_nodes
(
std
::
move
(
out
))
{}
int32_t
scope_id
{};
std
::
vector
<
AnfNodePtr
>
input_nodes
;
std
::
vector
<
AnfNodePtr
>
compute_nodes
;
std
::
vector
<
AnfNodePtr
>
output_nodes
;
...
...
mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h
浏览文件 @
bd1261e5
...
...
@@ -40,14 +40,13 @@ class OpLib {
private:
static
bool
RegOpFromLocalInfo
();
static
bool
DecodeOpInfo
(
const
nlohmann
::
json
&
obj
,
const
OpImplyType
imply_type
,
const
std
::
string
&
impl_path
);
static
bool
DecodeAttr
(
const
nlohmann
::
json
&
obj
,
const
OpImplyType
imply_type
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
);
static
bool
DecodeOpInfo
(
const
nlohmann
::
json
&
obj
,
OpImplyType
imply_type
,
const
std
::
string
&
impl_path
);
static
bool
DecodeAttr
(
const
nlohmann
::
json
&
obj
,
OpImplyType
imply_type
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
);
static
bool
DecodeDtypeFormat
(
const
nlohmann
::
json
&
dtype_format
,
const
std
::
shared_ptr
<
OpIOInfo
>
&
op_io
,
size_t
index
);
static
void
DecodeTBESpecificInfo
(
const
nlohmann
::
json
&
obj
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
);
static
void
DecodeAKGSpecificInfo
(
const
nlohmann
::
json
&
obj
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
);
static
bool
DecodeInputOutput
(
const
nlohmann
::
json
&
obj
,
const
OpImplyType
imply_type
,
const
OpIOType
io_type
,
static
bool
DecodeInputOutput
(
const
nlohmann
::
json
&
obj
,
OpImplyType
imply_type
,
OpIOType
io_type
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
,
const
nlohmann
::
json
&
dtype_format
);
static
bool
GetRefInfo
(
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
);
static
bool
CheckRepetition
(
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
);
...
...
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc
浏览文件 @
bd1261e5
...
...
@@ -173,7 +173,7 @@ void TbeAdapter::NormalizeFuncName(std::string *func_name) {
*
func_name
=
name_tmp
;
auto
iter
=
tbe_func_adapter_map
.
find
(
*
func_name
);
if
(
iter
!=
tbe_func_adapter_map
.
end
())
{
MS_LOG
(
INFO
)
<<
"
map actual op from me "
<<
*
func_name
<<
" to tbe op
"
<<
iter
->
second
;
MS_LOG
(
INFO
)
<<
"
Map actual op from me: "
<<
*
func_name
<<
" to tbe op:
"
<<
iter
->
second
;
*
func_name
=
iter
->
second
;
}
}
...
...
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h
浏览文件 @
bd1261e5
...
...
@@ -27,7 +27,7 @@
// the TBE back-end operator implementation difference
namespace
mindspore
{
namespace
kernel
{
enum
kCreaterType
:
int
{
SINGLE_BUILD
=
0
,
PREBUILD
,
OP_SELECT_FORMAT
,
CHECK_SUPPORTED
,
OP_PRE_COMPILE
};
enum
kCreaterType
:
int
{
SINGLE_BUILD
=
0
,
OP_SELECT_FORMAT
,
CHECK_SUPPORTED
,
OP_PRE_COMPILE
};
namespace
tbe
{
using
FAttrsPass
=
void
(
*
)(
const
AnfNodePtr
&
anf_node
,
const
std
::
vector
<
std
::
shared_ptr
<
OpAttr
>>
&
op_info_attrs
,
nlohmann
::
json
*
attrs_json
);
...
...
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc
浏览文件 @
bd1261e5
...
...
@@ -63,7 +63,7 @@ const std::unordered_map<std::string, size_t> type_nbyte_maps = {
const
std
::
unordered_map
<
std
::
string
,
FusionType
>
fusion_type_maps
=
{
{
"CONVLUTION"
,
FusionType
::
CONVLUTION
},
{
"ELEMWISE"
,
FusionType
::
ELEMWISE
},
{
"COMMREDUCE"
,
FusionType
::
COMMREDUCE
},
{
"SEGMENT"
,
FusionType
::
SEGMENT
},
{
"
DYNAMIC"
,
FusionType
::
DYNAMIC
},
{
"
OPAQUE"
,
FusionType
::
OPAQUE
},
{
"SEGMENT"
,
FusionType
::
SEGMENT
},
{
"OPAQUE"
,
FusionType
::
OPAQUE
},
};
TypeId
DtypeToTypeId
(
const
std
::
string
&
dtypes
)
{
...
...
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
浏览文件 @
bd1261e5
此差异已折叠。
点击以展开。
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h
浏览文件 @
bd1261e5
...
...
@@ -41,8 +41,8 @@ class TbeKernelBuild {
std
::
vector
<
size_t
>
*
output_size_list
);
// Ub Fuison
static
bool
GenFusionScopeJson
(
const
std
::
vector
<
AnfNodePtr
>
&
input_nodes
,
const
std
::
vector
<
AnfNodePtr
>
&
compute_nodes
,
nlohmann
::
json
*
fusion_
str
,
std
::
string
*
fusion_kernel
);
const
std
::
vector
<
AnfNodePtr
>
&
compute_nodes
,
nlohmann
::
json
*
fusion_
json
,
std
::
string
*
fusion_kernel
_name
);
static
bool
GetIOSize
(
const
nlohmann
::
json
&
fusion_op_list
,
const
std
::
vector
<
AnfNodePtr
>
&
output_nodes
,
std
::
vector
<
size_t
>
*
input_size_list
,
std
::
vector
<
size_t
>
*
output_size_list
);
...
...
@@ -61,9 +61,14 @@ class TbeKernelBuild {
static
std
::
vector
<
size_t
>
GetDescOutputIndex
(
const
std
::
vector
<
int
>
&
output_used_nums
);
static
bool
GenFusionComputeOutputJson
(
const
mindspore
::
CNodePtr
&
cnode
,
std
::
vector
<
nlohmann
::
json
>
*
output_desc_list
);
static
void
GenPreDescJson
(
nlohmann
::
json
*
output_desc
);
static
void
GenFusionComputeCommonJson
(
const
mindspore
::
CNodePtr
&
cnode
,
nlohmann
::
json
*
compute_op_str
,
std
::
string
*
fusion_kernel_name
);
static
void
GenFusionComputePreBuildJson
(
const
mindspore
::
CNodePtr
&
cnode
,
nlohmann
::
json
*
compute_op_str
);
static
void
GenDescJson
(
const
std
::
shared_ptr
<
mindspore
::
AnfNode
>
&
anf_node
,
size_t
node_out_idx
,
size_t
desc_output_idx
,
nlohmann
::
json
*
output_desc
,
FusionDataType
fusion_data_type
=
kFusionNormal
);
static
void
GenSuffixDescJson
(
nlohmann
::
json
*
output_desc
);
static
void
GenReusedOutputDesc
(
const
std
::
shared_ptr
<
mindspore
::
AnfNode
>
&
anf_node
,
size_t
index
,
size_t
output_index
,
nlohmann
::
json
*
output_desc
);
static
size_t
GetIOSizeImpl
(
const
nlohmann
::
json
&
desc
);
...
...
@@ -76,6 +81,7 @@ class TbeKernelBuild {
static
bool
IsDynamicInput
(
const
CNodePtr
&
cnode
);
static
size_t
GetOptionalInput
(
const
CNodePtr
&
cnode
,
bool
is_dynamic_input
);
static
std
::
string
GetRealOpType
(
const
std
::
string
&
origin_type
);
static
std
::
string
GetNodeFusionType
(
const
CNodePtr
&
cnode
);
};
class
TbeKernelJsonCreator
{
...
...
@@ -84,14 +90,14 @@ class TbeKernelJsonCreator {
~
TbeKernelJsonCreator
()
=
default
;
bool
GenTbeSingleKernelJson
(
const
std
::
shared_ptr
<
AnfNode
>
&
anf_node
,
nlohmann
::
json
*
kernel_json
);
std
::
string
json_name
()
{
return
json_name_
;
}
bool
GenTbeAttrJson
(
const
std
::
shared_ptr
<
AnfNode
>
&
anf_node
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
,
nlohmann
::
json
*
attrs_json
);
private:
bool
GenTbeInputsJson
(
const
std
::
shared_ptr
<
AnfNode
>
&
anf_node
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
,
nlohmann
::
json
*
inputs_json
);
bool
GenTbeOutputsJson
(
const
std
::
shared_ptr
<
AnfNode
>
&
anf_node
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
,
nlohmann
::
json
*
outputs_json
);
bool
GenTbeAttrJson
(
const
std
::
shared_ptr
<
AnfNode
>
&
anf_node
,
const
std
::
shared_ptr
<
OpInfo
>
&
op_info
,
nlohmann
::
json
*
attrs_json
);
static
void
ParseAttrValue
(
const
std
::
string
&
type
,
const
ValuePtr
&
value
,
nlohmann
::
json
*
attr_obj
);
bool
GenInputDescJson
(
const
std
::
shared_ptr
<
AnfNode
>
&
anf_node
,
size_t
real_input_index
,
bool
value
,
const
std
::
shared_ptr
<
OpIOInfo
>
&
input_ptr
,
const
string
&
op_input_name
,
size_t
input_i
,
...
...
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
浏览文件 @
bd1261e5
...
...
@@ -33,42 +33,6 @@
namespace
mindspore
{
namespace
kernel
{
using
mindspore
::
kernel
::
tbe
::
TbeUtils
;
bool
TbeOpParallelPreBuild
(
const
std
::
vector
<
AnfNodePtr
>
&
anf_nodes
)
{
auto
build_manger
=
std
::
make_shared
<
ParallelBuildManager
>
();
MS_EXCEPTION_IF_NULL
(
build_manger
);
for
(
const
auto
&
anf_node
:
anf_nodes
)
{
// gen kernel json
MS_EXCEPTION_IF_NULL
(
anf_node
);
nlohmann
::
json
kernel_json
;
TbeKernelJsonCreator
creator
(
OP_PRE_COMPILE
);
if
(
!
creator
.
GenTbeSingleKernelJson
(
anf_node
,
&
kernel_json
))
{
MS_LOG
(
ERROR
)
<<
"GenTbeSingleKernelJson failed"
;
return
false
;
}
kernel_json
[
"compile_type"
]
=
"pre_build"
;
// op build
auto
task_id
=
build_manger
->
StartCompileOp
(
kernel_json
);
build_manger
->
SavePreTaskInfo
(
task_id
,
anf_node
);
}
while
(
!
build_manger
->
IsAllPreTaskFinish
())
{
int
task_id
=
-
1
;
std
::
string
task_result
;
std
::
string
pre_build_result
;
auto
ret
=
build_manger
->
WaitOne
(
&
task_id
,
&
task_result
,
&
pre_build_result
);
if
(
!
ret
)
{
MS_EXCEPTION
(
ArgumentError
)
<<
"Pre Build Failed. wait one ret:"
<<
ret
<<
", task id:"
<<
task_id
;
}
if
(
task_result
!=
"Success"
)
{
MS_EXCEPTION
(
ArgumentError
)
<<
"task pre compile Failed, task id:"
<<
task_id
<<
", cause:"
<<
task_result
;
}
build_manger
->
PreTaskFinishProcess
(
task_id
,
pre_build_result
);
}
return
true
;
}
bool
TbeOpParallelBuild
(
const
std
::
vector
<
AnfNodePtr
>
&
anf_nodes
)
{
auto
build_manger
=
std
::
make_shared
<
ParallelBuildManager
>
();
MS_EXCEPTION_IF_NULL
(
build_manger
);
...
...
@@ -123,15 +87,8 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
return
build_manger
->
GenSameOpKernelMod
();
}
ParallelBuildManager
::
ParallelBuildManager
()
{}
ParallelBuildManager
::~
ParallelBuildManager
()
{
ResetTaskInfo
();
}
void
ParallelBuildManager
::
SavePreTaskInfo
(
int32_t
task_id
,
const
mindspore
::
AnfNodePtr
&
anf_node
)
{
MS_LOG
(
INFO
)
<<
"SavePreTaskInfo, task id: "
<<
task_id
;
pre_task_map_
[
task_id
]
=
anf_node
;
}
void
ParallelBuildManager
::
SaveTaskInfo
(
int32_t
task_id
,
const
mindspore
::
AnfNodePtr
&
anf_node
,
const
std
::
string
&
json_name
,
const
std
::
vector
<
size_t
>
&
input_size_list
,
const
std
::
vector
<
size_t
>
&
output_size_list
,
int32_t
scope_id
)
{
...
...
@@ -150,42 +107,11 @@ void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNod
task_map_
[
task_id
]
=
task_info
;
}
bool
ParallelBuildManager
::
IsAllPreTaskFinish
()
const
{
MS_LOG
(
INFO
)
<<
"wait pre build process task_num: "
<<
pre_task_map_
.
size
();
return
pre_task_map_
.
empty
();
}
bool
ParallelBuildManager
::
IsAllTaskFinish
()
const
{
MS_LOG
(
INFO
)
<<
"wait process task_num: "
<<
task_map_
.
size
();
return
task_map_
.
empty
();
}
void
ParallelBuildManager
::
PreTaskFinishProcess
(
int32_t
task_id
,
const
std
::
string
&
pre_build_result
)
{
auto
task_iter
=
pre_task_map_
.
find
(
task_id
);
if
(
task_iter
==
pre_task_map_
.
end
())
{
MS_EXCEPTION
(
ArgumentError
)
<<
"can find pre task_id:"
<<
task_id
;
}
auto
node
=
task_iter
->
second
;
auto
builder
=
std
::
make_shared
<
kernel
::
KernelBuildInfo
::
KernelBuildInfoBuilder
>
(
AnfAlgo
::
GetSelectKernelBuildInfo
(
node
));
std
::
string
start_flag
=
"fusion_pattern_start"
;
std
::
string
end_flag
=
"fusion_pattern_end"
;
int
start
=
pre_build_result
.
find
(
start_flag
);
int
end
=
pre_build_result
.
find
(
end_flag
);
if
(
start
!=
-
1
&&
end
!=
-
1
&&
end
>=
start
)
{
std
::
string
result
=
pre_build_result
.
substr
(
start
+
start_flag
.
size
(),
end
-
start
-
start_flag
.
size
());
if
(
result
==
""
)
{
(
void
)
pre_task_map_
.
erase
(
task_iter
);
return
;
}
transform
(
result
.
begin
(),
result
.
end
(),
result
.
begin
(),
::
toupper
);
FusionType
fusion_type
=
tbe
::
GetFusionType
(
result
);
builder
->
SetFusionType
(
fusion_type
);
AnfAlgo
::
SetSelectKernelBuildInfo
(
builder
->
Build
(),
node
.
get
());
}
(
void
)
pre_task_map_
.
erase
(
task_iter
);
}
std
::
pair
<
int32_t
,
KernelModPtr
>
ParallelBuildManager
::
TaskFinishProcess
(
int32_t
task_id
,
bool
set_kernel_mod
)
{
auto
task_iter
=
task_map_
.
find
(
task_id
);
if
(
task_iter
==
task_map_
.
end
())
{
...
...
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h
浏览文件 @
bd1261e5
...
...
@@ -28,7 +28,6 @@
namespace
mindspore
{
namespace
kernel
{
bool
TbeOpParallelPreBuild
(
const
std
::
vector
<
AnfNodePtr
>
&
anf_nodes
);
bool
TbeOpParallelBuild
(
const
std
::
vector
<
AnfNodePtr
>
&
anf_nodes
);
struct
KernelBuildTaskInfo
{
...
...
@@ -42,9 +41,8 @@ struct KernelBuildTaskInfo {
class
ParallelBuildManager
{
public:
ParallelBuildManager
();
ParallelBuildManager
()
=
default
;
~
ParallelBuildManager
();
void
SavePreTaskInfo
(
int32_t
task_id
,
const
AnfNodePtr
&
anf_node
);
void
SaveTaskInfo
(
int32_t
task_id
,
const
AnfNodePtr
&
anf_node
,
const
std
::
string
&
json_name
,
const
std
::
vector
<
size_t
>
&
input_size_list
,
const
std
::
vector
<
size_t
>
&
output_size_list
,
int32_t
scope_id
=
0
);
...
...
@@ -54,10 +52,7 @@ class ParallelBuildManager {
bool
SearchInCache
(
const
std
::
string
&
json_name
,
const
std
::
string
&
processor
,
const
std
::
vector
<
size_t
>
&
input_size_list
,
const
std
::
vector
<
size_t
>
&
output_size_list
,
AnfNode
*
node
)
const
;
bool
IsAllPreTaskFinish
()
const
;
bool
IsAllTaskFinish
()
const
;
void
PreTaskFinishProcess
(
int32_t
task_id
,
const
std
::
string
&
pre_build_result
);
std
::
pair
<
int32_t
,
KernelModPtr
>
TaskFinishProcess
(
int32_t
task_id
,
bool
set_kernel_mod
=
true
);
KernelModPtr
GenKernelMod
(
const
string
&
json_name
,
const
string
&
processor
,
const
std
::
vector
<
size_t
>
&
input_size_list
,
const
std
::
vector
<
size_t
>
&
output_size_list
,
...
...
mindspore/ccsrc/backend/session/ascend_session.cc
浏览文件 @
bd1261e5
...
...
@@ -474,7 +474,6 @@ void AscendSession::InitRuntimeResource() {
}
void
AscendSession
::
HardwareOptimize
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
{
device
::
ascend
::
KernelPreBuild
(
kernel_graph
.
get
());
MS_LOG
(
INFO
)
<<
"HardwareOptimize start!"
;
opt
::
AscendBackendOptimization
(
kernel_graph
);
opt
::
AscendGraphKernelCommonProcess
(
kernel_graph
);
...
...
mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
浏览文件 @
bd1261e5
...
...
@@ -19,7 +19,8 @@
#include <vector>
#include <string>
#include <memory>
#include <set>
#include <map>
#include "runtime/device/ascend/kernel_select_ascend.h"
#include "runtime/device/kernel_info.h"
#include "backend/kernel_compiler/kernel.h"
...
...
@@ -61,32 +62,6 @@ static kernel::KernelModPtr SerialCompileImpl(const AnfNodePtr &anf_node) {
return
kernel_mod_ptr
;
}
static
bool
KernelPreBuildParallelCompile
(
const
mindspore
::
session
::
KernelGraph
*
kernel_graph_ptr
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph_ptr
);
std
::
vector
<
AnfNodePtr
>
tbe_nodes
;
for
(
const
auto
&
anf_node
:
kernel_graph_ptr
->
execution_order
())
{
MS_EXCEPTION_IF_NULL
(
anf_node
);
if
(
!
AnfAlgo
::
IsRealKernel
(
anf_node
))
{
continue
;
}
KernelType
kernel_type
=
AnfAlgo
::
GetKernelType
(
anf_node
);
switch
(
kernel_type
)
{
case
KernelType
::
TBE_KERNEL
:
{
if
(
AnfAlgo
::
GetKernelMod
(
anf_node
)
==
nullptr
&&
AnfAlgo
::
GetFusionType
(
anf_node
)
==
kernel
::
FusionType
::
DYNAMIC
)
{
tbe_nodes
.
push_back
(
anf_node
);
}
break
;
}
default:
{
break
;
}
}
}
bool
ret
=
kernel
::
TbeOpParallelPreBuild
(
tbe_nodes
);
return
ret
;
}
static
bool
KernelBuildParallelCompile
(
const
mindspore
::
session
::
KernelGraph
*
kernel_graph_ptr
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph_ptr
);
std
::
vector
<
AnfNodePtr
>
tbe_nodes
;
...
...
@@ -230,12 +205,6 @@ static bool IsAtomicNode(const CNodePtr &kernel_node) {
return
!
(
workspace_indexs
.
empty
()
&&
output_indexs
.
empty
());
}
bool
KernelPreBuild
(
const
mindspore
::
session
::
KernelGraph
*
kernel_graph_ptr
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph_ptr
);
bool
ret
=
device
::
ascend
::
KernelPreBuildParallelCompile
(
kernel_graph_ptr
);
return
ret
;
}
bool
KernelBuild
(
const
mindspore
::
session
::
KernelGraph
*
kernel_graph_ptr
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph_ptr
);
TbeUtils
::
LoadCache
();
...
...
mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h
浏览文件 @
bd1261e5
...
...
@@ -22,10 +22,6 @@
namespace
mindspore
{
namespace
device
{
namespace
ascend
{
/**
* @brief kernel pre build for ascend.
*/
bool
KernelPreBuild
(
const
mindspore
::
session
::
KernelGraph
*
kernel_graph_ptr
);
/**
* @brief kernel build for ascend.
*/
...
...
mindspore/ccsrc/utils/utils.h
浏览文件 @
bd1261e5
...
...
@@ -32,6 +32,7 @@ namespace mindspore {
// op name. Op which not exists in operator/ops.h, so define it's name here
constexpr
auto
kFour2FiveOpName
=
"Four2Five"
;
constexpr
auto
kFive2FourOpName
=
"Five2Four"
;
constexpr
auto
kConv2DOpName
=
"Conv2D"
;
constexpr
auto
kConvBN1OpName
=
"ConvBN1"
;
constexpr
auto
kBN2AddReluOpName
=
"BN2AddRelu"
;
constexpr
auto
kBN2ReLUOpName
=
"BN2Relu"
;
...
...
mindspore/ops/_op_impl/tbe/matmul.py
浏览文件 @
bd1261e5
...
...
@@ -17,7 +17,7 @@
from
mindspore.ops.op_info_register
import
op_info_register
,
TBERegOp
,
DataType
matmul_op_info
=
TBERegOp
(
"MatMul"
)
\
.
fusion_type
(
"
ELEMWIS
E"
)
\
.
fusion_type
(
"
OPAQU
E"
)
\
.
async_flag
(
False
)
\
.
binfile_name
(
"matmul.so"
)
\
.
compute_cost
(
10
)
\
...
...
tests/st/networks/models/resnet50/test_resnet50_imagenet.py
浏览文件 @
bd1261e5
...
...
@@ -367,8 +367,7 @@ def test_resnet_and_resnet_thor_imagenet_4p():
os
.
system
(
"rm -rf "
+
str
(
i
))
print
(
"End training..."
)
assert
acc
>
0.15
# the original perf is: 20 in C75B100
assert
cost
<
22
assert
cost
<
20
# THOR
thor_acc
=
0.0
...
...
@@ -384,5 +383,4 @@ def test_resnet_and_resnet_thor_imagenet_4p():
os
.
system
(
"rm -rf "
+
str
(
i
))
print
(
"End training..."
)
assert
thor_acc
>
0.22
# the original perf is: 21 in C75B100
assert
thor_cost
<
23
assert
thor_cost
<
21
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录