Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2301_77200941
mindspore
提交
6fdd5208
M
mindspore
项目概览
2301_77200941
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
6fdd5208
编写于
8月 29, 2020
作者:
L
lizhenyu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add mode black list checker
上级
81004f5e
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
33 addition
and
5 deletion
+33
-5
mindspore/ccsrc/backend/session/gpu_session.cc
mindspore/ccsrc/backend/session/gpu_session.cc
+25
-2
mindspore/ccsrc/backend/session/gpu_session.h
mindspore/ccsrc/backend/session/gpu_session.h
+5
-0
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
+2
-2
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h
+1
-1
未找到文件。
mindspore/ccsrc/backend/session/gpu_session.cc
浏览文件 @
6fdd5208
...
...
@@ -49,9 +49,10 @@ using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
void
GPUSession
::
SelectKernel
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
{
MS_EXCEPTION_IF_NULL
(
kernel_graph
);
bool
in_black_list
=
CheckInModeBlackList
(
kernel_graph
);
for
(
const
auto
&
kernel_node
:
kernel_graph
->
execution_order
())
{
MS_EXCEPTION_IF_NULL
(
kernel_node
);
device
::
gpu
::
SetKernelInfo
(
kernel_node
);
device
::
gpu
::
SetKernelInfo
(
kernel_node
,
in_black_list
);
}
}
...
...
@@ -75,7 +76,7 @@ void GPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
pm
->
AddPass
(
std
::
make_shared
<
opt
::
ReplaceBNGradCastFusion
>
());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
ReplaceMomentumCastFusion
>
());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
ReplaceAddNFusion
>
());
if
(
context_ptr
->
execution_mode
()
!=
kPynativeMode
)
{
if
(
!
CheckInModeBlackList
(
kernel_graph
)
&&
context_ptr
->
execution_mode
()
!=
kPynativeMode
)
{
pm
->
AddPass
(
std
::
make_shared
<
opt
::
BatchNormReluFusion
>
());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
BatchNormReluGradFusion
>
());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
BatchNormAddReluFusion
>
());
...
...
@@ -192,6 +193,28 @@ void GPUSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const
}
}
bool
GPUSession
::
CheckInModeBlackList
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
{
auto
kernels
=
kernel_graph
->
execution_order
();
size_t
conv_cnt
=
0
;
size_t
bn_cnt
=
0
;
for
(
const
auto
&
kernel
:
kernels
)
{
auto
kernel_name
=
AnfAlgo
::
GetCNodeName
(
kernel
);
if
(
kernel_name
==
prim
::
kPrimLayerNorm
->
name
())
{
return
true
;
}
if
(
kernel_name
==
prim
::
kPrimConv2D
->
name
())
{
conv_cnt
++
;
}
if
(
kernel_name
==
prim
::
kPrimFusedBatchNormEx
->
name
())
{
bn_cnt
++
;
}
}
if
(
conv_cnt
==
kConv2dCount
&&
bn_cnt
==
kFusedBatchNormCount
)
{
return
true
;
}
return
false
;
}
GraphId
GPUSession
::
CompileGraph
(
const
AnfNodePtrList
&
lst
,
const
AnfNodePtrList
&
outputs
)
{
// Construct graph, if successfully, graph_sum_ + 1
auto
graph_id
=
graph_sum_
;
...
...
mindspore/ccsrc/backend/session/gpu_session.h
浏览文件 @
6fdd5208
...
...
@@ -67,6 +67,8 @@ class GPUSession : public SessionBasic {
void
Execute
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
;
bool
CheckInModeBlackList
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
;
#ifdef ENABLE_DEBUGGER
void
Dump
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
;
...
...
@@ -80,6 +82,9 @@ class GPUSession : public SessionBasic {
void
PostLoadTensor
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
;
#endif
static
constexpr
size_t
kConv2dCount
=
96
;
static
constexpr
size_t
kFusedBatchNormCount
=
94
;
};
using
GPUSessionPtr
=
std
::
shared_ptr
<
GPUSession
>
;
MS_REG_SESSION
(
kGPUDevice
,
GPUSession
);
...
...
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
浏览文件 @
6fdd5208
...
...
@@ -223,7 +223,7 @@ void UpdateKernelFormatInfo(const CNodePtr &kernel_node, const std::vector<TypeI
}
}
// namespace
void
SetKernelInfo
(
const
CNodePtr
&
kernel_node
)
{
void
SetKernelInfo
(
const
CNodePtr
&
kernel_node
,
bool
in_black_list
)
{
std
::
vector
<
std
::
string
>
inputs_format
;
std
::
vector
<
TypeId
>
inputs_type
;
for
(
size_t
input_index
=
0
;
input_index
<
AnfAlgo
::
GetInputTensorNum
(
kernel_node
);
++
input_index
)
{
...
...
@@ -237,7 +237,7 @@ void SetKernelInfo(const CNodePtr &kernel_node) {
outputs_type
.
push_back
(
AnfAlgo
::
GetOutputInferDataType
(
kernel_node
,
output_index
));
}
std
::
string
origin_data_format
=
kOpFormat_DEFAULT
;
if
(
IsNeedProcessFormatInfo
(
kernel_node
,
inputs_type
))
{
if
(
!
in_black_list
&&
IsNeedProcessFormatInfo
(
kernel_node
,
inputs_type
))
{
UpdateKernelFormatInfo
(
kernel_node
,
inputs_type
,
&
inputs_format
,
&
outputs_format
,
&
origin_data_format
);
}
std
::
shared_ptr
<
KernelBuildInfo
::
KernelBuildInfoBuilder
>
builder
=
...
...
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h
浏览文件 @
6fdd5208
...
...
@@ -53,7 +53,7 @@ static std::map<std::string, std::pair<std::vector<size_t>, std::vector<size_t>>
{
prim
::
kPrimAddN
->
name
(),
{{},
{
0
}}},
};
void
SetKernelInfo
(
const
CNodePtr
&
apply_kernel_ptr
);
void
SetKernelInfo
(
const
CNodePtr
&
kernel_node
,
bool
in_black_list
=
false
);
class
KernelAttr
{
public:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录