Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
341200ab
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
341200ab
编写于
8月 29, 2020
作者:
L
limingqi107
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
gpu kernel_info_setter code review
上级
7b0370f7
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
26 addition
and
15 deletion
+26
-15
mindspore/ccsrc/backend/session/gpu_session.cc
mindspore/ccsrc/backend/session/gpu_session.cc
+7
-7
mindspore/ccsrc/backend/session/gpu_session.h
mindspore/ccsrc/backend/session/gpu_session.h
+1
-1
mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
+3
-1
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
+14
-5
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h
+1
-1
未找到文件。
mindspore/ccsrc/backend/session/gpu_session.cc
浏览文件 @
341200ab
...
...
@@ -49,10 +49,10 @@ using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
void
GPUSession
::
SelectKernel
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
{
MS_EXCEPTION_IF_NULL
(
kernel_graph
);
bool
in_black_list
=
CheckInModeBlackList
(
kernel_graph
);
bool
graph_format_transform
=
IsSupportFormatTransform
(
kernel_graph
);
for
(
const
auto
&
kernel_node
:
kernel_graph
->
execution_order
())
{
MS_EXCEPTION_IF_NULL
(
kernel_node
);
device
::
gpu
::
SetKernelInfo
(
kernel_node
,
in_black_list
);
device
::
gpu
::
SetKernelInfo
(
kernel_node
,
graph_format_transform
);
}
}
...
...
@@ -76,7 +76,7 @@ void GPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
pm
->
AddPass
(
std
::
make_shared
<
opt
::
ReplaceBNGradCastFusion
>
());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
ReplaceMomentumCastFusion
>
());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
ReplaceAddNFusion
>
());
if
(
!
CheckInModeBlackList
(
kernel_graph
)
&&
context_ptr
->
get_param
<
int
>
(
MS_CTX_EXECUTION_MODE
)
!=
kPynativeMode
)
{
if
(
IsSupportFormatTransform
(
kernel_graph
)
&&
context_ptr
->
get_param
<
int
>
(
MS_CTX_EXECUTION_MODE
)
!=
kPynativeMode
)
{
pm
->
AddPass
(
std
::
make_shared
<
opt
::
BatchNormReluFusion
>
());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
BatchNormReluGradFusion
>
());
pm
->
AddPass
(
std
::
make_shared
<
opt
::
BatchNormAddReluFusion
>
());
...
...
@@ -193,14 +193,14 @@ void GPUSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const
}
}
bool
GPUSession
::
CheckInModeBlackList
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
{
bool
GPUSession
::
IsSupportFormatTransform
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
{
auto
kernels
=
kernel_graph
->
execution_order
();
size_t
conv_cnt
=
0
;
size_t
bn_cnt
=
0
;
for
(
const
auto
&
kernel
:
kernels
)
{
auto
kernel_name
=
AnfAlgo
::
GetCNodeName
(
kernel
);
if
(
kernel_name
==
prim
::
kPrimLayerNorm
->
name
())
{
return
tru
e
;
return
fals
e
;
}
if
(
kernel_name
==
prim
::
kPrimConv2D
->
name
())
{
conv_cnt
++
;
...
...
@@ -210,9 +210,9 @@ bool GPUSession::CheckInModeBlackList(const std::shared_ptr<KernelGraph> &kernel
}
}
if
(
conv_cnt
==
kConv2dCount
&&
bn_cnt
==
kFusedBatchNormCount
)
{
return
tru
e
;
return
fals
e
;
}
return
fals
e
;
return
tru
e
;
}
GraphId
GPUSession
::
CompileGraph
(
const
AnfNodePtrList
&
lst
,
const
AnfNodePtrList
&
outputs
)
{
...
...
mindspore/ccsrc/backend/session/gpu_session.h
浏览文件 @
341200ab
...
...
@@ -67,7 +67,7 @@ class GPUSession : public SessionBasic {
void
Execute
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
;
bool
CheckInModeBlackList
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
;
bool
IsSupportFormatTransform
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
;
#ifdef ENABLE_DEBUGGER
void
Dump
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
;
...
...
mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
浏览文件 @
341200ab
...
...
@@ -404,7 +404,9 @@ void GPUKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id, const std::v
// Release the kernel resource.
for
(
const
auto
&
kernel
:
execution_order
)
{
auto
kernel_mod
=
AnfAlgo
::
GetKernelMod
(
kernel
);
MS_EXCEPTION_IF_NULL
(
kernel_mod
);
if
(
kernel_mod
==
nullptr
)
{
continue
;
}
kernel_mod
->
ReleaseResource
();
}
}
...
...
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
浏览文件 @
341200ab
...
...
@@ -176,9 +176,18 @@ bool IsNeedProcessFormatInfo(const CNodePtr &kernel_node, const std::vector<Type
if
(
inputs_type
.
size
()
==
0
)
{
return
false
;
}
auto
input_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
0
);
if
(
input_shape
.
size
()
!=
4
)
{
return
false
;
auto
inputs_format_position
=
iter
->
second
.
first
;
// If input position is empty, then insert all the input positions, because the input numbers of this op are variable.
if
(
inputs_format_position
.
size
()
==
0
)
{
for
(
size_t
input_index
=
0
;
input_index
<
AnfAlgo
::
GetInputTensorNum
(
kernel_node
);
input_index
++
)
{
inputs_format_position
.
push_back
(
input_index
);
}
}
for
(
const
auto
&
input_format_position
:
inputs_format_position
)
{
auto
input_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
input_format_position
);
if
(
input_shape
.
size
()
!=
4
)
{
return
false
;
}
}
return
true
;
}
...
...
@@ -223,7 +232,7 @@ void UpdateKernelFormatInfo(const CNodePtr &kernel_node, const std::vector<TypeI
}
}
// namespace
void
SetKernelInfo
(
const
CNodePtr
&
kernel_node
,
bool
in_black_list
)
{
void
SetKernelInfo
(
const
CNodePtr
&
kernel_node
,
bool
graph_format_transform
)
{
std
::
vector
<
std
::
string
>
inputs_format
;
std
::
vector
<
TypeId
>
inputs_type
;
for
(
size_t
input_index
=
0
;
input_index
<
AnfAlgo
::
GetInputTensorNum
(
kernel_node
);
++
input_index
)
{
...
...
@@ -237,7 +246,7 @@ void SetKernelInfo(const CNodePtr &kernel_node, bool in_black_list) {
outputs_type
.
push_back
(
AnfAlgo
::
GetOutputInferDataType
(
kernel_node
,
output_index
));
}
std
::
string
origin_data_format
=
kOpFormat_DEFAULT
;
if
(
!
in_black_list
&&
IsNeedProcessFormatInfo
(
kernel_node
,
inputs_type
))
{
if
(
graph_format_transform
&&
IsNeedProcessFormatInfo
(
kernel_node
,
inputs_type
))
{
UpdateKernelFormatInfo
(
kernel_node
,
inputs_type
,
&
inputs_format
,
&
outputs_format
,
&
origin_data_format
);
}
std
::
shared_ptr
<
KernelBuildInfo
::
KernelBuildInfoBuilder
>
builder
=
...
...
mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h
浏览文件 @
341200ab
...
...
@@ -53,7 +53,7 @@ static std::map<std::string, std::pair<std::vector<size_t>, std::vector<size_t>>
{
prim
::
kPrimAddN
->
name
(),
{{},
{
0
}}},
};
void
SetKernelInfo
(
const
CNodePtr
&
kernel_node
,
bool
in_black_list
=
false
);
void
SetKernelInfo
(
const
CNodePtr
&
kernel_node
,
bool
graph_format_transform
=
false
);
class
KernelAttr
{
public:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录