Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
ee7a6401
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ee7a6401
编写于
4月 23, 2020
作者:
V
VectorSL
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
gpu update conv kernel for auto-mixed-precision
上级
eefb6edd
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
28 addition
and
7 deletion
+28
-7
mindspore/ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h
mindspore/ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h
+1
-1
mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h
mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h
+9
-2
mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h
...spore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h
+9
-2
mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h
mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h
+9
-2
未找到文件。
mindspore/ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h
浏览文件 @
ee7a6401
...
...
@@ -218,7 +218,7 @@ class BinaryOpGpuKernel : public GpuKernel {
}
}
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetOpTensorDescriptor
(
opTensor_descriptor_
,
tensor_op_
,
cudnn_data_type_
,
CUDNN_NOT_PROPAGATE_NAN
),
cudnnSetOpTensorDescriptor
(
opTensor_descriptor_
,
tensor_op_
,
CUDNN_DATA_FLOAT
,
CUDNN_NOT_PROPAGATE_NAN
),
"cudnnSetOpTensorDescriptor failed"
);
return
;
}
...
...
mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h
浏览文件 @
ee7a6401
...
...
@@ -142,10 +142,14 @@ class Conv2dGpuFwdKernel : public GpuKernel {
}
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetConvolution2dDescriptor
(
conv_desc_
,
pad_height_
,
pad_width_
,
stride_
,
stride_
,
dilation_
,
dilation_
,
CUDNN_CROSS_CORRELATION
,
cudnn_data_type_
),
CUDNN_CROSS_CORRELATION
,
CUDNN_DATA_FLOAT
),
"cudnnSetConvolution2dDescriptor failed"
);
input_descriptor_real
=
input_desc_
;
}
if
(
cudnn_data_type_
==
CUDNN_DATA_HALF
)
{
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetConvolutionMathType
(
conv_desc_
,
CUDNN_TENSOR_OP_MATH
),
"cudnnSetConvolutionMathType failed."
)
}
SelectAlgorithm
(
input_descriptor_real
);
InitSizeLists
();
return
true
;
...
...
@@ -240,7 +244,7 @@ class Conv2dGpuFwdKernel : public GpuKernel {
"cudnnSetTensor4dDescriptor failed"
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetConvolution2dDescriptor
(
conv_desc_
,
use_pad_
?
0
:
pad_top_
,
use_pad_
?
0
:
pad_left_
,
stride_
,
stride_
,
dilation_
,
dilation_
,
CUDNN_CROSS_CORRELATION
,
cudnn_data_type_
),
dilation_
,
dilation_
,
CUDNN_CROSS_CORRELATION
,
CUDNN_DATA_FLOAT
),
"cudnnSetConvolution2dDescriptor failed"
);
}
...
...
@@ -276,6 +280,9 @@ class Conv2dGpuFwdKernel : public GpuKernel {
"cudnnGetConvolutionForwardAlgorithm_v7 failed"
);
conv_algorithm_
=
perf_results
.
algo
;
}
if
(
cudnn_data_type_
==
CUDNN_DATA_HALF
)
{
conv_algorithm_
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
;
}
}
cudnnHandle_t
cudnn_handle_
;
cudnnTensorDescriptor_t
input_desc_
;
...
...
mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h
浏览文件 @
ee7a6401
...
...
@@ -141,10 +141,14 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
}
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetConvolution2dDescriptor
(
conv_desc_
,
pad_height_
,
pad_width_
,
stride_
,
stride_
,
dilation_
,
dilation_
,
CUDNN_CROSS_CORRELATION
,
cudnn_data_type_
),
CUDNN_CROSS_CORRELATION
,
CUDNN_DATA_FLOAT
),
"GetConvolution2dDescriptor failed"
);
x_desc_real
=
x_desc_
;
}
if
(
cudnn_data_type_
==
CUDNN_DATA_HALF
)
{
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetConvolutionMathType
(
conv_desc_
,
CUDNN_TENSOR_OP_MATH
),
"cudnnSetConvolutionMathType failed."
)
}
SelectAlgorithm
(
x_desc_real
);
InitSizeLists
();
return
true
;
...
...
@@ -239,7 +243,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
"cudnnSetTensor4dDescriptor failed"
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetConvolution2dDescriptor
(
conv_desc_
,
use_pad_
?
0
:
pad_top_
,
use_pad_
?
0
:
pad_left_
,
stride_
,
stride_
,
dilation_
,
dilation_
,
CUDNN_CROSS_CORRELATION
,
cudnn_data_type_
),
dilation_
,
dilation_
,
CUDNN_CROSS_CORRELATION
,
CUDNN_DATA_FLOAT
),
"cudnnSetConvolution2dDescriptor failed"
);
}
void
SelectAlgorithm
(
cudnnTensorDescriptor_t
x_desc_real
)
{
...
...
@@ -258,6 +262,9 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
"GetConvolutionBackwardFilterAlgorithm failed"
);
algo_
=
perf_results
.
algo
;
}
if
(
cudnn_data_type_
==
CUDNN_DATA_HALF
)
{
algo_
=
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1
;
}
}
void
GetFilterShape
(
const
CNodePtr
&
kernel_node
,
std
::
vector
<
int
>
*
filter_shape
)
{
auto
shp_tuple_x
=
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"filter_sizes"
)
->
cast
<
ValueTuplePtr
>
()
->
value
();
...
...
mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h
浏览文件 @
ee7a6401
...
...
@@ -142,10 +142,14 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
}
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetConvolution2dDescriptor
(
conv_desc_
,
pad_height_
,
pad_width_
,
stride_
,
stride_
,
dilation_
,
dilation_
,
CUDNN_CROSS_CORRELATION
,
cudnn_data_type_
),
CUDNN_CROSS_CORRELATION
,
CUDNN_DATA_FLOAT
),
"cudnnSetConvolution2dDescriptor failed"
);
dx_desc_real
=
dx_desc_
;
}
if
(
cudnn_data_type_
==
CUDNN_DATA_HALF
)
{
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetConvolutionMathType
(
conv_desc_
,
CUDNN_TENSOR_OP_MATH
),
"cudnnSetConvolutionMathType failed."
)
}
SelectAlgorithm
(
dx_desc_real
);
InitSizeLists
();
return
true
;
...
...
@@ -239,7 +243,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
"cudnnSetTensor4dDescriptor failed"
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetConvolution2dDescriptor
(
conv_desc_
,
use_pad_
?
0
:
pad_top_
,
use_pad_
?
0
:
pad_left_
,
stride_
,
stride_
,
dilation_
,
dilation_
,
CUDNN_CROSS_CORRELATION
,
cudnn_data_type_
),
dilation_
,
dilation_
,
CUDNN_CROSS_CORRELATION
,
CUDNN_DATA_FLOAT
),
"cudnnSetConvolution2dDescriptor failed"
);
}
void
SelectAlgorithm
(
cudnnTensorDescriptor_t
dx_desc_real
)
{
...
...
@@ -258,6 +262,9 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
"cudnnGetConvolutionBackwardDataAlgorithm_v7 failed"
);
algo_
=
perf_results
.
algo
;
}
if
(
cudnn_data_type_
==
CUDNN_DATA_HALF
)
{
algo_
=
CUDNN_CONVOLUTION_BWD_DATA_ALGO_1
;
}
}
void
GetInputShape
(
const
CNodePtr
&
kernel_node
,
std
::
vector
<
int
>
*
input_shape
)
{
auto
shp_tuple_x
=
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"input_sizes"
)
->
cast
<
ValueTuplePtr
>
()
->
value
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录