Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
9996e0d4
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9996e0d4
编写于
5月 14, 2020
作者:
V
VectorSL
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
gpu update shape infer
上级
4bb5c7b3
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
81 addition
and
106 deletion
+81
-106
mindspore/ccsrc/device/gpu/kernel_info_setter.cc
mindspore/ccsrc/device/gpu/kernel_info_setter.cc
+10
-3
mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.h
mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.h
+13
-26
mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h
mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h
+2
-10
mindspore/ccsrc/kernel/gpu/arrays/slice_grad_gpu_kernel.h
mindspore/ccsrc/kernel/gpu/arrays/slice_grad_gpu_kernel.h
+2
-9
mindspore/ccsrc/kernel/gpu/gpu_kernel.h
mindspore/ccsrc/kernel/gpu/gpu_kernel.h
+18
-1
mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h
mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h
+9
-17
mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.h
mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.h
+2
-2
mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.h
mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.h
+2
-2
mindspore/ccsrc/kernel/gpu/nn/relu_gpu_kernel.h
mindspore/ccsrc/kernel/gpu/nn/relu_gpu_kernel.h
+12
-23
mindspore/ccsrc/kernel/gpu/nn/relu_grad_kernel.h
mindspore/ccsrc/kernel/gpu/nn/relu_grad_kernel.h
+11
-13
未找到文件。
mindspore/ccsrc/device/gpu/kernel_info_setter.cc
浏览文件 @
9996e0d4
...
...
@@ -184,10 +184,17 @@ void SetKernelInfo(const CNodePtr &kernel_node) {
if
(
!
result
)
{
auto
kernel_name
=
AnfAlgo
::
GetCNodeName
(
kernel_node
);
std
::
string
build_type
=
"in ["
;
std
::
for_each
(
std
::
begin
(
inputs_type
),
std
::
end
(
inputs_type
),
[
&
build_type
](
auto
i
)
{
build_type
+=
mindspore
::
kernel
::
TypeId2String
(
i
)
+
" "
;
});
build_type
+=
"] out ["
;
std
::
for_each
(
std
::
begin
(
outputs_type
),
std
::
end
(
outputs_type
),
[
&
build_type
](
auto
i
)
{
build_type
+=
mindspore
::
kernel
::
TypeId2String
(
i
)
+
" "
;
});
build_type
+=
"]"
;
auto
supported_type_lists
=
SupportedTypeList
(
kernel_node
);
MS_LOG
(
EXCEPTION
)
<<
"Select GPU kernel op["
<<
kernel_name
<<
"] fail! Incompatible data type!
\n
The supported data types are "
<<
supported_type_lists
;
MS_EXCEPTION
(
TypeError
)
<<
"Select GPU kernel op["
<<
kernel_name
<<
"] fail! Incompatible data type!
\n
The supported data types are "
<<
supported_type_lists
<<
", but get "
<<
build_type
;
}
builder
->
SetKernelType
(
kernel_type
);
builder
->
SetProcessor
(
kernel
::
Processor
::
CUDA
);
...
...
mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.h
浏览文件 @
9996e0d4
...
...
@@ -178,46 +178,33 @@ class ArrayReduceGpuKernel : public GpuKernel {
return
;
}
void
InferInAndOutDesc
(
const
std
::
vector
<
size_t
>
&
input_shape
,
const
std
::
vector
<
size_t
>
&
output_shape
)
{
std
::
vector
<
size_t
>
inputA_shape
=
input_shape
;
std
::
vector
<
int
>
inputA
;
std
::
vector
<
size_t
>
outputC_shape
=
output_shape
;
std
::
vector
<
int
>
real_input_shape
;
int
shapeA_n
,
shapeA_c
,
shapeA_h
,
shapeA_w
;
shapeA_n
=
inputA_shape
.
size
()
<
4
?
1
:
SizeToInt
(
inputA_shape
[
inputA_shape
.
size
()
-
4
]);
shapeA_c
=
inputA_shape
.
size
()
<
3
?
1
:
SizeToInt
(
inputA_shape
[
inputA_shape
.
size
()
-
3
]);
shapeA_h
=
inputA_shape
.
size
()
<
2
?
1
:
SizeToInt
(
inputA_shape
[
inputA_shape
.
size
()
-
2
]);
shapeA_w
=
inputA_shape
.
size
()
==
0
?
1
:
SizeToInt
(
inputA_shape
[
inputA_shape
.
size
()
-
1
]);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetTensor4dDescriptor
(
inputA_descriptor_
,
CUDNN_TENSOR_NCHW
,
data_type_
,
shapeA_n
,
shapeA_c
,
shapeA_h
,
shapeA_w
),
ShapeNdTo4d
(
input_shape
,
&
inputA
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetTensor4dDescriptor
(
inputA_descriptor_
,
CUDNN_TENSOR_NCHW
,
data_type_
,
inputA
[
0
],
inputA
[
1
],
inputA
[
2
],
inputA
[
3
]),
"cudnnSetTensor4dDescriptor failed"
);
int
shapeC_n
,
shapeC_c
,
shapeC_h
,
shapeC_w
;
if
(
axis_
[
0
]
==
-
1
)
{
shapeC_n
=
1
;
shapeC_c
=
1
;
shapeC_h
=
1
;
shapeC_w
=
1
;
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetTensor4dDescriptor
(
outputC_descriptor_
,
CUDNN_TENSOR_NCHW
,
data_type_
,
shapeC_n
,
shapeC_c
,
shapeC_h
,
shapeC_w
),
"cudnnSetTensor4dDescriptor failed"
);
if
(
shapeA_n
==
shapeC_n
&&
shapeA_c
==
shapeC_c
&&
shapeA_h
==
shapeC_h
&&
shapeA_w
==
shapeC_w
)
{
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetTensor4dDescriptor
(
outputC_descriptor_
,
CUDNN_TENSOR_NCHW
,
data_type_
,
1
,
1
,
1
,
1
),
"cudnnSetTensor4dDescriptor failed"
);
if
(
inputA
[
0
]
==
1
&&
inputA
[
1
]
==
1
&&
inputA
[
2
]
==
1
&&
inputA
[
3
]
==
1
)
{
all_match_
=
true
;
}
return
;
}
if
(
!
keep_dims_
)
{
for
(
auto
i
:
axis_
)
{
(
void
)(
outputC_shape
.
insert
(
outputC_shape
.
begin
()
+
i
,
1
));
}
}
shapeC_n
=
outputC_shape
.
size
()
<
4
?
1
:
SizeToInt
(
outputC_shape
[
outputC_shape
.
size
()
-
4
]);
shapeC_c
=
outputC_shape
.
size
()
<
3
?
1
:
SizeToInt
(
outputC_shape
[
outputC_shape
.
size
()
-
3
]);
shapeC_h
=
outputC_shape
.
size
()
<
2
?
1
:
SizeToInt
(
outputC_shape
[
outputC_shape
.
size
()
-
2
]);
shapeC_w
=
outputC_shape
.
size
()
==
0
?
1
:
SizeToInt
(
outputC_shape
[
outputC_shape
.
size
()
-
1
]);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetTensor4dDescriptor
(
outputC_descriptor_
,
CUDNN_TENSOR_NCHW
,
data_type_
,
shapeC_n
,
shapeC_c
,
shapeC_h
,
shapeC_w
),
std
::
vector
<
int
>
outputC
;
ShapeNdTo4d
(
outputC_shape
,
&
outputC
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetTensor4dDescriptor
(
outputC_descriptor_
,
CUDNN_TENSOR_NCHW
,
data_type_
,
outputC
[
0
],
outputC
[
1
],
outputC
[
2
],
outputC
[
3
]),
"cudnnSetTensor4dDescriptor failed"
);
if
(
shapeA_n
==
shapeC_n
&&
shapeA_c
==
shapeC_c
&&
shapeA_h
==
shapeC_h
&&
shapeA_w
==
shapeC_w
)
{
if
(
inputA
==
outputC
)
{
all_match_
=
true
;
}
return
;
...
...
mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h
浏览文件 @
9996e0d4
...
...
@@ -52,15 +52,7 @@ class SliceGpuFwdKernel : public GpuKernel {
return
false
;
}
auto
input_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
0
);
int
shape_n
=
input_shape
.
size
()
<
4
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
4
]);
int
shape_c
=
input_shape
.
size
()
<
3
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
3
]);
int
shape_h
=
input_shape
.
size
()
<
2
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
2
]);
int
shape_w
=
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
1
]);
input_shape_
.
push_back
(
shape_n
);
input_shape_
.
push_back
(
shape_c
);
input_shape_
.
push_back
(
shape_h
);
input_shape_
.
push_back
(
shape_w
);
ShapeNdTo4d
(
input_shape
,
&
input_shape_
);
auto
strides
=
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"strides"
);
if
(
strides
)
{
strides_
=
GetAttr
<
std
::
vector
<
int
>>
(
kernel_node
,
"strides"
);
...
...
@@ -89,7 +81,7 @@ class SliceGpuFwdKernel : public GpuKernel {
}
}
input_size_
=
IntToSize
(
shape_n
*
shape_c
*
shape_h
*
shape_w
)
*
sizeof
(
T
);
input_size_
=
IntToSize
(
input_shape_
[
0
]
*
input_shape_
[
1
]
*
input_shape_
[
2
]
*
input_shape_
[
3
]
)
*
sizeof
(
T
);
auto
out_shape
=
AnfAlgo
::
GetOutputInferShape
(
kernel_node
,
0
);
output_size_
=
sizeof
(
T
);
...
...
mindspore/ccsrc/kernel/gpu/arrays/slice_grad_gpu_kernel.h
浏览文件 @
9996e0d4
...
...
@@ -66,19 +66,12 @@ class SliceGradGpuKernel : public GpuKernel {
size_
=
GetAttr
<
std
::
vector
<
int
>>
(
kernel_node
,
"end"
);
}
else
{
auto
input_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
1
);
input_shape_
.
push_back
(
input_shape
.
size
()
<
4
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
4
]));
input_shape_
.
push_back
(
input_shape
.
size
()
<
3
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
3
]));
input_shape_
.
push_back
(
input_shape
.
size
()
<
2
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
2
]));
input_shape_
.
push_back
(
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
1
]));
ShapeNdTo4d
(
input_shape
,
&
input_shape_
);
size_
=
GetAttr
<
std
::
vector
<
int
>>
(
kernel_node
,
"size"
);
}
auto
dy_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
0
);
dy_shape_
.
push_back
(
dy_shape
.
size
()
<
4
?
1
:
SizeToInt
(
dy_shape
[
dy_shape
.
size
()
-
4
]));
dy_shape_
.
push_back
(
dy_shape
.
size
()
<
3
?
1
:
SizeToInt
(
dy_shape
[
dy_shape
.
size
()
-
3
]));
dy_shape_
.
push_back
(
dy_shape
.
size
()
<
2
?
1
:
SizeToInt
(
dy_shape
[
dy_shape
.
size
()
-
2
]));
dy_shape_
.
push_back
(
SizeToInt
(
dy_shape
[
dy_shape
.
size
()
-
1
]));
ShapeNdTo4d
(
dy_shape
,
&
dy_shape_
);
begin_
=
GetAttr
<
std
::
vector
<
int
>>
(
kernel_node
,
"begin"
);
DealParam
();
input_size_
=
IntToSize
(
input_shape_
[
0
]
*
input_shape_
[
1
]
*
input_shape_
[
2
]
*
input_shape_
[
3
])
*
sizeof
(
T
);
...
...
mindspore/ccsrc/kernel/gpu/gpu_kernel.h
浏览文件 @
9996e0d4
...
...
@@ -39,7 +39,6 @@ class GpuKernel : public KernelMod {
virtual
void
InitSizeLists
()
=
0
;
template
<
typename
T
>
inline
T
*
GetDeviceAddress
(
const
std
::
vector
<
AddressPtr
>
&
addr_list
,
size_t
index
)
{
if
(
index
>=
addr_list
.
size
())
{
MS_LOG
(
EXCEPTION
)
<<
"Address index("
<<
index
<<
") out of range("
<<
addr_list
.
size
()
<<
")"
;
...
...
@@ -62,6 +61,24 @@ class GpuKernel : public KernelMod {
}
return
GetValue
<
T
>
(
attr
);
}
// expand Nd Shape to 4d (N in [0,4])
void
ShapeNdTo4d
(
const
std
::
vector
<
size_t
>
&
src
,
std
::
vector
<
int
>
*
dst
)
{
dst
->
push_back
(
src
.
size
()
<
4
?
1
:
SizeToInt
(
src
[
src
.
size
()
-
4
]));
dst
->
push_back
(
src
.
size
()
<
3
?
1
:
SizeToInt
(
src
[
src
.
size
()
-
3
]));
dst
->
push_back
(
src
.
size
()
<
2
?
1
:
SizeToInt
(
src
[
src
.
size
()
-
2
]));
dst
->
push_back
(
src
.
size
()
==
0
?
1
:
SizeToInt
(
src
[
src
.
size
()
-
1
]));
}
inline
void
CheckBroadcast4TensorOp
(
const
std
::
vector
<
int
>
&
A
,
const
std
::
vector
<
int
>
&
B
,
const
std
::
vector
<
int
>
&
Out
)
{
if
(
A
!=
Out
&&
B
!=
Out
)
{
MS_EXCEPTION
(
ValueError
)
<<
"Double-sided broadcast was not supported in cudnn of cudnnOpTensor:
\n
"
"InputA must match the corresponding dimension of the destination tensor outC, and each "
"dimension of the inputB "
"must match the corresponding dimension of outC or must be equal to 1."
;
}
}
};
}
// namespace kernel
}
// namespace mindspore
...
...
mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h
浏览文件 @
9996e0d4
...
...
@@ -87,32 +87,24 @@ class TensorAddGpuFwdKernel : public GpuKernel {
auto
input_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
0
);
auto
input_shapeB
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
1
);
auto
output_shape
=
AnfAlgo
::
GetOutputInferShape
(
kernel_node
,
0
);
if
(
input_shape
!=
output_shape
&&
input_shapeB
!=
output_shape
)
{
MS_LOG
(
ERROR
)
<<
"Double-sided broadcast was not supported in cudnn of cudnnOpTensor:
\n
"
"InputA must match the corresponding dimension of the destination tensor outC, and each "
"dimension of the inputB "
"must match the corresponding dimension of outC or must be equal to 1."
;
return
false
;
}
is_null_input_
=
CHECK_NULL_INPUT
(
input_shape
)
||
CHECK_NULL_INPUT
(
input_shapeB
);
if
(
is_null_input_
)
{
MS_LOG
(
WARNING
)
<<
"TensorAddGpuFwdKernel input is null"
;
InitSizeLists
();
return
true
;
}
int
shape_n
=
input_shape
.
size
()
<
4
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
4
]);
int
shape_c
=
input_shape
.
size
()
<
3
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
3
]);
int
shape_h
=
input_shape
.
size
()
<
2
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
2
]);
int
shape_w
=
input_shape
.
size
()
==
0
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
1
]);
std
::
vector
<
int
>
shapeA
;
std
::
vector
<
int
>
shapeB
;
std
::
vector
<
int
>
shapeOut
;
ShapeNdTo4d
(
input_shape
,
&
shapeA
);
ShapeNdTo4d
(
input_shapeB
,
&
shapeB
);
ShapeNdTo4d
(
output_shape
,
&
shapeOut
);
CheckBroadcast4TensorOp
(
shapeA
,
shapeB
,
shapeOut
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetTensor4dDescriptor
(
inputA_descriptor_
,
CUDNN_TENSOR_NCHW
,
cudnn_data_type_
,
shape
_n
,
shape_c
,
shape_h
,
shape_w
),
shape
A
[
0
],
shapeA
[
1
],
shapeA
[
2
],
shapeA
[
3
]
),
"cudnnSetTensor4dDescriptor failed"
);
int
shapeB_n
=
input_shapeB
.
size
()
<
4
?
1
:
SizeToInt
(
input_shapeB
[
input_shapeB
.
size
()
-
4
]);
int
shapeB_c
=
input_shapeB
.
size
()
<
3
?
1
:
SizeToInt
(
input_shapeB
[
input_shapeB
.
size
()
-
3
]);
int
shapeB_h
=
input_shapeB
.
size
()
<
2
?
1
:
SizeToInt
(
input_shapeB
[
input_shapeB
.
size
()
-
2
]);
int
shapeB_w
=
input_shapeB
.
size
()
==
0
?
1
:
SizeToInt
(
input_shapeB
[
input_shapeB
.
size
()
-
1
]);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetTensor4dDescriptor
(
inputB_descriptor_
,
CUDNN_TENSOR_NCHW
,
cudnn_data_type_
,
shapeB
_n
,
shapeB_c
,
shapeB_h
,
shapeB_w
),
shapeB
[
0
],
shapeB
[
1
],
shapeB
[
2
],
shapeB
[
3
]
),
"cudnnSetTensor4dDescriptor failed"
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
...
...
mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.h
浏览文件 @
9996e0d4
...
...
@@ -107,8 +107,8 @@ class PoolingGpuFwdKernel : public GpuKernel {
SizeToInt
(
output_shape
[
1
]),
SizeToInt
(
output_shape
[
2
]),
SizeToInt
(
output_shape
[
3
])),
"cudnnSetTensor4dDescriptor failed"
);
auto
window
=
GetValue
<
std
::
vector
<
int
>>
(
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"ksize"
));
int
window_height
=
window
[
3
];
int
window_width
=
window
[
2
];
int
window_height
=
window
[
2
];
int
window_width
=
window
[
3
];
stride_
=
GetValue
<
std
::
vector
<
int
>>
(
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"strides"
));
SetPoolingMode
(
kernel_node
);
if
(
pad_mode_
==
kSamePadModeUpperCase
||
pad_mode_
==
kSamePadModeLowerCase
)
{
...
...
mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.h
浏览文件 @
9996e0d4
...
...
@@ -101,8 +101,8 @@ class PoolingGradGpuFwdKernel : public GpuKernel {
return
false
;
}
auto
window
=
GetAttr
<
std
::
vector
<
int
>>
(
kernel_node
,
"ksize"
);
int
window_height
=
window
[
3
];
int
window_width
=
window
[
2
];
int
window_height
=
window
[
2
];
int
window_width
=
window
[
3
];
SetPoolingMode
(
kernel_node
);
auto
input_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
0
);
auto
input_mask
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
1
);
...
...
mindspore/ccsrc/kernel/gpu/nn/relu_gpu_kernel.h
浏览文件 @
9996e0d4
...
...
@@ -31,8 +31,7 @@ class ReLUGpuFwdKernel : public GpuKernel {
:
cudnn_handle_
(
nullptr
),
activation_desc_
(
nullptr
),
mode_
(
CUDNN_ACTIVATION_RELU
),
input_descriptor_
(
nullptr
),
output_descriptor_
(
nullptr
),
data_descriptor_
(
nullptr
),
is_null_input_
(
false
),
cudnn_data_type_
(
CUDNN_DATA_FLOAT
),
input_size_
(
0
),
...
...
@@ -53,8 +52,8 @@ class ReLUGpuFwdKernel : public GpuKernel {
const
float
alpha
=
1
;
const
float
beta
=
0
;
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnActivationForward
(
cudnn_handle_
,
activation_desc_
,
&
alpha
,
input_descriptor_
,
input
,
&
beta
,
output
_descriptor_
,
output
),
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnActivationForward
(
cudnn_handle_
,
activation_desc_
,
&
alpha
,
data_descriptor_
,
input
,
&
beta
,
data
_descriptor_
,
output
),
"ReLUGpuFwdKernel failed"
);
return
true
;
...
...
@@ -75,18 +74,12 @@ class ReLUGpuFwdKernel : public GpuKernel {
return
true
;
}
mode_
=
CUDNN_ACTIVATION_RELU
;
int
batch_size
=
input_shape
.
size
()
<
4
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
4
]);
int
channel_size
=
input_shape
.
size
()
<
3
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
3
]);
int
height
=
input_shape
.
size
()
<
2
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
2
]);
int
width
=
input_shape
.
size
()
==
0
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
1
]);
std
::
vector
<
int
>
shape
;
ShapeNdTo4d
(
input_shape
,
&
shape
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetActivationDescriptor
(
activation_desc_
,
mode_
,
CUDNN_NOT_PROPAGATE_NAN
,
0.0
),
"SetActivationDescriptor failed"
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetTensor4dDescriptor
(
input_descriptor_
,
CUDNN_TENSOR_NCHW
,
cudnn_data_type_
,
batch_size
,
channel_size
,
height
,
width
),
"SetTensor4dDescriptor failed"
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetTensor4dDescriptor
(
output_descriptor_
,
CUDNN_TENSOR_NCHW
,
cudnn_data_type_
,
batch_size
,
channel_size
,
height
,
width
),
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetTensor4dDescriptor
(
data_descriptor_
,
CUDNN_TENSOR_NCHW
,
cudnn_data_type_
,
shape
[
0
],
shape
[
1
],
shape
[
2
],
shape
[
3
]),
"SetTensor4dDescriptor failed"
);
InitSizeLists
();
return
true
;
...
...
@@ -95,18 +88,16 @@ class ReLUGpuFwdKernel : public GpuKernel {
protected:
void
InitResource
()
override
{
cudnn_handle_
=
device
::
gpu
::
GPUDeviceManager
::
GetInstance
().
GetCudnnHandle
();
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnCreateTensorDescriptor
(
&
input_descriptor_
),
"cudnnCreateTensorDescriptor failed"
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnCreateTensorDescriptor
(
&
output_descriptor_
),
"cudnnCreateTensorDescriptor failed"
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnCreateTensorDescriptor
(
&
data_descriptor_
),
"cudnnCreateTensorDescriptor failed"
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnCreateActivationDescriptor
(
&
activation_desc_
),
"cudnnCreateActivationDescriptor failed"
);
}
void
InitSizeLists
()
override
{
if
(
!
is_null_input_
)
{
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnGetTensorSizeInBytes
(
input_descriptor_
,
&
input_size_
),
"cudnnGetTensorSizeInBytes failed"
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnGetTensorSizeInBytes
(
output_descriptor_
,
&
output_size_
),
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnGetTensorSizeInBytes
(
data_descriptor_
,
&
input_size_
),
"cudnnGetTensorSizeInBytes failed"
);
output_size_
=
input_size_
;
}
input_size_list_
.
push_back
(
input_size_
);
output_size_list_
.
push_back
(
output_size_
);
...
...
@@ -116,15 +107,13 @@ class ReLUGpuFwdKernel : public GpuKernel {
void
DestroyResource
()
noexcept
{
CHECK_CUDNN_RET_WITH_ERROR
(
cudnnDestroyActivationDescriptor
(
activation_desc_
),
"cudnnDestroyActivationDescriptor failed"
);
CHECK_CUDNN_RET_WITH_ERROR
(
cudnnDestroyTensorDescriptor
(
output_descriptor_
),
"cudnnDestroyTensorDescriptor failed"
);
CHECK_CUDNN_RET_WITH_ERROR
(
cudnnDestroyTensorDescriptor
(
input_descriptor_
),
"cudnnDestroyTensorDescriptor failed"
);
CHECK_CUDNN_RET_WITH_ERROR
(
cudnnDestroyTensorDescriptor
(
data_descriptor_
),
"cudnnDestroyTensorDescriptor failed"
);
}
cudnnHandle_t
cudnn_handle_
;
cudnnActivationDescriptor_t
activation_desc_
;
cudnnActivationMode_t
mode_
;
cudnnTensorDescriptor_t
input_descriptor_
;
cudnnTensorDescriptor_t
output_descriptor_
;
cudnnTensorDescriptor_t
data_descriptor_
;
bool
is_null_input_
;
std
::
vector
<
size_t
>
input_size_list_
;
std
::
vector
<
size_t
>
output_size_list_
;
...
...
mindspore/ccsrc/kernel/gpu/nn/relu_grad_kernel.h
浏览文件 @
9996e0d4
...
...
@@ -31,7 +31,7 @@ class ReluGradGpuFwdKernel : public GpuKernel {
:
cudnn_handle_
(
nullptr
),
activation_desc_
(
nullptr
),
mode_
(
CUDNN_ACTIVATION_RELU
),
input
_descriptor_
(
nullptr
),
data
_descriptor_
(
nullptr
),
is_null_input_
(
false
),
cudnn_data_type_
(
CUDNN_DATA_FLOAT
),
input_size_
(
0
)
{}
...
...
@@ -52,8 +52,8 @@ class ReluGradGpuFwdKernel : public GpuKernel {
const
float
alpha
=
1
;
const
float
beta
=
0
;
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnActivationBackward
(
cudnn_handle_
,
activation_desc_
,
&
alpha
,
input_descriptor_
,
y
,
input
_descriptor_
,
dy
,
input_descriptor_
,
y
,
&
beta
,
input
_descriptor_
,
dx
),
cudnnActivationBackward
(
cudnn_handle_
,
activation_desc_
,
&
alpha
,
data_descriptor_
,
y
,
data
_descriptor_
,
dy
,
data_descriptor_
,
y
,
&
beta
,
data
_descriptor_
,
dx
),
"cudnnActivationBackward failed"
);
return
true
;
...
...
@@ -74,14 +74,12 @@ class ReluGradGpuFwdKernel : public GpuKernel {
InitSizeLists
();
return
true
;
}
int
batch_size
=
input_shape
.
size
()
<
4
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
4
]);
int
channel_size
=
input_shape
.
size
()
<
3
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
3
]);
int
height
=
input_shape
.
size
()
<
2
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
2
]);
int
width
=
input_shape
.
size
()
==
0
?
1
:
SizeToInt
(
input_shape
[
input_shape
.
size
()
-
1
]);
std
::
vector
<
int
>
shape
;
ShapeNdTo4d
(
input_shape
,
&
shape
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetActivationDescriptor
(
activation_desc_
,
mode_
,
CUDNN_PROPAGATE_NAN
,
0.0
),
"SetActivationDescriptor failed"
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetTensor4dDescriptor
(
input
_descriptor_
,
CUDNN_TENSOR_NCHW
,
cudnn_data_type_
,
batch_size
,
channel_size
,
height
,
width
),
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetTensor4dDescriptor
(
data
_descriptor_
,
CUDNN_TENSOR_NCHW
,
cudnn_data_type_
,
shape
[
0
],
shape
[
1
],
shape
[
2
],
shape
[
3
]
),
"SetTensor4dDescriptor failed"
);
InitSizeLists
();
...
...
@@ -91,13 +89,13 @@ class ReluGradGpuFwdKernel : public GpuKernel {
protected:
void
InitResource
()
override
{
cudnn_handle_
=
device
::
gpu
::
GPUDeviceManager
::
GetInstance
().
GetCudnnHandle
();
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnCreateTensorDescriptor
(
&
input
_descriptor_
),
"cudnnCreateTensorDescriptor failed"
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnCreateTensorDescriptor
(
&
data
_descriptor_
),
"cudnnCreateTensorDescriptor failed"
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnCreateActivationDescriptor
(
&
activation_desc_
),
"cudnnCreateActivationDescriptor failed"
);
}
void
InitSizeLists
()
override
{
if
(
!
is_null_input_
)
{
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnGetTensorSizeInBytes
(
input
_descriptor_
,
&
input_size_
),
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnGetTensorSizeInBytes
(
data
_descriptor_
,
&
input_size_
),
"cudnnGetTensorSizeInBytes failed"
);
}
input_size_list_
.
push_back
(
input_size_
);
...
...
@@ -109,13 +107,13 @@ class ReluGradGpuFwdKernel : public GpuKernel {
void
DestroyResource
()
noexcept
{
CHECK_CUDNN_RET_WITH_ERROR
(
cudnnDestroyActivationDescriptor
(
activation_desc_
),
"cudnnDestroyActivationDescriptor failed"
);
CHECK_CUDNN_RET_WITH_ERROR
(
cudnnDestroyTensorDescriptor
(
input
_descriptor_
),
"cudnnDestroyTensorDescriptor failed"
);
CHECK_CUDNN_RET_WITH_ERROR
(
cudnnDestroyTensorDescriptor
(
data
_descriptor_
),
"cudnnDestroyTensorDescriptor failed"
);
}
cudnnHandle_t
cudnn_handle_
;
cudnnActivationDescriptor_t
activation_desc_
;
cudnnActivationMode_t
mode_
;
cudnnTensorDescriptor_t
input
_descriptor_
;
cudnnTensorDescriptor_t
data
_descriptor_
;
bool
is_null_input_
;
std
::
vector
<
size_t
>
input_size_list_
;
std
::
vector
<
size_t
>
output_size_list_
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录