Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
19e66f06
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
19e66f06
编写于
6月 18, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
6月 18, 2020
浏览文件
操作
浏览文件
下载
差异文件
!2150 Gpu Tanh kernel support fp16
Merge pull request !2150 from chenweifeng/tanh-fp16
上级
0c3d96a9
9201ea5e
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
106 addition
and
298 deletion
+106
-298
mindspore/ccsrc/kernel/gpu/cuda_impl/tanh_impl.cu
mindspore/ccsrc/kernel/gpu/cuda_impl/tanh_impl.cu
+0
-46
mindspore/ccsrc/kernel/gpu/cuda_impl/tanh_impl.cuh
mindspore/ccsrc/kernel/gpu/cuda_impl/tanh_impl.cuh
+0
-28
mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.cc
mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.cc
+8
-3
mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.h
mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.h
+22
-9
mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.cc
mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.cc
+12
-3
mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.h
mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.h
+27
-8
mindspore/ccsrc/kernel/gpu/nn/tanh_gpu_kernel.cc
mindspore/ccsrc/kernel/gpu/nn/tanh_gpu_kernel.cc
+0
-24
mindspore/ccsrc/kernel/gpu/nn/tanh_gpu_kernel.h
mindspore/ccsrc/kernel/gpu/nn/tanh_gpu_kernel.h
+0
-75
mindspore/ccsrc/kernel/gpu/nn/tanh_grad_kernel.cc
mindspore/ccsrc/kernel/gpu/nn/tanh_grad_kernel.cc
+0
-26
mindspore/ccsrc/kernel/gpu/nn/tanh_grad_kernel.h
mindspore/ccsrc/kernel/gpu/nn/tanh_grad_kernel.h
+0
-76
tests/st/ops/gpu/test_tanh_op.py
tests/st/ops/gpu/test_tanh_op.py
+37
-0
未找到文件。
mindspore/ccsrc/kernel/gpu/cuda_impl/tanh_impl.cu
已删除
100644 → 0
浏览文件 @
0c3d96a9
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/gpu/cuda_impl/tanh_impl.cuh"
#include <cuda_runtime.h>
template
<
typename
T
>
__global__
void
TanhKernel
(
const
size_t
size
,
const
T
*
x_addr
,
T
*
y_addr
)
{
for
(
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
pos
<
size
;
pos
+=
blockDim
.
x
*
gridDim
.
x
)
{
y_addr
[
pos
]
=
tanh
(
x_addr
[
pos
]);
}
}
template
<
typename
T
>
__global__
void
TanhGradKernel
(
const
size_t
size
,
const
T
*
y_addr
,
const
T
*
dy_addr
,
T
*
dx_addr
)
{
for
(
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
pos
<
size
;
pos
+=
blockDim
.
x
*
gridDim
.
x
)
{
dx_addr
[
pos
]
=
dy_addr
[
pos
]
*
(
1
-
y_addr
[
pos
]
*
y_addr
[
pos
]);
}
}
template
<
typename
T
>
void
Tanh
(
const
size_t
size
,
const
T
*
x_addr
,
T
*
y_addr
,
cudaStream_t
cuda_stream
)
{
TanhKernel
<<<
GET_BLOCKS
(
size
),
GET_THREADS
,
0
,
cuda_stream
>>>
(
size
,
x_addr
,
y_addr
);
}
template
<
typename
T
>
void
TanhGrad
(
const
size_t
size
,
const
T
*
y_addr
,
const
T
*
dy_addr
,
T
*
dx_addr
,
cudaStream_t
cuda_stream
)
{
TanhGradKernel
<<<
GET_BLOCKS
(
size
),
GET_THREADS
,
0
,
cuda_stream
>>>
(
size
,
y_addr
,
dy_addr
,
dx_addr
);
}
template
void
Tanh
(
const
size_t
size
,
const
float
*
x_addr
,
float
*
y_addr
,
cudaStream_t
cuda_stream
);
template
void
TanhGrad
(
const
size_t
size
,
const
float
*
y_addr
,
const
float
*
dy_addr
,
float
*
dx_addr
,
cudaStream_t
cuda_stream
);
mindspore/ccsrc/kernel/gpu/cuda_impl/tanh_impl.cuh
已删除
100644 → 0
浏览文件 @
0c3d96a9
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_TAN_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_TAN_H_
#include "device/gpu/cuda_common.h"
template
<
typename
T
>
void
Tanh
(
const
size_t
size
,
const
T
*
x_addr
,
T
*
y_addr
,
cudaStream_t
cuda_stream
);
template
<
typename
T
>
void
TanhGrad
(
const
size_t
size
,
const
T
*
y_addr
,
const
T
*
dy_addr
,
T
*
dx_addr
,
cudaStream_t
cuda_stream
);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_TAN_H_
mindspore/ccsrc/kernel/gpu/nn/
relu
_gpu_kernel.cc
→
mindspore/ccsrc/kernel/gpu/nn/
activation
_gpu_kernel.cc
浏览文件 @
19e66f06
...
...
@@ -14,13 +14,18 @@
* limitations under the License.
*/
#include "kernel/gpu/nn/
relu
_gpu_kernel.h"
#include "kernel/gpu/nn/
activation
_gpu_kernel.h"
namespace
mindspore
{
namespace
kernel
{
MS_REG_GPU_KERNEL_ONE
(
ReLU
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat32
).
AddOutputAttr
(
kNumberTypeFloat32
),
ReLU
GpuFwdKernel
,
float
)
Activation
GpuFwdKernel
,
float
)
MS_REG_GPU_KERNEL_ONE
(
ReLU
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat16
).
AddOutputAttr
(
kNumberTypeFloat16
),
ReLUGpuFwdKernel
,
half
)
ActivationGpuFwdKernel
,
half
)
MS_REG_GPU_KERNEL_ONE
(
Tanh
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat32
).
AddOutputAttr
(
kNumberTypeFloat32
),
ActivationGpuFwdKernel
,
float
)
MS_REG_GPU_KERNEL_ONE
(
Tanh
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat16
).
AddOutputAttr
(
kNumberTypeFloat16
),
ActivationGpuFwdKernel
,
half
)
}
// namespace kernel
}
// namespace mindspore
mindspore/ccsrc/kernel/gpu/nn/
relu
_gpu_kernel.h
→
mindspore/ccsrc/kernel/gpu/nn/
activation
_gpu_kernel.h
浏览文件 @
19e66f06
...
...
@@ -18,6 +18,8 @@
#define MINDSPORE_CCSRC_KERNEL_GPU_NN_RELU_GPU_KERNEL_H_
#include <vector>
#include <map>
#include <string>
#include "kernel/gpu/gpu_kernel.h"
#include "kernel/gpu/gpu_kernel_factory.h"
#include "kernel/gpu/kernel_constants.h"
...
...
@@ -25,9 +27,9 @@
namespace
mindspore
{
namespace
kernel
{
template
<
typename
T
>
class
ReLU
GpuFwdKernel
:
public
GpuKernel
{
class
Activation
GpuFwdKernel
:
public
GpuKernel
{
public:
ReLU
GpuFwdKernel
()
Activation
GpuFwdKernel
()
:
cudnn_handle_
(
nullptr
),
activation_desc_
(
nullptr
),
mode_
(
CUDNN_ACTIVATION_RELU
),
...
...
@@ -37,7 +39,7 @@ class ReLUGpuFwdKernel : public GpuKernel {
input_size_
(
0
),
output_size_
(
0
),
workspace_size_
(
0
)
{}
~
ReLU
GpuFwdKernel
()
override
{
DestroyResource
();
}
~
Activation
GpuFwdKernel
()
override
{
DestroyResource
();
}
const
std
::
vector
<
size_t
>
&
GetInputSizeList
()
const
override
{
return
input_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetOutputSizeList
()
const
override
{
return
output_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetWorkspaceSizeList
()
const
override
{
return
workspace_size_list_
;
}
...
...
@@ -54,33 +56,39 @@ class ReLUGpuFwdKernel : public GpuKernel {
const
float
beta
=
0
;
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnActivationForward
(
cudnn_handle_
,
activation_desc_
,
&
alpha
,
data_descriptor_
,
input
,
&
beta
,
data_descriptor_
,
output
),
"
ReLUGpuFwdKernel
failed"
);
"
cudnnActivationForward
failed"
);
return
true
;
}
bool
Init
(
const
CNodePtr
&
kernel_node
)
override
{
auto
node_name
=
AnfAlgo
::
GetCNodeName
(
kernel_node
);
auto
iter
=
kernel_map
.
find
(
node_name
);
if
(
iter
==
kernel_map
.
end
())
{
MS_LOG
(
EXCEPTION
)
<<
"Kernel: "
<<
node_name
<<
" not support."
;
}
mode_
=
iter
->
second
;
InitResource
();
cudnn_data_type_
=
GetCudnnDataType
(
TypeIdLabel
(
AnfAlgo
::
GetInputDeviceDataType
(
kernel_node
,
0
)));
size_t
input_num
=
AnfAlgo
::
GetInputTensorNum
(
kernel_node
);
if
(
input_num
!=
1
)
{
MS_LOG
(
ERROR
)
<<
"Argument number is "
<<
input_num
<<
", but
ReLU
GpuFwdKernel needs 1."
;
MS_LOG
(
ERROR
)
<<
"Argument number is "
<<
input_num
<<
", but
Activation
GpuFwdKernel needs 1."
;
return
false
;
}
auto
input_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
0
);
is_null_input_
=
CHECK_NULL_INPUT
(
input_shape
);
if
(
is_null_input_
)
{
MS_LOG
(
WARNING
)
<<
"
ReLU
GpuFwdKernel input is null."
;
MS_LOG
(
WARNING
)
<<
"
Activation
GpuFwdKernel input is null."
;
InitSizeLists
();
return
true
;
}
mode_
=
CUDNN_ACTIVATION_RELU
;
std
::
vector
<
int
>
shape
;
ShapeNdTo4d
(
input_shape
,
&
shape
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetActivationDescriptor
(
activation_desc_
,
mode_
,
CUDNN_NOT_PROPAGATE_NAN
,
0.0
),
"SetActivationDescriptor failed"
);
"
cudnn
SetActivationDescriptor failed"
);
CHECK_CUDNN_RET_WITH_EXCEPT
(
cudnnSetTensor4dDescriptor
(
data_descriptor_
,
CUDNN_TENSOR_NCHW
,
cudnn_data_type_
,
shape
[
0
],
shape
[
1
],
shape
[
2
],
shape
[
3
]),
"SetTensor4dDescriptor failed"
);
"
cudnn
SetTensor4dDescriptor failed"
);
InitSizeLists
();
return
true
;
}
...
...
@@ -110,6 +118,11 @@ class ReLUGpuFwdKernel : public GpuKernel {
CHECK_CUDNN_RET_WITH_ERROR
(
cudnnDestroyTensorDescriptor
(
data_descriptor_
),
"cudnnDestroyTensorDescriptor failed"
);
}
std
::
map
<
std
::
string
,
cudnnActivationMode_t
>
kernel_map
=
{{
"ReLU"
,
CUDNN_ACTIVATION_RELU
},
{
"Tanh"
,
CUDNN_ACTIVATION_TANH
},
{
"ELU"
,
CUDNN_ACTIVATION_ELU
},
{
"Sigmoid"
,
CUDNN_ACTIVATION_SIGMOID
}};
cudnnHandle_t
cudnn_handle_
;
cudnnActivationDescriptor_t
activation_desc_
;
cudnnActivationMode_t
mode_
;
...
...
mindspore/ccsrc/kernel/gpu/nn/
relu
_grad_kernel.cc
→
mindspore/ccsrc/kernel/gpu/nn/
activation
_grad_kernel.cc
浏览文件 @
19e66f06
...
...
@@ -14,17 +14,26 @@
* limitations under the License.
*/
#include "kernel/gpu/nn/
relu
_grad_kernel.h"
#include "kernel/gpu/nn/
activation
_grad_kernel.h"
namespace
mindspore
{
namespace
kernel
{
MS_REG_GPU_KERNEL_ONE
(
ReluGrad
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat32
).
AddInputAttr
(
kNumberTypeFloat32
).
AddOutputAttr
(
kNumberTypeFloat32
),
Relu
GradGpuKernel
,
float
)
Activation
GradGpuKernel
,
float
)
MS_REG_GPU_KERNEL_ONE
(
ReluGrad
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat16
).
AddInputAttr
(
kNumberTypeFloat16
).
AddOutputAttr
(
kNumberTypeFloat16
),
ReluGradGpuKernel
,
half
)
ActivationGradGpuKernel
,
half
)
MS_REG_GPU_KERNEL_ONE
(
TanhGrad
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat32
).
AddInputAttr
(
kNumberTypeFloat32
).
AddOutputAttr
(
kNumberTypeFloat32
),
ActivationGradGpuKernel
,
float
)
MS_REG_GPU_KERNEL_ONE
(
TanhGrad
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat16
).
AddInputAttr
(
kNumberTypeFloat16
).
AddOutputAttr
(
kNumberTypeFloat16
),
ActivationGradGpuKernel
,
half
)
}
// namespace kernel
}
// namespace mindspore
mindspore/ccsrc/kernel/gpu/nn/
relu
_grad_kernel.h
→
mindspore/ccsrc/kernel/gpu/nn/
activation
_grad_kernel.h
浏览文件 @
19e66f06
...
...
@@ -18,6 +18,8 @@
#define MINDSPORE_CCSRC_KERNEL_GPU_NN_RELU_GRAD_KERNEL_H_
#include <vector>
#include <map>
#include <string>
#include "kernel/gpu/gpu_kernel.h"
#include "kernel/gpu/gpu_kernel_factory.h"
#include "kernel/gpu/kernel_constants.h"
...
...
@@ -25,9 +27,9 @@
namespace
mindspore
{
namespace
kernel
{
template
<
typename
T
>
class
Relu
GradGpuKernel
:
public
GpuKernel
{
class
Activation
GradGpuKernel
:
public
GpuKernel
{
public:
Relu
GradGpuKernel
()
Activation
GradGpuKernel
()
:
cudnn_handle_
(
nullptr
),
activation_desc_
(
nullptr
),
mode_
(
CUDNN_ACTIVATION_RELU
),
...
...
@@ -35,7 +37,7 @@ class ReluGradGpuKernel : public GpuKernel {
is_null_input_
(
false
),
cudnn_data_type_
(
CUDNN_DATA_FLOAT
),
input_size_
(
0
)
{}
~
Relu
GradGpuKernel
()
override
{
DestroyResource
();
}
~
Activation
GradGpuKernel
()
override
{
DestroyResource
();
}
const
std
::
vector
<
size_t
>
&
GetInputSizeList
()
const
override
{
return
input_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetOutputSizeList
()
const
override
{
return
output_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetWorkspaceSizeList
()
const
override
{
return
workspace_size_list_
;
}
...
...
@@ -45,8 +47,15 @@ class ReluGradGpuKernel : public GpuKernel {
if
(
is_null_input_
)
{
return
true
;
}
T
*
y
=
GetDeviceAddress
<
T
>
(
inputs
,
1
);
T
*
dy
=
GetDeviceAddress
<
T
>
(
inputs
,
0
);
T
*
dy
=
nullptr
;
T
*
y
=
nullptr
;
if
(
mode_
==
CUDNN_ACTIVATION_RELU
||
mode_
==
CUDNN_ACTIVATION_ELU
)
{
dy
=
GetDeviceAddress
<
T
>
(
inputs
,
0
);
y
=
GetDeviceAddress
<
T
>
(
inputs
,
1
);
}
else
{
y
=
GetDeviceAddress
<
T
>
(
inputs
,
0
);
dy
=
GetDeviceAddress
<
T
>
(
inputs
,
1
);
}
T
*
dx
=
GetDeviceAddress
<
T
>
(
outputs
,
0
);
const
float
alpha
=
1
;
...
...
@@ -59,18 +68,24 @@ class ReluGradGpuKernel : public GpuKernel {
return
true
;
}
bool
Init
(
const
CNodePtr
&
kernel_node
)
override
{
auto
node_name
=
AnfAlgo
::
GetCNodeName
(
kernel_node
);
auto
iter
=
kernel_map
.
find
(
node_name
);
if
(
iter
==
kernel_map
.
end
())
{
MS_LOG
(
EXCEPTION
)
<<
"Kernel: "
<<
node_name
<<
" not support."
;
}
mode_
=
iter
->
second
;
InitResource
();
cudnn_data_type_
=
GetCudnnDataType
(
TypeIdLabel
(
AnfAlgo
::
GetInputDeviceDataType
(
kernel_node
,
0
)));
size_t
input_num
=
AnfAlgo
::
GetInputTensorNum
(
kernel_node
);
if
(
input_num
!=
2
)
{
MS_LOG
(
ERROR
)
<<
"Argument number is "
<<
input_num
<<
", but
Relu
GradGpuKernel needs 2."
;
MS_LOG
(
ERROR
)
<<
"Argument number is "
<<
input_num
<<
", but
Activation
GradGpuKernel needs 2."
;
return
false
;
}
auto
input_shape
=
AnfAlgo
::
GetOutputInferShape
(
kernel_node
,
0
);
mode_
=
CUDNN_ACTIVATION_RELU
;
is_null_input_
=
CHECK_NULL_INPUT
(
input_shape
);
if
(
is_null_input_
)
{
MS_LOG
(
WARNING
)
<<
"
Relu
GradGpuKernel input is null."
;
MS_LOG
(
WARNING
)
<<
"
Activation
GradGpuKernel input is null."
;
InitSizeLists
();
return
true
;
}
...
...
@@ -110,6 +125,10 @@ class ReluGradGpuKernel : public GpuKernel {
CHECK_CUDNN_RET_WITH_ERROR
(
cudnnDestroyTensorDescriptor
(
data_descriptor_
),
"cudnnDestroyTensorDescriptor failed"
);
}
std
::
map
<
std
::
string
,
cudnnActivationMode_t
>
kernel_map
=
{{
"ReluGrad"
,
CUDNN_ACTIVATION_RELU
},
{
"TanhGrad"
,
CUDNN_ACTIVATION_TANH
},
{
"ELUGrad"
,
CUDNN_ACTIVATION_ELU
},
{
"SigmoidGrad"
,
CUDNN_ACTIVATION_SIGMOID
}};
cudnnHandle_t
cudnn_handle_
;
cudnnActivationDescriptor_t
activation_desc_
;
cudnnActivationMode_t
mode_
;
...
...
mindspore/ccsrc/kernel/gpu/nn/tanh_gpu_kernel.cc
已删除
100644 → 0
浏览文件 @
0c3d96a9
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/gpu/nn/tanh_gpu_kernel.h"
namespace
mindspore
{
namespace
kernel
{
MS_REG_GPU_KERNEL_ONE
(
Tanh
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat32
).
AddOutputAttr
(
kNumberTypeFloat32
),
TanhGpuKernel
,
float
)
}
// namespace kernel
}
// namespace mindspore
mindspore/ccsrc/kernel/gpu/nn/tanh_gpu_kernel.h
已删除
100644 → 0
浏览文件 @
0c3d96a9
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_TANH_GPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_NN_TANH_GPU_KERNEL_H_
#include <cuda_runtime_api.h>
#include <vector>
#include <memory>
#include "kernel/gpu/gpu_kernel.h"
#include "kernel/gpu/gpu_kernel_factory.h"
#include "kernel/gpu/cuda_impl/tanh_impl.cuh"
namespace
mindspore
{
namespace
kernel
{
template
<
typename
T
>
class
TanhGpuKernel
:
public
GpuKernel
{
public:
TanhGpuKernel
()
:
input_size_
(
0
)
{}
~
TanhGpuKernel
()
override
=
default
;
const
std
::
vector
<
size_t
>
&
GetInputSizeList
()
const
override
{
return
input_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetOutputSizeList
()
const
override
{
return
output_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetWorkspaceSizeList
()
const
override
{
return
workspace_size_list_
;
}
bool
Launch
(
const
std
::
vector
<
AddressPtr
>
&
inputs
,
const
std
::
vector
<
AddressPtr
>
&
,
const
std
::
vector
<
AddressPtr
>
&
outputs
,
void
*
stream_ptr
)
override
{
auto
x_addr
=
GetDeviceAddress
<
T
>
(
inputs
,
0
);
auto
y_addr
=
GetDeviceAddress
<
T
>
(
outputs
,
0
);
Tanh
(
input_size_
/
sizeof
(
T
),
x_addr
,
y_addr
,
reinterpret_cast
<
cudaStream_t
>
(
stream_ptr
));
return
true
;
}
bool
Init
(
const
CNodePtr
&
kernel_node
)
override
{
auto
input_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
0
);
input_size_
=
sizeof
(
T
);
for
(
auto
dim
:
input_shape
)
{
input_size_
*=
dim
;
}
InitSizeLists
();
return
true
;
}
protected:
void
InitSizeLists
()
override
{
input_size_list_
.
push_back
(
input_size_
);
input_size_list_
.
push_back
(
input_size_
);
output_size_list_
.
push_back
(
input_size_
);
}
private:
std
::
vector
<
size_t
>
input_size_list_
;
std
::
vector
<
size_t
>
output_size_list_
;
std
::
vector
<
size_t
>
workspace_size_list_
;
size_t
input_size_
;
};
}
// namespace kernel
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_GPU_NN_LSTM_GPU_KERNEL_H_
mindspore/ccsrc/kernel/gpu/nn/tanh_grad_kernel.cc
已删除
100644 → 0
浏览文件 @
0c3d96a9
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/gpu/nn/tanh_grad_kernel.h"
namespace
mindspore
{
namespace
kernel
{
MS_REG_GPU_KERNEL_ONE
(
TanhGrad
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat32
).
AddInputAttr
(
kNumberTypeFloat32
).
AddOutputAttr
(
kNumberTypeFloat32
),
TanhGradKernel
,
float
)
}
// namespace kernel
}
// namespace mindspore
mindspore/ccsrc/kernel/gpu/nn/tanh_grad_kernel.h
已删除
100644 → 0
浏览文件 @
0c3d96a9
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_TANH_GRAD_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_NN_TANH_GRAD_KERNEL_H_
#include <cuda_runtime_api.h>
#include <vector>
#include <memory>
#include "kernel/gpu/gpu_kernel.h"
#include "kernel/gpu/gpu_kernel_factory.h"
#include "kernel/gpu/cuda_impl/tanh_impl.cuh"
namespace
mindspore
{
namespace
kernel
{
template
<
typename
T
>
class
TanhGradKernel
:
public
GpuKernel
{
public:
TanhGradKernel
()
:
input_size_
(
0
)
{}
~
TanhGradKernel
()
override
=
default
;
const
std
::
vector
<
size_t
>
&
GetInputSizeList
()
const
override
{
return
input_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetOutputSizeList
()
const
override
{
return
output_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetWorkspaceSizeList
()
const
override
{
return
workspace_size_list_
;
}
bool
Launch
(
const
std
::
vector
<
AddressPtr
>
&
inputs
,
const
std
::
vector
<
AddressPtr
>
&
,
const
std
::
vector
<
AddressPtr
>
&
outputs
,
void
*
stream_ptr
)
override
{
auto
y_addr
=
GetDeviceAddress
<
T
>
(
inputs
,
0
);
auto
dy_addr
=
GetDeviceAddress
<
T
>
(
inputs
,
1
);
auto
dx_addr
=
GetDeviceAddress
<
T
>
(
outputs
,
0
);
TanhGrad
(
input_size_
/
sizeof
(
T
),
y_addr
,
dy_addr
,
dx_addr
,
reinterpret_cast
<
cudaStream_t
>
(
stream_ptr
));
return
true
;
}
bool
Init
(
const
CNodePtr
&
kernel_node
)
override
{
auto
input_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
0
);
input_size_
=
sizeof
(
T
);
for
(
auto
dim
:
input_shape
)
{
input_size_
*=
dim
;
}
InitSizeLists
();
return
true
;
}
protected:
void
InitSizeLists
()
override
{
input_size_list_
.
push_back
(
input_size_
);
input_size_list_
.
push_back
(
input_size_
);
output_size_list_
.
push_back
(
input_size_
);
}
private:
std
::
vector
<
size_t
>
input_size_list_
;
std
::
vector
<
size_t
>
output_size_list_
;
std
::
vector
<
size_t
>
workspace_size_list_
;
size_t
input_size_
;
};
}
// namespace kernel
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_GPU_NN_TANH_GRAD_KERNEL_H_
tests/st/ops/gpu/test_tanh_op.py
浏览文件 @
19e66f06
...
...
@@ -72,3 +72,40 @@ def test_Tanh():
[
1.78391056
,
0.44159236
,
0.33690308
,
0.16800483
,
-
0.13651318
,
-
0.63878956
,
0.18175511
,
0.65280384
]]
assert
np
.
allclose
(
output
[
0
].
asnumpy
(),
expect
)
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_gpu_training
@
pytest
.
mark
.
env_onecard
def
test_Tanh_fp16
():
np
.
random
.
seed
(
42
)
x_np
=
np
.
random
.
randn
(
5
,
3
,
6
).
astype
(
np
.
float16
)
dy_np
=
np
.
random
.
randn
(
5
,
3
,
6
).
astype
(
np
.
float16
)
x_ms
=
Tensor
(
x_np
)
dy_ms
=
Tensor
(
dy_np
)
net
=
TanhNet
()
grad
=
Grad
(
net
)
output
=
grad
(
x_ms
,
dy_ms
)
expect
=
[[[
0.0766
,
0.95
,
-
0.474
,
-
0.0568
,
-
0.3713
,
-
1.387
],
[
0.04626
,
0.1521
,
0.004135
,
-
0.1771
,
-
1.149
,
-
0.341
],
[
-
0.3235
,
-
0.0666
,
-
0.01921
,
0.299
,
0.7764
,
0.1583
]],
[[
0.124
,
-
0.0157
,
-
0.3682
,
-
0.0252
,
0.05997
,
0.51
],
[
-
0.145
,
0.2979
,
-
0.01145
,
-
1.019
,
0.8125
,
0.6914
],
[
0.562
,
-
0.0848
,
1.402
,
-
0.5386
,
0.318
,
0.645
]],
[[
-
0.9487
,
-
0.04343
,
0.02448
,
-
0.4844
,
-
0.939
,
0.0666
],
[
-
1.049
,
0.433
,
-
0.1724
,
0.9604
,
-
0.6377
,
-
0.1241
],
[
0.7246
,
-
0.1364
,
0.2051
,
1.132
,
-
1.049
,
0.1298
]],
[[
0.104
,
0.3643
,
-
0.6562
,
-
1.202
,
0.4688
,
0.1294
],
[
0.2008
,
0.3347
,
-
0.2418
,
0.07135
,
0.1611
,
-
0.1667
],
[
1.856
,
0.1979
,
-
1.048
,
0.4443
,
-
0.8574
,
0.1329
]],
[[
1.156
,
-
0.1322
,
0.02069
,
0.2241
,
0.8164
,
1.736
],
[
-
0.2433
,
-
0.05484
,
-
0.848
,
-
0.7197
,
-
0.01453
,
0.2637
],
[
0.1528
,
0.6494
,
0.006195
,
1.307
,
-
0.2024
,
2.113
]]]
assert
np
.
allclose
(
output
[
0
].
asnumpy
(),
expect
,
rtol
=
1e-3
,
atol
=
1e-3
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录