Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
86434818
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
86434818
编写于
1月 12, 2022
作者:
Z
Zhang Ting
提交者:
GitHub
1月 12, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[part 2]change type of function args (#38886)
上级
df5d55bb
变更
1
显示空白变更内容
内联
并排
Showing
1 changed file
with
111 addition
and
112 deletion
+111
-112
paddle/fluid/operators/activation_op.cu
paddle/fluid/operators/activation_op.cu
+111
-112
未找到文件。
paddle/fluid/operators/activation_op.cu
浏览文件 @
86434818
...
...
@@ -24,7 +24,7 @@ struct CudaReluFunctor : public BaseActivationFunctor<T> {
T
zero
=
static_cast
<
T
>
(
0.0
f
);
// relu(x) = max(x, 0)
__device__
__forceinline__
T
operator
()(
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
return
x
>
zero
?
x
:
zero
;
}
};
...
...
@@ -34,7 +34,7 @@ struct CudaReluGradFunctor : public BaseActivationFunctor<T> {
T
zero
=
static_cast
<
T
>
(
0.0
f
);
// dx = dout * (out > 0)
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
out
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
return
out
>
zero
?
dout
:
zero
;
}
...
...
@@ -51,7 +51,7 @@ struct CudaLeakyReluFunctor : public BaseActivationFunctor<T> {
}
// leakyrelu(x) = x > 0 ? x : alpha * x
__device__
__forceinline__
T
operator
()(
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
return
x
>
zero
?
x
:
static_cast
<
T
>
(
alpha
)
*
x
;
}
};
...
...
@@ -66,7 +66,7 @@ struct CudaLeakyReluGradFunctor : public BaseActivationFunctor<T> {
}
// dx = dout * (x > 0 ? 1 : alpha)
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
x
)
const
{
return
x
>
zero
?
dout
:
static_cast
<
T
>
(
alpha
)
*
dout
;
}
...
...
@@ -79,7 +79,7 @@ struct CudaSigmoidFunctor : public BaseActivationFunctor<T> {
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
// sigmoid(x) = 1 / (1 + exp(-x))
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
one
/
(
one
+
exp
(
-
x
)));
}
...
...
@@ -90,7 +90,7 @@ struct CudaSigmoidGradFunctor : public BaseActivationFunctor<T> {
T
one
=
static_cast
<
T
>
(
1.0
f
);
// dx = dout * out * (1 - out)
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
out
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
return
dout
*
out
*
(
one
-
out
);
}
...
...
@@ -103,7 +103,7 @@ struct CudaSiluFunctor : public BaseActivationFunctor<T> {
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
// silu(x) = x / (1 + exp(-x))
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
x
/
(
one
+
exp
(
-
x
)));
}
...
...
@@ -115,8 +115,8 @@ struct CudaSiluGradFunctor : public BaseActivationFunctor<T> {
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
// dx = dout * (1 + exp(-x) + x * exp(-x) / (1 + exp(-x))^2)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
temp
=
one
/
(
one
+
exp
(
-
x
));
...
...
@@ -135,7 +135,7 @@ struct CudaLogSigmoidFunctor : public BaseActivationFunctor<T> {
// For numerical stability,
// logsigmoid(x) =
// - (max(-x, 0) + log(exp(-max(-x, 0)) + exp(-x - max(-x, 0))))
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
temp
=
x
>
zero
?
zero
:
-
x
;
return
static_cast
<
T
>
(
-
temp
-
log
(
exp
(
-
temp
)
+
exp
(
-
x
-
temp
)));
...
...
@@ -151,8 +151,8 @@ struct CudaLogSigmoidGradFunctor : public BaseActivationFunctor<T> {
// For numerical stability:
// dx = dout * exp(-x - max(-x, 0)) / (exp(-max(-x, 0)) + exp(-x - max(-x,
// 0)))
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
temp1
=
x
>
zero
?
zero
:
-
x
;
...
...
@@ -168,7 +168,7 @@ struct CudaAtanFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// atan(x) = atan(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
atan
(
x
));
}
...
...
@@ -179,7 +179,7 @@ struct CudaAtanGradFunctor : public BaseActivationFunctor<T> {
T
one
=
static_cast
<
T
>
(
1.0
f
);
// dx = dout / (1 + x^2)
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
x
)
const
{
return
dout
/
(
one
+
x
*
x
);
}
...
...
@@ -197,7 +197,7 @@ struct CudaSoftShrinkFunctor : public BaseActivationFunctor<T> {
// softshrink(x) = x - lambda, if x > lambda;
// x + lambda, if x < -lambda;
// 0, otherwise.
__device__
__forceinline__
T
operator
()(
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
T
l
=
static_cast
<
T
>
(
lambda
);
T
temp1
=
static_cast
<
T
>
(
x
>
l
);
T
temp2
=
static_cast
<
T
>
(
x
<
-
l
);
...
...
@@ -215,7 +215,7 @@ struct CudaSoftShrinkGradFunctor : public BaseActivationFunctor<T> {
}
// dx = dout, if x > lambda or x < -lambda else 0
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
x
)
const
{
T
l
=
static_cast
<
T
>
(
lambda
);
return
(
x
>=
-
l
&&
x
<=
l
)
?
zero
:
dout
;
}
...
...
@@ -228,7 +228,7 @@ struct CudaCeilFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// ceil(x) = ceil(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
ceil
(
x
));
}
...
...
@@ -239,7 +239,7 @@ struct CudaFloorFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// floor(x) = floor(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
floor
(
x
));
}
...
...
@@ -250,7 +250,7 @@ struct CudaRoundFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// round(x) = round(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
round
(
x
));
}
...
...
@@ -259,7 +259,7 @@ struct CudaRoundFunctor : public BaseActivationFunctor<T> {
// GradFunctor for ceil, floor and round
template
<
typename
T
>
struct
CudaZeroGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
__device__
__forceinline__
T
operator
()(
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
return
static_cast
<
T
>
(
0.0
f
);
}
...
...
@@ -271,7 +271,7 @@ struct CudaCosFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// cos(x) = cos(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
cos
(
x
));
}
...
...
@@ -282,8 +282,8 @@ struct CudaCosGradFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// dx = dout * (-sin(x))
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
-
dout
*
sin
(
x
));
...
...
@@ -297,7 +297,7 @@ struct CudaSinFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// sin(x) = sin(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
sin
(
x
));
}
...
...
@@ -308,8 +308,8 @@ struct CudaSinGradFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// dx = dout * cos(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
dout
*
cos
(
x
));
...
...
@@ -323,7 +323,7 @@ struct CudaTanFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// tan(x) = tan(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
tan
(
x
));
}
...
...
@@ -334,8 +334,8 @@ struct CudaTanGradFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// dx = dout / cos(x)^2
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
dout
/
(
cos
(
x
)
*
cos
(
x
)));
...
...
@@ -349,7 +349,7 @@ struct CudaAsinFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// asin(x) = asin(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
asin
(
x
));
}
...
...
@@ -361,8 +361,8 @@ struct CudaAsinGradFunctor : public BaseActivationFunctor<T> {
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
// dx = dout / sqrt(1 - x^2)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
dout
/
sqrt
(
one
-
x
*
x
));
...
...
@@ -376,7 +376,7 @@ struct CudaAcosFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// acos(x) = acos(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
acos
(
x
));
}
...
...
@@ -388,8 +388,8 @@ struct CudaAcosGradFunctor : public BaseActivationFunctor<T> {
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
// dx = -dout / sqrt(1 - x^2)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
-
dout
/
sqrt
(
one
-
x
*
x
));
...
...
@@ -403,7 +403,7 @@ struct CudaCoshFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// cosh(x) = cosh(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
cosh
(
x
));
}
...
...
@@ -414,8 +414,8 @@ struct CudaCoshGradFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// dx = dout * sinh(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
dout
*
sinh
(
x
));
...
...
@@ -429,7 +429,7 @@ struct CudaSinhFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// sinh(x) = sinh(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
sinh
(
x
));
}
...
...
@@ -440,8 +440,8 @@ struct CudaSinhGradFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// dx = dout * cosh(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
dout
*
cosh
(
x
));
...
...
@@ -455,7 +455,7 @@ struct CudaTanhFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// tanh(x) = tanh(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
tanh
(
x
));
}
...
...
@@ -466,7 +466,7 @@ struct CudaTanhGradFunctor : public BaseActivationFunctor<T> {
T
one
=
static_cast
<
T
>
(
1.0
f
);
// dx = dout * (1 - out^2)
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
out
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
return
dout
*
(
one
-
out
*
out
);
}
...
...
@@ -478,7 +478,7 @@ struct CudaAcoshFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// Acosh(x) = acosh(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
acosh
(
x
));
}
...
...
@@ -489,8 +489,8 @@ struct CudaAcoshGradFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
// dx = dout * 1 / sqrt(x^2 - 1)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
dout
*
one
/
sqrt
(
x
*
x
-
one
));
...
...
@@ -504,7 +504,7 @@ struct CudaAsinhFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// Asinh(x) = asinh(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
asinh
(
x
));
}
...
...
@@ -516,8 +516,8 @@ struct CudaAsinhGradFunctor : public BaseActivationFunctor<T> {
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
// dx = dout * 1/sqrt(x^2 + 1)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
dout
*
one
/
sqrt
(
x
*
x
+
one
));
...
...
@@ -531,7 +531,7 @@ struct CudaAtanhFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// Atanh(x) = atanh(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
atanh
(
x
));
}
...
...
@@ -542,8 +542,8 @@ struct CudaAtanhGradFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
// dx = dout * 1/(1- x^2)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
dout
*
one
/
(
one
-
x
*
x
));
...
...
@@ -557,13 +557,13 @@ struct CudaReciprocalFunctor : public BaseActivationFunctor<T> {
T
one
=
static_cast
<
T
>
(
1.0
f
);
// reciprocal(x) = 1 / x
__device__
__forceinline__
T
operator
()(
const
T
&
x
)
const
{
return
one
/
x
;
}
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
return
one
/
x
;
}
};
template
<
typename
T
>
struct
CudaReciprocalGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
// dx = -dout * out^2
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
out
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
return
-
dout
*
out
*
out
;
}
...
...
@@ -575,7 +575,7 @@ struct CudaExpFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// exp(x) = exp(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
exp
(
x
));
}
...
...
@@ -584,7 +584,7 @@ struct CudaExpFunctor : public BaseActivationFunctor<T> {
template
<
typename
T
>
struct
CudaExpGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
// dx = dout * out
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
out
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
return
dout
*
out
;
}
...
...
@@ -596,7 +596,7 @@ struct CudaExpm1Functor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// expm1(x) = expm1(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
expm1
(
x
));
}
...
...
@@ -605,7 +605,7 @@ struct CudaExpm1Functor : public BaseActivationFunctor<T> {
template
<
typename
T
>
struct
CudaExpm1GradFunctor
:
public
BaseActivationFunctor
<
T
>
{
// dx = dout * out
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
out
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
return
dout
*
out
+
dout
;
}
...
...
@@ -617,7 +617,7 @@ struct CudaLogFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// log(x) = log(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
log
(
x
));
}
...
...
@@ -626,7 +626,7 @@ struct CudaLogFunctor : public BaseActivationFunctor<T> {
template
<
typename
T
>
struct
CudaLogGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
// dx = dout / x
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
x
)
const
{
return
dout
/
x
;
}
...
...
@@ -636,7 +636,7 @@ struct CudaLogGradFunctor : public BaseActivationFunctor<T> {
template
<
typename
T
>
struct
CudaSquareFunctor
:
public
BaseActivationFunctor
<
T
>
{
// square(x) = x * x
__device__
__forceinline__
T
operator
()(
const
T
&
x
)
const
{
return
x
*
x
;
}
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
return
x
*
x
;
}
};
template
<
typename
T
>
...
...
@@ -644,7 +644,7 @@ struct CudaSquareGradFunctor : public BaseActivationFunctor<T> {
T
two
=
static_cast
<
T
>
(
2.0
f
);
// dx = dout * 2 * x
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
x
)
const
{
return
dout
*
two
*
x
;
}
...
...
@@ -656,7 +656,7 @@ struct CudaSqrtFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// sqrt(x) = sqrt(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
sqrt
(
x
));
}
...
...
@@ -667,7 +667,7 @@ struct CudaSqrtGradFunctor : public BaseActivationFunctor<T> {
T
one_half
=
static_cast
<
T
>
(
0.5
f
);
// dx = dout * 0.5 / out
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
out
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
return
one_half
*
dout
/
out
;
}
...
...
@@ -679,7 +679,7 @@ struct CudaRsqrtFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// rsqrt(x) = rsqrt(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
rsqrt
(
x
));
}
...
...
@@ -690,7 +690,7 @@ struct CudaRsqrtGradFunctor : public BaseActivationFunctor<T> {
T
minus_one_half
=
static_cast
<
T
>
(
-
0.5
f
);
// dx = -0.5 * dout * out^3
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
out
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
return
minus_one_half
*
dout
*
out
*
out
*
out
;
}
...
...
@@ -703,7 +703,7 @@ struct CudaLog1pFunctor : public BaseActivationFunctor<T> {
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
// log1p(x) = log(1 + x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
log
(
one
+
x
));
}
...
...
@@ -714,7 +714,7 @@ struct CudaLog1pGradFunctor : public BaseActivationFunctor<T> {
T
one
=
static_cast
<
T
>
(
1.0
f
);
// dx = dout / (1 + x)
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
x
)
const
{
return
dout
/
(
one
+
x
);
}
...
...
@@ -726,7 +726,7 @@ struct CudaLog2Functor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// log2(x) = log2(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
log2
(
x
));
}
...
...
@@ -738,7 +738,7 @@ struct CudaLog2GradFunctor : public BaseActivationFunctor<T> {
T
log_two
=
static_cast
<
T
>
(
log
(
static_cast
<
MPType
>
(
2.0
f
)));
// dx = dout / (x * log(2))
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
x
)
const
{
return
dout
/
(
x
*
log_two
);
}
...
...
@@ -750,7 +750,7 @@ struct CudaLog10Functor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// log10(x) = log10(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
log10
(
x
));
}
...
...
@@ -762,7 +762,7 @@ struct CudaLog10GradFunctor : public BaseActivationFunctor<T> {
T
log_ten
=
static_cast
<
T
>
(
log
(
static_cast
<
MPType
>
(
10.0
f
)));
// dx = dout / (x * log(10))
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
x
)
const
{
return
dout
/
(
x
*
log_ten
);
}
...
...
@@ -779,7 +779,7 @@ struct CudaBReluFunctor : public BaseActivationFunctor<T> {
}
// brelu(x) = min(max(x, t_min), t_max)
__device__
__forceinline__
T
operator
()(
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
T
t_min_cast
=
static_cast
<
T
>
(
t_min
);
T
t_max_cast
=
static_cast
<
T
>
(
t_max
);
T
temp_max
=
x
>
t_min_cast
?
x
:
t_min_cast
;
...
...
@@ -799,7 +799,7 @@ struct CudaBReluGradFunctor : public BaseActivationFunctor<T> {
}
// dx = (x > t_min && x < t_max) ? dout : 0
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
x
)
const
{
T
t_min_cast
=
static_cast
<
T
>
(
t_min
);
T
t_max_cast
=
static_cast
<
T
>
(
t_max
);
return
(
x
>
t_min_cast
&&
x
<
t_max_cast
)
?
dout
:
zero
;
...
...
@@ -820,7 +820,7 @@ struct CudaSoftReluFunctor : public BaseActivationFunctor<T> {
// soft_relu(x) = log(1 + exp(max(min(x, threshold), -threshold)))
// threshold should not be negative
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
t
=
static_cast
<
MPType
>
(
threshold
);
MPType
temp_min
=
x
<
t
?
x
:
t
;
...
...
@@ -841,8 +841,8 @@ struct CudaSoftReluGradFunctor : public BaseActivationFunctor<T> {
// dx = (out > -threshold && out < threshold) ? dout * (1 - exp(-out)) : 0
// threshold should not be negative
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_out
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_out
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
out
=
static_cast
<
MPType
>
(
arg_out
);
MPType
t
=
static_cast
<
MPType
>
(
threshold
);
...
...
@@ -864,7 +864,7 @@ struct CudaSTanhFunctor : public BaseActivationFunctor<T> {
}
// stanh(x) = b * tanh(a * x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
a
=
static_cast
<
MPType
>
(
scale_a
);
MPType
b
=
static_cast
<
MPType
>
(
scale_b
);
...
...
@@ -884,8 +884,8 @@ struct CudaSTanhGradFunctor : public BaseActivationFunctor<T> {
}
// dx = dout * a * b * (1 - tanh(a * x) * tanh(a * x))
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
a
=
static_cast
<
MPType
>
(
scale_a
);
...
...
@@ -909,7 +909,7 @@ struct CudaSoftplusFunctor : public BaseActivationFunctor<T> {
}
// softplus(x) = beta * x > threshold ? x : log(1 + exp(beta * x)) / beta
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
b
=
static_cast
<
MPType
>
(
beta
);
MPType
t
=
static_cast
<
MPType
>
(
threshold
);
...
...
@@ -930,8 +930,8 @@ struct CudaSoftplusGradFunctor : public BaseActivationFunctor<T> {
}
// dx = x * beta > threshold ? dout : dout / (1 + exp(-beta * x))
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
b
=
static_cast
<
MPType
>
(
beta
);
...
...
@@ -948,7 +948,7 @@ struct CudaSoftsignFunctor : public BaseActivationFunctor<T> {
T
one
=
static_cast
<
T
>
(
1.0
f
);
// softsign(x) = x / (1 + abs(x))
__device__
__forceinline__
T
operator
()(
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
return
x
/
(
one
+
abs
(
x
));
}
};
...
...
@@ -958,7 +958,7 @@ struct CudaSoftsignGradFunctor : public BaseActivationFunctor<T> {
T
one
=
static_cast
<
T
>
(
1.0
f
);
// dx = dout / (1 + abs(x))^2
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
x
)
const
{
T
temp
=
one
+
abs
(
x
);
return
dout
/
(
temp
*
temp
);
}
...
...
@@ -976,7 +976,7 @@ struct CudaRelu6Functor : public BaseActivationFunctor<T> {
}
// relu6(x) = min(max(0, x), 6)
__device__
__forceinline__
T
operator
()(
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
T
t
=
static_cast
<
T
>
(
threshold
);
return
x
<=
zero
?
zero
:
(
x
<
t
?
x
:
t
);
}
...
...
@@ -992,7 +992,7 @@ struct CudaRelu6GradFunctor : public BaseActivationFunctor<T> {
}
// dx = (out > 0 && out < t) ? dout : 0
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
out
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
T
t
=
static_cast
<
T
>
(
threshold
);
return
(
out
>
zero
&&
out
<
t
)
?
dout
:
zero
;
}
...
...
@@ -1005,7 +1005,7 @@ struct CudaTanhShrinkFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// tanhshrink(x) = x - tanh(x)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
x
-
tanh
(
x
));
}
...
...
@@ -1016,8 +1016,8 @@ struct CudaTanhShrinkGradFunctor : public BaseActivationFunctor<T> {
using
MPType
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
// dx = dout * tanh(x)^2
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
dout
*
tanh
(
x
)
*
tanh
(
x
));
...
...
@@ -1036,7 +1036,7 @@ struct CudaHardShrinkFunctor : public BaseActivationFunctor<T> {
}
// hadrshrink(x) = (x > -threshold && x < threshold) ? 0 : x
__device__
__forceinline__
T
operator
()(
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
T
t
=
static_cast
<
T
>
(
threshold
);
return
(
x
>
-
t
&&
x
<
t
)
?
zero
:
x
;
}
...
...
@@ -1052,7 +1052,7 @@ struct CudaHardShrinkGradFunctor : public BaseActivationFunctor<T> {
}
// dx = (x > -threshold && x < threshold) ? 0 : dout
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
x
)
const
{
T
t
=
static_cast
<
T
>
(
threshold
);
return
(
x
>
-
t
&&
x
<
t
)
?
zero
:
dout
;
}
...
...
@@ -1074,7 +1074,7 @@ struct CudaHardSigmoidFunctor : public BaseActivationFunctor<T> {
// hard_sigmoid(x) = 0, when x <= -3
// 1, when x >= 3
// x * slope + offset, otherwise
__device__
__forceinline__
T
operator
()(
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
T
temp
=
x
*
static_cast
<
T
>
(
slope
)
+
static_cast
<
T
>
(
offset
);
T
temp_max
=
temp
>
zero
?
temp
:
zero
;
T
temp_min
=
temp_max
<
one
?
temp_max
:
one
;
...
...
@@ -1094,7 +1094,7 @@ struct CudaHardSigmoidGradFunctor : public BaseActivationFunctor<T> {
}
// dx = (out > 0 && out < 1) ? dout * slope : 0
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
out
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
return
(
out
>
zero
&&
out
<
one
)
?
dout
*
static_cast
<
T
>
(
slope
)
:
zero
;
}
...
...
@@ -1112,7 +1112,7 @@ struct CudaSwishFunctor : public BaseActivationFunctor<T> {
}
// swish(x) = x / (1 + exp(-beta * x))
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
b
=
static_cast
<
MPType
>
(
beta
);
return
static_cast
<
T
>
(
x
/
(
one
+
exp
(
-
b
*
x
)));
...
...
@@ -1130,8 +1130,8 @@ struct CudaSwishGradFunctor : public BaseActivationFunctor<T> {
}
// dx = dout * (1 + exp(-b * x) + b * x * exp(-b * x) / (1 + exp(-b * x))^2)
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
b
=
static_cast
<
MPType
>
(
beta
);
...
...
@@ -1159,7 +1159,7 @@ struct CudaMishFunctor : public BaseActivationFunctor<T> {
// softplus(x) = x, if x > threshold
// = ln(1 + exp(x)), otherwise
// Inputs: args[0], the input x
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
sp
=
(
x
>
static_cast
<
MPType
>
(
threshold
))
?
x
:
log
(
one
+
exp
(
x
));
return
static_cast
<
T
>
(
x
*
tanh
(
sp
));
...
...
@@ -1180,8 +1180,8 @@ struct CudaMishGradFunctor : public BaseActivationFunctor<T> {
// sp = softplus(x)
// Inputs: args[0], the input dout
// args[1], the input x
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
sp
=
(
x
>
static_cast
<
MPType
>
(
threshold
))
?
x
:
log
(
one
+
exp
(
x
));
...
...
@@ -1204,7 +1204,7 @@ struct CudaThresholdedReluFunctor : public BaseActivationFunctor<T> {
}
// thresholded_relu(x) = x > threshold ? x : 0
__device__
__forceinline__
T
operator
()(
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
return
x
>
static_cast
<
T
>
(
threshold
)
?
x
:
zero
;
}
};
...
...
@@ -1219,7 +1219,7 @@ struct CudaThresholdedReluGradFunctor : public BaseActivationFunctor<T> {
}
// dx = x > threshold ? dout : 0
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
x
)
const
{
return
x
>
static_cast
<
T
>
(
threshold
)
?
dout
:
zero
;
}
...
...
@@ -1241,7 +1241,7 @@ struct CudaHardSwishFunctor : public BaseActivationFunctor<T> {
// x , when x >= threshold - offset
// x * (x + offset) / scale, otherwise
// threshold = scale = 6, offset = 3 by default
__device__
__forceinline__
T
operator
()(
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
T
t
=
static_cast
<
T
>
(
threshold
);
T
temp
=
x
+
static_cast
<
T
>
(
offset
);
T
temp_max
=
temp
>
zero
?
temp
:
zero
;
...
...
@@ -1267,7 +1267,7 @@ struct CudaHardSwishGradFunctor : public BaseActivationFunctor<T> {
// dout , when x >= threshold - offset
// dout * (2 * x / scale + offset / scale), otherwise
// threshold = scale = 6, offset = 3 by default
__device__
__forceinline__
T
operator
()(
const
T
&
dout
,
const
T
&
x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
x
)
const
{
T
o
=
static_cast
<
T
>
(
offset
);
T
s
=
static_cast
<
T
>
(
scale
);
T
temp1
=
static_cast
<
T
>
(
x
+
o
>
zero
);
...
...
@@ -1291,7 +1291,7 @@ struct CudaELUFunctor : public BaseActivationFunctor<T> {
// elu(x) = x, if x > 0
// elu(x) = alpha * (e^x - 1), if x <= 0
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
CT
x
=
static_cast
<
CT
>
(
arg_x
);
CT
temp
=
static_cast
<
CT
>
(
alpha
)
*
(
exp
(
x
)
-
one
);
CT
res
=
x
>
zero
?
x
:
temp
;
...
...
@@ -1312,8 +1312,7 @@ struct CudaELUGradFunctor : public BaseActivationFunctor<T> {
// case 1: alpha >= 0
// dx = dout, if out > 0
// dx = dout * (out + alpha), if out <= 0
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_out
)
const
{
__device__
__forceinline__
T
operator
()(
T
arg_dout
,
T
arg_out
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
out
=
static_cast
<
MPType
>
(
arg_out
);
MPType
a
=
static_cast
<
MPType
>
(
alpha
);
...
...
@@ -1338,8 +1337,8 @@ struct CudaELUGradNegativeAlphaFunctor : public BaseActivationFunctor<T> {
// case 2: alpha < 0
// dx = dout, if x > 0
// dx = dout * (out + alpha), if x <=0
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_out
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_out
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
out
=
static_cast
<
MPType
>
(
arg_out
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
...
...
@@ -1393,7 +1392,7 @@ struct CudaCELUFunctor : public BaseActivationFunctor<T> {
}
// celu(x) = max(0, x) + min(0, alpha * (exp(x/alpha) - 1))
__device__
__forceinline__
T
operator
()(
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
CT
x
=
static_cast
<
CT
>
(
arg_x
);
CT
temp
=
static_cast
<
CT
>
(
alpha
)
*
(
exp
(
x
/
static_cast
<
CT
>
(
alpha
))
-
one
);
CT
res
=
(
x
>
zero
?
x
:
zero
)
+
(
temp
>
zero
?
zero
:
temp
);
...
...
@@ -1416,8 +1415,8 @@ struct CudaCELUGradFunctor : public BaseActivationFunctor<T> {
// dx = dout * (x/alpha).exp(), if alpha > 0 and x <= 0
// dx = dout , if alpha < 0 and x > 0
// dx = dout * (x/alpha).exp(), if alpha < 0 and x <=0
__device__
__forceinline__
T
operator
()(
const
T
&
arg_dout
,
const
T
&
arg_x
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
a
=
static_cast
<
MPType
>
(
alpha
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录