Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
57f54d3b
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
57f54d3b
编写于
3月 16, 2022
作者:
Y
YuanRisheng
提交者:
GitHub
3月 16, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
move activation kernel (#40565)
上级
603f8425
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
919 addition
and
740 deletion
+919
-740
paddle/fluid/operators/activation_op.cc
paddle/fluid/operators/activation_op.cc
+7
-16
paddle/fluid/operators/activation_op.h
paddle/fluid/operators/activation_op.h
+27
-231
paddle/fluid/operators/activation_op.kps
paddle/fluid/operators/activation_op.kps
+9
-251
paddle/phi/kernels/activation_grad_kernel.h
paddle/phi/kernels/activation_grad_kernel.h
+50
-20
paddle/phi/kernels/activation_kernel.h
paddle/phi/kernels/activation_kernel.h
+24
-18
paddle/phi/kernels/cpu/activation_grad_kernel.cc
paddle/phi/kernels/cpu/activation_grad_kernel.cc
+114
-69
paddle/phi/kernels/cpu/activation_kernel.cc
paddle/phi/kernels/cpu/activation_kernel.cc
+76
-61
paddle/phi/kernels/funcs/activation_functor.h
paddle/phi/kernels/funcs/activation_functor.h
+435
-0
paddle/phi/kernels/gpu/activation_grad_kernel.cu
paddle/phi/kernels/gpu/activation_grad_kernel.cu
+58
-22
paddle/phi/kernels/gpu/activation_kernel.cu
paddle/phi/kernels/gpu/activation_kernel.cu
+32
-19
paddle/phi/kernels/impl/activation_grad_impl.h
paddle/phi/kernels/impl/activation_grad_impl.h
+20
-0
paddle/phi/ops/compat/activation_sig.cc
paddle/phi/ops/compat/activation_sig.cc
+67
-33
未找到文件。
paddle/fluid/operators/activation_op.cc
浏览文件 @
57f54d3b
...
...
@@ -1485,6 +1485,13 @@ REGISTER_ACTIVATION_OP(atanh, Atanh, AtanhFunctor, AtanhGradFunctor);
REGISTER_ACTIVATION_OP
(
brelu
,
BRelu
,
BReluFunctor
,
BReluGradFunctor
);
REGISTER_ACTIVATION_OP
(
thresholded_relu
,
ThresholdedRelu
,
ThresholdedReluFunctor
,
ThresholdedReluGradFunctor
);
REGISTER_ACTIVATION_OP
(
hard_shrink
,
HardShrink
,
HardShrinkFunctor
,
HardShrinkGradFunctor
);
REGISTER_ACTIVATION_OP
(
softshrink
,
SoftShrink
,
SoftShrinkFunctor
,
SoftShrinkGradFunctor
);
REGISTER_ACTIVATION_OP
(
tanh_shrink
,
TanhShrink
,
TanhShrinkFunctor
,
TanhShrinkGradFunctor
);
REGISTER_ACTIVATION_OP
(
silu
,
Silu
,
SiluFunctor
,
SiluGradFunctor
);
/* ========================== sigmoid register =============================
*/
...
...
@@ -1626,22 +1633,6 @@ REGISTER_OPERATOR(
ops
::
ActivationOpDoubleGrad
<
ops
::
ELUGradFunctor
<
float
>::
FwdDeps
()
>
,
ops
::
ActivationDoubleGradOpInplaceInferer
);
REGISTER_OP_CPU_KERNEL
(
elu
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ELUFunctor
<
float
>>
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ELUFunctor
<
double
>>
);
REGISTER_OP_CPU_KERNEL
(
elu_grad
,
ops
::
ELUGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
ELUGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
elu_grad_grad
,
ops
::
ELUDoubleGradKernel
<
plat
::
CPUDeviceContext
,
ops
::
ELUGradGradFunctor
<
float
>>
,
ops
::
ELUDoubleGradKernel
<
plat
::
CPUDeviceContext
,
ops
::
ELUGradGradFunctor
<
double
>>
,
ops
::
ELUDoubleGradKernel
<
plat
::
CPUDeviceContext
,
ops
::
ELUGradGradFunctor
<
plat
::
float16
>>
);
/* ========================================================================== */
/* ======================== logit register ============================
...
...
paddle/fluid/operators/activation_op.h
浏览文件 @
57f54d3b
...
...
@@ -279,6 +279,15 @@ USE_PHI_FUNCTOR(BRelu)
USE_PHI_FUNCTOR
(
ThresholdedRelu
)
USE_PHI_FUNCTOR
(
LeakyRelu
)
USE_PHI_DOUBLE_GRAD_FUNCTOR
(
LeakyRelu
)
USE_PHI_FUNCTOR
(
HardShrink
)
USE_PHI_FUNCTOR
(
SoftShrink
)
USE_PHI_FUNCTOR
(
TanhShrink
)
USE_PHI_FUNCTOR
(
Silu
)
USE_PHI_FUNCTOR
(
ELU
)
USE_PHI_DOUBLE_GRAD_FUNCTOR
(
ELU
)
template
<
typename
T
>
using
ELUGradNegativeAlphaFunctor
=
phi
::
funcs
::
ELUGradNegativeAlphaFunctor
<
T
>
;
template
<
typename
T
>
struct
SigmoidGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
...
...
@@ -392,31 +401,6 @@ struct SigmoidTripleGradFunctor : public BaseActivationFunctor<T> {
}
};
// silu(x) = x / (1 + exp(-x))
template
<
typename
T
>
struct
SiluFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
auto
temp
=
static_cast
<
T
>
(
1
)
/
(
static_cast
<
T
>
(
1
)
+
(
-
x
).
exp
());
out
.
device
(
d
)
=
x
*
temp
;
}
};
// silu'(x) = (1 / (1 + e^{-x})) * (1 + out * e^{-x}))
template
<
typename
T
>
struct
SiluGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
temp1
=
static_cast
<
T
>
(
1
)
+
(
-
x
).
exp
();
// 1+e^(-x)
auto
temp2
=
x
*
(
-
x
).
exp
();
// x*e^(-x)
dx
.
device
(
d
)
=
dout
*
((
static_cast
<
T
>
(
1
)
/
temp1
)
*
(
static_cast
<
T
>
(
1
)
+
(
temp2
/
temp1
)));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// Originally: logsigmoid(x) = -log (1 + exp(-x))
// For numerical stability, we can use the log-sum-exp trick:
// https://hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/
...
...
@@ -512,99 +496,6 @@ using ReluGradGradFunctor = phi::funcs::ReluGradGradFunctor<T>;
template
<
typename
T
>
using
ReluCUDAFunctor
=
phi
::
funcs
::
ReluCUDAFunctor
<
T
>
;
// tanhshrink(x) = x - tanh(x)
// where tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
template
<
typename
T
>
struct
TanhShrinkFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
-
x
.
tanh
();
}
};
template
<
typename
T
>
struct
TanhShrinkGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
(
x
.
tanh
()
*
x
.
tanh
());
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// tanhshrink(x) = x - tanh(x)
// where tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
template
<
typename
T
>
struct
HardShrinkFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
auto
temp1
=
x
<
static_cast
<
T
>
(
threshold
*
-
1.
f
);
auto
temp2
=
x
>
static_cast
<
T
>
(
threshold
);
out
.
device
(
d
)
=
x
*
(
temp1
||
temp2
).
template
cast
<
T
>();
}
};
template
<
typename
T
>
struct
HardShrinkGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
temp1
=
x
<
static_cast
<
T
>
(
threshold
*
-
1.
f
);
auto
temp2
=
x
>
static_cast
<
T
>
(
threshold
);
dx
.
device
(
d
)
=
dout
*
(
temp1
||
temp2
).
template
cast
<
T
>();
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// softshrink(x) = x - lambda, if x > lambda; x + lambda, if x < -lambda; 0
// otherwise
template
<
typename
T
>
struct
SoftShrinkFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
lambda
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"lambda"
,
&
lambda
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
auto
lambdaT
=
static_cast
<
T
>
(
lambda
);
auto
temp1
=
(
x
>
lambdaT
).
template
cast
<
T
>();
auto
temp2
=
(
x
<
-
lambdaT
).
template
cast
<
T
>();
out
.
device
(
d
)
=
temp1
*
(
x
-
lambdaT
)
+
temp2
*
(
x
+
lambdaT
);
}
};
template
<
typename
T
>
struct
SoftShrinkGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
lambda
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"lambda"
,
&
lambda
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
lambdaT
=
static_cast
<
T
>
(
lambda
);
auto
temp1
=
(
x
>
lambdaT
).
template
cast
<
T
>();
auto
temp2
=
(
x
<
-
lambdaT
).
template
cast
<
T
>();
dx
.
device
(
d
)
=
dout
*
(
temp1
+
temp2
).
template
cast
<
T
>();
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// sqrt(x) = x^(1/2)
template
<
typename
T
>
struct
SqrtFunctor
:
public
BaseActivationFunctor
<
T
>
{
...
...
@@ -1036,59 +927,6 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> {
}
};
template
<
typename
T
>
struct
ELUFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
alpha
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"alpha"
,
&
alpha
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
(
x
<
static_cast
<
T
>
(
0
))
.
select
(
static_cast
<
T
>
(
alpha
)
*
(
x
.
exp
()
-
static_cast
<
T
>
(
1
)),
x
);
}
};
template
<
typename
T
>
struct
ELUGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
alpha
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"alpha"
,
&
alpha
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
// case 1: alpha >= 0
// dx = dout, if out > 0
// dx = dout * (out + alpha), if out <= 0
dx
.
device
(
d
)
=
(
out
>
static_cast
<
T
>
(
0
))
.
select
(
dout
,
dout
*
(
out
+
static_cast
<
T
>
(
alpha
)));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
ELUGradNegativeAlphaFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
alpha
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"alpha"
,
&
alpha
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
// case 2: alpha < 0
// dx = dout, if x > 0
// dx = dout * (out + alpha), if x <=0
dx
.
device
(
d
)
=
(
x
>
static_cast
<
T
>
(
0
))
.
select
(
dout
,
dout
*
static_cast
<
T
>
(
alpha
)
*
x
.
exp
());
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
ELUGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -1354,44 +1192,6 @@ struct AbsGradGradFunctor : public BaseActivationFunctor<T> {
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
ELUGradGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
alpha
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"alpha"
,
&
alpha
}};
}
template
<
typename
Device
>
void
operator
()(
const
Device
&
dev
,
const
framework
::
Tensor
*
X
,
const
framework
::
Tensor
*
ddX
,
framework
::
Tensor
*
ddOut
,
const
framework
::
Tensor
*
dOut
,
framework
::
Tensor
*
dX
)
const
{
auto
*
d
=
dev
.
eigen_device
();
auto
ddx
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
ddX
,
"Input"
,
"DDX"
,
"ELUGradGrad"
));
auto
x
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
X
,
"Input"
,
"X"
,
"ELUGradGrad"
));
if
(
dX
)
{
auto
dx
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dX
,
"Output"
,
"DX"
,
"ELUGradGrad"
));
auto
dout
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dOut
,
"Output"
,
"DOut"
,
"ELUGradGrad"
));
dx
.
device
(
*
d
)
=
ddx
*
dout
*
static_cast
<
T
>
(
alpha
)
*
x
.
exp
()
*
(
x
<=
static_cast
<
T
>
(
0
)).
template
cast
<
T
>();
}
if
(
ddOut
)
{
auto
ddout
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
ddOut
,
"Output"
,
"DDOut"
,
"ELUGradGrad"
));
ddout
.
device
(
*
d
)
=
ddx
*
((
x
>
static_cast
<
T
>
(
0
)).
template
cast
<
T
>()
+
static_cast
<
T
>
(
alpha
)
*
x
.
exp
()
*
(
x
<=
static_cast
<
T
>
(
0
)).
template
cast
<
T
>())
.
template
cast
<
T
>();
}
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
CELUGradGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
alpha
;
...
...
@@ -2151,26 +1951,22 @@ struct LogGradGradFunctor : public BaseActivationFunctor<T> {
}
// namespace operators
}
// namespace paddle
#define FOR_EACH_ACTIVATION_OP(__macro) \
__macro(silu, Silu, SiluFunctor, SiluGradFunctor); \
__macro(logsigmoid, LogSigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \
__macro(softshrink, SoftShrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \
__macro(ceil, Ceil, CeilFunctor, ZeroGradFunctor); \
__macro(floor, Floor, FloorFunctor, ZeroGradFunctor); \
__macro(round, Round, RoundFunctor, ZeroGradFunctor); \
__macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \
__macro(log1p, Log1p, Log1pFunctor, Log1pGradFunctor); \
__macro(log2, Log2, Log2Functor, Log2GradFunctor); \
__macro(log10, Log10, Log10Functor, Log10GradFunctor); \
__macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \
__macro(stanh, STanh, STanhFunctor, STanhGradFunctor); \
__macro(softplus, Softplus, SoftplusFunctor, SoftplusGradFunctor); \
__macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6, Relu6Functor, Relu6GradFunctor); \
__macro(tanh_shrink, TanhShrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \
__macro(hard_shrink, HardShrink, HardShrinkFunctor, HardShrinkGradFunctor); \
__macro(hard_sigmoid, HardSigmoid, HardSigmoidFunctor, \
HardSigmoidGradFunctor); \
__macro(swish, Swish, SwishFunctor, SwishGradFunctor); \
__macro(mish, Mish, MishFunctor, MishGradFunctor); \
#define FOR_EACH_ACTIVATION_OP(__macro) \
__macro(logsigmoid, LogSigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \
__macro(ceil, Ceil, CeilFunctor, ZeroGradFunctor); \
__macro(floor, Floor, FloorFunctor, ZeroGradFunctor); \
__macro(round, Round, RoundFunctor, ZeroGradFunctor); \
__macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \
__macro(log1p, Log1p, Log1pFunctor, Log1pGradFunctor); \
__macro(log2, Log2, Log2Functor, Log2GradFunctor); \
__macro(log10, Log10, Log10Functor, Log10GradFunctor); \
__macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \
__macro(stanh, STanh, STanhFunctor, STanhGradFunctor); \
__macro(softplus, Softplus, SoftplusFunctor, SoftplusGradFunctor); \
__macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6, Relu6Functor, Relu6GradFunctor); \
__macro(hard_sigmoid, HardSigmoid, HardSigmoidFunctor, \
HardSigmoidGradFunctor); \
__macro(swish, Swish, SwishFunctor, SwishGradFunctor); \
__macro(mish, Mish, MishFunctor, MishGradFunctor); \
__macro(hard_swish, HardSwish, HardSwishFunctor, HardSwishGradFunctor);
paddle/fluid/operators/activation_op.kps
浏览文件 @
57f54d3b
...
...
@@ -44,35 +44,6 @@ struct CudaSigmoidGradFunctor : public BaseActivationFunctor<T> {
}
};
template <typename T>
struct CudaSiluFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
// silu(x) = x / (1 + exp(-x))
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(x / (one + exp(-x)));
}
};
template <typename T>
struct CudaSiluGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
// dx = dout * (1 + exp(-x) + x * exp(-x) / (1 + exp(-x))^2)
__device__ __forceinline__ T operator()(const T arg_dout,
const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x);
MPType temp = one / (one + exp(-x));
return static_cast<T>(dout * (temp * (one + x * (one - temp))));
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaLogSigmoidFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
...
...
@@ -110,43 +81,6 @@ struct CudaLogSigmoidGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaSoftShrinkFunctor : public BaseActivationFunctor<T> {
float lambda;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"lambda", &lambda}};
}
// softshrink(x) = x - lambda, if x > lambda;
// x + lambda, if x < -lambda;
// 0, otherwise.
__device__ __forceinline__ T operator()(const T x) const {
T l = static_cast<T>(lambda);
T temp1 = static_cast<T>(x > l);
T temp2 = static_cast<T>(x < -l);
return temp1 * (x - l) + temp2 * (x + l);
}
};
template <typename T>
struct CudaSoftShrinkGradFunctor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
float lambda;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"lambda", &lambda}};
}
// dx = dout, if x > lambda or x < -lambda else 0
__device__ __forceinline__ T operator()(const T dout, const T x) const {
T l = static_cast<T>(lambda);
return (x >= -l && x <= l) ? zero : dout;
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaCeilFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
...
...
@@ -615,66 +549,6 @@ struct CudaRelu6GradFunctor : public BaseActivationFunctor<T> {
}
};
template <typename T>
struct CudaTanhShrinkFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
// tanhshrink(x) = x - tanh(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(x - tanh(x));
}
};
template <typename T>
struct CudaTanhShrinkGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
// dx = dout * tanh(x)^2
__device__ __forceinline__ T operator()(const T arg_dout,
const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(dout * tanh(x) * tanh(x));
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaHardShrinkFunctor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
// hadrshrink(x) = (x > -threshold && x < threshold) ? 0 : x
__device__ __forceinline__ T operator()(const T x) const {
T t = static_cast<T>(threshold);
return (x > -t && x < t) ? zero : x;
}
};
template <typename T>
struct CudaHardShrinkGradFunctor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
// dx = (x > -threshold && x < threshold) ? 0 : dout
__device__ __forceinline__ T operator()(const T dout, const T x) const {
T t = static_cast<T>(threshold);
return (x > -t && x < t) ? zero : dout;
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaHardSigmoidFunctor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
...
...
@@ -863,110 +737,6 @@ struct CudaHardSwishGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaELUFunctor : public BaseActivationFunctor<T> {
using CT = typename details::MPTypeTrait<T>::Type;
CT zero = static_cast<CT>(0.0f);
CT one = static_cast<CT>(1.0f);
float alpha;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"alpha", &alpha}};
}
// elu(x) = x, if x > 0
// elu(x) = alpha * (e^x - 1), if x <= 0
__device__ __forceinline__ T operator()(const T arg_x) const {
CT x = static_cast<CT>(arg_x);
CT temp = static_cast<CT>(alpha) * (exp(x) - one);
CT res = x > zero ? x : temp;
return static_cast<T>(res);
}
};
template <typename T>
struct CudaELUGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType zero = static_cast<MPType>(0.0f);
float alpha;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"alpha", &alpha}};
}
// case 1: alpha >= 0
// dx = dout, if out > 0
// dx = dout * (out + alpha), if out <= 0
__device__ __forceinline__ T operator()(T arg_dout, T arg_out) const {
MPType dout = static_cast<MPType>(arg_dout);
MPType out = static_cast<MPType>(arg_out);
MPType a = static_cast<MPType>(alpha);
MPType out_pos = static_cast<MPType>(out > zero);
MPType out_neg = static_cast<MPType>(out <= zero);
return static_cast<T>(dout * (out_pos + out_neg * (out + a)));
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaELUGradNegativeAlphaFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType zero = static_cast<MPType>(0.0f);
float alpha;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"alpha", &alpha}};
}
// case 2: alpha < 0
// dx = dout, if x > 0
// dx = dout * (out + alpha), if x <=0
__device__ __forceinline__ T operator()(const T arg_dout, const T arg_out,
const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout);
MPType out = static_cast<MPType>(arg_out);
MPType x = static_cast<MPType>(arg_x);
MPType a = static_cast<MPType>(alpha);
MPType x_pos = static_cast<MPType>(x > zero);
MPType x_neg = static_cast<MPType>(x <= zero);
return static_cast<T>(dout * (x_pos + x_neg * (out + a)));
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename DeviceContext, typename T>
class ELUGradCudaKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const {
auto* d_out = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* out = ctx.Input<framework::Tensor>("Out");
auto* x = ctx.Input<framework::Tensor>("X");
auto* d_x = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
d_x->mutable_data<T>(ctx.GetPlace());
const float alpha = ctx.Attr<float>("alpha");
auto& dev_ctx = ctx.device_context<DeviceContext>();
std::vector<const framework::Tensor*> ins = {d_out, out};
std::vector<framework::Tensor*> outs = {d_x};
if (alpha > 0) {
CudaELUGradFunctor<T> functor;
functor.alpha = alpha;
paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(dev_ctx, ins,
&outs, functor);
} else {
CudaELUGradNegativeAlphaFunctor<T> functor;
functor.alpha = alpha;
ins.push_back(x);
paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(dev_ctx, ins,
&outs, functor);
}
}
};
template <typename T>
struct CudaCELUFunctor : public BaseActivationFunctor<T> {
using CT = typename details::MPTypeTrait<T>::Type;
...
...
@@ -1099,6 +869,15 @@ USE_PHI_FUNCTOR(CudaTanh)
USE_PHI_FUNCTOR(CudaBRelu)
USE_PHI_FUNCTOR(CudaLeakyRelu)
USE_PHI_FUNCTOR(CudaThresholdedRelu)
USE_PHI_FUNCTOR(CudaHardShrink)
USE_PHI_FUNCTOR(CudaSoftShrink)
USE_PHI_FUNCTOR(CudaTanhShrink)
USE_PHI_FUNCTOR(CudaSilu)
USE_PHI_FUNCTOR(CudaELU)
template <typename T>
using CudaELUGradNegativeAlphaFunctor =
phi::funcs::CudaELUGradNegativeAlphaFunctor<T>;
} // namespace operators
} // namespace paddle
...
...
@@ -1158,26 +937,6 @@ namespace plat = paddle::platform;
ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \
ops::grad_functor<plat::bfloat16>>);
/* ======================== elu register ============================ */
REGISTER_OP_CUDA_KERNEL(
elu, ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext,
ops::CudaELUFunctor<float>>,
ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext,
ops::CudaELUFunctor<double>>,
ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaELUFunctor<plat::float16>>);
REGISTER_OP_CUDA_KERNEL(
elu_grad, ops::ELUGradCudaKernel<plat::CUDADeviceContext, float>,
ops::ELUGradCudaKernel<plat::CUDADeviceContext, double>,
ops::ELUGradCudaKernel<plat::CUDADeviceContext, plat::float16>);
REGISTER_OP_CUDA_KERNEL(
elu_grad_grad, ops::ELUDoubleGradKernel<plat::CUDADeviceContext,
ops::ELUGradGradFunctor<float>>,
ops::ELUDoubleGradKernel<plat::CUDADeviceContext,
ops::ELUGradGradFunctor<double>>,
ops::ELUDoubleGradKernel<plat::CUDADeviceContext,
ops::ELUGradGradFunctor<plat::float16>>);
/* ========================================================================== */
/* ======================== celu register ============================ */
...
...
@@ -1359,7 +1118,6 @@ REGISTER_OP_CUDA_KERNEL(
/* ========================================================================== */
#define FOR_EACH_ACTIVATION_CUDA_OP(__macro) \
__macro(silu, Silu, CudaSiluFunctor, CudaSiluGradFunctor); \
__macro(logsigmoid, LogSigmoid, CudaLogSigmoidFunctor, \
CudaLogSigmoidGradFunctor); \
__macro(softshrink, SoftShrink, CudaSoftShrinkFunctor, \
...
...
paddle/phi/kernels/activation_grad_kernel.h
浏览文件 @
57f54d3b
...
...
@@ -26,6 +26,23 @@ namespace phi {
const DenseTensor& dout, \
DenseTensor* dx);
#define DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DepX(name, attr) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& dout, \
float attr, \
DenseTensor* dx);
#define DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DepX(name, attr1, attr2) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& dout, \
float attr1, \
float attr2, \
DenseTensor* dx);
#define DECLARE_ACTIVATION_GRAD_KERNEL_DepOut(name) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
...
...
@@ -33,6 +50,14 @@ namespace phi {
const DenseTensor& dout, \
DenseTensor* dx);
#define DECLARE_ACTIVATION_GRAD_KERNEL_WITH_ONE_ATTRS_DepOut(name, attr) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& out, \
const DenseTensor& dout, \
float attr, \
DenseTensor* dx);
template
<
typename
T
,
typename
Context
>
void
ReluDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
...
...
@@ -58,21 +83,6 @@ void TanhTripleGradKernel(const Context& dev_ctx,
DenseTensor
*
d_dout
,
DenseTensor
*
d_ddx
);
template
<
typename
T
,
typename
Context
>
void
BReluGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
dout
,
float
t_min
,
float
t_max
,
DenseTensor
*
dx
);
template
<
typename
T
,
typename
Context
>
void
LeakyReluGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
dout
,
float
alpha
,
DenseTensor
*
dx
);
template
<
typename
T
,
typename
Context
>
void
LeakyReluDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
...
...
@@ -81,11 +91,21 @@ void LeakyReluDoubleGradKernel(const Context& dev_ctx,
DenseTensor
*
ddout
);
template
<
typename
T
,
typename
Context
>
void
ThresholdedReluGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
dout
,
float
threshold
,
DenseTensor
*
dx
);
void
EluGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
out
,
const
DenseTensor
&
dout
,
float
alpha
,
DenseTensor
*
dx
);
template
<
typename
T
,
typename
Context
>
void
EluDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
ddx
,
float
alpha
,
DenseTensor
*
dx
,
DenseTensor
*
ddout
);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX
(
Cos
);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX
(
Tan
);
...
...
@@ -98,7 +118,17 @@ DECLARE_ACTIVATION_GRAD_KERNEL_DepX(Cosh);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX
(
Asinh
);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX
(
Acosh
);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX
(
Atanh
);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX
(
TanhShrink
);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX
(
Silu
);
DECLARE_ACTIVATION_GRAD_KERNEL_DepOut
(
Relu
);
DECLARE_ACTIVATION_GRAD_KERNEL_DepOut
(
Tanh
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DepX
(
LeakyRelu
,
alpha
)
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DepX
(
ThresholdedRelu
,
threshold
)
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DepX
(
SoftShrink
,
lambda
)
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DepX
(
HardShrink
,
threshold
)
DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DepX
(
BRelu
,
t_min
,
t_max
)
}
// namespace phi
paddle/phi/kernels/activation_kernel.h
浏览文件 @
57f54d3b
...
...
@@ -24,6 +24,21 @@ namespace phi {
void name##Kernel( \
const Context& dev_ctx, const DenseTensor& x, DenseTensor* out);
#define DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(name, attr) \
template <typename T, typename Context> \
void name##Kernel(const Context& dev_ctx, \
const DenseTensor& x, \
float attr, \
DenseTensor* out);
#define DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(name, attr1, attr2) \
template <typename T, typename Context> \
void name##Kernel(const Context& dev_ctx, \
const DenseTensor& x, \
float attr1, \
float attr2, \
DenseTensor* out);
DECLARE_ACTIVATION_KERNEL
(
Cos
)
DECLARE_ACTIVATION_KERNEL
(
Tan
)
DECLARE_ACTIVATION_KERNEL
(
Acos
)
...
...
@@ -37,24 +52,15 @@ DECLARE_ACTIVATION_KERNEL(Acosh)
DECLARE_ACTIVATION_KERNEL
(
Atanh
)
DECLARE_ACTIVATION_KERNEL
(
Relu
)
DECLARE_ACTIVATION_KERNEL
(
Tanh
)
DECLARE_ACTIVATION_KERNEL
(
TanhShrink
)
DECLARE_ACTIVATION_KERNEL
(
Silu
)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS
(
LeakyRelu
,
alpha
)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS
(
ThresholdedRelu
,
threshold
)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS
(
SoftShrink
,
lambda
)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS
(
HardShrink
,
threshold
)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS
(
Elu
,
alpha
)
template
<
typename
T
,
typename
Context
>
void
BReluKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
float
t_min
,
float
t_max
,
DenseTensor
*
out
);
template
<
typename
T
,
typename
Context
>
void
LeakyReluKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
float
alpha
,
DenseTensor
*
out
);
template
<
typename
T
,
typename
Context
>
void
ThresholdedReluKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
float
threshold
,
DenseTensor
*
out
);
DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS
(
BRelu
,
t_min
,
t_max
)
}
// namespace phi
paddle/phi/kernels/cpu/activation_grad_kernel.cc
浏览文件 @
57f54d3b
...
...
@@ -21,101 +21,140 @@ limitations under the License. */
namespace
phi
{
#define DEFINE_CPU_ACTIVATION_GRAD_KERNEL_D
ep
X(name, functor_class) \
#define DEFINE_CPU_ACTIVATION_GRAD_KERNEL_D
EP
X(name, functor_class) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& dout, \
DenseTensor* dx) { \
func
tor_class<T> functor;
\
ActivationGradImpl<T, Context, func
tor_class<T>>(
\
func
s::functor_class<T> functor;
\
ActivationGradImpl<T, Context, func
s::functor_class<T>>(
\
dev_ctx, &x, nullptr, &dout, dx, functor); \
}
#define DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_D
epX(
\
name, functor_class, attr) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& dout, \
float attr, \
DenseTensor* dx) { \
func
tor_class<T> functor;
\
auto attrs = functor.GetAttrs(); \
*(attrs[0].second) = attr; \
ActivationGradImpl<T, Context, func
tor_class<T>>(
\
dev_ctx, &x, nullptr, &dout, dx, functor); \
#define DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_D
EPX(
\
name, functor_class, attr)
\
template <typename T, typename Context>
\
void name##GradKernel(const Context& dev_ctx,
\
const DenseTensor& x,
\
const DenseTensor& dout,
\
float attr,
\
DenseTensor* dx) {
\
func
s::functor_class<T> functor;
\
auto attrs = functor.GetAttrs();
\
*(attrs[0].second) = attr;
\
ActivationGradImpl<T, Context, func
s::functor_class<T>>(
\
dev_ctx, &x, nullptr, &dout, dx, functor);
\
}
#define DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_D
epX(
\
name, functor_class, attr1, attr2) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& dout, \
float attr1, \
float attr2, \
DenseTensor* dx) { \
func
tor_class<T> functor;
\
auto attrs = functor.GetAttrs(); \
*(attrs[0].second) = attr1; \
*(attrs[1].second) = attr2; \
ActivationGradImpl<T, Context, func
tor_class<T>>(
\
dev_ctx, &x, nullptr, &dout, dx, functor); \
#define DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_D
EPX(
\
name, functor_class, attr1, attr2)
\
template <typename T, typename Context>
\
void name##GradKernel(const Context& dev_ctx,
\
const DenseTensor& x,
\
const DenseTensor& dout,
\
float attr1,
\
float attr2,
\
DenseTensor* dx) {
\
func
s::functor_class<T> functor;
\
auto attrs = functor.GetAttrs();
\
*(attrs[0].second) = attr1;
\
*(attrs[1].second) = attr2;
\
ActivationGradImpl<T, Context, func
s::functor_class<T>>(
\
dev_ctx, &x, nullptr, &dout, dx, functor);
\
}
#define DEFINE_CPU_ACTIVATION_GRAD_KERNEL_D
epOut
(name, functor_class) \
#define DEFINE_CPU_ACTIVATION_GRAD_KERNEL_D
EPOUT
(name, functor_class) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& out, \
const DenseTensor& dout, \
DenseTensor* dx) { \
func
tor_class<T> functor;
\
ActivationGradImpl<T, Context, func
tor_class<T>>(
\
func
s::functor_class<T> functor;
\
ActivationGradImpl<T, Context, func
s::functor_class<T>>(
\
dev_ctx, nullptr, &out, &dout, dx, functor); \
}
#define DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_D
epOut(
\
name, functor_class, attr) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& out, \
const DenseTensor& dout, \
float attr, \
DenseTensor* dx) { \
func
tor_class<T> functor;
\
auto attrs = functor.GetAttrs(); \
*(attrs[0].second) = attr; \
ActivationGradImpl<T, Context, func
tor_class<T>>(
\
dev_ctx, nullptr, &out, &dout, dx, functor); \
#define DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_D
EPOUT(
\
name, functor_class, attr)
\
template <typename T, typename Context>
\
void name##GradKernel(const Context& dev_ctx,
\
const DenseTensor& out,
\
const DenseTensor& dout,
\
float attr,
\
DenseTensor* dx) {
\
func
s::functor_class<T> functor;
\
auto attrs = functor.GetAttrs();
\
*(attrs[0].second) = attr;
\
ActivationGradImpl<T, Context, func
s::functor_class<T>>(
\
dev_ctx, nullptr, &out, &dout, dx, functor);
\
}
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX
(
Cos
,
funcs
::
CosGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX
(
Tan
,
funcs
::
TanGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX
(
Acos
,
funcs
::
AcosGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX
(
Sin
,
funcs
::
SinGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX
(
Asin
,
funcs
::
AsinGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX
(
Atan
,
funcs
::
AtanGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX
(
Sinh
,
funcs
::
SinhGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX
(
Cosh
,
funcs
::
CoshGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX
(
Asinh
,
funcs
::
AsinhGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX
(
Acosh
,
funcs
::
AcoshGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX
(
Atanh
,
funcs
::
AtanhGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepOut
(
Relu
,
funcs
::
ReluGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepOut
(
Tanh
,
funcs
::
TanhGradFunctor
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DepX
(
LeakyRelu
,
funcs
::
LeakyReluGradFunctor
,
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Cos
,
CosGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Tan
,
TanGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Acos
,
AcosGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Sin
,
SinGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Asin
,
AsinGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Atan
,
AtanGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Sinh
,
SinhGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Cosh
,
CoshGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Asinh
,
AsinhGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Acosh
,
AcoshGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Atanh
,
AtanhGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
TanhShrink
,
TanhShrinkGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Silu
,
SiluGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Relu
,
ReluGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Tanh
,
TanhGradFunctor
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
LeakyRelu
,
LeakyReluGradFunctor
,
alpha
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DepX
(
ThresholdedRelu
,
funcs
::
ThresholdedReluGradFunctor
,
threshold
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DepX
(
BRelu
,
funcs
::
BReluGradFunctor
,
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
ThresholdedRelu
,
ThresholdedReluGradFunctor
,
threshold
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
SoftShrink
,
SoftShrinkGradFunctor
,
lambda
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
HardShrink
,
HardShrinkGradFunctor
,
threshold
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
BRelu
,
BReluGradFunctor
,
t_min
,
t_max
);
template
<
typename
T
,
typename
Context
>
void
EluGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
out
,
const
DenseTensor
&
dout
,
float
alpha
,
DenseTensor
*
dx
)
{
dev_ctx
.
template
Alloc
<
T
>(
dx
);
auto
x_flatten
=
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
&
x
,
"Input"
,
"X"
,
"elu_grad"
));
auto
out_flatten
=
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
&
out
,
"Input"
,
"Out"
,
"elu_grad"
));
auto
dout_flatten
=
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
&
dout
,
"Input"
,
"dOut"
,
"elu_grad"
));
auto
dx_flatten
=
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dx
,
"Output"
,
"dX"
,
"elu_grad"
));
auto
*
place
=
dev_ctx
.
eigen_device
();
if
(
alpha
>
0
)
{
funcs
::
ELUGradFunctor
<
T
>
functor
;
functor
.
alpha
=
alpha
;
functor
(
*
place
,
x_flatten
,
out_flatten
,
dout_flatten
,
dx_flatten
);
}
else
{
funcs
::
ELUGradNegativeAlphaFunctor
<
T
>
functor
;
functor
.
alpha
=
alpha
;
functor
(
*
place
,
x_flatten
,
out_flatten
,
dout_flatten
,
dx_flatten
);
}
}
}
// namespace phi
PD_REGISTER_KERNEL
(
...
...
@@ -144,6 +183,11 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(brelu_grad, BReluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
leaky_relu_grad
,
LeakyReluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
thresholded_relu_grad
,
ThresholdedReluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
soft_shrink_grad
,
SoftShrinkGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
hard_shrink_grad
,
HardShrinkGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
tanh_shrink_grad
,
TanhShrinkGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
elu_grad
,
EluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
silu_grad
,
SiluGradKernel
)
PD_REGISTER_ACTIVATION_DOUBLE_GRAD_KERNEL
(
relu_double_grad
,
ReluDoubleGradKernel
)
...
...
@@ -151,6 +195,7 @@ PD_REGISTER_ACTIVATION_DOUBLE_GRAD_KERNEL(tanh_double_grad,
TanhDoubleGradKernel
)
PD_REGISTER_ACTIVATION_DOUBLE_GRAD_KERNEL
(
leaky_relu_double_grad
,
LeakyReluDoubleGradKernel
)
PD_REGISTER_ACTIVATION_DOUBLE_GRAD_KERNEL
(
elu_double_grad
,
EluDoubleGradKernel
)
PD_REGISTER_KERNEL
(
tanh_triple_grad
,
CPU
,
...
...
paddle/phi/kernels/cpu/activation_kernel.cc
浏览文件 @
57f54d3b
...
...
@@ -19,78 +19,93 @@ limitations under the License. */
namespace
phi
{
#define DEFINE_CPU_ACTIVATION_KERNEL(name, functor_class) \
template <typename T, typename Context> \
void name##Kernel( \
const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) { \
functor_class functor; \
ActivationImpl<T, Context, functor_class>(dev_ctx, x, out, functor); \
#define DEFINE_CPU_ACTIVATION_KERNEL(name, functor_class) \
template <typename T, typename Context> \
void name##Kernel( \
const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) { \
funcs::functor_class<T> functor; \
ActivationImpl<T, Context, funcs::functor_class<T>>( \
dev_ctx, x, out, functor); \
}
#define DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(name, functor_class, attr) \
template <typename T, typename Context> \
void name##Kernel(const Context& dev_ctx, \
const DenseTensor& x, \
float attr, \
DenseTensor* out) { \
functor_class<T> functor; \
auto attrs = functor.GetAttrs(); \
*(attrs[0].second) = attr; \
ActivationImpl<T, Context, functor_class<T>>(dev_ctx, x, out, functor); \
#define DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(name, functor_class, attr) \
template <typename T, typename Context> \
void name##Kernel(const Context& dev_ctx, \
const DenseTensor& x, \
float attr, \
DenseTensor* out) { \
funcs::functor_class<T> functor; \
auto attrs = functor.GetAttrs(); \
*(attrs[0].second) = attr; \
ActivationImpl<T, Context, funcs::functor_class<T>>( \
dev_ctx, x, out, functor); \
}
#define DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS( \
name, functor_class, attr1, attr2) \
template <typename T, typename Context> \
void name##Kernel(const Context& dev_ctx, \
const DenseTensor& x, \
float attr1, \
float attr2, \
DenseTensor* out) { \
functor_class<T> functor; \
auto attrs = functor.GetAttrs(); \
*(attrs[0].second) = attr1; \
*(attrs[1].second) = attr2; \
ActivationImpl<T, Context, functor_class<T>>(dev_ctx, x, out, functor); \
#define DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS( \
name, functor_class, attr1, attr2) \
template <typename T, typename Context> \
void name##Kernel(const Context& dev_ctx, \
const DenseTensor& x, \
float attr1, \
float attr2, \
DenseTensor* out) { \
funcs::functor_class<T> functor; \
auto attrs = functor.GetAttrs(); \
*(attrs[0].second) = attr1; \
*(attrs[1].second) = attr2; \
ActivationImpl<T, Context, funcs::functor_class<T>>( \
dev_ctx, x, out, functor); \
}
DEFINE_CPU_ACTIVATION_KERNEL
(
Sin
,
funcs
::
SinFunctor
<
T
>
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Cos
,
funcs
::
CosFunctor
<
T
>
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Tan
,
funcs
::
TanFunctor
<
T
>
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Asin
,
funcs
::
AsinFunctor
<
T
>
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Atan
,
funcs
::
AtanFunctor
<
T
>
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Acos
,
funcs
::
AcosFunctor
<
T
>
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Sinh
,
funcs
::
SinhFunctor
<
T
>
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Cosh
,
funcs
::
CoshFunctor
<
T
>
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Asinh
,
funcs
::
AsinhFunctor
<
T
>
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Acosh
,
funcs
::
AcoshFunctor
<
T
>
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Atanh
,
funcs
::
AtanhFunctor
<
T
>
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Relu
,
funcs
::
ReluCPUFunctor
<
T
>
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Tanh
,
funcs
::
TanhFunctor
<
T
>
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
LeakyRelu
,
funcs
::
LeakyReluFunctor
,
alpha
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Sin
,
SinFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Cos
,
CosFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Tan
,
TanFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Asin
,
AsinFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Atan
,
AtanFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Acos
,
AcosFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Sinh
,
SinhFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Cosh
,
CoshFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Asinh
,
AsinhFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Acosh
,
AcoshFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Atanh
,
AtanhFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Relu
,
ReluCPUFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Tanh
,
TanhFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
TanhShrink
,
TanhShrinkFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Silu
,
SiluFunctor
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
LeakyRelu
,
LeakyReluFunctor
,
alpha
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
ThresholdedRelu
,
funcs
::
ThresholdedReluFunctor
,
ThresholdedReluFunctor
,
threshold
)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS
(
BRelu
,
funcs
::
BReluFunctor
,
t_min
,
t_max
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
HardShrink
,
HardShrinkFunctor
,
threshold
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
SoftShrink
,
SoftShrinkFunctor
,
lambda
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Elu
,
ELUFunctor
,
alpha
)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS
(
BRelu
,
BReluFunctor
,
t_min
,
t_max
)
}
// namespace phi
PD_REGISTER_KERNEL
(
relu
,
CPU
,
ALL_LAYOUT
,
phi
::
ReluKernel
,
float
,
double
)
{}
#define PD_REGISTER_ACTIVATION_KERNEL(name, func) \
PD_REGISTER_KERNEL(name, CPU, ALL_LAYOUT, phi::func
##Kernel
, float, double) {}
PD_REGISTER_KERNEL(name, CPU, ALL_LAYOUT, phi::func, float, double) {}
PD_REGISTER_ACTIVATION_KERNEL
(
sin
,
Sin
)
PD_REGISTER_ACTIVATION_KERNEL
(
cos
,
Cos
)
PD_REGISTER_ACTIVATION_KERNEL
(
tan
,
Tan
)
PD_REGISTER_ACTIVATION_KERNEL
(
acos
,
Acos
)
PD_REGISTER_ACTIVATION_KERNEL
(
asin
,
Asin
)
PD_REGISTER_ACTIVATION_KERNEL
(
atan
,
Atan
)
PD_REGISTER_ACTIVATION_KERNEL
(
sinh
,
Sinh
)
PD_REGISTER_ACTIVATION_KERNEL
(
cosh
,
Cosh
)
PD_REGISTER_ACTIVATION_KERNEL
(
asinh
,
Asinh
)
PD_REGISTER_ACTIVATION_KERNEL
(
acosh
,
Acosh
)
PD_REGISTER_ACTIVATION_KERNEL
(
atanh
,
Atanh
)
PD_REGISTER_ACTIVATION_KERNEL
(
tanh
,
Tanh
)
PD_REGISTER_ACTIVATION_KERNEL
(
brelu
,
BRelu
)
PD_REGISTER_ACTIVATION_KERNEL
(
leaky_relu
,
LeakyRelu
)
PD_REGISTER_ACTIVATION_KERNEL
(
thresholded_relu
,
ThresholdedRelu
)
PD_REGISTER_ACTIVATION_KERNEL
(
sin
,
SinKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
cos
,
CosKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
tan
,
TanKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
acos
,
AcosKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
asin
,
AsinKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
atan
,
AtanKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
sinh
,
SinhKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
cosh
,
CoshKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
asinh
,
AsinhKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
acosh
,
AcoshKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
atanh
,
AtanhKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
tanh
,
TanhKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
brelu
,
BReluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
leaky_relu
,
LeakyReluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
thresholded_relu
,
ThresholdedReluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
hard_shrink
,
HardShrinkKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
soft_shrink
,
SoftShrinkKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
tanh_shrink
,
TanhShrinkKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
elu
,
EluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
silu
,
SiluKernel
)
paddle/phi/kernels/funcs/activation_functor.h
浏览文件 @
57f54d3b
...
...
@@ -29,11 +29,13 @@
#include <type_traits>
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/extensions.h"
#ifdef PADDLE_WITH_XPU_KP
#define __forceinline__ __inline__
...
...
@@ -780,6 +782,236 @@ struct ThresholdedReluGradFunctor : public BaseActivationFunctor<T> {
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// tanhshrink(x) = x - tanh(x)
// where tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
template
<
typename
T
>
struct
TanhShrinkFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
-
x
.
tanh
();
}
};
template
<
typename
T
>
struct
TanhShrinkGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
(
x
.
tanh
()
*
x
.
tanh
());
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// tanhshrink(x) = x - tanh(x)
// where tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
template
<
typename
T
>
struct
HardShrinkFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
auto
temp1
=
x
<
static_cast
<
T
>
(
threshold
*
-
1.
f
);
auto
temp2
=
x
>
static_cast
<
T
>
(
threshold
);
out
.
device
(
d
)
=
x
*
(
temp1
||
temp2
).
template
cast
<
T
>();
}
};
template
<
typename
T
>
struct
HardShrinkGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
temp1
=
x
<
static_cast
<
T
>
(
threshold
*
-
1.
f
);
auto
temp2
=
x
>
static_cast
<
T
>
(
threshold
);
dx
.
device
(
d
)
=
dout
*
(
temp1
||
temp2
).
template
cast
<
T
>();
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// softshrink(x) = x - lambda, if x > lambda; x + lambda, if x < -lambda; 0
// otherwise
template
<
typename
T
>
struct
SoftShrinkFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
lambda
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"lambda"
,
&
lambda
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
auto
lambdaT
=
static_cast
<
T
>
(
lambda
);
auto
temp1
=
(
x
>
lambdaT
).
template
cast
<
T
>();
auto
temp2
=
(
x
<
-
lambdaT
).
template
cast
<
T
>();
out
.
device
(
d
)
=
temp1
*
(
x
-
lambdaT
)
+
temp2
*
(
x
+
lambdaT
);
}
};
template
<
typename
T
>
struct
SoftShrinkGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
lambda
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"lambda"
,
&
lambda
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
lambdaT
=
static_cast
<
T
>
(
lambda
);
auto
temp1
=
(
x
>
lambdaT
).
template
cast
<
T
>();
auto
temp2
=
(
x
<
-
lambdaT
).
template
cast
<
T
>();
dx
.
device
(
d
)
=
dout
*
(
temp1
+
temp2
).
template
cast
<
T
>();
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
ELUFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
alpha
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"alpha"
,
&
alpha
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
(
x
<
static_cast
<
T
>
(
0
))
.
select
(
static_cast
<
T
>
(
alpha
)
*
(
x
.
exp
()
-
static_cast
<
T
>
(
1
)),
x
);
}
};
template
<
typename
T
>
struct
ELUGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
alpha
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"alpha"
,
&
alpha
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
// case 1: alpha >= 0
// dx = dout, if out > 0
// dx = dout * (out + alpha), if out <= 0
dx
.
device
(
d
)
=
(
out
>
static_cast
<
T
>
(
0
))
.
select
(
dout
,
dout
*
(
out
+
static_cast
<
T
>
(
alpha
)));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
ELUGradNegativeAlphaFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
alpha
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"alpha"
,
&
alpha
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
// case 2: alpha < 0
// dx = dout, if x > 0
// dx = dout * (out + alpha), if x <=0
dx
.
device
(
d
)
=
(
x
>
static_cast
<
T
>
(
0
))
.
select
(
dout
,
dout
*
static_cast
<
T
>
(
alpha
)
*
x
.
exp
());
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
ELUGradGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
alpha
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"alpha"
,
&
alpha
}};
}
template
<
typename
Device
>
void
operator
()(
const
Device
&
dev
,
const
DenseTensor
*
X
,
const
DenseTensor
*
ddX
,
DenseTensor
*
ddOut
,
const
DenseTensor
*
dOut
,
DenseTensor
*
dX
)
const
{
auto
*
d
=
dev
.
eigen_device
();
auto
ddx
=
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
ddX
,
"Input"
,
"DDX"
,
"ELUGradGrad"
));
auto
x
=
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
X
,
"Input"
,
"X"
,
"ELUGradGrad"
));
if
(
dX
)
{
auto
dx
=
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dX
,
"Output"
,
"DX"
,
"ELUGradGrad"
));
auto
dout
=
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dOut
,
"Output"
,
"DOut"
,
"ELUGradGrad"
));
dx
.
device
(
*
d
)
=
ddx
*
dout
*
static_cast
<
T
>
(
alpha
)
*
x
.
exp
()
*
(
x
<=
static_cast
<
T
>
(
0
)).
template
cast
<
T
>();
}
if
(
ddOut
)
{
auto
ddout
=
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
ddOut
,
"Output"
,
"DDOut"
,
"ELUGradGrad"
));
ddout
.
device
(
*
d
)
=
ddx
*
((
x
>
static_cast
<
T
>
(
0
)).
template
cast
<
T
>()
+
static_cast
<
T
>
(
alpha
)
*
x
.
exp
()
*
(
x
<=
static_cast
<
T
>
(
0
)).
template
cast
<
T
>())
.
template
cast
<
T
>();
}
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// silu(x) = x / (1 + exp(-x))
template
<
typename
T
>
struct
SiluFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
auto
temp
=
static_cast
<
T
>
(
1
)
/
(
static_cast
<
T
>
(
1
)
+
(
-
x
).
exp
());
out
.
device
(
d
)
=
x
*
temp
;
}
};
// silu'(x) = (1 / (1 + e^{-x})) * (1 + out * e^{-x}))
template
<
typename
T
>
struct
SiluGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
temp1
=
static_cast
<
T
>
(
1
)
+
(
-
x
).
exp
();
// 1+e^(-x)
auto
temp2
=
x
*
(
-
x
).
exp
();
// x*e^(-x)
dx
.
device
(
d
)
=
dout
*
((
static_cast
<
T
>
(
1
)
/
temp1
)
*
(
static_cast
<
T
>
(
1
)
+
(
temp2
/
temp1
)));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
#if defined(__NVCC__) || defined(__HIPCC__) || defined(__xpu__)
template
<
typename
T
>
struct
CudaReluFunctor
:
public
BaseActivationFunctor
<
T
>
{
...
...
@@ -1218,6 +1450,209 @@ struct CudaLeakyReluGradFunctor : public BaseActivationFunctor<T> {
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
CudaSoftShrinkFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
lambda
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"lambda"
,
&
lambda
}};
}
// softshrink(x) = x - lambda, if x > lambda;
// x + lambda, if x < -lambda;
// 0, otherwise.
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
T
l
=
static_cast
<
T
>
(
lambda
);
T
temp1
=
static_cast
<
T
>
(
x
>
l
);
T
temp2
=
static_cast
<
T
>
(
x
<
-
l
);
return
temp1
*
(
x
-
l
)
+
temp2
*
(
x
+
l
);
}
};
template
<
typename
T
>
struct
CudaSoftShrinkGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
T
zero
=
static_cast
<
T
>
(
0.0
f
);
float
lambda
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"lambda"
,
&
lambda
}};
}
// dx = dout, if x > lambda or x < -lambda else 0
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
x
)
const
{
T
l
=
static_cast
<
T
>
(
lambda
);
return
(
x
>=
-
l
&&
x
<=
l
)
?
zero
:
dout
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
CudaTanhShrinkFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
// tanhshrink(x) = x - tanh(x)
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
x
-
tanh
(
x
));
}
};
template
<
typename
T
>
struct
CudaTanhShrinkGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
// dx = dout * tanh(x)^2
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
dout
*
tanh
(
x
)
*
tanh
(
x
));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
CudaHardShrinkFunctor
:
public
BaseActivationFunctor
<
T
>
{
T
zero
=
static_cast
<
T
>
(
0.0
f
);
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
// hadrshrink(x) = (x > -threshold && x < threshold) ? 0 : x
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
T
t
=
static_cast
<
T
>
(
threshold
);
return
(
x
>
-
t
&&
x
<
t
)
?
zero
:
x
;
}
};
template
<
typename
T
>
struct
CudaHardShrinkGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
T
zero
=
static_cast
<
T
>
(
0.0
f
);
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
// dx = (x > -threshold && x < threshold) ? 0 : dout
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
x
)
const
{
T
t
=
static_cast
<
T
>
(
threshold
);
return
(
x
>
-
t
&&
x
<
t
)
?
zero
:
dout
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
CudaELUFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
CT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
CT
zero
=
static_cast
<
CT
>
(
0.0
f
);
CT
one
=
static_cast
<
CT
>
(
1.0
f
);
float
alpha
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"alpha"
,
&
alpha
}};
}
// elu(x) = x, if x > 0
// elu(x) = alpha * (e^x - 1), if x <= 0
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
CT
x
=
static_cast
<
CT
>
(
arg_x
);
CT
temp
=
static_cast
<
CT
>
(
alpha
)
*
(
exp
(
x
)
-
one
);
CT
res
=
x
>
zero
?
x
:
temp
;
return
static_cast
<
T
>
(
res
);
}
};
template
<
typename
T
>
struct
CudaELUGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
MPType
zero
=
static_cast
<
MPType
>
(
0.0
f
);
float
alpha
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"alpha"
,
&
alpha
}};
}
// case 1: alpha >= 0
// dx = dout, if out > 0
// dx = dout * (out + alpha), if out <= 0
__device__
__forceinline__
T
operator
()(
T
arg_dout
,
T
arg_out
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
out
=
static_cast
<
MPType
>
(
arg_out
);
MPType
a
=
static_cast
<
MPType
>
(
alpha
);
MPType
out_pos
=
static_cast
<
MPType
>
(
out
>
zero
);
MPType
out_neg
=
static_cast
<
MPType
>
(
out
<=
zero
);
return
static_cast
<
T
>
(
dout
*
(
out_pos
+
out_neg
*
(
out
+
a
)));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
template
<
typename
T
>
struct
CudaELUGradNegativeAlphaFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
MPType
zero
=
static_cast
<
MPType
>
(
0.0
f
);
float
alpha
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"alpha"
,
&
alpha
}};
}
// case 2: alpha < 0
// dx = dout, if x > 0
// dx = dout * (out + alpha), if x <=0
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_out
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
out
=
static_cast
<
MPType
>
(
arg_out
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
a
=
static_cast
<
MPType
>
(
alpha
);
MPType
x_pos
=
static_cast
<
MPType
>
(
x
>
zero
);
MPType
x_neg
=
static_cast
<
MPType
>
(
x
<=
zero
);
return
static_cast
<
T
>
(
dout
*
(
x_pos
+
x_neg
*
(
out
+
a
)));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
CudaSiluFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
// silu(x) = x / (1 + exp(-x))
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
x
/
(
one
+
exp
(
-
x
)));
}
};
template
<
typename
T
>
struct
CudaSiluGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
// dx = dout * (1 + exp(-x) + x * exp(-x) / (1 + exp(-x))^2)
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
temp
=
one
/
(
one
+
exp
(
-
x
));
return
static_cast
<
T
>
(
dout
*
(
temp
*
(
one
+
x
*
(
one
-
temp
))));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
#endif
}
// namespace funcs
...
...
paddle/phi/kernels/gpu/activation_grad_kernel.cu
浏览文件 @
57f54d3b
...
...
@@ -73,7 +73,7 @@ void ActivationGradGPUImpl(const Context& dev_ctx,
}
}
#define DEFINE_GPU_ACTIVATION_GRAD_KERNEL_D
ep
X(name, functor_class) \
#define DEFINE_GPU_ACTIVATION_GRAD_KERNEL_D
EP
X(name, functor_class) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& x, \
...
...
@@ -84,7 +84,7 @@ void ActivationGradGPUImpl(const Context& dev_ctx,
dev_ctx, &x, nullptr, &dout, dx, functor); \
}
#define DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_D
ep
X( \
#define DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_D
EP
X( \
name, functor_class, attr) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
...
...
@@ -99,7 +99,7 @@ void ActivationGradGPUImpl(const Context& dev_ctx,
dev_ctx, &x, nullptr, &dout, dx, functor); \
}
#define DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_D
ep
X( \
#define DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_D
EP
X( \
name, functor_class, attr1, attr2) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
...
...
@@ -116,7 +116,7 @@ void ActivationGradGPUImpl(const Context& dev_ctx,
dev_ctx, &x, nullptr, &dout, dx, functor); \
}
#define DEFINE_GPU_ACTIVATION_GRAD_KERNEL_D
epOut
(name, functor_class) \
#define DEFINE_GPU_ACTIVATION_GRAD_KERNEL_D
EPOUT
(name, functor_class) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& out, \
...
...
@@ -127,7 +127,7 @@ void ActivationGradGPUImpl(const Context& dev_ctx,
dev_ctx, nullptr, &out, &dout, dx, functor); \
}
#define DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_D
epOut
( \
#define DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_D
EPOUT
( \
name, functor_class, attr) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
...
...
@@ -142,32 +142,62 @@ void ActivationGradGPUImpl(const Context& dev_ctx,
dev_ctx, nullptr, &out, &dout, dx, functor); \
}
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepOut
(
Relu
,
CudaReluGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepOut
(
Tanh
,
CudaTanhGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX
(
Cos
,
CudaCosGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX
(
Tan
,
CudaTanGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX
(
Acos
,
CudaAcosGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX
(
Sin
,
CudaSinGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX
(
Asin
,
CudaAsinGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX
(
Atan
,
CudaAtanGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX
(
Sinh
,
CudaSinhGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX
(
Cosh
,
CudaCoshGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX
(
Asinh
,
CudaAsinhGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX
(
Acosh
,
CudaAcoshGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX
(
Atanh
,
CudaAtanhGradFunctor
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DepX
(
LeakyRelu
,
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Relu
,
CudaReluGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Tanh
,
CudaTanhGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Cos
,
CudaCosGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Tan
,
CudaTanGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Acos
,
CudaAcosGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Sin
,
CudaSinGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Asin
,
CudaAsinGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Atan
,
CudaAtanGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Sinh
,
CudaSinhGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Cosh
,
CudaCoshGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Asinh
,
CudaAsinhGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Acosh
,
CudaAcoshGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Atanh
,
CudaAtanhGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
TanhShrink
,
CudaTanhShrinkGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Silu
,
CudaSiluGradFunctor
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
LeakyRelu
,
CudaLeakyReluGradFunctor
,
alpha
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_D
ep
X
(
ThresholdedRelu
,
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_D
EP
X
(
ThresholdedRelu
,
CudaThresholdedReluGradFunctor
,
threshold
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
SoftShrink
,
CudaSoftShrinkGradFunctor
,
lambda
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
HardShrink
,
CudaHardShrinkGradFunctor
,
threshold
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_D
ep
X
(
BRelu
,
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_D
EP
X
(
BRelu
,
CudaBReluGradFunctor
,
t_min
,
t_max
);
template
<
typename
T
,
typename
Context
>
void
EluGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
out
,
const
DenseTensor
&
dout
,
float
alpha
,
DenseTensor
*
dx
)
{
dev_ctx
.
template
Alloc
<
T
>(
dx
);
std
::
vector
<
const
DenseTensor
*>
ins
=
{
&
dout
,
&
out
};
std
::
vector
<
DenseTensor
*>
outs
=
{
dx
};
if
(
alpha
>
0
)
{
funcs
::
CudaELUGradFunctor
<
T
>
functor
;
functor
.
alpha
=
alpha
;
funcs
::
ElementwiseKernel
<
T
>
(
dev_ctx
,
ins
,
&
outs
,
functor
);
}
else
{
funcs
::
CudaELUGradNegativeAlphaFunctor
<
T
>
functor
;
functor
.
alpha
=
alpha
;
ins
.
push_back
(
&
x
);
funcs
::
ElementwiseKernel
<
T
>
(
dev_ctx
,
ins
,
&
outs
,
functor
);
}
}
}
// namespace phi
#ifdef PADDLE_WITH_HIP
...
...
@@ -234,3 +264,9 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_double_grad,
LeakyReluDoubleGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
thresholded_relu_grad
,
ThresholdedReluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
soft_shrink_grad
,
SoftShrinkGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
hard_shrink_grad
,
HardShrinkGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
tanh_shrink_grad
,
TanhShrinkGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
silu_grad
,
SiluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
elu_grad
,
EluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
elu_double_grad
,
EluDoubleGradKernel
)
paddle/phi/kernels/gpu/activation_kernel.cu
浏览文件 @
57f54d3b
...
...
@@ -38,12 +38,13 @@ void ActivationGPUImpl(const Context& dev_ctx,
funcs
::
ElementwiseKernel
<
T
>
(
dev_ctx
,
ins
,
&
outs
,
functor
);
}
#define DEFINE_GPU_ACTIVATION_KERNEL(name, functor_class) \
template <typename T, typename Context> \
void name##Kernel( \
const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) { \
functor_class functor; \
ActivationGPUImpl<T, Context, functor_class>(dev_ctx, x, out, functor); \
#define DEFINE_GPU_ACTIVATION_KERNEL(name, functor_class) \
template <typename T, typename Context> \
void name##Kernel( \
const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) { \
funcs::functor_class<T> functor; \
ActivationGPUImpl<T, Context, funcs::functor_class<T>>( \
dev_ctx, x, out, functor); \
}
#define DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(name, functor_class, attr) \
...
...
@@ -75,24 +76,31 @@ void ActivationGPUImpl(const Context& dev_ctx,
dev_ctx, x, out, functor); \
}
DEFINE_GPU_ACTIVATION_KERNEL
(
Cos
,
funcs
::
CudaCosFunctor
<
T
>
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Tan
,
funcs
::
CudaTanFunctor
<
T
>
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Acos
,
funcs
::
CudaAcosFunctor
<
T
>
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Sin
,
funcs
::
CudaSinFunctor
<
T
>
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Asin
,
funcs
::
CudaAsinFunctor
<
T
>
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Atan
,
funcs
::
CudaAtanFunctor
<
T
>
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Sinh
,
funcs
::
CudaSinhFunctor
<
T
>
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Cosh
,
funcs
::
CudaCoshFunctor
<
T
>
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Asinh
,
funcs
::
CudaAsinhFunctor
<
T
>
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Acosh
,
funcs
::
CudaAcoshFunctor
<
T
>
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Atanh
,
funcs
::
CudaAtanhFunctor
<
T
>
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Relu
,
funcs
::
CudaReluFunctor
<
T
>
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Tanh
,
funcs
::
CudaTanhFunctor
<
T
>
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Cos
,
CudaCosFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Tan
,
CudaTanFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Acos
,
CudaAcosFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Sin
,
CudaSinFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Asin
,
CudaAsinFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Atan
,
CudaAtanFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Sinh
,
CudaSinhFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Cosh
,
CudaCoshFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Asinh
,
CudaAsinhFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Acosh
,
CudaAcoshFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Atanh
,
CudaAtanhFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Relu
,
CudaReluFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Tanh
,
CudaTanhFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
TanhShrink
,
CudaTanhShrinkFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Silu
,
CudaSiluFunctor
)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS
(
LeakyRelu
,
CudaLeakyReluFunctor
,
alpha
)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS
(
ThresholdedRelu
,
CudaThresholdedReluFunctor
,
threshold
)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS
(
HardShrink
,
CudaHardShrinkFunctor
,
threshold
)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS
(
SoftShrink
,
CudaSoftShrinkFunctor
,
lambda
)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Elu
,
CudaELUFunctor
,
alpha
)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS
(
BRelu
,
CudaBReluFunctor
,
t_min
,
t_max
)
...
...
@@ -142,3 +150,8 @@ PD_REGISTER_ACTIVATION_KERNEL(tanh, TanhKernel)
PD_REGISTER_ACTIVATION_KERNEL
(
brelu
,
BReluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
thresholded_relu
,
ThresholdedReluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
leaky_relu
,
LeakyReluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
hard_shrink
,
HardShrinkKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
soft_shrink
,
SoftShrinkKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
tanh_shrink
,
TanhShrinkKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
elu
,
EluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
silu
,
SiluKernel
)
paddle/phi/kernels/impl/activation_grad_impl.h
浏览文件 @
57f54d3b
...
...
@@ -202,4 +202,24 @@ void TanhTripleGradKernel(const Context& dev_ctx,
d_ddx
);
// output
}
template
<
typename
T
,
typename
Context
>
void
EluDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
ddx
,
float
alpha
,
DenseTensor
*
dx
,
DenseTensor
*
ddout
)
{
if
(
dx
)
{
dx
->
Resize
(
x
.
dims
());
dev_ctx
.
template
Alloc
<
T
>(
dx
);
}
if
(
ddout
)
{
dev_ctx
.
template
Alloc
<
T
>(
ddout
);
}
funcs
::
ELUGradGradFunctor
<
T
>
functor
;
functor
.
alpha
=
alpha
;
functor
(
dev_ctx
,
&
x
,
&
ddx
,
ddout
,
&
dout
,
dx
);
}
}
// namespace phi
paddle/phi/ops/compat/activation_sig.cc
浏览文件 @
57f54d3b
...
...
@@ -16,45 +16,49 @@ limitations under the License. */
namespace
phi
{
#define D
efineActGradDepXOpArgMap
(func_name, op_name, attrs) \
KernelSignature func_name##GradOpArgumentMapping( \
const ArgumentMappingContext& ctx) { \
return KernelSignature(op_name "_grad", \
{"X", GradVarName("Out")}, \
{attrs}, \
{GradVarName("X")}); \
#define D
EFINE_ACT_GRAD_DEPX_OP_ARGMAP
(func_name, op_name, attrs) \
KernelSignature func_name##GradOpArgumentMapping(
\
const ArgumentMappingContext& ctx) {
\
return KernelSignature(op_name "_grad",
\
{"X", GradVarName("Out")},
\
{attrs},
\
{GradVarName("X")});
\
}
#define D
efineActGradDepOutOpArgMap
(func_name, op_name, attrs) \
KernelSignature func_name##GradOpArgumentMapping( \
const ArgumentMappingContext& ctx) { \
return KernelSignature(op_name "_grad", \
{"Out", GradVarName("Out")}, \
{attrs}, \
{GradVarName("X")}); \
#define D
EFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(func_name, op_name, attrs) \
KernelSignature func_name##GradOpArgumentMapping(
\
const ArgumentMappingContext& ctx) {
\
return KernelSignature(op_name "_grad",
\
{"Out", GradVarName("Out")},
\
{attrs},
\
{GradVarName("X")});
\
}
#define comma ,
DefineActGradDepXOpArgMap
(
Cos
,
"cos"
,
);
// NOLINT
DefineActGradDepXOpArgMap
(
Tan
,
"tan"
,
);
// NOLINT
DefineActGradDepXOpArgMap
(
Acos
,
"acos"
,
);
// NOLINT
DefineActGradDepXOpArgMap
(
Sin
,
"sin"
,
);
// NOLINT
DefineActGradDepXOpArgMap
(
Asin
,
"asin"
,
);
// NOLINT
DefineActGradDepXOpArgMap
(
Atan
,
"atan"
,
);
// NOLINT
DefineActGradDepXOpArgMap
(
Sinh
,
"sinh"
,
);
// NOLINT
DefineActGradDepXOpArgMap
(
Cosh
,
"cosh"
,
);
// NOLINT
DefineActGradDepXOpArgMap
(
Asinh
,
"asinh"
,
);
// NOLINT
DefineActGradDepXOpArgMap
(
Acosh
,
"acosh"
,
);
// NOLINT
DefineActGradDepXOpArgMap
(
Atanh
,
"atanh"
,
);
// NOLINT
DefineActGradDepXOpArgMap
(
BRelu
,
"brelu"
,
"t_min"
comma
"t_max"
);
// NOLINT
DefineActGradDepXOpArgMap
(
LeakyRelu
,
"leaky_relu"
,
"alpha"
);
// NOLINT
DefineActGradDepXOpArgMap
(
ThresholdedRelu
,
"thresholded_relu"
,
"threshold"
);
// NOLINT
DefineActGradDepOutOpArgMap
(
Relu
,
"relu"
,
);
// NOLINT
DefineActGradDepOutOpArgMap
(
Tanh
,
"tanh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Cos
,
"cos"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Tan
,
"tan"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Acos
,
"acos"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Sin
,
"sin"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Asin
,
"asin"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Atan
,
"atan"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Sinh
,
"sinh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Cosh
,
"cosh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Asinh
,
"asinh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Acosh
,
"acosh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Atanh
,
"atanh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
BRelu
,
"brelu"
,
"t_min"
comma
"t_max"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
LeakyRelu
,
"leaky_relu"
,
"alpha"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
ThresholdedRelu
,
"thresholded_relu"
,
"threshold"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
SoftShrink
,
"soft_shrink"
,
"lambda"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
HardShrink
,
"hard_shrink"
,
"threshold"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
TanhShrink
,
"tanh_shrink"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Silu
,
"silu"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Relu
,
"relu"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Tanh
,
"tanh"
,
);
// NOLINT
KernelSignature
ReluDoubleGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
...
...
@@ -85,11 +89,31 @@ KernelSignature LeakyReluOpArgumentMapping(const ArgumentMappingContext& ctx) {
return
KernelSignature
(
"leaky_relu"
,
{
"X"
},
{
"alpha"
},
{
"Out"
});
}
KernelSignature
EluOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"elu"
,
{
"X"
},
{
"alpha"
},
{
"Out"
});
}
KernelSignature
EluGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"elu_grad"
,
{
"X"
,
"Out"
,
GradVarName
(
"Out"
)},
{
"alpha"
},
{
GradVarName
(
"X"
)});
}
KernelSignature
EluDoubleGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"elu_double_grad"
,
{
"X"
,
"DOut"
,
"DDX"
},
{
"alpha"
},
{
"DX"
,
"DDOut"
});
}
}
// namespace phi
PD_REGISTER_BASE_KERNEL_NAME
(
relu_grad_grad
,
relu_double_grad
);
PD_REGISTER_BASE_KERNEL_NAME
(
tanh_grad_grad
,
tanh_double_grad
);
PD_REGISTER_BASE_KERNEL_NAME
(
leaky_relu_grad_grad
,
leaky_relu_double_grad
);
PD_REGISTER_BASE_KERNEL_NAME
(
softshrink
,
soft_shrink
);
PD_REGISTER_BASE_KERNEL_NAME
(
softshrink_grad
,
soft_shrink_grad
);
PD_REGISTER_BASE_KERNEL_NAME
(
elu_grad_grad
,
elu_double_grad
);
PD_REGISTER_ARG_MAPPING_FN
(
cos_grad
,
phi
::
CosGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
tan_grad
,
phi
::
TanGradOpArgumentMapping
);
...
...
@@ -118,3 +142,13 @@ PD_REGISTER_ARG_MAPPING_FN(leaky_relu_grad_grad,
phi
::
LeakyReluDoubleGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
thresholded_relu_grad
,
phi
::
ThresholdedReluGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
softshrink_grad
,
phi
::
SoftShrinkGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
hard_shrink_grad
,
phi
::
HardShrinkGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
tanh_shrink_grad
,
phi
::
TanhShrinkGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
elu
,
phi
::
EluOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
elu_grad
,
phi
::
EluGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
elu_grad_grad
,
phi
::
EluDoubleGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
silu_grad
,
phi
::
SiluGradOpArgumentMapping
);
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录