Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
07473786
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
07473786
编写于
3月 29, 2022
作者:
H
hong
提交者:
GitHub
3月 29, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Revert "Revert "Move some activation to phi (#40727)" (#41056)"
This reverts commit
05f3d48e
.
上级
9bb3744f
变更
29
隐藏空白更改
内联
并排
Showing
29 changed file
with
1908 addition
and
931 deletion
+1908
-931
paddle/fluid/framework/new_executor/standalone_executor_test.cc
.../fluid/framework/new_executor/standalone_executor_test.cc
+2
-1
paddle/fluid/operators/activation_op.cc
paddle/fluid/operators/activation_op.cc
+9
-77
paddle/fluid/operators/activation_op.h
paddle/fluid/operators/activation_op.h
+85
-399
paddle/fluid/operators/activation_op.kps
paddle/fluid/operators/activation_op.kps
+23
-348
paddle/fluid/operators/math/CMakeLists.txt
paddle/fluid/operators/math/CMakeLists.txt
+2
-2
paddle/fluid/operators/math/selected_rows_functor.cc
paddle/fluid/operators/math/selected_rows_functor.cc
+1
-0
paddle/fluid/operators/temporal_shift_op.h
paddle/fluid/operators/temporal_shift_op.h
+1
-89
paddle/phi/kernels/activation_kernel.h
paddle/phi/kernels/activation_kernel.h
+22
-0
paddle/phi/kernels/cpu/activation_grad_kernel.cc
paddle/phi/kernels/cpu/activation_grad_kernel.cc
+54
-0
paddle/phi/kernels/cpu/activation_kernel.cc
paddle/phi/kernels/cpu/activation_kernel.cc
+31
-1
paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc
paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc
+136
-0
paddle/phi/kernels/cpu/temporal_shift_kernel.cc
paddle/phi/kernels/cpu/temporal_shift_kernel.cc
+131
-0
paddle/phi/kernels/funcs/activation_functor.h
paddle/phi/kernels/funcs/activation_functor.h
+589
-0
paddle/phi/kernels/gpu/activation_grad_kernel.cu
paddle/phi/kernels/gpu/activation_grad_kernel.cu
+65
-0
paddle/phi/kernels/gpu/activation_kernel.cu
paddle/phi/kernels/gpu/activation_kernel.cu
+54
-0
paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu
paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu
+149
-0
paddle/phi/kernels/gpu/temporal_shift_kernel.cu
paddle/phi/kernels/gpu/temporal_shift_kernel.cu
+148
-0
paddle/phi/kernels/impl/activation_grad_impl.h
paddle/phi/kernels/impl/activation_grad_impl.h
+18
-0
paddle/phi/kernels/impl/activation_impl.h
paddle/phi/kernels/impl/activation_impl.h
+16
-0
paddle/phi/kernels/selected_rows/activation_kernel.cc
paddle/phi/kernels/selected_rows/activation_kernel.cc
+68
-0
paddle/phi/kernels/selected_rows/activation_kernel.h
paddle/phi/kernels/selected_rows/activation_kernel.h
+34
-0
paddle/phi/kernels/temporal_shift_grad_kernel.h
paddle/phi/kernels/temporal_shift_grad_kernel.h
+29
-0
paddle/phi/kernels/temporal_shift_kernel.h
paddle/phi/kernels/temporal_shift_kernel.h
+29
-0
paddle/phi/ops/compat/activation_sig.cc
paddle/phi/ops/compat/activation_sig.cc
+68
-14
paddle/phi/ops/compat/temporal_shift_sig.cc
paddle/phi/ops/compat/temporal_shift_sig.cc
+39
-0
python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
...n/paddle/fluid/tests/unittests/test_activation_nn_grad.py
+1
-0
python/paddle/fluid/tests/unittests/test_activation_sparse_op.py
...paddle/fluid/tests/unittests/test_activation_sparse_op.py
+101
-0
python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
+2
-0
python/paddle/fluid/tests/unittests/test_temporal_shift_op.py
...on/paddle/fluid/tests/unittests/test_temporal_shift_op.py
+1
-0
未找到文件。
paddle/fluid/framework/new_executor/standalone_executor_test.cc
浏览文件 @
07473786
...
...
@@ -53,7 +53,7 @@ USE_OP_ITSELF(tanh_grad);
USE_OP
(
sum
);
USE_OP_ITSELF
(
slice_grad
);
USE_OP_ITSELF
(
lookup_table_grad
);
USE_OP
(
sqrt
);
USE_OP
_ITSELF
(
sqrt
);
USE_OP_ITSELF
(
elementwise_max
);
USE_OP_ITSELF
(
elementwise_div
);
USE_OP_ITSELF
(
sgd
);
...
...
@@ -83,6 +83,7 @@ PD_DECLARE_KERNEL(max_raw, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL
(
sgd
,
GPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
slice
,
GPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
slice_grad
,
GPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
sqrt
,
GPU
,
ALL_LAYOUT
);
DECLARE_double
(
eager_delete_tensor_gb
);
...
...
paddle/fluid/operators/activation_op.cc
浏览文件 @
07473786
...
...
@@ -1496,6 +1496,14 @@ REGISTER_ACTIVATION_OP(hard_sigmoid, HardSigmoid, HardSigmoidFunctor,
HardSigmoidGradFunctor
);
REGISTER_ACTIVATION_OP
(
logsigmoid
,
LogSigmoid
,
LogSigmoidFunctor
,
LogSigmoidGradFunctor
);
REGISTER_ACTIVATION_OP
(
expm1
,
Expm1
,
Expm1Functor
,
Expm1GradFunctor
);
REGISTER_ACTIVATION_OP
(
softplus
,
Softplus
,
SoftplusFunctor
,
SoftplusGradFunctor
);
REGISTER_ACTIVATION_OP
(
mish
,
Mish
,
MishFunctor
,
MishGradFunctor
);
REGISTER_ACTIVATION_OP
(
stanh
,
STanh
,
STanhFunctor
,
STanhGradFunctor
);
REGISTER_ACTIVATION_OP
(
reciprocal
,
Reciprocal
,
ReciprocalFunctor
,
ReciprocalGradFunctor
);
REGISTER_ACTIVATION_OP
(
log2
,
Log2
,
Log2Functor
,
Log2GradFunctor
);
REGISTER_ACTIVATION_OP
(
log10
,
Log10
,
Log10Functor
,
Log10GradFunctor
);
REGISTER_ACTIVATION_OP
(
log1p
,
Log1p
,
Log1pFunctor
,
Log1pGradFunctor
);
...
...
@@ -1630,12 +1638,7 @@ REGISTER_OPERATOR(logit, ops::LogitOp, ops::LogitOpMaker,
ops
::
LogitGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
LogitGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OPERATOR
(
logit_grad
,
ops
::
LogitGradOp
);
REGISTER_OP_CPU_KERNEL
(
logit
,
ops
::
LogitKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
LogitKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
logit_grad
,
ops
::
LogitGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
LogitGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
/* ========================================================================== */
/* ======================== celu register ============================
...
...
@@ -1684,7 +1687,6 @@ REGISTER_OPERATOR(
ops
::
ActivationOpDoubleGrad
<
ops
::
SqrtGradGradFunctor
<
float
>::
FwdDeps
()
>
,
ops
::
ActivationDoubleGradOpInplaceInferer
);
REGISTER_ACTIVATION_CPU_KERNEL
(
sqrt
,
Sqrt
,
SqrtFunctor
,
SqrtGradFunctor
);
REGISTER_OP_CPU_KERNEL
(
sqrt_grad_grad
,
ops
::
SqrtDoubleGradKernel
<
plat
::
CPUDeviceContext
,
ops
::
SqrtGradGradFunctor
<
float
>>
,
...
...
@@ -1712,7 +1714,6 @@ REGISTER_OPERATOR(
ops
::
ActivationOpDoubleGrad
<
ops
::
RsqrtGradGradFunctor
<
float
>::
FwdDeps
()
>
,
ops
::
ActivationDoubleGradOpInplaceInferer
);
REGISTER_ACTIVATION_CPU_KERNEL
(
rsqrt
,
Rsqrt
,
RsqrtFunctor
,
RsqrtGradFunctor
);
REGISTER_OP_CPU_KERNEL
(
rsqrt_grad_grad
,
ops
::
RsqrtDoubleGradKernel
<
plat
::
CPUDeviceContext
,
...
...
@@ -1741,25 +1742,6 @@ REGISTER_OPERATOR(
ops
::
ActivationOpDoubleGrad
<
ops
::
SquareGradGradFunctor
<
float
>::
FwdDeps
()
>
,
ops
::
ActivationDoubleGradOpInplaceInferer
);
REGISTER_OP_CPU_KERNEL
(
square
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SquareFunctor
<
float
>>
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SquareFunctor
<
double
>>
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SquareFunctor
<
int
>>
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SquareFunctor
<
int64_t
>>
);
REGISTER_OP_CPU_KERNEL
(
square_grad
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SquareGradFunctor
<
float
>>
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SquareGradFunctor
<
double
>>
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SquareGradFunctor
<
int
>>
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SquareGradFunctor
<
int64_t
>>
);
REGISTER_OP_CPU_KERNEL
(
square_grad_grad
,
ops
::
SquareDoubleGradKernel
<
plat
::
CPUDeviceContext
,
...
...
@@ -1798,54 +1780,6 @@ REGISTER_OPERATOR(
REGISTER_OPERATOR
(
exp_grad
,
ops
::
ActivationOpGrad
,
ops
::
ActivationGradOpInplaceInferer
);
REGISTER_OP_CPU_KERNEL
(
exp
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ExpFunctor
<
float
>>
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ExpFunctor
<
double
>>
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ExpFunctor
<
int
>>
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ExpFunctor
<
int64_t
>>
);
REGISTER_OP_CPU_KERNEL
(
exp_grad
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ExpGradFunctor
<
float
>>
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ExpGradFunctor
<
double
>>
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ExpGradFunctor
<
int
>>
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ExpGradFunctor
<
int64_t
>>
);
/* ========================================================================== */
/* ========================== expm1 register ============================ */
REGISTER_OPERATOR
(
expm1
,
ops
::
ActivationOp
,
ops
::
Expm1OpMaker
,
ops
::
ActivationOpInferVarType
,
ops
::
ActivationGradOpMaker
<
ops
::
Expm1GradFunctor
<
float
>::
FwdDeps
(),
paddle
::
framework
::
OpDesc
>
,
ops
::
ActivationGradOpMaker
<
ops
::
Expm1GradFunctor
<
float
>::
FwdDeps
(),
paddle
::
imperative
::
OpBase
>
,
std
::
conditional
<
ops
::
CanInplaceAct
<
ops
::
Expm1GradFunctor
<
float
>>
(),
ops
::
ActFwdInplaceInferer
,
void
>::
type
);
REGISTER_OPERATOR
(
expm1_grad
,
ops
::
ActivationOpGrad
,
ops
::
ActivationGradOpInplaceInferer
);
REGISTER_OP_CPU_KERNEL
(
expm1
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
Expm1Functor
<
float
>>
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
Expm1Functor
<
double
>>
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
Expm1Functor
<
plat
::
float16
>>
);
REGISTER_OP_CPU_KERNEL
(
expm1_grad
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
Expm1GradFunctor
<
float
>>
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
Expm1GradFunctor
<
double
>>
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
Expm1GradFunctor
<
plat
::
float16
>>
);
/* ========================================================================== */
/* ========================== Log register ==================================*/
REGISTER_OPERATOR
(
log
,
ops
::
ActivationOp
,
ops
::
LogOpMaker
,
ops
::
ActivationOpInferVarType
,
...
...
@@ -1864,8 +1798,6 @@ REGISTER_OPERATOR(
ops
::
ActivationOpDoubleGrad
<
ops
::
LogGradGradFunctor
<
float
>::
FwdDeps
()
>
,
ops
::
ActivationDoubleGradOpInplaceInferer
);
/* ========================================================================== */
/* ========================== register checkpoint ===========================*/
REGISTER_OP_VERSION
(
leaky_relu
)
.
AddCheckpoint
(
...
...
paddle/fluid/operators/activation_op.h
浏览文件 @
07473786
...
...
@@ -264,6 +264,7 @@ USE_PHI_FUNCTOR(Asinh)
USE_PHI_FUNCTOR
(
Acosh
)
USE_PHI_FUNCTOR
(
Atanh
)
USE_PHI_FUNCTOR
(
Tanh
)
USE_PHI_FUNCTOR
(
Exp
)
USE_PHI_DOUBLE_GRAD_FUNCTOR
(
Tanh
)
USE_PHI_TRIPLE_GRAD_FUNCTOR
(
Tanh
)
USE_PHI_FUNCTOR
(
BRelu
)
...
...
@@ -289,6 +290,15 @@ USE_PHI_FUNCTOR(Log1p)
USE_PHI_FUNCTOR
(
Swish
)
USE_PHI_FUNCTOR
(
HardSwish
)
USE_PHI_FUNCTOR
(
Pow
)
USE_PHI_FUNCTOR
(
Exp
)
USE_PHI_FUNCTOR
(
Expm1
)
USE_PHI_FUNCTOR
(
Mish
)
USE_PHI_FUNCTOR
(
STanh
)
USE_PHI_FUNCTOR
(
Reciprocal
)
USE_PHI_FUNCTOR
(
Square
)
USE_PHI_FUNCTOR
(
Sqrt
)
USE_PHI_FUNCTOR
(
Rsqrt
)
USE_PHI_FUNCTOR
(
Softplus
)
template
<
typename
T
>
using
ELUGradNegativeAlphaFunctor
=
phi
::
funcs
::
ELUGradNegativeAlphaFunctor
<
T
>
;
...
...
@@ -305,49 +315,8 @@ using CeilFunctor = phi::funcs::CeilFunctor<T>;
template
<
typename
T
>
using
ZeroGradFunctor
=
phi
::
funcs
::
ZeroGradFunctor
<
T
>
;
// exp(x) = e^x
template
<
typename
T
>
struct
ExpFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
exp
();
}
};
template
<
typename
T
>
struct
ExpGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
out
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
// expm1(x) = e^x - 1
template
<
typename
T
>
struct
Expm1Functor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
expm1
();
}
};
template
<
typename
T
>
struct
Expm1GradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
out
+
dout
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
using
ELUGradNegativeAlphaFunctor
=
phi
::
funcs
::
ELUGradNegativeAlphaFunctor
<
T
>
;
// relu(x) = max(x, 0)
...
...
@@ -362,92 +331,68 @@ using ReluGradGradFunctor = phi::funcs::ReluGradGradFunctor<T>;
template
<
typename
T
>
using
ReluCUDAFunctor
=
phi
::
funcs
::
ReluCUDAFunctor
<
T
>
;
// sqrt(x) = x^(1/2)
template
<
typename
T
>
struct
SqrtFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
sqrt
();
}
};
template
<
typename
T
>
struct
SqrtGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
static_cast
<
T
>
(
0.5
)
*
dout
/
out
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
// rsqrt(x) = x^(-1/2)
template
<
typename
T
>
struct
RsqrtFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
rsqrt
();
}
};
template
<
typename
T
>
struct
RsqrtGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
static_cast
<
T
>
(
-
0.5
)
*
dout
*
out
*
out
*
out
;
struct
SqrtGradGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
>
void
operator
()(
const
Device
&
dev
,
const
framework
::
Tensor
*
Out
,
const
framework
::
Tensor
*
ddX
,
framework
::
Tensor
*
ddOut
,
framework
::
Tensor
*
dOut
,
const
framework
::
Tensor
*
dX
)
const
{
auto
*
d
=
dev
.
eigen_device
();
auto
ddx
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
ddX
,
"Input"
,
"DDX"
,
"SqrtGradGrad"
));
auto
out
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
Out
,
"Output"
,
"Out"
,
"SqrtGradGrad"
));
// sqrt GradGrad: ddy = 0.5 * ddx / y, dy = -1 * dx * ddx
// calculate dy first, so ddy can inplace ddx
if
(
dOut
)
{
auto
dx
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dX
,
"Output"
,
"DX"
,
"SqrtGradGrad"
));
auto
dout
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dOut
,
"Output"
,
"DOut"
,
"SqrtGradGrad"
));
dout
.
device
(
*
d
)
=
dx
*
ddx
*
static_cast
<
T
>
(
-
1
)
/
out
;
}
if
(
ddOut
)
{
auto
ddout
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
ddOut
,
"Output"
,
"DDOut"
,
"SqrtGradGrad"
));
ddout
.
device
(
*
d
)
=
ddx
*
static_cast
<
T
>
(
0.5
)
/
out
;
}
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
// reciprocal(x) = 1 / x
template
<
typename
T
>
struct
ReciprocalFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
static_cast
<
T
>
(
1
)
/
x
;
}
};
struct
RsqrtGradGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
>
void
operator
()(
const
Device
&
dev
,
const
framework
::
Tensor
*
Out
,
const
framework
::
Tensor
*
ddX
,
framework
::
Tensor
*
ddOut
,
framework
::
Tensor
*
dOut
,
const
framework
::
Tensor
*
dX
)
const
{
auto
*
d
=
dev
.
eigen_device
();
auto
ddx
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
ddX
,
"Input"
,
"DDX"
,
"RsqrtGradGrad"
));
auto
out
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
Out
,
"Output"
,
"Out"
,
"RsqrtGradGrad"
));
template
<
typename
T
>
struct
ReciprocalGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
static_cast
<
T
>
(
-
1
)
*
out
*
out
;
// rsqrt GradGrad: ddy = -0.5 * ddx * y * y * y, dy = (3/y) * dx * ddx
if
(
dOut
)
{
auto
dx
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dX
,
"Output"
,
"DX"
,
"RsqrtGradGrad"
));
auto
dout
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dOut
,
"Output"
,
"DOut"
,
"RsqrtGradGrad"
));
dout
.
device
(
*
d
)
=
(
static_cast
<
T
>
(
3.0
)
/
out
)
*
dx
*
ddx
;
}
if
(
ddOut
)
{
auto
ddout
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
ddOut
,
"Output"
,
"DDOut"
,
"RsqrtGradGrad"
));
ddout
.
device
(
*
d
)
=
ddx
*
static_cast
<
T
>
(
-
0.5
)
*
out
*
out
*
out
;
}
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
// square(x) = x^2
template
<
typename
T
>
struct
SquareFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
square
();
}
};
template
<
typename
T
>
struct
SquareGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
static_cast
<
T
>
(
2
)
*
x
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// relu6(x) = min(max(0, x), 6)
template
<
typename
T
>
struct
Relu6Functor
:
public
BaseActivationFunctor
<
T
>
{
...
...
@@ -484,114 +429,6 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> {
}
};
// For numerical stability, using the following formula instead of softplus(x) =
// log(1 + exp(x))
// softplus(x) = log(1 + exp(beta * x)) / beta when beta * x <= threshold(beta =
// 1, threshold = 20 by default), otherwise x
template
<
typename
T
>
struct
SoftplusFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
beta
;
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"beta"
,
&
beta
},
{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
{
auto
x_beta
=
static_cast
<
T
>
(
beta
)
*
x
;
out
.
device
(
d
)
=
(
x_beta
>
static_cast
<
T
>
(
threshold
))
.
select
(
x
,
(
static_cast
<
T
>
(
1
)
+
x_beta
.
exp
()).
log
()
/
static_cast
<
T
>
(
beta
));
}
};
// For numerical stability, using the following formula instead of
// d(softplus(x))/dx = 1 / (1 + exp(-x))
// d(softplus(x))/dx = 1 / (1 + exp(-beta * x)) when beta * x <= threshold(beta
// = 1, threshold = 20 by default), otherwise x
template
<
typename
T
>
struct
SoftplusGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
beta
;
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"beta"
,
&
beta
},
{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
{
auto
x_beta
=
static_cast
<
T
>
(
beta
)
*
x
;
dx
.
device
(
d
)
=
(
x_beta
>
static_cast
<
T
>
(
threshold
))
.
select
(
dout
,
dout
/
(
static_cast
<
T
>
(
1
)
+
(
-
x_beta
).
exp
()));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// mish(x) = x * tanh(softplus(x))
// softplus(x) = x, if x > threshold
// = ln(1 + exp(x)), otherwise
template
<
typename
T
>
struct
MishFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
{
auto
sp
=
(
x
>
static_cast
<
T
>
(
threshold
))
.
select
(
x
,
(
static_cast
<
T
>
(
1
)
+
x
.
exp
()).
log
());
out
.
device
(
d
)
=
x
*
sp
.
tanh
();
}
};
// dx = dout * (tanh(sp) + x * (1 - tanh(sp) ** 2) * (1 - exp(-sp)))
// sp = softplus(x)
template
<
typename
T
>
struct
MishGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
{
auto
sp
=
(
x
>
static_cast
<
T
>
(
threshold
))
.
select
(
x
,
(
static_cast
<
T
>
(
1
)
+
x
.
exp
()).
log
());
auto
gsp
=
static_cast
<
T
>
(
1
)
-
(
-
sp
).
exp
();
auto
tsp
=
sp
.
tanh
();
dx
.
device
(
d
)
=
dout
*
(
tsp
+
x
*
(
static_cast
<
T
>
(
1
)
-
tsp
*
tsp
)
*
gsp
);
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// softsign(x) = x / (1 + |x|)
template
<
typename
T
>
struct
SoftsignFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
{
out
.
device
(
d
)
=
x
/
(
static_cast
<
T
>
(
1
)
+
x
.
abs
());
}
};
// d(softsign(x))/dx = 1 / (1 + |x|)^2
// Taken from https://en.wikipedia.org/wiki/Activation_function
template
<
typename
T
>
struct
SoftsignGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
{
dx
.
device
(
d
)
=
dout
*
(
static_cast
<
T
>
(
1
)
/
(
static_cast
<
T
>
(
1
)
+
x
.
abs
()).
square
());
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
SoftReluFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
threshold
;
...
...
@@ -706,71 +543,6 @@ struct CELUGradFunctor : public BaseActivationFunctor<T> {
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
LogitFunctor
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
P
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
P
p
,
float
eps
)
const
{
// logit(x) = ln(x/(1-x))
auto
tmp_x
=
(
x
.
cwiseMin
(
static_cast
<
T
>
(
1.0
-
eps
))).
cwiseMax
(
static_cast
<
T
>
(
eps
));
if
(
!
eps
)
{
out
.
device
(
d
)
=
(
x
<
static_cast
<
T
>
(
0.0
)
||
x
>
static_cast
<
T
>
(
1.0
))
.
select
(
p
.
constant
(
static_cast
<
T
>
(
NAN
)),
(
tmp_x
/
(
static_cast
<
T
>
(
1
)
-
tmp_x
)).
log
());
}
else
{
out
.
device
(
d
)
=
(
tmp_x
/
(
static_cast
<
T
>
(
1
)
-
tmp_x
)).
log
();
}
}
};
template
<
typename
T
>
struct
LogitGradFunctor
{
template
<
typename
Device
,
typename
X
,
typename
dOut
,
typename
dX
,
typename
P
>
void
operator
()(
Device
d
,
X
x
,
dOut
dout
,
dX
dx
,
P
p
,
float
eps
)
const
{
// logit(x)' = 1/(x*(1-x))
dx
.
device
(
d
)
=
(
x
<
static_cast
<
T
>
(
eps
)
||
x
>
static_cast
<
T
>
(
1.0
-
eps
))
.
select
(
p
.
constant
(
static_cast
<
T
>
(
0
)),
dout
*
(
static_cast
<
T
>
(
1
)
/
((
static_cast
<
T
>
(
1
)
-
x
)
*
x
)));
}
};
template
<
typename
T
>
struct
STanhFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
scale_a
;
float
scale_b
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"scale_a"
,
&
scale_a
},
{
"scale_b"
,
&
scale_b
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
static_cast
<
T
>
(
scale_b
)
*
(
static_cast
<
T
>
(
scale_a
)
*
x
).
tanh
();
}
};
template
<
typename
T
>
struct
STanhGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
scale_a
;
float
scale_b
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"scale_a"
,
&
scale_a
},
{
"scale_b"
,
&
scale_b
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
a
=
static_cast
<
T
>
(
scale_a
);
auto
b
=
static_cast
<
T
>
(
scale_b
);
auto
temp
=
(
a
*
x
).
tanh
()
*
(
a
*
x
).
tanh
();
dx
.
device
(
d
)
=
dout
*
a
*
b
*
(
static_cast
<
T
>
(
1
)
-
temp
);
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
AbsGradGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
>
...
...
@@ -831,68 +603,6 @@ struct CELUGradGradFunctor : public BaseActivationFunctor<T> {
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
SqrtGradGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
>
void
operator
()(
const
Device
&
dev
,
const
framework
::
Tensor
*
Out
,
const
framework
::
Tensor
*
ddX
,
framework
::
Tensor
*
ddOut
,
framework
::
Tensor
*
dOut
,
const
framework
::
Tensor
*
dX
)
const
{
auto
*
d
=
dev
.
eigen_device
();
auto
ddx
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
ddX
,
"Input"
,
"DDX"
,
"SqrtGradGrad"
));
auto
out
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
Out
,
"Output"
,
"Out"
,
"SqrtGradGrad"
));
// sqrt GradGrad: ddy = 0.5 * ddx / y, dy = -1 * dx * ddx
// calculate dy first, so ddy can inplace ddx
if
(
dOut
)
{
auto
dx
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dX
,
"Output"
,
"DX"
,
"SqrtGradGrad"
));
auto
dout
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dOut
,
"Output"
,
"DOut"
,
"SqrtGradGrad"
));
dout
.
device
(
*
d
)
=
dx
*
ddx
*
static_cast
<
T
>
(
-
1
)
/
out
;
}
if
(
ddOut
)
{
auto
ddout
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
ddOut
,
"Output"
,
"DDOut"
,
"SqrtGradGrad"
));
ddout
.
device
(
*
d
)
=
ddx
*
static_cast
<
T
>
(
0.5
)
/
out
;
}
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
template
<
typename
T
>
struct
RsqrtGradGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
>
void
operator
()(
const
Device
&
dev
,
const
framework
::
Tensor
*
Out
,
const
framework
::
Tensor
*
ddX
,
framework
::
Tensor
*
ddOut
,
framework
::
Tensor
*
dOut
,
const
framework
::
Tensor
*
dX
)
const
{
auto
*
d
=
dev
.
eigen_device
();
auto
ddx
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
ddX
,
"Input"
,
"DDX"
,
"RsqrtGradGrad"
));
auto
out
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
Out
,
"Output"
,
"Out"
,
"RsqrtGradGrad"
));
// rsqrt GradGrad: ddy = -0.5 * ddx * y * y * y, dy = (3/y) * dx * ddx
if
(
dOut
)
{
auto
dx
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dX
,
"Output"
,
"DX"
,
"RsqrtGradGrad"
));
auto
dout
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dOut
,
"Output"
,
"DOut"
,
"RsqrtGradGrad"
));
dout
.
device
(
*
d
)
=
(
static_cast
<
T
>
(
3.0
)
/
out
)
*
dx
*
ddx
;
}
if
(
ddOut
)
{
auto
ddout
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
ddOut
,
"Output"
,
"DDOut"
,
"RsqrtGradGrad"
));
ddout
.
device
(
*
d
)
=
ddx
*
static_cast
<
T
>
(
-
0.5
)
*
out
*
out
*
out
;
}
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
template
<
typename
T
>
struct
SquareGradGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
>
...
...
@@ -988,6 +698,29 @@ class SquareDoubleGradKernel
}
};
template
<
typename
T
>
struct
SoftsignFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
/
(
static_cast
<
T
>
(
1
)
+
x
.
abs
());
}
};
// d(softsign(x))/dx = 1 / (1 + |x|)^2
// Taken from https://en.wikipedia.org/wiki/Activation_function
template
<
typename
T
>
struct
SoftsignGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
(
static_cast
<
T
>
(
1
)
/
(
static_cast
<
T
>
(
1
)
+
x
.
abs
()).
square
());
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
DeviceContext
,
typename
Functor
>
class
CELUDoubleGradKernel
:
public
framework
::
OpKernel
<
typename
Functor
::
ELEMENT_TYPE
>
{
...
...
@@ -1135,57 +868,10 @@ class RsqrtDoubleGradKernel
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
LogitKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
out
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
*
in
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
eps
=
context
.
Attr
<
float
>
(
"eps"
);
out
->
mutable_data
<
T
>
(
in
->
place
());
auto
eigen_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
out
);
auto
eigen_in
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
in
);
auto
&
place
=
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
eigen_p
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
out
);
LogitFunctor
<
T
>
functor
;
functor
(
place
,
eigen_in
,
eigen_out
,
eigen_p
,
eps
);
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
LogitGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
x
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
dout
=
context
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
context
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
eps
=
context
.
Attr
<
float
>
(
"eps"
);
dx
->
mutable_data
<
T
>
(
dout
->
place
());
auto
eigen_x
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
x
);
auto
eigen_dout
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dout
);
auto
eigen_dx
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dx
);
auto
&
place
=
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
eigen_p
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
x
);
LogitGradFunctor
<
T
>
functor
;
functor
(
place
,
eigen_x
,
eigen_dout
,
eigen_dx
,
eigen_p
,
eps
);
}
};
}
// namespace operators
}
// namespace paddle
#define FOR_EACH_ACTIVATION_OP(__macro) \
__macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \
__macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \
__macro(stanh, STanh, STanhFunctor, STanhGradFunctor); \
__macro(softplus, Softplus, SoftplusFunctor, SoftplusGradFunctor); \
__macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6, Relu6Functor, Relu6GradFunctor); \
__macro(mish, Mish, MishFunctor, MishGradFunctor);
#define FOR_EACH_ACTIVATION_OP(__macro) \
__macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \
__macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6, Relu6Functor, Relu6GradFunctor);
paddle/fluid/operators/activation_op.kps
浏览文件 @
07473786
...
...
@@ -20,140 +20,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
struct CudaReciprocalFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
// reciprocal(x) = 1 / x
__device__ __forceinline__ T operator()(const T x) const { return one / x; }
};
template <typename T>
struct CudaReciprocalGradFunctor : public BaseActivationFunctor<T> {
// dx = -dout * out^2
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return -dout * out * out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaExpFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
// exp(x) = exp(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(exp(x));
}
};
template <typename T>
struct CudaExpGradFunctor : public BaseActivationFunctor<T> {
// dx = dout * out
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return dout * out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaExpm1Functor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
// expm1(x) = expm1(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(expm1(x));
}
};
template <typename T>
struct CudaExpm1GradFunctor : public BaseActivationFunctor<T> {
// dx = dout * out
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return dout * out + dout;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaSquareFunctor : public BaseActivationFunctor<T> {
// square(x) = x * x
__device__ __forceinline__ T operator()(const T x) const { return x * x; }
};
template <typename T>
struct CudaSquareGradFunctor : public BaseActivationFunctor<T> {
T two = static_cast<T>(2.0f);
// dx = dout * 2 * x
__device__ __forceinline__ T operator()(const T dout, const T x) const {
return dout * two * x;
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaSqrtFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
// sqrt(x) = sqrt(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(sqrt(x));
}
};
template <typename T>
struct CudaSqrtGradFunctor : public BaseActivationFunctor<T> {
T one_half = static_cast<T>(0.5f);
// dx = dout * 0.5 / out
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return one_half * dout / out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaRsqrtFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
// rsqrt(x) = rsqrt(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(rsqrt(x));
}
};
template <typename T>
struct CudaRsqrtGradFunctor : public BaseActivationFunctor<T> {
T minus_one_half = static_cast<T>(-0.5f);
// dx = -0.5 * dout * out^3
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return minus_one_half * dout * out * out * out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaSoftReluFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
...
...
@@ -201,119 +67,6 @@ struct CudaSoftReluGradFunctor : public BaseActivationFunctor<T> {
}
};
template <typename T>
struct CudaSTanhFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
float scale_a;
float scale_b;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"scale_a", &scale_a}, {"scale_b", &scale_b}};
}
// stanh(x) = b * tanh(a * x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
MPType a = static_cast<MPType>(scale_a);
MPType b = static_cast<MPType>(scale_b);
return static_cast<T>(b * tanh(a * x));
}
};
template <typename T>
struct CudaSTanhGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float scale_a;
float scale_b;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"scale_a", &scale_a}, {"scale_b", &scale_b}};
}
// dx = dout * a * b * (1 - tanh(a * x) * tanh(a * x))
__device__ __forceinline__ T operator()(const T arg_dout,
const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x);
MPType a = static_cast<MPType>(scale_a);
MPType b = static_cast<MPType>(scale_b);
MPType temp = tanh(a * x);
return static_cast<T>(dout * a * b * (one - temp * temp));
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaSoftplusFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float beta;
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}, {"threshold", &threshold}};
}
// softplus(x) = beta * x > threshold ? x : log(1 + exp(beta * x)) / beta
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
MPType b = static_cast<MPType>(beta);
MPType t = static_cast<MPType>(threshold);
MPType x_beta = x * beta;
return static_cast<T>(x_beta > t ? x : log(one + exp(x_beta)) / b);
}
};
template <typename T>
struct CudaSoftplusGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float beta;
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}, {"threshold", &threshold}};
}
// dx = x * beta > threshold ? dout : dout / (1 + exp(-beta * x))
__device__ __forceinline__ T operator()(const T arg_dout,
const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x);
MPType b = static_cast<MPType>(beta);
MPType t = static_cast<MPType>(threshold);
MPType x_beta = x * beta;
return x_beta > t ? arg_dout : static_cast<T>(dout / (one + exp(-x_beta)));
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaSoftsignFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
// softsign(x) = x / (1 + abs(x))
__device__ __forceinline__ T operator()(const T x) const {
return x / (one + abs(x));
}
};
template <typename T>
struct CudaSoftsignGradFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
// dx = dout / (1 + abs(x))^2
__device__ __forceinline__ T operator()(const T dout, const T x) const {
T temp = one + abs(x);
return dout / (temp * temp);
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaRelu6Functor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
...
...
@@ -351,49 +104,23 @@ struct CudaRelu6GradFunctor : public BaseActivationFunctor<T> {
};
template <typename T>
struct CudaMishFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
struct CudaSoftsignFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
// mish(x) = x * tanh(softplus(x))
// softplus(x) = x, if x > threshold
// = ln(1 + exp(x)), otherwise
// Inputs: args[0], the input x
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
MPType sp = (x > static_cast<MPType>(threshold)) ? x : log(one + exp(x));
return static_cast<T>(x * tanh(sp));
// softsign(x) = x / (1 + abs(x))
__device__ __forceinline__ T operator()(const T x) const {
return x / (one + abs(x));
}
};
template <typename T>
struct CudaMishGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
struct CudaSoftsignGradFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
// dx = dout * (tanh(sp) + x * (1 - tanh(sp) ** 2) * (1 - exp(-sp)))
// sp = softplus(x)
// Inputs: args[0], the input dout
// args[1], the input x
__device__ __forceinline__ T operator()(const T arg_dout,
const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x);
MPType sp = (x > static_cast<MPType>(threshold)) ? x : log(one + exp(x));
MPType gsp =
(x > static_cast<MPType>(threshold)) ? one : one / (one + exp(-x));
MPType tsp = tanh(sp);
return static_cast<T>(dout * (tsp + x * (one - tsp * tsp) * gsp));
// dx = dout / (1 + abs(x))^2
__device__ __forceinline__ T operator()(const T dout, const T x) const {
T temp = one + abs(x);
return dout / (temp * temp);
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
...
...
@@ -558,6 +285,16 @@ using CudaCeilFunctor = phi::funcs::CudaCeilFunctor<T>;
template <typename T>
using CudaZeroGradFunctor = phi::funcs::CudaZeroGradFunctor<T>;
USE_PHI_FUNCTOR(CudaExp)
USE_PHI_FUNCTOR(CudaExpm1)
USE_PHI_FUNCTOR(CudaMish)
USE_PHI_FUNCTOR(CudaSTanh)
USE_PHI_FUNCTOR(CudaReciprocal)
USE_PHI_FUNCTOR(CudaSquare)
USE_PHI_FUNCTOR(CudaSqrt)
USE_PHI_FUNCTOR(CudaRsqrt)
USE_PHI_FUNCTOR(CudaSoftplus)
template <typename T>
using CudaELUGradNegativeAlphaFunctor =
phi::funcs::CudaELUGradNegativeAlphaFunctor<T>;
...
...
@@ -636,8 +373,6 @@ REGISTER_OP_CUDA_KERNEL(
/* ========================================================================== */
/* =========================== sqrt register ============================= */
REGISTER_ACTIVATION_CUDA_KERNEL(sqrt, Sqrt, CudaSqrtFunctor,
CudaSqrtGradFunctor);
REGISTER_OP_CUDA_KERNEL(
sqrt_grad_grad,
...
...
@@ -653,8 +388,6 @@ REGISTER_OP_CUDA_KERNEL(
/* =========================== rsqrt register =============================
*/
REGISTER_ACTIVATION_CUDA_KERNEL(rsqrt, Rsqrt, CudaRsqrtFunctor,
CudaRsqrtGradFunctor);
REGISTER_OP_CUDA_KERNEL(
rsqrt_grad_grad,
...
...
@@ -667,8 +400,6 @@ REGISTER_OP_CUDA_KERNEL(
/* ========================================================================== */
/* =========================== square register ============================ */
REGISTER_ACTIVATION_CUDA_KERNEL_INT(square, Square, CudaSquareFunctor,
CudaSquareGradFunctor);
REGISTER_OP_CUDA_KERNEL(
square_grad_grad,
...
...
@@ -688,75 +419,19 @@ REGISTER_OP_CUDA_KERNEL(
/* ========================== logit register ============================ */
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
logit, ops::LogitKernel<paddle::platform::CUDADeviceContext, float>,
ops::LogitKernel<paddle::platform::CUDADeviceContext, double>,
ops::LogitKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
REGISTER_OP_CUDA_KERNEL(
logit_grad,
ops::LogitGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::LogitGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::LogitGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
/* ========================================================================== */
/* ========================== exp register ============================ */
REGISTER_OP_CUDA_KERNEL(
exp, ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpFunctor<float>>,
ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpFunctor<double>>,
ops::ActivationKernel<plat::CUDADeviceContext, ops::ExpFunctor<int>>,
ops::ActivationKernel<plat::CUDADeviceContext, ops::ExpFunctor<int64_t>>,
ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpFunctor<plat::float16>>);
REGISTER_OP_CUDA_KERNEL(
exp_grad, ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpGradFunctor<float>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpGradFunctor<double>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpGradFunctor<int>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpGradFunctor<int64_t>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpGradFunctor<plat::float16>>);
/* ========================================================================== */
/* ========================== expm1 register ============================ */
REGISTER_OP_CUDA_KERNEL(
expm1, ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1Functor<float>>,
ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1Functor<double>>,
ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1Functor<plat::float16>>);
REGISTER_OP_CUDA_KERNEL(
expm1_grad, ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1GradFunctor<float>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1GradFunctor<double>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1GradFunctor<plat::float16>>);
/* ========================================================================== */
#define FOR_EACH_ACTIVATION_CUDA_OP(__macro) \
__macro(softshrink, SoftShrink, CudaSoftShrinkFunctor, \
CudaSoftShrinkGradFunctor); \
__macro(reciprocal, Reciprocal, CudaReciprocalFunctor, \
CudaReciprocalGradFunctor); \
__macro(soft_relu, SoftRelu, CudaSoftReluFunctor, CudaSoftReluGradFunctor); \
__macro(stanh, STanh, CudaSTanhFunctor, CudaSTanhGradFunctor); \
__macro(softplus, Softplus, CudaSoftplusFunctor, CudaSoftplusGradFunctor); \
__macro(softsign, Softsign, CudaSoftsignFunctor, CudaSoftsignGradFunctor); \
__macro(relu6, Relu6, CudaRelu6Functor, CudaRelu6GradFunctor); \
__macro(tanh_shrink, TanhShrink, CudaTanhShrinkFunctor, \
CudaTanhShrinkGradFunctor); \
__macro(hard_shrink, HardShrink, CudaHardShrinkFunctor, \
CudaHardShrinkGradFunctor); \
__macro(mish, Mish, CudaMishFunctor, CudaMishGradFunctor);
__macro(softsign, Softsign, CudaSoftsignFunctor, CudaSoftsignGradFunctor);
FOR_EACH_ACTIVATION_CUDA_OP(REGISTER_ACTIVATION_CUDA_KERNEL)
#ifdef PADDLE_WITH_XPU_KP
...
...
paddle/fluid/operators/math/CMakeLists.txt
浏览文件 @
07473786
...
...
@@ -22,9 +22,9 @@ math_library(sampler DEPS generator)
math_library
(
maxouting
)
if
(
WITH_MKLDNN
)
math_library
(
selected_rows_functor DEPS selected_rows_utils math_function blas mkldnn_axpy_handler
)
math_library
(
selected_rows_functor DEPS selected_rows_utils math_function blas mkldnn_axpy_handler
mixed_vector
)
else
()
math_library
(
selected_rows_functor DEPS selected_rows_utils math_function blas
)
math_library
(
selected_rows_functor DEPS selected_rows_utils math_function blas
mixed_vector
)
endif
()
math_library
(
sequence_padding
)
...
...
paddle/fluid/operators/math/selected_rows_functor.cc
浏览文件 @
07473786
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#ifdef PADDLE_WITH_MKLDNN
...
...
paddle/fluid/operators/temporal_shift_op.h
浏览文件 @
07473786
...
...
@@ -19,56 +19,6 @@ namespace operators {
using
Tensor
=
framework
::
Tensor
;
using
DataLayout
=
framework
::
DataLayout
;
template
<
typename
T
>
void
TemporalShiftFwNCHW
(
const
T
*
input
,
T
*
output
,
const
int
ntchw
,
const
int
tchw
,
const
int
chw
,
const
int
hw
,
const
int
t
,
const
int
c1
,
const
int
c2
)
{
int
src_it
=
0
;
for
(
int
i
=
0
;
i
<
ntchw
;
i
++
)
{
int
it
=
(
i
%
tchw
)
/
chw
;
int
ic
=
(
i
%
chw
)
/
hw
;
if
(
ic
<
c1
)
{
src_it
=
it
-
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
+
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
<
0
||
src_it
>=
t
)
{
output
[
i
]
=
0
;
}
else
{
output
[
i
]
=
input
[
i
+
(
src_it
-
it
)
*
chw
];
}
}
}
template
<
typename
T
>
void
TemporalShiftFwNHWC
(
const
T
*
input
,
T
*
output
,
const
int
nthwc
,
const
int
thwc
,
const
int
hwc
,
const
int
t
,
const
int
c
,
const
int
c1
,
const
int
c2
)
{
int
src_it
=
0
;
for
(
int
i
=
0
;
i
<
nthwc
;
i
++
)
{
int
it
=
(
i
%
thwc
)
/
hwc
;
int
ic
=
i
%
c
;
if
(
ic
<
c1
)
{
src_it
=
it
-
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
+
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
<
0
||
src_it
>=
t
)
{
output
[
i
]
=
0
;
}
else
{
output
[
i
]
=
input
[
i
+
(
src_it
-
it
)
*
hwc
];
}
}
}
template
<
typename
T
>
void
TemporalShiftBwNCHW
(
const
T
*
output_grad
,
T
*
input_grad
,
const
int
ntchw
,
const
int
tchw
,
const
int
chw
,
const
int
hw
,
...
...
@@ -122,45 +72,7 @@ void TemporalShiftBwNHWC(const T* output_grad, T* input_grad, const int nthwc,
template
<
typename
T
>
class
TemporalShiftKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
int
t
=
ctx
.
Attr
<
int
>
(
"seg_num"
);
float
shift_ratio
=
ctx
.
Attr
<
float
>
(
"shift_ratio"
);
const
std
::
string
data_format_str
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
const
DataLayout
data_layout
=
framework
::
StringToDataLayout
(
data_format_str
);
const
int
nt
=
input
->
dims
()[
0
];
const
int
c
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
1
]
:
input
->
dims
()[
3
]);
const
int
h
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
2
]
:
input
->
dims
()[
1
]);
const
int
w
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
3
]
:
input
->
dims
()[
2
]);
const
int
hw
=
h
*
w
;
const
int
chw
=
c
*
hw
;
const
int
tchw
=
t
*
chw
;
const
int
ntchw
=
nt
*
chw
;
const
int
c1
=
static_cast
<
int
>
(
c
*
shift_ratio
);
const
int
c2
=
static_cast
<
int
>
(
c
*
2
*
shift_ratio
);
framework
::
DDim
out_dims
=
(
data_layout
==
DataLayout
::
kNCHW
?
phi
::
make_ddim
({
nt
,
c
,
h
,
w
})
:
phi
::
make_ddim
({
nt
,
h
,
w
,
c
}));
const
T
*
input_data
=
input
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
out_dims
,
ctx
.
GetPlace
());
if
(
data_layout
==
DataLayout
::
kNCHW
)
{
TemporalShiftFwNCHW
<
T
>
(
input_data
,
output_data
,
ntchw
,
tchw
,
chw
,
hw
,
t
,
c1
,
c2
);
}
else
{
TemporalShiftFwNHWC
<
T
>
(
input_data
,
output_data
,
ntchw
,
tchw
,
chw
,
t
,
c
,
c1
,
c2
);
}
}
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{}
};
template
<
typename
T
>
...
...
paddle/phi/kernels/activation_kernel.h
浏览文件 @
07473786
...
...
@@ -53,6 +53,13 @@ DECLARE_ACTIVATION_KERNEL(Acosh)
DECLARE_ACTIVATION_KERNEL
(
Atanh
)
DECLARE_ACTIVATION_KERNEL
(
Relu
)
DECLARE_ACTIVATION_KERNEL
(
Tanh
)
DECLARE_ACTIVATION_KERNEL
(
Exp
)
DECLARE_ACTIVATION_KERNEL
(
Expm1
)
DECLARE_ACTIVATION_KERNEL
(
Reciprocal
)
DECLARE_ACTIVATION_KERNEL
(
Square
)
DECLARE_ACTIVATION_KERNEL
(
Sqrt
)
DECLARE_ACTIVATION_KERNEL
(
Rsqrt
)
DECLARE_ACTIVATION_KERNEL
(
TanhShrink
)
DECLARE_ACTIVATION_KERNEL
(
Silu
)
DECLARE_ACTIVATION_KERNEL
(
Sigmoid
)
...
...
@@ -73,8 +80,23 @@ DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Elu, alpha)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS
(
Swish
,
beta
)
DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS
(
BRelu
,
t_min
,
t_max
)
DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS
(
STanh
,
scale_a
,
scale_b
)
DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS
(
HardSigmoid
,
slope
,
offset
)
DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS
(
Softplus
,
beta
,
threshold
)
template
<
typename
T
,
typename
Context
>
void
LogitKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
float
eps
,
DenseTensor
*
out
);
template
<
typename
T
,
typename
Context
>
void
MishKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
float
threshold
,
DenseTensor
*
out
);
template
<
typename
T
,
typename
Context
>
void
HardSwishKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
...
...
paddle/phi/kernels/cpu/activation_grad_kernel.cc
浏览文件 @
07473786
...
...
@@ -129,6 +129,13 @@ DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Acosh, AcoshGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Atanh
,
AtanhGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
TanhShrink
,
TanhShrinkGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Silu
,
SiluGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Square
,
SquareGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Exp
,
ExpGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Expm1
,
Expm1GradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Reciprocal
,
ReciprocalGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Sqrt
,
SqrtGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Rsqrt
,
RsqrtGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
LogSigmoid
,
LogSigmoidGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log
,
LogGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log2
,
Log2GradFunctor
);
...
...
@@ -157,11 +164,24 @@ DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(HardShrink,
threshold
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Swish
,
SwishGradFunctor
,
beta
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Mish
,
MishGradFunctor
,
threshold
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
BRelu
,
BReluGradFunctor
,
t_min
,
t_max
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
STanh
,
STanhGradFunctor
,
scale_a
,
scale_b
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
Softplus
,
SoftplusGradFunctor
,
beta
,
threshold
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT
(
HardSigmoid
,
HardSigmoidGradFunctor
,
slope
,
...
...
@@ -247,6 +267,12 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
tanh_shrink_grad
,
TanhShrinkGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
elu_grad
,
EluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
silu_grad
,
SiluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
mish_grad
,
MishGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
stanh_grad
,
STanhGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
reciprocal_grad
,
ReciprocalGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
sqrt_grad
,
SqrtGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
rsqrt_grad
,
RsqrtGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
softplus_grad
,
SoftplusGradKernel
)
PD_REGISTER_ACTIVATION_DOUBLE_GRAD_KERNEL
(
relu_double_grad
,
ReluDoubleGradKernel
)
...
...
@@ -263,6 +289,34 @@ PD_REGISTER_KERNEL(tanh_triple_grad,
float
,
double
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_KERNEL
(
exp_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
ExpGradKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
expm1_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
Expm1GradKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_KERNEL
(
logit_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
LogitGradKernel
,
float
,
double
)
{}
PD_REGISTER_KERNEL
(
square_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
SquareGradKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
sigmoid_grad
,
SigmoidGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
sigmoid_double_grad
,
SigmoidDoubleGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
sigmoid_triple_grad
,
SigmoidTripleGradKernel
)
...
...
paddle/phi/kernels/cpu/activation_kernel.cc
浏览文件 @
07473786
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/phi/kernels/activation_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/activation_functor.h"
#include "paddle/phi/kernels/impl/activation_impl.h"
namespace
phi
{
...
...
@@ -72,6 +73,12 @@ DEFINE_CPU_ACTIVATION_KERNEL(Relu, ReluCPUFunctor)
DEFINE_CPU_ACTIVATION_KERNEL
(
Tanh
,
TanhFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
TanhShrink
,
TanhShrinkFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Silu
,
SiluFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Exp
,
ExpFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Expm1
,
Expm1Functor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Reciprocal
,
ReciprocalFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Square
,
SquareFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Sqrt
,
SqrtFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Rsqrt
,
RsqrtFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Sigmoid
,
SigmoidFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
LogSigmoid
,
LogSigmoidFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Log
,
LogFunctor
)
...
...
@@ -83,15 +90,19 @@ DEFINE_CPU_ACTIVATION_KERNEL(Floor, FloorFunctor)
DEFINE_CPU_ACTIVATION_KERNEL
(
Ceil
,
CeilFunctor
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
LeakyRelu
,
LeakyReluFunctor
,
alpha
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
ThresholdedRelu
,
ThresholdedReluFunctor
,
threshold
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Mish
,
MishFunctor
,
threshold
)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS
(
BRelu
,
BReluFunctor
,
t_min
,
t_max
)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS
(
STanh
,
STanhFunctor
,
scale_a
,
scale_b
)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS
(
Softplus
,
SoftplusFunctor
,
beta
,
threshold
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
HardShrink
,
HardShrinkFunctor
,
threshold
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
SoftShrink
,
SoftShrinkFunctor
,
lambda
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Elu
,
ELUFunctor
,
alpha
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Swish
,
SwishFunctor
,
beta
)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS
(
BRelu
,
BReluFunctor
,
t_min
,
t_max
)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS
(
HardSigmoid
,
HardSigmoidFunctor
,
slope
,
...
...
@@ -139,6 +150,25 @@ PD_REGISTER_ACTIVATION_KERNEL(soft_shrink, SoftShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL
(
tanh_shrink
,
TanhShrinkKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
elu
,
EluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
silu
,
SiluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
mish
,
MishKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
stanh
,
STanhKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
reciprocal
,
ReciprocalKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
sqrt
,
SqrtKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
rsqrt
,
RsqrtKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
softplus
,
SoftplusKernel
)
PD_REGISTER_KERNEL
(
exp
,
CPU
,
ALL_LAYOUT
,
phi
::
ExpKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
expm1
,
CPU
,
ALL_LAYOUT
,
phi
::
Expm1Kernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_KERNEL
(
logit
,
CPU
,
ALL_LAYOUT
,
phi
::
LogitKernel
,
float
,
double
)
{}
PD_REGISTER_KERNEL
(
square
,
CPU
,
ALL_LAYOUT
,
phi
::
SquareKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_ACTIVATION_KERNEL
(
sigmoid
,
SigmoidKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
logsigmoid
,
LogSigmoidKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
hard_sigmoid
,
HardSigmoidKernel
)
...
...
paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc
0 → 100644
浏览文件 @
07473786
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/temporal_shift_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h"
namespace
phi
{
template
<
typename
T
>
void
TemporalShiftBwNCHW
(
const
T
*
output_grad
,
T
*
input_grad
,
const
int
ntchw
,
const
int
tchw
,
const
int
chw
,
const
int
hw
,
const
int
t
,
const
int
c1
,
const
int
c2
)
{
int
src_it
=
0
;
for
(
int
i
=
0
;
i
<
ntchw
;
i
++
)
{
int
it
=
(
i
%
tchw
)
/
chw
;
int
ic
=
(
i
%
chw
)
/
hw
;
if
(
ic
<
c1
)
{
src_it
=
it
+
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
-
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
>=
0
&&
src_it
<
t
)
{
input_grad
[
i
]
=
output_grad
[
i
+
(
src_it
-
it
)
*
chw
];
}
else
{
input_grad
[
i
]
=
0
;
}
}
}
template
<
typename
T
>
void
TemporalShiftBwNHWC
(
const
T
*
output_grad
,
T
*
input_grad
,
const
int
nthwc
,
const
int
thwc
,
const
int
hwc
,
const
int
t
,
const
int
c
,
const
int
c1
,
const
int
c2
)
{
int
src_it
=
0
;
for
(
int
i
=
0
;
i
<
nthwc
;
i
++
)
{
int
it
=
(
i
%
thwc
)
/
hwc
;
int
ic
=
i
%
c
;
if
(
ic
<
c1
)
{
src_it
=
it
+
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
-
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
>=
0
&&
src_it
<
t
)
{
input_grad
[
i
]
=
output_grad
[
i
+
(
src_it
-
it
)
*
hwc
];
}
else
{
input_grad
[
i
]
=
0
;
}
}
}
template
<
typename
T
,
typename
Context
>
void
TemporalShiftGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out_grad
,
int
seg_num
,
float
shift_ratio
,
const
std
::
string
&
data_format_str
,
DenseTensor
*
x_grad
)
{
auto
*
input_grad
=
x_grad
;
auto
*
output_grad
=
&
out_grad
;
int
t
=
seg_num
;
const
DataLayout
data_layout
=
paddle
::
framework
::
StringToDataLayout
(
data_format_str
);
const
int
nt
=
output_grad
->
dims
()[
0
];
const
int
c
=
(
data_layout
==
DataLayout
::
kNCHW
?
output_grad
->
dims
()[
1
]
:
output_grad
->
dims
()[
3
]);
const
int
h
=
(
data_layout
==
DataLayout
::
kNCHW
?
output_grad
->
dims
()[
2
]
:
output_grad
->
dims
()[
1
]);
const
int
w
=
(
data_layout
==
DataLayout
::
kNCHW
?
output_grad
->
dims
()[
3
]
:
output_grad
->
dims
()[
2
]);
const
int
hw
=
h
*
w
;
const
int
chw
=
c
*
hw
;
const
int
tchw
=
t
*
chw
;
const
int
ntchw
=
nt
*
chw
;
const
int
c1
=
static_cast
<
int
>
(
c
*
shift_ratio
);
const
int
c2
=
static_cast
<
int
>
(
c
*
2
*
shift_ratio
);
DDim
in_grad_dims
=
(
data_layout
==
DataLayout
::
kNCHW
?
phi
::
make_ddim
({
nt
,
c
,
h
,
w
})
:
phi
::
make_ddim
({
nt
,
h
,
w
,
c
}));
const
T
*
output_grad_data
=
output_grad
->
data
<
T
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
in_grad_dims
,
dev_ctx
.
GetPlace
());
if
(
data_layout
==
DataLayout
::
kNCHW
)
{
TemporalShiftBwNCHW
<
T
>
(
output_grad_data
,
input_grad_data
,
ntchw
,
tchw
,
chw
,
hw
,
t
,
c1
,
c2
);
}
else
{
TemporalShiftBwNHWC
<
T
>
(
output_grad_data
,
input_grad_data
,
ntchw
,
tchw
,
chw
,
t
,
c
,
c1
,
c2
);
}
}
}
// namespace phi
PD_REGISTER_KERNEL
(
temporal_shift_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
TemporalShiftGradKernel
,
float
,
double
)
{}
paddle/phi/kernels/cpu/temporal_shift_kernel.cc
0 → 100644
浏览文件 @
07473786
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/temporal_shift_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h"
namespace
phi
{
template
<
typename
T
>
void
TemporalShiftFwNCHW
(
const
T
*
input
,
T
*
output
,
const
int
ntchw
,
const
int
tchw
,
const
int
chw
,
const
int
hw
,
const
int
t
,
const
int
c1
,
const
int
c2
)
{
int
src_it
=
0
;
for
(
int
i
=
0
;
i
<
ntchw
;
i
++
)
{
int
it
=
(
i
%
tchw
)
/
chw
;
int
ic
=
(
i
%
chw
)
/
hw
;
if
(
ic
<
c1
)
{
src_it
=
it
-
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
+
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
<
0
||
src_it
>=
t
)
{
output
[
i
]
=
0
;
}
else
{
output
[
i
]
=
input
[
i
+
(
src_it
-
it
)
*
chw
];
}
}
}
template
<
typename
T
>
void
TemporalShiftFwNHWC
(
const
T
*
input
,
T
*
output
,
const
int
nthwc
,
const
int
thwc
,
const
int
hwc
,
const
int
t
,
const
int
c
,
const
int
c1
,
const
int
c2
)
{
int
src_it
=
0
;
for
(
int
i
=
0
;
i
<
nthwc
;
i
++
)
{
int
it
=
(
i
%
thwc
)
/
hwc
;
int
ic
=
i
%
c
;
if
(
ic
<
c1
)
{
src_it
=
it
-
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
+
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
<
0
||
src_it
>=
t
)
{
output
[
i
]
=
0
;
}
else
{
output
[
i
]
=
input
[
i
+
(
src_it
-
it
)
*
hwc
];
}
}
}
template
<
typename
T
,
typename
Context
>
void
TemporalShiftKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
int
seg_num
,
float
shift_ratio
,
const
std
::
string
&
data_format_str
,
DenseTensor
*
out
)
{
auto
*
input
=
&
x
;
auto
*
output
=
out
;
int
t
=
seg_num
;
const
DataLayout
data_layout
=
paddle
::
framework
::
StringToDataLayout
(
data_format_str
);
const
int
nt
=
input
->
dims
()[
0
];
const
int
c
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
1
]
:
input
->
dims
()[
3
]);
const
int
h
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
2
]
:
input
->
dims
()[
1
]);
const
int
w
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
3
]
:
input
->
dims
()[
2
]);
const
int
hw
=
h
*
w
;
const
int
chw
=
c
*
hw
;
const
int
tchw
=
t
*
chw
;
const
int
ntchw
=
nt
*
chw
;
const
int
c1
=
static_cast
<
int
>
(
c
*
shift_ratio
);
const
int
c2
=
static_cast
<
int
>
(
c
*
2
*
shift_ratio
);
DDim
out_dims
=
(
data_layout
==
DataLayout
::
kNCHW
?
phi
::
make_ddim
({
nt
,
c
,
h
,
w
})
:
phi
::
make_ddim
({
nt
,
h
,
w
,
c
}));
const
T
*
input_data
=
input
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
out_dims
,
dev_ctx
.
GetPlace
());
if
(
data_layout
==
DataLayout
::
kNCHW
)
{
TemporalShiftFwNCHW
<
T
>
(
input_data
,
output_data
,
ntchw
,
tchw
,
chw
,
hw
,
t
,
c1
,
c2
);
}
else
{
TemporalShiftFwNHWC
<
T
>
(
input_data
,
output_data
,
ntchw
,
tchw
,
chw
,
t
,
c
,
c1
,
c2
);
}
}
}
// namespace phi
PD_REGISTER_KERNEL
(
temporal_shift
,
CPU
,
ALL_LAYOUT
,
phi
::
TemporalShiftKernel
,
float
,
double
)
{}
paddle/phi/kernels/funcs/activation_functor.h
浏览文件 @
07473786
...
...
@@ -106,6 +106,31 @@ struct SinFunctor : public BaseActivationFunctor<T> {
}
};
// reciprocal(x) = 1 / x
template
<
typename
T
>
struct
ReciprocalFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
static_cast
<
T
>
(
1
)
/
x
;
}
};
template
<
typename
T
>
struct
ReciprocalGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
static_cast
<
T
>
(
-
1
)
*
out
*
out
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
// cosine'(x) = -sin(x)
template
<
typename
T
>
struct
CosGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
...
...
@@ -130,6 +155,108 @@ struct CosFunctor : public BaseActivationFunctor<T> {
}
};
template
<
typename
T
>
struct
LogitFunctor
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
P
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
P
p
,
float
eps
)
const
{
// logit(x) = ln(x/(1-x))
auto
tmp_x
=
(
x
.
cwiseMin
(
static_cast
<
T
>
(
1.0
-
eps
))).
cwiseMax
(
static_cast
<
T
>
(
eps
));
if
(
!
eps
)
{
out
.
device
(
d
)
=
(
x
<
static_cast
<
T
>
(
0.0
)
||
x
>
static_cast
<
T
>
(
1.0
))
.
select
(
p
.
constant
(
static_cast
<
T
>
(
NAN
)),
(
tmp_x
/
(
static_cast
<
T
>
(
1
)
-
tmp_x
)).
log
());
}
else
{
out
.
device
(
d
)
=
(
tmp_x
/
(
static_cast
<
T
>
(
1
)
-
tmp_x
)).
log
();
}
}
};
// mish(x) = x * tanh(softplus(x))
// softplus(x) = x, if x > threshold
// = ln(1 + exp(x)), otherwise
template
<
typename
T
>
struct
MishFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
auto
sp
=
(
x
>
static_cast
<
T
>
(
threshold
))
.
select
(
x
,
(
static_cast
<
T
>
(
1
)
+
x
.
exp
()).
log
());
out
.
device
(
d
)
=
x
*
sp
.
tanh
();
}
};
// dx = dout * (tanh(sp) + x * (1 - tanh(sp) ** 2) * (1 - exp(-sp)))
// sp = softplus(x)
template
<
typename
T
>
struct
MishGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
sp
=
(
x
>
static_cast
<
T
>
(
threshold
))
.
select
(
x
,
(
static_cast
<
T
>
(
1
)
+
x
.
exp
()).
log
());
auto
gsp
=
static_cast
<
T
>
(
1
)
-
(
-
sp
).
exp
();
auto
tsp
=
sp
.
tanh
();
dx
.
device
(
d
)
=
dout
*
(
tsp
+
x
*
(
static_cast
<
T
>
(
1
)
-
tsp
*
tsp
)
*
gsp
);
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
STanhFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
scale_a
;
float
scale_b
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"scale_a"
,
&
scale_a
},
{
"scale_b"
,
&
scale_b
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
static_cast
<
T
>
(
scale_b
)
*
(
static_cast
<
T
>
(
scale_a
)
*
x
).
tanh
();
}
};
template
<
typename
T
>
struct
STanhGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
scale_a
;
float
scale_b
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"scale_a"
,
&
scale_a
},
{
"scale_b"
,
&
scale_b
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
a
=
static_cast
<
T
>
(
scale_a
);
auto
b
=
static_cast
<
T
>
(
scale_b
);
auto
temp
=
(
a
*
x
).
tanh
()
*
(
a
*
x
).
tanh
();
dx
.
device
(
d
)
=
dout
*
a
*
b
*
(
static_cast
<
T
>
(
1
)
-
temp
);
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
Tangent
{
HOSTDEVICE
T
operator
()(
const
T
&
val
)
const
{
return
tan
(
val
);
}
...
...
@@ -157,6 +284,132 @@ struct TanGradFunctor : public BaseActivationFunctor<T> {
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
kDepX
;
}
};
// square(x) = x^2
template
<
typename
T
>
struct
SquareFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
square
();
}
};
template
<
typename
T
>
struct
SquareGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
static_cast
<
T
>
(
2
)
*
x
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// sqrt(x) = x^(1/2)
template
<
typename
T
>
struct
SqrtFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
sqrt
();
}
};
template
<
typename
T
>
struct
SqrtGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
static_cast
<
T
>
(
0.5
)
*
dout
/
out
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
// rsqrt(x) = x^(-1/2)
template
<
typename
T
>
struct
RsqrtFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
rsqrt
();
}
};
template
<
typename
T
>
struct
RsqrtGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
static_cast
<
T
>
(
-
0.5
)
*
dout
*
out
*
out
*
out
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
// // For numerical stability, using the following formula instead of
// softplus(x) =
// // log(1 + exp(x))
// // softplus(x) = log(1 + exp(beta * x)) / beta when beta * x <=
// threshold(beta =
// // 1, threshold = 20 by default), otherwise x
template
<
typename
T
>
struct
SoftplusFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
beta
;
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"beta"
,
&
beta
},
{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
auto
x_beta
=
static_cast
<
T
>
(
beta
)
*
x
;
out
.
device
(
d
)
=
(
x_beta
>
static_cast
<
T
>
(
threshold
))
.
select
(
x
,
(
static_cast
<
T
>
(
1
)
+
x_beta
.
exp
()).
log
()
/
static_cast
<
T
>
(
beta
));
}
};
// For numerical stability, using the following formula instead of
// d(softplus(x))/dx = 1 / (1 + exp(-x))
// d(softplus(x))/dx = 1 / (1 + exp(-beta * x)) when beta * x <= threshold(beta
// = 1, threshold = 20 by default), otherwise x
template
<
typename
T
>
struct
SoftplusGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
beta
;
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"beta"
,
&
beta
},
{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
x_beta
=
static_cast
<
T
>
(
beta
)
*
x
;
dx
.
device
(
d
)
=
(
x_beta
>
static_cast
<
T
>
(
threshold
))
.
select
(
dout
,
dout
/
(
static_cast
<
T
>
(
1
)
+
(
-
x_beta
).
exp
()));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// Tangent(x) = tan(x)
template
<
typename
T
>
struct
TanFunctor
:
public
BaseActivationFunctor
<
T
>
{
...
...
@@ -348,6 +601,18 @@ struct AtanGradFunctor : public BaseActivationFunctor<T> {
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
kDepX
;
}
};
template
<
typename
T
>
struct
LogitGradFunctor
{
template
<
typename
Device
,
typename
X
,
typename
dOut
,
typename
dX
,
typename
P
>
void
operator
()(
Device
d
,
X
x
,
dOut
dout
,
dX
dx
,
P
p
,
float
eps
)
const
{
// logit(x)' = 1/(x*(1-x))
dx
.
device
(
d
)
=
(
x
<
static_cast
<
T
>
(
eps
)
||
x
>
static_cast
<
T
>
(
1.0
-
eps
))
.
select
(
p
.
constant
(
static_cast
<
T
>
(
0
)),
dout
*
(
static_cast
<
T
>
(
1
)
/
((
static_cast
<
T
>
(
1
)
-
x
)
*
x
)));
}
};
template
<
typename
T
>
struct
Acosh
{
HOSTDEVICE
T
operator
()(
const
T
&
val
)
const
{
return
acosh
(
val
);
}
...
...
@@ -458,6 +723,57 @@ struct AtanhGradFunctor : public BaseActivationFunctor<T> {
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
kDepX
;
}
};
// exp functor
// exp(x) = e^x
template
<
typename
T
>
struct
ExpFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
exp
();
}
};
template
<
typename
T
>
struct
ExpGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
out
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
// expm1(x) = e^x - 1
template
<
typename
T
>
struct
Expm1Functor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
expm1
();
}
};
template
<
typename
T
>
struct
Expm1GradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
out
+
dout
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
// relu(x) = max(x, 0)
template
<
typename
T
>
struct
ReluCPUFunctor
:
public
BaseActivationFunctor
<
T
>
{
...
...
@@ -1560,6 +1876,90 @@ struct CudaCosGradFunctor : public BaseActivationFunctor<T> {
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
CudaExpFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
// exp(x) = exp(x)
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
exp
(
x
));
}
};
template
<
typename
T
>
struct
CudaSquareFunctor
:
public
BaseActivationFunctor
<
T
>
{
// square(x) = x * x
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
return
x
*
x
;
}
};
template
<
typename
T
>
struct
CudaSquareGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
T
two
=
static_cast
<
T
>
(
2.0
f
);
// dx = dout * 2 * x
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
x
)
const
{
return
dout
*
two
*
x
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
CudaExpGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
// dx = dout * out
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
return
dout
*
out
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
template
<
typename
T
>
struct
CudaReciprocalFunctor
:
public
BaseActivationFunctor
<
T
>
{
T
one
=
static_cast
<
T
>
(
1.0
f
);
// reciprocal(x) = 1 / x
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
return
one
/
x
;
}
};
template
<
typename
T
>
struct
CudaReciprocalGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
// dx = -dout * out^2
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
return
-
dout
*
out
*
out
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
template
<
typename
T
>
struct
CudaExpm1Functor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
// expm1(x) = expm1(x)
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
expm1
(
x
));
}
};
template
<
typename
T
>
struct
CudaExpm1GradFunctor
:
public
BaseActivationFunctor
<
T
>
{
// dx = dout * out
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
return
dout
*
out
+
dout
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
template
<
typename
T
>
struct
CudaSinFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
...
...
@@ -1782,6 +2182,96 @@ struct CudaAtanhFunctor : public BaseActivationFunctor<T> {
}
};
template
<
typename
T
>
struct
CudaSTanhFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
float
scale_a
;
float
scale_b
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"scale_a"
,
&
scale_a
},
{
"scale_b"
,
&
scale_b
}};
}
// stanh(x) = b * tanh(a * x)
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
a
=
static_cast
<
MPType
>
(
scale_a
);
MPType
b
=
static_cast
<
MPType
>
(
scale_b
);
return
static_cast
<
T
>
(
b
*
tanh
(
a
*
x
));
}
};
template
<
typename
T
>
struct
CudaSTanhGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
float
scale_a
;
float
scale_b
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"scale_a"
,
&
scale_a
},
{
"scale_b"
,
&
scale_b
}};
}
// dx = dout * a * b * (1 - tanh(a * x) * tanh(a * x))
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
a
=
static_cast
<
MPType
>
(
scale_a
);
MPType
b
=
static_cast
<
MPType
>
(
scale_b
);
MPType
temp
=
tanh
(
a
*
x
);
return
static_cast
<
T
>
(
dout
*
a
*
b
*
(
one
-
temp
*
temp
));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
CudaSoftplusFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
float
beta
;
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"beta"
,
&
beta
},
{
"threshold"
,
&
threshold
}};
}
// softplus(x) = beta * x > threshold ? x : log(1 + exp(beta * x)) / beta
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
b
=
static_cast
<
MPType
>
(
beta
);
MPType
t
=
static_cast
<
MPType
>
(
threshold
);
MPType
x_beta
=
x
*
beta
;
return
static_cast
<
T
>
(
x_beta
>
t
?
x
:
log
(
one
+
exp
(
x_beta
))
/
b
);
}
};
template
<
typename
T
>
struct
CudaSoftplusGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
float
beta
;
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"beta"
,
&
beta
},
{
"threshold"
,
&
threshold
}};
}
// dx = x * beta > threshold ? dout : dout / (1 + exp(-beta * x))
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
b
=
static_cast
<
MPType
>
(
beta
);
MPType
t
=
static_cast
<
MPType
>
(
threshold
);
MPType
x_beta
=
x
*
beta
;
return
x_beta
>
t
?
arg_dout
:
static_cast
<
T
>
(
dout
/
(
one
+
exp
(
-
x_beta
)));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
CudaAtanhGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
...
...
@@ -1797,6 +2287,56 @@ struct CudaAtanhGradFunctor : public BaseActivationFunctor<T> {
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
CudaSqrtFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
// sqrt(x) = sqrt(x)
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
sqrt
(
x
));
}
};
template
<
typename
T
>
struct
CudaSqrtGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
T
one_half
=
static_cast
<
T
>
(
0.5
f
);
// dx = dout * 0.5 / out
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
return
one_half
*
dout
/
out
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
template
<
typename
T
>
struct
CudaRsqrtFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
// rsqrt(x) = rsqrt(x)
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
rsqrt
(
x
));
}
};
template
<
typename
T
>
struct
CudaRsqrtGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
T
minus_one_half
=
static_cast
<
T
>
(
-
0.5
f
);
// dx = -0.5 * dout * out^3
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
return
minus_one_half
*
dout
*
out
*
out
*
out
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
template
<
typename
T
>
struct
CudaAtanFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
...
...
@@ -1864,6 +2404,55 @@ struct CudaBReluFunctor : public BaseActivationFunctor<T> {
}
};
template
<
typename
T
>
struct
CudaMishFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
// mish(x) = x * tanh(softplus(x))
// softplus(x) = x, if x > threshold
// = ln(1 + exp(x)), otherwise
// Inputs: args[0], the input x
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
sp
=
(
x
>
static_cast
<
MPType
>
(
threshold
))
?
x
:
log
(
one
+
exp
(
x
));
return
static_cast
<
T
>
(
x
*
tanh
(
sp
));
}
};
template
<
typename
T
>
struct
CudaMishGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
// dx = dout * (tanh(sp) + x * (1 - tanh(sp) ** 2) * (1 - exp(-sp)))
// sp = softplus(x)
// Inputs: args[0], the input dout
// args[1], the input x
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
sp
=
(
x
>
static_cast
<
MPType
>
(
threshold
))
?
x
:
log
(
one
+
exp
(
x
));
MPType
gsp
=
(
x
>
static_cast
<
MPType
>
(
threshold
))
?
one
:
one
/
(
one
+
exp
(
-
x
));
MPType
tsp
=
tanh
(
sp
);
return
static_cast
<
T
>
(
dout
*
(
tsp
+
x
*
(
one
-
tsp
*
tsp
)
*
gsp
));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
CudaBReluGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
T
zero
=
static_cast
<
T
>
(
0.0
f
);
...
...
paddle/phi/kernels/gpu/activation_grad_kernel.cu
浏览文件 @
07473786
...
...
@@ -189,6 +189,13 @@ DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Acosh, CudaAcoshGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Atanh
,
CudaAtanhGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
TanhShrink
,
CudaTanhShrinkGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Silu
,
CudaSiluGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Square
,
CudaSquareGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Exp
,
CudaExpGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Expm1
,
CudaExpm1GradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Reciprocal
,
CudaReciprocalGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Sqrt
,
CudaSqrtGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Rsqrt
,
CudaRsqrtGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
LogSigmoid
,
CudaLogSigmoidGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log
,
CudaLogGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log2
,
CudaLog2GradFunctor
);
...
...
@@ -211,11 +218,24 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Swish,
CudaSwishGradFunctor
,
beta
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Mish
,
CudaMishGradFunctor
,
threshold
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
BRelu
,
CudaBReluGradFunctor
,
t_min
,
t_max
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
STanh
,
CudaSTanhGradFunctor
,
scale_a
,
scale_b
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
Softplus
,
CudaSoftplusGradFunctor
,
beta
,
threshold
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT
(
HardSigmoid
,
CudaHardSigmoidGradFunctor
,
slope
,
...
...
@@ -326,12 +346,57 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_double_grad,
LeakyReluDoubleGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
thresholded_relu_grad
,
ThresholdedReluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
mish_grad
,
MishGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
stanh_grad
,
STanhGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
reciprocal_grad
,
ReciprocalGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
softplus_grad
,
SoftplusGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
sqrt_grad
,
SqrtGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
rsqrt_grad
,
RsqrtGradKernel
)
PD_REGISTER_KERNEL
(
exp_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
ExpGradKernel
,
float
,
double
,
int
,
int64_t
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
soft_shrink_grad
,
SoftShrinkGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
hard_shrink_grad
,
HardShrinkGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
tanh_shrink_grad
,
TanhShrinkGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
silu_grad
,
SiluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
elu_grad
,
EluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
elu_double_grad
,
EluDoubleGradKernel
)
PD_REGISTER_KERNEL
(
expm1_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
Expm1GradKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_KERNEL
(
logit_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
LogitGradKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_KERNEL
(
square_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
SquareGradKernel
,
float
,
double
,
int
,
int64_t
,
phi
::
dtype
::
float16
,
phi
::
dtype
::
bfloat16
)
{}
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
sigmoid_grad
,
SigmoidGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
sigmoid_double_grad
,
SigmoidDoubleGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
sigmoid_triple_grad
,
SigmoidTripleGradKernel
)
...
...
paddle/phi/kernels/gpu/activation_kernel.cu
浏览文件 @
07473786
...
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/elementwise_base.h"
#include "paddle/phi/kernels/impl/activation_grad_impl.h"
#include "paddle/phi/kernels/impl/activation_impl.h"
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
...
...
@@ -91,6 +92,12 @@ DEFINE_GPU_ACTIVATION_KERNEL(Relu, CudaReluFunctor)
DEFINE_GPU_ACTIVATION_KERNEL
(
Tanh
,
CudaTanhFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
TanhShrink
,
CudaTanhShrinkFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Silu
,
CudaSiluFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Exp
,
CudaExpFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Expm1
,
CudaExpm1Functor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Reciprocal
,
CudaReciprocalFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Square
,
CudaSquareFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Sqrt
,
CudaSqrtFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Rsqrt
,
CudaRsqrtFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Sigmoid
,
CudaSigmoidFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
LogSigmoid
,
CudaLogSigmoidFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Log
,
CudaLogFunctor
)
...
...
@@ -112,7 +119,14 @@ DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(SoftShrink, CudaSoftShrinkFunctor, lambda)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Elu
,
CudaELUFunctor
,
alpha
)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Swish
,
CudaSwishFunctor
,
beta
)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Mish
,
CudaMishFunctor
,
threshold
)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS
(
BRelu
,
CudaBReluFunctor
,
t_min
,
t_max
)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS
(
Stanh
,
CudaSTanhFunctor
,
scale_a
,
scale_b
)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS
(
Softplus
,
CudaSoftplusFunctor
,
beta
,
threshold
)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS
(
HardSigmoid
,
CudaHardSigmoidFunctor
,
slope
,
...
...
@@ -180,6 +194,46 @@ PD_REGISTER_ACTIVATION_KERNEL(tanh, TanhKernel)
PD_REGISTER_ACTIVATION_KERNEL
(
brelu
,
BReluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
thresholded_relu
,
ThresholdedReluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
leaky_relu
,
LeakyReluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
mish
,
MishKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
stanh
,
StanhKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
reciprocal
,
ReciprocalKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
sqrt
,
SqrtKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
rsqrt
,
RsqrtKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
softplus
,
SoftplusKernel
)
PD_REGISTER_KERNEL
(
exp
,
GPU
,
ALL_LAYOUT
,
phi
::
ExpKernel
,
float
,
double
,
int
,
int64_t
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_KERNEL
(
expm1
,
GPU
,
ALL_LAYOUT
,
phi
::
Expm1Kernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_KERNEL
(
logit
,
GPU
,
ALL_LAYOUT
,
phi
::
LogitKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_KERNEL
(
square
,
GPU
,
ALL_LAYOUT
,
phi
::
SquareKernel
,
float
,
double
,
int
,
int64_t
,
phi
::
dtype
::
float16
,
phi
::
dtype
::
bfloat16
)
{}
PD_REGISTER_ACTIVATION_KERNEL
(
hard_shrink
,
HardShrinkKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
soft_shrink
,
SoftShrinkKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
tanh_shrink
,
TanhShrinkKernel
)
...
...
paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu
0 → 100644
浏览文件 @
07473786
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/temporal_shift_grad_kernel.h"
namespace
phi
{
template
<
typename
T
>
__global__
void
KeTemporalShiftBwNCHW
(
const
T
*
output_grad
,
T
*
input_grad
,
const
int
ntchw
,
const
int
tchw
,
const
int
chw
,
const
int
hw
,
const
int
t
,
const
int
c1
,
const
int
c2
)
{
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
stride
=
blockDim
.
x
*
gridDim
.
x
;
int
src_it
=
0
;
for
(;
tid
<
ntchw
;
tid
+=
stride
)
{
int
it
=
(
tid
%
tchw
)
/
chw
;
int
ic
=
(
tid
%
chw
)
/
hw
;
if
(
ic
<
c1
)
{
src_it
=
it
+
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
-
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
>=
0
&&
src_it
<
t
)
{
input_grad
[
tid
]
=
output_grad
[
tid
+
(
src_it
-
it
)
*
chw
];
}
else
{
input_grad
[
tid
]
=
0
;
}
}
}
template
<
typename
T
>
__global__
void
KeTemporalShiftBwNHWC
(
const
T
*
output_grad
,
T
*
input_grad
,
const
int
nthwc
,
const
int
thwc
,
const
int
hwc
,
const
int
t
,
const
int
c
,
const
int
c1
,
const
int
c2
)
{
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
stride
=
blockDim
.
x
*
gridDim
.
x
;
int
src_it
=
0
;
for
(;
tid
<
nthwc
;
tid
+=
stride
)
{
int
it
=
(
tid
%
thwc
)
/
hwc
;
int
ic
=
tid
%
c
;
if
(
ic
<
c1
)
{
src_it
=
it
+
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
-
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
>=
0
&&
src_it
<
t
)
{
input_grad
[
tid
]
=
output_grad
[
tid
+
(
src_it
-
it
)
*
hwc
];
}
else
{
input_grad
[
tid
]
=
0
;
}
}
}
template
<
typename
T
,
typename
Context
>
void
TemporalShiftGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out_grad
,
int
seg_num
,
float
shift_ratio
,
const
std
::
string
&
data_format_str
,
DenseTensor
*
x_grad
)
{
auto
*
input_grad
=
x_grad
;
auto
*
output_grad
=
&
out_grad
;
int
t
=
seg_num
;
const
DataLayout
data_layout
=
paddle
::
framework
::
StringToDataLayout
(
data_format_str
);
const
int
nt
=
output_grad
->
dims
()[
0
];
const
int
c
=
(
data_layout
==
DataLayout
::
kNCHW
?
output_grad
->
dims
()[
1
]
:
output_grad
->
dims
()[
3
]);
const
int
h
=
(
data_layout
==
DataLayout
::
kNCHW
?
output_grad
->
dims
()[
2
]
:
output_grad
->
dims
()[
1
]);
const
int
w
=
(
data_layout
==
DataLayout
::
kNCHW
?
output_grad
->
dims
()[
3
]
:
output_grad
->
dims
()[
2
]);
const
int
hw
=
h
*
w
;
const
int
chw
=
c
*
hw
;
const
int
tchw
=
t
*
chw
;
const
int
ntchw
=
nt
*
chw
;
const
int
c1
=
static_cast
<
int
>
(
c
*
shift_ratio
);
const
int
c2
=
static_cast
<
int
>
(
c
*
2
*
shift_ratio
);
DDim
in_grad_dims
=
(
data_layout
==
DataLayout
::
kNCHW
?
phi
::
make_ddim
({
nt
,
c
,
h
,
w
})
:
phi
::
make_ddim
({
nt
,
h
,
w
,
c
}));
const
T
*
output_grad_data
=
output_grad
->
data
<
T
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
in_grad_dims
,
dev_ctx
.
GetPlace
());
int
pixelNum
=
nt
*
chw
;
int
threads
=
1024
;
int
grid
=
(
pixelNum
+
threads
-
1
)
/
threads
;
int
blocks_per_sm
=
dev_ctx
.
GetMaxPhysicalThreadCount
()
/
threads
;
grid
=
std
::
min
(
dev_ctx
.
GetSMCount
()
*
blocks_per_sm
,
grid
);
if
(
data_layout
==
DataLayout
::
kNCHW
)
{
KeTemporalShiftBwNCHW
<
T
><<<
grid
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
output_grad_data
,
input_grad_data
,
ntchw
,
tchw
,
chw
,
hw
,
t
,
c1
,
c2
);
}
else
{
KeTemporalShiftBwNHWC
<
T
><<<
grid
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
output_grad_data
,
input_grad_data
,
ntchw
,
tchw
,
chw
,
t
,
c
,
c1
,
c2
);
}
}
}
// namespace phi
PD_REGISTER_KERNEL
(
temporal_shift_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
TemporalShiftGradKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
paddle/phi/kernels/gpu/temporal_shift_kernel.cu
0 → 100644
浏览文件 @
07473786
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/temporal_shift_kernel.h"
namespace
phi
{
template
<
typename
T
>
__global__
void
KeTemporalShiftFwNCHW
(
const
T
*
input
,
T
*
output
,
const
int
ntchw
,
const
int
tchw
,
const
int
chw
,
const
int
hw
,
const
int
t
,
const
int
c1
,
const
int
c2
)
{
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
stride
=
blockDim
.
x
*
gridDim
.
x
;
int
src_it
=
0
;
for
(;
tid
<
ntchw
;
tid
+=
stride
)
{
int
it
=
(
tid
%
tchw
)
/
chw
;
int
ic
=
(
tid
%
chw
)
/
hw
;
if
(
ic
<
c1
)
{
src_it
=
it
-
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
+
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
<
0
||
src_it
>=
t
)
{
output
[
tid
]
=
0
;
}
else
{
output
[
tid
]
=
input
[
tid
+
(
src_it
-
it
)
*
chw
];
}
}
}
template
<
typename
T
>
__global__
void
KeTemporalShiftFwNHWC
(
const
T
*
input
,
T
*
output
,
const
int
nthwc
,
const
int
thwc
,
const
int
hwc
,
const
int
t
,
const
int
c
,
const
int
c1
,
const
int
c2
)
{
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
stride
=
blockDim
.
x
*
gridDim
.
x
;
int
src_it
=
0
;
for
(;
tid
<
nthwc
;
tid
+=
stride
)
{
int
it
=
(
tid
%
thwc
)
/
hwc
;
int
ic
=
tid
%
c
;
if
(
ic
<
c1
)
{
src_it
=
it
-
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
+
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
<
0
||
src_it
>=
t
)
{
output
[
tid
]
=
0
;
}
else
{
output
[
tid
]
=
input
[
tid
+
(
src_it
-
it
)
*
hwc
];
}
}
}
template
<
typename
T
,
typename
Context
>
void
TemporalShiftKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
int
seg_num
,
float
shift_ratio
,
const
std
::
string
&
data_format_str
,
DenseTensor
*
out
)
{
auto
*
input
=
&
x
;
auto
*
output
=
out
;
int
t
=
seg_num
;
const
DataLayout
data_layout
=
paddle
::
framework
::
StringToDataLayout
(
data_format_str
);
const
int
nt
=
input
->
dims
()[
0
];
const
int
c
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
1
]
:
input
->
dims
()[
3
]);
const
int
h
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
2
]
:
input
->
dims
()[
1
]);
const
int
w
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
3
]
:
input
->
dims
()[
2
]);
const
int
hw
=
h
*
w
;
const
int
chw
=
c
*
hw
;
const
int
tchw
=
t
*
chw
;
const
int
ntchw
=
nt
*
chw
;
const
int
c1
=
static_cast
<
int
>
(
c
*
shift_ratio
);
const
int
c2
=
static_cast
<
int
>
(
c
*
2
*
shift_ratio
);
DDim
out_dims
=
(
data_layout
==
DataLayout
::
kNCHW
?
phi
::
make_ddim
({
nt
,
c
,
h
,
w
})
:
phi
::
make_ddim
({
nt
,
h
,
w
,
c
}));
const
T
*
input_data
=
input
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
out_dims
,
dev_ctx
.
GetPlace
());
int
pixelNum
=
nt
*
chw
;
int
threads
=
1024
;
int
grid
=
(
pixelNum
+
threads
-
1
)
/
threads
;
int
blocks_per_sm
=
dev_ctx
.
GetMaxPhysicalThreadCount
()
/
threads
;
grid
=
std
::
min
(
dev_ctx
.
GetSMCount
()
*
blocks_per_sm
,
grid
);
if
(
data_layout
==
DataLayout
::
kNCHW
)
{
KeTemporalShiftFwNCHW
<
T
><<<
grid
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
input_data
,
output_data
,
ntchw
,
tchw
,
chw
,
hw
,
t
,
c1
,
c2
);
}
else
{
KeTemporalShiftFwNHWC
<
T
><<<
grid
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
input_data
,
output_data
,
ntchw
,
tchw
,
chw
,
t
,
c
,
c1
,
c2
);
}
}
}
// namespace phi
PD_REGISTER_KERNEL
(
temporal_shift
,
GPU
,
ALL_LAYOUT
,
phi
::
TemporalShiftKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
paddle/phi/kernels/impl/activation_grad_impl.h
浏览文件 @
07473786
...
...
@@ -222,6 +222,24 @@ void EluDoubleGradKernel(const Context& dev_ctx,
functor
(
dev_ctx
,
&
x
,
&
ddx
,
ddout
,
&
dout
,
dx
);
}
template
<
typename
T
,
typename
Context
>
void
LogitGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
out_grad
,
float
eps
,
DenseTensor
*
x_grad
)
{
dev_ctx
.
template
Alloc
<
T
>(
x_grad
);
auto
eigen_x
=
EigenVector
<
T
>::
Flatten
(
x
);
auto
eigen_dout
=
EigenVector
<
T
>::
Flatten
(
out_grad
);
auto
eigen_dx
=
EigenVector
<
T
>::
Flatten
(
*
x_grad
);
auto
&
place
=
*
dev_ctx
.
eigen_device
();
auto
eigen_p
=
EigenVector
<
T
>::
Flatten
(
x
);
funcs
::
LogitGradFunctor
<
T
>
functor
;
functor
(
place
,
eigen_x
,
eigen_dout
,
eigen_dx
,
eigen_p
,
eps
);
}
template
<
typename
T
,
typename
Context
>
void
SigmoidDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
...
...
paddle/phi/kernels/impl/activation_impl.h
浏览文件 @
07473786
...
...
@@ -47,6 +47,22 @@ void ActivationImpl(const Context& dev_ctx,
}
}
template
<
typename
T
,
typename
Context
>
void
LogitKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
float
eps
,
DenseTensor
*
out
)
{
dev_ctx
.
template
Alloc
<
T
>(
out
);
auto
eigen_out
=
EigenVector
<
T
>::
Flatten
(
*
out
);
auto
eigen_in
=
EigenVector
<
T
>::
Flatten
(
x
);
auto
&
place
=
*
dev_ctx
.
eigen_device
();
auto
eigen_p
=
EigenVector
<
T
>::
Flatten
(
*
out
);
funcs
::
LogitFunctor
<
T
>
functor
;
functor
(
place
,
eigen_in
,
eigen_out
,
eigen_p
,
eps
);
}
template
<
typename
T
,
typename
Context
>
void
PowKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
...
...
paddle/phi/kernels/selected_rows/activation_kernel.cc
0 → 100644
浏览文件 @
07473786
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/selected_rows/activation_kernel.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/activation_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
namespace
phi
{
namespace
sr
{
template
<
typename
T
,
typename
Context
>
void
SquareKernel
(
const
Context
&
dev_ctx
,
const
SelectedRows
&
x
,
SelectedRows
*
out
)
{
out
->
set_rows
(
x
.
rows
());
out
->
set_height
(
x
.
height
());
phi
::
SquareKernel
<
T
,
Context
>
(
dev_ctx
,
x
.
value
(),
out
->
mutable_value
());
}
template
<
typename
T
,
typename
Context
>
void
SqrtKernel
(
const
Context
&
dev_ctx
,
const
SelectedRows
&
x
,
SelectedRows
*
out
)
{
out
->
set_rows
(
x
.
rows
());
out
->
set_height
(
x
.
height
());
phi
::
SqrtKernel
<
T
,
Context
>
(
dev_ctx
,
x
.
value
(),
out
->
mutable_value
());
}
}
// namespace sr
}
// namespace phi
PD_REGISTER_KERNEL
(
square_sr
,
CPU
,
ALL_LAYOUT
,
phi
::
sr
::
SquareKernel
,
float
,
double
)
{}
PD_REGISTER_KERNEL
(
sqrt_sr
,
CPU
,
ALL_LAYOUT
,
phi
::
sr
::
SqrtKernel
,
float
,
double
)
{}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_KERNEL
(
square_sr
,
GPU
,
ALL_LAYOUT
,
phi
::
sr
::
SquareKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
sqrt_sr
,
GPU
,
ALL_LAYOUT
,
phi
::
sr
::
SqrtKernel
,
float
,
double
)
{}
#endif
paddle/phi/kernels/selected_rows/activation_kernel.h
0 → 100644
浏览文件 @
07473786
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/selected_rows.h"
namespace
phi
{
namespace
sr
{
template
<
typename
T
,
typename
Context
>
void
SquareKernel
(
const
Context
&
dev_ctx
,
const
SelectedRows
&
x
,
SelectedRows
*
out
);
template
<
typename
T
,
typename
Context
>
void
SqrtKernel
(
const
Context
&
dev_ctx
,
const
SelectedRows
&
x
,
SelectedRows
*
out
);
}
// namespace sr
}
// namespace phi
paddle/phi/kernels/temporal_shift_grad_kernel.h
0 → 100644
浏览文件 @
07473786
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
TemporalShiftGradKernel
(
const
Context
&
ctx
,
const
DenseTensor
&
out_grad
,
int
seg_num
,
float
shift_ratio
,
const
std
::
string
&
data_format
,
DenseTensor
*
x_grad
);
}
// namespace phi
paddle/phi/kernels/temporal_shift_kernel.h
0 → 100644
浏览文件 @
07473786
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
TemporalShiftKernel
(
const
Context
&
ctx
,
const
DenseTensor
&
x
,
int
seg_num
,
float
shift_ratio
,
const
std
::
string
&
data_format
,
DenseTensor
*
out
);
}
// namespace phi
paddle/phi/ops/compat/activation_sig.cc
浏览文件 @
07473786
...
...
@@ -43,17 +43,19 @@ namespace phi {
#define comma ,
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Cos
,
"cos"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Tan
,
"tan"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Acos
,
"acos"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Sin
,
"sin"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Asin
,
"asin"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Atan
,
"atan"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Sinh
,
"sinh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Cosh
,
"cosh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Asinh
,
"asinh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Acosh
,
"acosh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Atanh
,
"atanh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Cos
,
"cos"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Tan
,
"tan"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Acos
,
"acos"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Sin
,
"sin"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Asin
,
"asin"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Atan
,
"atan"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Sinh
,
"sinh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Cosh
,
"cosh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Asinh
,
"asinh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Acosh
,
"acosh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Atanh
,
"atanh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Square
,
"square"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
BRelu
,
"brelu"
,
"t_min"
comma
"t_max"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
LeakyRelu
,
"leaky_relu"
,
"alpha"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
ThresholdedRelu
,
...
...
@@ -61,6 +63,7 @@ DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(ThresholdedRelu,
"threshold"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
SoftShrink
,
"soft_shrink"
,
"lambda"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
HardShrink
,
"hard_shrink"
,
"threshold"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Mish
,
"mish"
,
"threshold"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
TanhShrink
,
"tanh_shrink"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Silu
,
"silu"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
LogSigmoid
,
"logsigmoid"
,
);
// NOLINT
...
...
@@ -74,12 +77,41 @@ DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(HardSwish,
"offset"
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Swish
,
"swish"
,
"beta"
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Relu
,
"relu"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Tanh
,
"tanh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Sigmoid
,
"sigmoid"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
STanh
,
"stanh"
,
"scale_a"
comma
"scale_b"
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Softplus
,
"softplus"
,
"beta"
comma
"threshold"
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Relu
,
"relu"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Tanh
,
"tanh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Sigmoid
,
"sigmoid"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Exp
,
"exp"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Expm1
,
"expm1"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Reciprocal
,
"reciprocal"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Sqrt
,
"sqrt"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Rsqrt
,
"rsqrt"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
HardSigmoid
,
"hard_sigmoid"
,
"slope"
comma
"offset"
);
// NOLINT
KernelSignature
SqrtActiOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
if
(
ctx
.
IsDenseTensorInput
(
"X"
))
{
return
KernelSignature
(
"sqrt"
,
{
"X"
},
{},
{
"Out"
});
}
else
{
return
KernelSignature
(
"sqrt_sr"
,
{
"X"
},
{},
{
"Out"
});
}
}
KernelSignature
SquareActiOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
if
(
ctx
.
IsDenseTensorInput
(
"X"
))
{
return
KernelSignature
(
"square"
,
{
"X"
},
{},
{
"Out"
});
}
else
{
return
KernelSignature
(
"square_sr"
,
{
"X"
},
{},
{
"Out"
});
}
}
DEFINE_ACT_GRAD_NODEP_OP_ARGMAP
(
Round
,
"round"
,
);
// NOLINT
DEFINE_ACT_GRAD_NODEP_OP_ARGMAP
(
Floor
,
"floor"
,
);
// NOLINT
...
...
@@ -132,6 +164,11 @@ KernelSignature EluOpArgumentMapping(const ArgumentMappingContext& ctx) {
return
KernelSignature
(
"elu"
,
{
"X"
},
{
"alpha"
},
{
"Out"
});
}
KernelSignature
LogitGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"logit_grad"
,
{
"X"
,
GradVarName
(
"Out"
)},
{
"eps"
},
{
GradVarName
(
"X"
)});
}
KernelSignature
EluGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"elu_grad"
,
{
"X"
,
"Out"
,
GradVarName
(
"Out"
)},
...
...
@@ -194,6 +231,18 @@ PD_REGISTER_ARG_MAPPING_FN(asinh_grad, phi::AsinhGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN
(
acosh_grad
,
phi
::
AcoshGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
atanh_grad
,
phi
::
AtanhGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
relu_grad
,
phi
::
ReluGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
exp_grad
,
phi
::
ExpGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
expm1_grad
,
phi
::
Expm1GradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
square_grad
,
phi
::
SquareGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
reciprocal_grad
,
phi
::
ReciprocalGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
sqrt_grad
,
phi
::
SqrtGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
rsqrt_grad
,
phi
::
RsqrtGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
mish_grad
,
phi
::
MishGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
stanh_grad
,
phi
::
STanhGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
softplus_grad
,
phi
::
SoftplusGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
relu_grad_grad
,
phi
::
ReluDoubleGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
tanh_grad
,
phi
::
TanhGradOpArgumentMapping
);
...
...
@@ -228,11 +277,16 @@ PD_REGISTER_ARG_MAPPING_FN(logsigmoid_grad,
phi
::
LogSigmoidGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
hard_sigmoid_grad
,
phi
::
HardSigmoidGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
logit_grad
,
phi
::
LogitGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
log_grad
,
phi
::
LogGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
log_grad_grad
,
phi
::
LogDoubleGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
log2_grad
,
phi
::
Log2GradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
log10_grad
,
phi
::
Log10GradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
log1p_grad
,
phi
::
Log1pGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
sqrt
,
phi
::
SqrtActiOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
square
,
phi
::
SquareActiOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
hard_swish_grad
,
phi
::
HardSwishGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
swish_grad
,
phi
::
SwishGradOpArgumentMapping
);
...
...
paddle/phi/ops/compat/temporal_shift_sig.cc
0 → 100644
浏览文件 @
07473786
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/core/compat/op_utils.h"
namespace
phi
{
KernelSignature
TemporalShiftOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"temporal_shift"
,
{
"X"
},
{
"seg_num"
,
"shift_ratio"
,
"data_format"
},
{
"Out"
});
}
KernelSignature
TemporalShiftGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"temporal_shift_grad"
,
{
GradVarName
(
"Out"
)},
{
"seg_num"
,
"shift_ratio"
,
"data_format"
},
{
GradVarName
(
"X"
)});
}
}
// namespace phi
PD_REGISTER_ARG_MAPPING_FN
(
temporal_shift
,
phi
::
TemporalShiftOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
temporal_shift_grad
,
phi
::
TemporalShiftGradOpArgumentMapping
);
python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
浏览文件 @
07473786
...
...
@@ -342,4 +342,5 @@ class TestLogDoubleGradCheck(unittest.TestCase):
if
__name__
==
"__main__"
:
paddle
.
enable_static
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_activation_sparse_op.py
0 → 100644
浏览文件 @
07473786
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.op
import
Operator
from
op_test
import
OpTest
import
paddle
class
TestSparseSquareOp
(
unittest
.
TestCase
):
def
check_with_place
(
self
,
place
):
scope
=
core
.
Scope
()
# create and initialize Grad Variable
height
=
10
rows
=
[
0
,
4
,
7
]
self
.
row_numel
=
12
x_selected_rows
=
scope
.
var
(
'X'
).
get_selected_rows
()
x_selected_rows
.
set_height
(
height
)
x_selected_rows
.
set_rows
(
rows
)
np_array
=
np
.
ones
((
len
(
rows
),
self
.
row_numel
)).
astype
(
"float32"
)
np_array
[
0
,
0
]
=
2.0
np_array
[
2
,
8
]
=
4.0
x_tensor
=
x_selected_rows
.
get_tensor
()
x_tensor
.
set
(
np_array
,
place
)
out_selected_rows
=
scope
.
var
(
'Out'
).
get_selected_rows
()
# create and run sqrt operator
square_op
=
Operator
(
"square"
,
X
=
'X'
,
Out
=
'Out'
)
square_op
.
run
(
scope
,
place
)
# get and compare result
result_array
=
np
.
array
(
out_selected_rows
.
get_tensor
())
self
.
assertTrue
(
np
.
array_equal
(
result_array
,
np
.
square
(
np_array
)))
def
test_sparse_acti
(
self
):
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
for
place
in
places
:
self
.
check_with_place
(
place
)
class
TestSparseSqrtOp
(
unittest
.
TestCase
):
def
check_with_place
(
self
,
place
):
scope
=
core
.
Scope
()
# create and initialize Grad Variable
height
=
10
rows
=
[
0
,
4
,
7
]
self
.
row_numel
=
12
x_selected_rows
=
scope
.
var
(
'X1'
).
get_selected_rows
()
x_selected_rows
.
set_height
(
height
)
x_selected_rows
.
set_rows
(
rows
)
np_array
=
np
.
ones
((
len
(
rows
),
self
.
row_numel
)).
astype
(
"float32"
)
np_array
[
0
,
0
]
=
2.0
np_array
[
2
,
8
]
=
4.0
x_tensor
=
x_selected_rows
.
get_tensor
()
x_tensor
.
set
(
np_array
,
place
)
out_selected_rows
=
scope
.
var
(
'Out1'
).
get_selected_rows
()
# create and run sqrt operator
sqrt_op
=
Operator
(
"sqrt"
,
X
=
'X1'
,
Out
=
'Out1'
)
sqrt_op
.
run
(
scope
,
place
)
# get and compare result
result_array
=
np
.
array
(
out_selected_rows
.
get_tensor
())
self
.
assertTrue
(
np
.
allclose
(
result_array
,
np
.
sqrt
(
np_array
)))
def
test_sparse_acti
(
self
):
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
for
place
in
places
:
self
.
check_with_place
(
place
)
if
__name__
==
"__main__"
:
paddle
.
enable_static
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
浏览文件 @
07473786
...
...
@@ -16,6 +16,7 @@ from __future__ import print_function
import
unittest
import
numpy
as
np
import
paddle
from
op_test
import
OpTest
import
paddle.fluid
as
fluid
...
...
@@ -153,4 +154,5 @@ class TestClipByNormOpWithSelectedRows(unittest.TestCase):
if
__name__
==
'__main__'
:
paddle
.
enable_static
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_temporal_shift_op.py
浏览文件 @
07473786
...
...
@@ -143,4 +143,5 @@ class TestTemporalShiftAPI(unittest.TestCase):
if
__name__
==
"__main__"
:
paddle
.
enable_static
()
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录