Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
91bb52cd
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
91bb52cd
编写于
3月 30, 2022
作者:
H
hong
提交者:
GitHub
3月 30, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Revert "Revert "Move some activation to phi (#40727)" (#41056)" (#41095)
This reverts commit
05f3d48e
.
上级
abd2df4c
变更
29
展开全部
隐藏空白更改
内联
并排
Showing
29 changed file
with
1908 addition
and
931 deletion
+1908
-931
paddle/fluid/framework/new_executor/standalone_executor_test.cc
.../fluid/framework/new_executor/standalone_executor_test.cc
+2
-1
paddle/fluid/operators/activation_op.cc
paddle/fluid/operators/activation_op.cc
+9
-77
paddle/fluid/operators/activation_op.h
paddle/fluid/operators/activation_op.h
+85
-399
paddle/fluid/operators/activation_op.kps
paddle/fluid/operators/activation_op.kps
+23
-348
paddle/fluid/operators/math/CMakeLists.txt
paddle/fluid/operators/math/CMakeLists.txt
+2
-2
paddle/fluid/operators/math/selected_rows_functor.cc
paddle/fluid/operators/math/selected_rows_functor.cc
+1
-0
paddle/fluid/operators/temporal_shift_op.h
paddle/fluid/operators/temporal_shift_op.h
+1
-89
paddle/phi/kernels/activation_kernel.h
paddle/phi/kernels/activation_kernel.h
+22
-0
paddle/phi/kernels/cpu/activation_grad_kernel.cc
paddle/phi/kernels/cpu/activation_grad_kernel.cc
+54
-0
paddle/phi/kernels/cpu/activation_kernel.cc
paddle/phi/kernels/cpu/activation_kernel.cc
+31
-1
paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc
paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc
+136
-0
paddle/phi/kernels/cpu/temporal_shift_kernel.cc
paddle/phi/kernels/cpu/temporal_shift_kernel.cc
+131
-0
paddle/phi/kernels/funcs/activation_functor.h
paddle/phi/kernels/funcs/activation_functor.h
+589
-0
paddle/phi/kernels/gpu/activation_grad_kernel.cu
paddle/phi/kernels/gpu/activation_grad_kernel.cu
+65
-0
paddle/phi/kernels/gpu/activation_kernel.cu
paddle/phi/kernels/gpu/activation_kernel.cu
+54
-0
paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu
paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu
+149
-0
paddle/phi/kernels/gpu/temporal_shift_kernel.cu
paddle/phi/kernels/gpu/temporal_shift_kernel.cu
+148
-0
paddle/phi/kernels/impl/activation_grad_impl.h
paddle/phi/kernels/impl/activation_grad_impl.h
+18
-0
paddle/phi/kernels/impl/activation_impl.h
paddle/phi/kernels/impl/activation_impl.h
+16
-0
paddle/phi/kernels/selected_rows/activation_kernel.cc
paddle/phi/kernels/selected_rows/activation_kernel.cc
+68
-0
paddle/phi/kernels/selected_rows/activation_kernel.h
paddle/phi/kernels/selected_rows/activation_kernel.h
+34
-0
paddle/phi/kernels/temporal_shift_grad_kernel.h
paddle/phi/kernels/temporal_shift_grad_kernel.h
+29
-0
paddle/phi/kernels/temporal_shift_kernel.h
paddle/phi/kernels/temporal_shift_kernel.h
+29
-0
paddle/phi/ops/compat/activation_sig.cc
paddle/phi/ops/compat/activation_sig.cc
+68
-14
paddle/phi/ops/compat/temporal_shift_sig.cc
paddle/phi/ops/compat/temporal_shift_sig.cc
+39
-0
python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
...n/paddle/fluid/tests/unittests/test_activation_nn_grad.py
+1
-0
python/paddle/fluid/tests/unittests/test_activation_sparse_op.py
...paddle/fluid/tests/unittests/test_activation_sparse_op.py
+101
-0
python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
+2
-0
python/paddle/fluid/tests/unittests/test_temporal_shift_op.py
...on/paddle/fluid/tests/unittests/test_temporal_shift_op.py
+1
-0
未找到文件。
paddle/fluid/framework/new_executor/standalone_executor_test.cc
浏览文件 @
91bb52cd
...
...
@@ -53,7 +53,7 @@ USE_OP_ITSELF(tanh_grad);
USE_OP
(
sum
);
USE_OP_ITSELF
(
slice_grad
);
USE_OP_ITSELF
(
lookup_table_grad
);
USE_OP
(
sqrt
);
USE_OP
_ITSELF
(
sqrt
);
USE_OP_ITSELF
(
elementwise_max
);
USE_OP_ITSELF
(
elementwise_div
);
USE_OP_ITSELF
(
sgd
);
...
...
@@ -83,6 +83,7 @@ PD_DECLARE_KERNEL(max_raw, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL
(
sgd
,
GPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
slice
,
GPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
slice_grad
,
GPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
sqrt
,
GPU
,
ALL_LAYOUT
);
DECLARE_double
(
eager_delete_tensor_gb
);
...
...
paddle/fluid/operators/activation_op.cc
浏览文件 @
91bb52cd
...
...
@@ -1496,6 +1496,14 @@ REGISTER_ACTIVATION_OP(hard_sigmoid, HardSigmoid, HardSigmoidFunctor,
HardSigmoidGradFunctor
);
REGISTER_ACTIVATION_OP
(
logsigmoid
,
LogSigmoid
,
LogSigmoidFunctor
,
LogSigmoidGradFunctor
);
REGISTER_ACTIVATION_OP
(
expm1
,
Expm1
,
Expm1Functor
,
Expm1GradFunctor
);
REGISTER_ACTIVATION_OP
(
softplus
,
Softplus
,
SoftplusFunctor
,
SoftplusGradFunctor
);
REGISTER_ACTIVATION_OP
(
mish
,
Mish
,
MishFunctor
,
MishGradFunctor
);
REGISTER_ACTIVATION_OP
(
stanh
,
STanh
,
STanhFunctor
,
STanhGradFunctor
);
REGISTER_ACTIVATION_OP
(
reciprocal
,
Reciprocal
,
ReciprocalFunctor
,
ReciprocalGradFunctor
);
REGISTER_ACTIVATION_OP
(
log2
,
Log2
,
Log2Functor
,
Log2GradFunctor
);
REGISTER_ACTIVATION_OP
(
log10
,
Log10
,
Log10Functor
,
Log10GradFunctor
);
REGISTER_ACTIVATION_OP
(
log1p
,
Log1p
,
Log1pFunctor
,
Log1pGradFunctor
);
...
...
@@ -1630,12 +1638,7 @@ REGISTER_OPERATOR(logit, ops::LogitOp, ops::LogitOpMaker,
ops
::
LogitGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
LogitGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OPERATOR
(
logit_grad
,
ops
::
LogitGradOp
);
REGISTER_OP_CPU_KERNEL
(
logit
,
ops
::
LogitKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
LogitKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
logit_grad
,
ops
::
LogitGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
LogitGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
/* ========================================================================== */
/* ======================== celu register ============================
...
...
@@ -1684,7 +1687,6 @@ REGISTER_OPERATOR(
ops
::
ActivationOpDoubleGrad
<
ops
::
SqrtGradGradFunctor
<
float
>::
FwdDeps
()
>
,
ops
::
ActivationDoubleGradOpInplaceInferer
);
REGISTER_ACTIVATION_CPU_KERNEL
(
sqrt
,
Sqrt
,
SqrtFunctor
,
SqrtGradFunctor
);
REGISTER_OP_CPU_KERNEL
(
sqrt_grad_grad
,
ops
::
SqrtDoubleGradKernel
<
plat
::
CPUDeviceContext
,
ops
::
SqrtGradGradFunctor
<
float
>>
,
...
...
@@ -1712,7 +1714,6 @@ REGISTER_OPERATOR(
ops
::
ActivationOpDoubleGrad
<
ops
::
RsqrtGradGradFunctor
<
float
>::
FwdDeps
()
>
,
ops
::
ActivationDoubleGradOpInplaceInferer
);
REGISTER_ACTIVATION_CPU_KERNEL
(
rsqrt
,
Rsqrt
,
RsqrtFunctor
,
RsqrtGradFunctor
);
REGISTER_OP_CPU_KERNEL
(
rsqrt_grad_grad
,
ops
::
RsqrtDoubleGradKernel
<
plat
::
CPUDeviceContext
,
...
...
@@ -1741,25 +1742,6 @@ REGISTER_OPERATOR(
ops
::
ActivationOpDoubleGrad
<
ops
::
SquareGradGradFunctor
<
float
>::
FwdDeps
()
>
,
ops
::
ActivationDoubleGradOpInplaceInferer
);
REGISTER_OP_CPU_KERNEL
(
square
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SquareFunctor
<
float
>>
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SquareFunctor
<
double
>>
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SquareFunctor
<
int
>>
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SquareFunctor
<
int64_t
>>
);
REGISTER_OP_CPU_KERNEL
(
square_grad
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SquareGradFunctor
<
float
>>
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SquareGradFunctor
<
double
>>
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SquareGradFunctor
<
int
>>
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SquareGradFunctor
<
int64_t
>>
);
REGISTER_OP_CPU_KERNEL
(
square_grad_grad
,
ops
::
SquareDoubleGradKernel
<
plat
::
CPUDeviceContext
,
...
...
@@ -1798,54 +1780,6 @@ REGISTER_OPERATOR(
REGISTER_OPERATOR
(
exp_grad
,
ops
::
ActivationOpGrad
,
ops
::
ActivationGradOpInplaceInferer
);
REGISTER_OP_CPU_KERNEL
(
exp
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ExpFunctor
<
float
>>
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ExpFunctor
<
double
>>
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ExpFunctor
<
int
>>
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ExpFunctor
<
int64_t
>>
);
REGISTER_OP_CPU_KERNEL
(
exp_grad
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ExpGradFunctor
<
float
>>
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ExpGradFunctor
<
double
>>
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ExpGradFunctor
<
int
>>
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
ExpGradFunctor
<
int64_t
>>
);
/* ========================================================================== */
/* ========================== expm1 register ============================ */
REGISTER_OPERATOR
(
expm1
,
ops
::
ActivationOp
,
ops
::
Expm1OpMaker
,
ops
::
ActivationOpInferVarType
,
ops
::
ActivationGradOpMaker
<
ops
::
Expm1GradFunctor
<
float
>::
FwdDeps
(),
paddle
::
framework
::
OpDesc
>
,
ops
::
ActivationGradOpMaker
<
ops
::
Expm1GradFunctor
<
float
>::
FwdDeps
(),
paddle
::
imperative
::
OpBase
>
,
std
::
conditional
<
ops
::
CanInplaceAct
<
ops
::
Expm1GradFunctor
<
float
>>
(),
ops
::
ActFwdInplaceInferer
,
void
>::
type
);
REGISTER_OPERATOR
(
expm1_grad
,
ops
::
ActivationOpGrad
,
ops
::
ActivationGradOpInplaceInferer
);
REGISTER_OP_CPU_KERNEL
(
expm1
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
Expm1Functor
<
float
>>
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
Expm1Functor
<
double
>>
,
ops
::
ActivationKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
Expm1Functor
<
plat
::
float16
>>
);
REGISTER_OP_CPU_KERNEL
(
expm1_grad
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
Expm1GradFunctor
<
float
>>
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
Expm1GradFunctor
<
double
>>
,
ops
::
ActivationGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
Expm1GradFunctor
<
plat
::
float16
>>
);
/* ========================================================================== */
/* ========================== Log register ==================================*/
REGISTER_OPERATOR
(
log
,
ops
::
ActivationOp
,
ops
::
LogOpMaker
,
ops
::
ActivationOpInferVarType
,
...
...
@@ -1864,8 +1798,6 @@ REGISTER_OPERATOR(
ops
::
ActivationOpDoubleGrad
<
ops
::
LogGradGradFunctor
<
float
>::
FwdDeps
()
>
,
ops
::
ActivationDoubleGradOpInplaceInferer
);
/* ========================================================================== */
/* ========================== register checkpoint ===========================*/
REGISTER_OP_VERSION
(
leaky_relu
)
.
AddCheckpoint
(
...
...
paddle/fluid/operators/activation_op.h
浏览文件 @
91bb52cd
此差异已折叠。
点击以展开。
paddle/fluid/operators/activation_op.kps
浏览文件 @
91bb52cd
...
...
@@ -20,140 +20,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
struct CudaReciprocalFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
// reciprocal(x) = 1 / x
__device__ __forceinline__ T operator()(const T x) const { return one / x; }
};
template <typename T>
struct CudaReciprocalGradFunctor : public BaseActivationFunctor<T> {
// dx = -dout * out^2
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return -dout * out * out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaExpFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
// exp(x) = exp(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(exp(x));
}
};
template <typename T>
struct CudaExpGradFunctor : public BaseActivationFunctor<T> {
// dx = dout * out
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return dout * out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaExpm1Functor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
// expm1(x) = expm1(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(expm1(x));
}
};
template <typename T>
struct CudaExpm1GradFunctor : public BaseActivationFunctor<T> {
// dx = dout * out
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return dout * out + dout;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaSquareFunctor : public BaseActivationFunctor<T> {
// square(x) = x * x
__device__ __forceinline__ T operator()(const T x) const { return x * x; }
};
template <typename T>
struct CudaSquareGradFunctor : public BaseActivationFunctor<T> {
T two = static_cast<T>(2.0f);
// dx = dout * 2 * x
__device__ __forceinline__ T operator()(const T dout, const T x) const {
return dout * two * x;
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaSqrtFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
// sqrt(x) = sqrt(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(sqrt(x));
}
};
template <typename T>
struct CudaSqrtGradFunctor : public BaseActivationFunctor<T> {
T one_half = static_cast<T>(0.5f);
// dx = dout * 0.5 / out
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return one_half * dout / out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaRsqrtFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
// rsqrt(x) = rsqrt(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(rsqrt(x));
}
};
template <typename T>
struct CudaRsqrtGradFunctor : public BaseActivationFunctor<T> {
T minus_one_half = static_cast<T>(-0.5f);
// dx = -0.5 * dout * out^3
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return minus_one_half * dout * out * out * out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaSoftReluFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
...
...
@@ -201,119 +67,6 @@ struct CudaSoftReluGradFunctor : public BaseActivationFunctor<T> {
}
};
template <typename T>
struct CudaSTanhFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
float scale_a;
float scale_b;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"scale_a", &scale_a}, {"scale_b", &scale_b}};
}
// stanh(x) = b * tanh(a * x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
MPType a = static_cast<MPType>(scale_a);
MPType b = static_cast<MPType>(scale_b);
return static_cast<T>(b * tanh(a * x));
}
};
template <typename T>
struct CudaSTanhGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float scale_a;
float scale_b;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"scale_a", &scale_a}, {"scale_b", &scale_b}};
}
// dx = dout * a * b * (1 - tanh(a * x) * tanh(a * x))
__device__ __forceinline__ T operator()(const T arg_dout,
const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x);
MPType a = static_cast<MPType>(scale_a);
MPType b = static_cast<MPType>(scale_b);
MPType temp = tanh(a * x);
return static_cast<T>(dout * a * b * (one - temp * temp));
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaSoftplusFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float beta;
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}, {"threshold", &threshold}};
}
// softplus(x) = beta * x > threshold ? x : log(1 + exp(beta * x)) / beta
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
MPType b = static_cast<MPType>(beta);
MPType t = static_cast<MPType>(threshold);
MPType x_beta = x * beta;
return static_cast<T>(x_beta > t ? x : log(one + exp(x_beta)) / b);
}
};
template <typename T>
struct CudaSoftplusGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float beta;
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}, {"threshold", &threshold}};
}
// dx = x * beta > threshold ? dout : dout / (1 + exp(-beta * x))
__device__ __forceinline__ T operator()(const T arg_dout,
const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x);
MPType b = static_cast<MPType>(beta);
MPType t = static_cast<MPType>(threshold);
MPType x_beta = x * beta;
return x_beta > t ? arg_dout : static_cast<T>(dout / (one + exp(-x_beta)));
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaSoftsignFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
// softsign(x) = x / (1 + abs(x))
__device__ __forceinline__ T operator()(const T x) const {
return x / (one + abs(x));
}
};
template <typename T>
struct CudaSoftsignGradFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
// dx = dout / (1 + abs(x))^2
__device__ __forceinline__ T operator()(const T dout, const T x) const {
T temp = one + abs(x);
return dout / (temp * temp);
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaRelu6Functor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
...
...
@@ -351,49 +104,23 @@ struct CudaRelu6GradFunctor : public BaseActivationFunctor<T> {
};
template <typename T>
struct CudaMishFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
struct CudaSoftsignFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
// mish(x) = x * tanh(softplus(x))
// softplus(x) = x, if x > threshold
// = ln(1 + exp(x)), otherwise
// Inputs: args[0], the input x
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
MPType sp = (x > static_cast<MPType>(threshold)) ? x : log(one + exp(x));
return static_cast<T>(x * tanh(sp));
// softsign(x) = x / (1 + abs(x))
__device__ __forceinline__ T operator()(const T x) const {
return x / (one + abs(x));
}
};
template <typename T>
struct CudaMishGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
struct CudaSoftsignGradFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
// dx = dout * (tanh(sp) + x * (1 - tanh(sp) ** 2) * (1 - exp(-sp)))
// sp = softplus(x)
// Inputs: args[0], the input dout
// args[1], the input x
__device__ __forceinline__ T operator()(const T arg_dout,
const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x);
MPType sp = (x > static_cast<MPType>(threshold)) ? x : log(one + exp(x));
MPType gsp =
(x > static_cast<MPType>(threshold)) ? one : one / (one + exp(-x));
MPType tsp = tanh(sp);
return static_cast<T>(dout * (tsp + x * (one - tsp * tsp) * gsp));
// dx = dout / (1 + abs(x))^2
__device__ __forceinline__ T operator()(const T dout, const T x) const {
T temp = one + abs(x);
return dout / (temp * temp);
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
...
...
@@ -558,6 +285,16 @@ using CudaCeilFunctor = phi::funcs::CudaCeilFunctor<T>;
template <typename T>
using CudaZeroGradFunctor = phi::funcs::CudaZeroGradFunctor<T>;
USE_PHI_FUNCTOR(CudaExp)
USE_PHI_FUNCTOR(CudaExpm1)
USE_PHI_FUNCTOR(CudaMish)
USE_PHI_FUNCTOR(CudaSTanh)
USE_PHI_FUNCTOR(CudaReciprocal)
USE_PHI_FUNCTOR(CudaSquare)
USE_PHI_FUNCTOR(CudaSqrt)
USE_PHI_FUNCTOR(CudaRsqrt)
USE_PHI_FUNCTOR(CudaSoftplus)
template <typename T>
using CudaELUGradNegativeAlphaFunctor =
phi::funcs::CudaELUGradNegativeAlphaFunctor<T>;
...
...
@@ -636,8 +373,6 @@ REGISTER_OP_CUDA_KERNEL(
/* ========================================================================== */
/* =========================== sqrt register ============================= */
REGISTER_ACTIVATION_CUDA_KERNEL(sqrt, Sqrt, CudaSqrtFunctor,
CudaSqrtGradFunctor);
REGISTER_OP_CUDA_KERNEL(
sqrt_grad_grad,
...
...
@@ -653,8 +388,6 @@ REGISTER_OP_CUDA_KERNEL(
/* =========================== rsqrt register =============================
*/
REGISTER_ACTIVATION_CUDA_KERNEL(rsqrt, Rsqrt, CudaRsqrtFunctor,
CudaRsqrtGradFunctor);
REGISTER_OP_CUDA_KERNEL(
rsqrt_grad_grad,
...
...
@@ -667,8 +400,6 @@ REGISTER_OP_CUDA_KERNEL(
/* ========================================================================== */
/* =========================== square register ============================ */
REGISTER_ACTIVATION_CUDA_KERNEL_INT(square, Square, CudaSquareFunctor,
CudaSquareGradFunctor);
REGISTER_OP_CUDA_KERNEL(
square_grad_grad,
...
...
@@ -688,75 +419,19 @@ REGISTER_OP_CUDA_KERNEL(
/* ========================== logit register ============================ */
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
logit, ops::LogitKernel<paddle::platform::CUDADeviceContext, float>,
ops::LogitKernel<paddle::platform::CUDADeviceContext, double>,
ops::LogitKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
REGISTER_OP_CUDA_KERNEL(
logit_grad,
ops::LogitGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::LogitGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::LogitGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
/* ========================================================================== */
/* ========================== exp register ============================ */
REGISTER_OP_CUDA_KERNEL(
exp, ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpFunctor<float>>,
ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpFunctor<double>>,
ops::ActivationKernel<plat::CUDADeviceContext, ops::ExpFunctor<int>>,
ops::ActivationKernel<plat::CUDADeviceContext, ops::ExpFunctor<int64_t>>,
ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpFunctor<plat::float16>>);
REGISTER_OP_CUDA_KERNEL(
exp_grad, ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpGradFunctor<float>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpGradFunctor<double>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpGradFunctor<int>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpGradFunctor<int64_t>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpGradFunctor<plat::float16>>);
/* ========================================================================== */
/* ========================== expm1 register ============================ */
REGISTER_OP_CUDA_KERNEL(
expm1, ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1Functor<float>>,
ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1Functor<double>>,
ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1Functor<plat::float16>>);
REGISTER_OP_CUDA_KERNEL(
expm1_grad, ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1GradFunctor<float>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1GradFunctor<double>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1GradFunctor<plat::float16>>);
/* ========================================================================== */
#define FOR_EACH_ACTIVATION_CUDA_OP(__macro) \
__macro(softshrink, SoftShrink, CudaSoftShrinkFunctor, \
CudaSoftShrinkGradFunctor); \
__macro(reciprocal, Reciprocal, CudaReciprocalFunctor, \
CudaReciprocalGradFunctor); \
__macro(soft_relu, SoftRelu, CudaSoftReluFunctor, CudaSoftReluGradFunctor); \
__macro(stanh, STanh, CudaSTanhFunctor, CudaSTanhGradFunctor); \
__macro(softplus, Softplus, CudaSoftplusFunctor, CudaSoftplusGradFunctor); \
__macro(softsign, Softsign, CudaSoftsignFunctor, CudaSoftsignGradFunctor); \
__macro(relu6, Relu6, CudaRelu6Functor, CudaRelu6GradFunctor); \
__macro(tanh_shrink, TanhShrink, CudaTanhShrinkFunctor, \
CudaTanhShrinkGradFunctor); \
__macro(hard_shrink, HardShrink, CudaHardShrinkFunctor, \
CudaHardShrinkGradFunctor); \
__macro(mish, Mish, CudaMishFunctor, CudaMishGradFunctor);
__macro(softsign, Softsign, CudaSoftsignFunctor, CudaSoftsignGradFunctor);
FOR_EACH_ACTIVATION_CUDA_OP(REGISTER_ACTIVATION_CUDA_KERNEL)
#ifdef PADDLE_WITH_XPU_KP
...
...
paddle/fluid/operators/math/CMakeLists.txt
浏览文件 @
91bb52cd
...
...
@@ -22,9 +22,9 @@ math_library(sampler DEPS generator)
math_library
(
maxouting
)
if
(
WITH_MKLDNN
)
math_library
(
selected_rows_functor DEPS selected_rows_utils math_function blas mkldnn_axpy_handler
)
math_library
(
selected_rows_functor DEPS selected_rows_utils math_function blas mkldnn_axpy_handler
mixed_vector
)
else
()
math_library
(
selected_rows_functor DEPS selected_rows_utils math_function blas
)
math_library
(
selected_rows_functor DEPS selected_rows_utils math_function blas
mixed_vector
)
endif
()
math_library
(
sequence_padding
)
...
...
paddle/fluid/operators/math/selected_rows_functor.cc
浏览文件 @
91bb52cd
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#ifdef PADDLE_WITH_MKLDNN
...
...
paddle/fluid/operators/temporal_shift_op.h
浏览文件 @
91bb52cd
...
...
@@ -19,56 +19,6 @@ namespace operators {
using
Tensor
=
framework
::
Tensor
;
using
DataLayout
=
framework
::
DataLayout
;
template
<
typename
T
>
void
TemporalShiftFwNCHW
(
const
T
*
input
,
T
*
output
,
const
int
ntchw
,
const
int
tchw
,
const
int
chw
,
const
int
hw
,
const
int
t
,
const
int
c1
,
const
int
c2
)
{
int
src_it
=
0
;
for
(
int
i
=
0
;
i
<
ntchw
;
i
++
)
{
int
it
=
(
i
%
tchw
)
/
chw
;
int
ic
=
(
i
%
chw
)
/
hw
;
if
(
ic
<
c1
)
{
src_it
=
it
-
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
+
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
<
0
||
src_it
>=
t
)
{
output
[
i
]
=
0
;
}
else
{
output
[
i
]
=
input
[
i
+
(
src_it
-
it
)
*
chw
];
}
}
}
template
<
typename
T
>
void
TemporalShiftFwNHWC
(
const
T
*
input
,
T
*
output
,
const
int
nthwc
,
const
int
thwc
,
const
int
hwc
,
const
int
t
,
const
int
c
,
const
int
c1
,
const
int
c2
)
{
int
src_it
=
0
;
for
(
int
i
=
0
;
i
<
nthwc
;
i
++
)
{
int
it
=
(
i
%
thwc
)
/
hwc
;
int
ic
=
i
%
c
;
if
(
ic
<
c1
)
{
src_it
=
it
-
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
+
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
<
0
||
src_it
>=
t
)
{
output
[
i
]
=
0
;
}
else
{
output
[
i
]
=
input
[
i
+
(
src_it
-
it
)
*
hwc
];
}
}
}
template
<
typename
T
>
void
TemporalShiftBwNCHW
(
const
T
*
output_grad
,
T
*
input_grad
,
const
int
ntchw
,
const
int
tchw
,
const
int
chw
,
const
int
hw
,
...
...
@@ -122,45 +72,7 @@ void TemporalShiftBwNHWC(const T* output_grad, T* input_grad, const int nthwc,
template
<
typename
T
>
class
TemporalShiftKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
int
t
=
ctx
.
Attr
<
int
>
(
"seg_num"
);
float
shift_ratio
=
ctx
.
Attr
<
float
>
(
"shift_ratio"
);
const
std
::
string
data_format_str
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
const
DataLayout
data_layout
=
framework
::
StringToDataLayout
(
data_format_str
);
const
int
nt
=
input
->
dims
()[
0
];
const
int
c
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
1
]
:
input
->
dims
()[
3
]);
const
int
h
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
2
]
:
input
->
dims
()[
1
]);
const
int
w
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
3
]
:
input
->
dims
()[
2
]);
const
int
hw
=
h
*
w
;
const
int
chw
=
c
*
hw
;
const
int
tchw
=
t
*
chw
;
const
int
ntchw
=
nt
*
chw
;
const
int
c1
=
static_cast
<
int
>
(
c
*
shift_ratio
);
const
int
c2
=
static_cast
<
int
>
(
c
*
2
*
shift_ratio
);
framework
::
DDim
out_dims
=
(
data_layout
==
DataLayout
::
kNCHW
?
phi
::
make_ddim
({
nt
,
c
,
h
,
w
})
:
phi
::
make_ddim
({
nt
,
h
,
w
,
c
}));
const
T
*
input_data
=
input
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
out_dims
,
ctx
.
GetPlace
());
if
(
data_layout
==
DataLayout
::
kNCHW
)
{
TemporalShiftFwNCHW
<
T
>
(
input_data
,
output_data
,
ntchw
,
tchw
,
chw
,
hw
,
t
,
c1
,
c2
);
}
else
{
TemporalShiftFwNHWC
<
T
>
(
input_data
,
output_data
,
ntchw
,
tchw
,
chw
,
t
,
c
,
c1
,
c2
);
}
}
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{}
};
template
<
typename
T
>
...
...
paddle/phi/kernels/activation_kernel.h
浏览文件 @
91bb52cd
...
...
@@ -53,6 +53,13 @@ DECLARE_ACTIVATION_KERNEL(Acosh)
DECLARE_ACTIVATION_KERNEL
(
Atanh
)
DECLARE_ACTIVATION_KERNEL
(
Relu
)
DECLARE_ACTIVATION_KERNEL
(
Tanh
)
DECLARE_ACTIVATION_KERNEL
(
Exp
)
DECLARE_ACTIVATION_KERNEL
(
Expm1
)
DECLARE_ACTIVATION_KERNEL
(
Reciprocal
)
DECLARE_ACTIVATION_KERNEL
(
Square
)
DECLARE_ACTIVATION_KERNEL
(
Sqrt
)
DECLARE_ACTIVATION_KERNEL
(
Rsqrt
)
DECLARE_ACTIVATION_KERNEL
(
TanhShrink
)
DECLARE_ACTIVATION_KERNEL
(
Silu
)
DECLARE_ACTIVATION_KERNEL
(
Sigmoid
)
...
...
@@ -73,8 +80,23 @@ DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Elu, alpha)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS
(
Swish
,
beta
)
DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS
(
BRelu
,
t_min
,
t_max
)
DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS
(
STanh
,
scale_a
,
scale_b
)
DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS
(
HardSigmoid
,
slope
,
offset
)
DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS
(
Softplus
,
beta
,
threshold
)
template
<
typename
T
,
typename
Context
>
void
LogitKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
float
eps
,
DenseTensor
*
out
);
template
<
typename
T
,
typename
Context
>
void
MishKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
float
threshold
,
DenseTensor
*
out
);
template
<
typename
T
,
typename
Context
>
void
HardSwishKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
...
...
paddle/phi/kernels/cpu/activation_grad_kernel.cc
浏览文件 @
91bb52cd
...
...
@@ -129,6 +129,13 @@ DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Acosh, AcoshGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Atanh
,
AtanhGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
TanhShrink
,
TanhShrinkGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Silu
,
SiluGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Square
,
SquareGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Exp
,
ExpGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Expm1
,
Expm1GradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Reciprocal
,
ReciprocalGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Sqrt
,
SqrtGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Rsqrt
,
RsqrtGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
LogSigmoid
,
LogSigmoidGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log
,
LogGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log2
,
Log2GradFunctor
);
...
...
@@ -157,11 +164,24 @@ DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(HardShrink,
threshold
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Swish
,
SwishGradFunctor
,
beta
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Mish
,
MishGradFunctor
,
threshold
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
BRelu
,
BReluGradFunctor
,
t_min
,
t_max
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
STanh
,
STanhGradFunctor
,
scale_a
,
scale_b
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
Softplus
,
SoftplusGradFunctor
,
beta
,
threshold
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT
(
HardSigmoid
,
HardSigmoidGradFunctor
,
slope
,
...
...
@@ -247,6 +267,12 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
tanh_shrink_grad
,
TanhShrinkGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
elu_grad
,
EluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
silu_grad
,
SiluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
mish_grad
,
MishGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
stanh_grad
,
STanhGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
reciprocal_grad
,
ReciprocalGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
sqrt_grad
,
SqrtGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
rsqrt_grad
,
RsqrtGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
softplus_grad
,
SoftplusGradKernel
)
PD_REGISTER_ACTIVATION_DOUBLE_GRAD_KERNEL
(
relu_double_grad
,
ReluDoubleGradKernel
)
...
...
@@ -263,6 +289,34 @@ PD_REGISTER_KERNEL(tanh_triple_grad,
float
,
double
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_KERNEL
(
exp_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
ExpGradKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
expm1_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
Expm1GradKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_KERNEL
(
logit_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
LogitGradKernel
,
float
,
double
)
{}
PD_REGISTER_KERNEL
(
square_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
SquareGradKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
sigmoid_grad
,
SigmoidGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
sigmoid_double_grad
,
SigmoidDoubleGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
sigmoid_triple_grad
,
SigmoidTripleGradKernel
)
...
...
paddle/phi/kernels/cpu/activation_kernel.cc
浏览文件 @
91bb52cd
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/phi/kernels/activation_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/activation_functor.h"
#include "paddle/phi/kernels/impl/activation_impl.h"
namespace
phi
{
...
...
@@ -72,6 +73,12 @@ DEFINE_CPU_ACTIVATION_KERNEL(Relu, ReluCPUFunctor)
DEFINE_CPU_ACTIVATION_KERNEL
(
Tanh
,
TanhFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
TanhShrink
,
TanhShrinkFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Silu
,
SiluFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Exp
,
ExpFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Expm1
,
Expm1Functor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Reciprocal
,
ReciprocalFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Square
,
SquareFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Sqrt
,
SqrtFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Rsqrt
,
RsqrtFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Sigmoid
,
SigmoidFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
LogSigmoid
,
LogSigmoidFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Log
,
LogFunctor
)
...
...
@@ -83,15 +90,19 @@ DEFINE_CPU_ACTIVATION_KERNEL(Floor, FloorFunctor)
DEFINE_CPU_ACTIVATION_KERNEL
(
Ceil
,
CeilFunctor
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
LeakyRelu
,
LeakyReluFunctor
,
alpha
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
ThresholdedRelu
,
ThresholdedReluFunctor
,
threshold
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Mish
,
MishFunctor
,
threshold
)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS
(
BRelu
,
BReluFunctor
,
t_min
,
t_max
)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS
(
STanh
,
STanhFunctor
,
scale_a
,
scale_b
)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS
(
Softplus
,
SoftplusFunctor
,
beta
,
threshold
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
HardShrink
,
HardShrinkFunctor
,
threshold
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
SoftShrink
,
SoftShrinkFunctor
,
lambda
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Elu
,
ELUFunctor
,
alpha
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Swish
,
SwishFunctor
,
beta
)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS
(
BRelu
,
BReluFunctor
,
t_min
,
t_max
)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS
(
HardSigmoid
,
HardSigmoidFunctor
,
slope
,
...
...
@@ -139,6 +150,25 @@ PD_REGISTER_ACTIVATION_KERNEL(soft_shrink, SoftShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL
(
tanh_shrink
,
TanhShrinkKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
elu
,
EluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
silu
,
SiluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
mish
,
MishKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
stanh
,
STanhKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
reciprocal
,
ReciprocalKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
sqrt
,
SqrtKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
rsqrt
,
RsqrtKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
softplus
,
SoftplusKernel
)
PD_REGISTER_KERNEL
(
exp
,
CPU
,
ALL_LAYOUT
,
phi
::
ExpKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
expm1
,
CPU
,
ALL_LAYOUT
,
phi
::
Expm1Kernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_KERNEL
(
logit
,
CPU
,
ALL_LAYOUT
,
phi
::
LogitKernel
,
float
,
double
)
{}
PD_REGISTER_KERNEL
(
square
,
CPU
,
ALL_LAYOUT
,
phi
::
SquareKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_ACTIVATION_KERNEL
(
sigmoid
,
SigmoidKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
logsigmoid
,
LogSigmoidKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
hard_sigmoid
,
HardSigmoidKernel
)
...
...
paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc
0 → 100644
浏览文件 @
91bb52cd
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/temporal_shift_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h"
namespace
phi
{
template
<
typename
T
>
void
TemporalShiftBwNCHW
(
const
T
*
output_grad
,
T
*
input_grad
,
const
int
ntchw
,
const
int
tchw
,
const
int
chw
,
const
int
hw
,
const
int
t
,
const
int
c1
,
const
int
c2
)
{
int
src_it
=
0
;
for
(
int
i
=
0
;
i
<
ntchw
;
i
++
)
{
int
it
=
(
i
%
tchw
)
/
chw
;
int
ic
=
(
i
%
chw
)
/
hw
;
if
(
ic
<
c1
)
{
src_it
=
it
+
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
-
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
>=
0
&&
src_it
<
t
)
{
input_grad
[
i
]
=
output_grad
[
i
+
(
src_it
-
it
)
*
chw
];
}
else
{
input_grad
[
i
]
=
0
;
}
}
}
template
<
typename
T
>
void
TemporalShiftBwNHWC
(
const
T
*
output_grad
,
T
*
input_grad
,
const
int
nthwc
,
const
int
thwc
,
const
int
hwc
,
const
int
t
,
const
int
c
,
const
int
c1
,
const
int
c2
)
{
int
src_it
=
0
;
for
(
int
i
=
0
;
i
<
nthwc
;
i
++
)
{
int
it
=
(
i
%
thwc
)
/
hwc
;
int
ic
=
i
%
c
;
if
(
ic
<
c1
)
{
src_it
=
it
+
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
-
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
>=
0
&&
src_it
<
t
)
{
input_grad
[
i
]
=
output_grad
[
i
+
(
src_it
-
it
)
*
hwc
];
}
else
{
input_grad
[
i
]
=
0
;
}
}
}
template
<
typename
T
,
typename
Context
>
void
TemporalShiftGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out_grad
,
int
seg_num
,
float
shift_ratio
,
const
std
::
string
&
data_format_str
,
DenseTensor
*
x_grad
)
{
auto
*
input_grad
=
x_grad
;
auto
*
output_grad
=
&
out_grad
;
int
t
=
seg_num
;
const
DataLayout
data_layout
=
paddle
::
framework
::
StringToDataLayout
(
data_format_str
);
const
int
nt
=
output_grad
->
dims
()[
0
];
const
int
c
=
(
data_layout
==
DataLayout
::
kNCHW
?
output_grad
->
dims
()[
1
]
:
output_grad
->
dims
()[
3
]);
const
int
h
=
(
data_layout
==
DataLayout
::
kNCHW
?
output_grad
->
dims
()[
2
]
:
output_grad
->
dims
()[
1
]);
const
int
w
=
(
data_layout
==
DataLayout
::
kNCHW
?
output_grad
->
dims
()[
3
]
:
output_grad
->
dims
()[
2
]);
const
int
hw
=
h
*
w
;
const
int
chw
=
c
*
hw
;
const
int
tchw
=
t
*
chw
;
const
int
ntchw
=
nt
*
chw
;
const
int
c1
=
static_cast
<
int
>
(
c
*
shift_ratio
);
const
int
c2
=
static_cast
<
int
>
(
c
*
2
*
shift_ratio
);
DDim
in_grad_dims
=
(
data_layout
==
DataLayout
::
kNCHW
?
phi
::
make_ddim
({
nt
,
c
,
h
,
w
})
:
phi
::
make_ddim
({
nt
,
h
,
w
,
c
}));
const
T
*
output_grad_data
=
output_grad
->
data
<
T
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
in_grad_dims
,
dev_ctx
.
GetPlace
());
if
(
data_layout
==
DataLayout
::
kNCHW
)
{
TemporalShiftBwNCHW
<
T
>
(
output_grad_data
,
input_grad_data
,
ntchw
,
tchw
,
chw
,
hw
,
t
,
c1
,
c2
);
}
else
{
TemporalShiftBwNHWC
<
T
>
(
output_grad_data
,
input_grad_data
,
ntchw
,
tchw
,
chw
,
t
,
c
,
c1
,
c2
);
}
}
}
// namespace phi
PD_REGISTER_KERNEL
(
temporal_shift_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
TemporalShiftGradKernel
,
float
,
double
)
{}
paddle/phi/kernels/cpu/temporal_shift_kernel.cc
0 → 100644
浏览文件 @
91bb52cd
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/temporal_shift_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h"
namespace
phi
{
template
<
typename
T
>
void
TemporalShiftFwNCHW
(
const
T
*
input
,
T
*
output
,
const
int
ntchw
,
const
int
tchw
,
const
int
chw
,
const
int
hw
,
const
int
t
,
const
int
c1
,
const
int
c2
)
{
int
src_it
=
0
;
for
(
int
i
=
0
;
i
<
ntchw
;
i
++
)
{
int
it
=
(
i
%
tchw
)
/
chw
;
int
ic
=
(
i
%
chw
)
/
hw
;
if
(
ic
<
c1
)
{
src_it
=
it
-
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
+
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
<
0
||
src_it
>=
t
)
{
output
[
i
]
=
0
;
}
else
{
output
[
i
]
=
input
[
i
+
(
src_it
-
it
)
*
chw
];
}
}
}
template
<
typename
T
>
void
TemporalShiftFwNHWC
(
const
T
*
input
,
T
*
output
,
const
int
nthwc
,
const
int
thwc
,
const
int
hwc
,
const
int
t
,
const
int
c
,
const
int
c1
,
const
int
c2
)
{
int
src_it
=
0
;
for
(
int
i
=
0
;
i
<
nthwc
;
i
++
)
{
int
it
=
(
i
%
thwc
)
/
hwc
;
int
ic
=
i
%
c
;
if
(
ic
<
c1
)
{
src_it
=
it
-
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
+
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
<
0
||
src_it
>=
t
)
{
output
[
i
]
=
0
;
}
else
{
output
[
i
]
=
input
[
i
+
(
src_it
-
it
)
*
hwc
];
}
}
}
template
<
typename
T
,
typename
Context
>
void
TemporalShiftKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
int
seg_num
,
float
shift_ratio
,
const
std
::
string
&
data_format_str
,
DenseTensor
*
out
)
{
auto
*
input
=
&
x
;
auto
*
output
=
out
;
int
t
=
seg_num
;
const
DataLayout
data_layout
=
paddle
::
framework
::
StringToDataLayout
(
data_format_str
);
const
int
nt
=
input
->
dims
()[
0
];
const
int
c
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
1
]
:
input
->
dims
()[
3
]);
const
int
h
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
2
]
:
input
->
dims
()[
1
]);
const
int
w
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
3
]
:
input
->
dims
()[
2
]);
const
int
hw
=
h
*
w
;
const
int
chw
=
c
*
hw
;
const
int
tchw
=
t
*
chw
;
const
int
ntchw
=
nt
*
chw
;
const
int
c1
=
static_cast
<
int
>
(
c
*
shift_ratio
);
const
int
c2
=
static_cast
<
int
>
(
c
*
2
*
shift_ratio
);
DDim
out_dims
=
(
data_layout
==
DataLayout
::
kNCHW
?
phi
::
make_ddim
({
nt
,
c
,
h
,
w
})
:
phi
::
make_ddim
({
nt
,
h
,
w
,
c
}));
const
T
*
input_data
=
input
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
out_dims
,
dev_ctx
.
GetPlace
());
if
(
data_layout
==
DataLayout
::
kNCHW
)
{
TemporalShiftFwNCHW
<
T
>
(
input_data
,
output_data
,
ntchw
,
tchw
,
chw
,
hw
,
t
,
c1
,
c2
);
}
else
{
TemporalShiftFwNHWC
<
T
>
(
input_data
,
output_data
,
ntchw
,
tchw
,
chw
,
t
,
c
,
c1
,
c2
);
}
}
}
// namespace phi
PD_REGISTER_KERNEL
(
temporal_shift
,
CPU
,
ALL_LAYOUT
,
phi
::
TemporalShiftKernel
,
float
,
double
)
{}
paddle/phi/kernels/funcs/activation_functor.h
浏览文件 @
91bb52cd
此差异已折叠。
点击以展开。
paddle/phi/kernels/gpu/activation_grad_kernel.cu
浏览文件 @
91bb52cd
...
...
@@ -189,6 +189,13 @@ DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Acosh, CudaAcoshGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Atanh
,
CudaAtanhGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
TanhShrink
,
CudaTanhShrinkGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Silu
,
CudaSiluGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Square
,
CudaSquareGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Exp
,
CudaExpGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Expm1
,
CudaExpm1GradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Reciprocal
,
CudaReciprocalGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Sqrt
,
CudaSqrtGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Rsqrt
,
CudaRsqrtGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
LogSigmoid
,
CudaLogSigmoidGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log
,
CudaLogGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log2
,
CudaLog2GradFunctor
);
...
...
@@ -211,11 +218,24 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Swish,
CudaSwishGradFunctor
,
beta
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Mish
,
CudaMishGradFunctor
,
threshold
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
BRelu
,
CudaBReluGradFunctor
,
t_min
,
t_max
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
STanh
,
CudaSTanhGradFunctor
,
scale_a
,
scale_b
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
Softplus
,
CudaSoftplusGradFunctor
,
beta
,
threshold
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT
(
HardSigmoid
,
CudaHardSigmoidGradFunctor
,
slope
,
...
...
@@ -326,12 +346,57 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_double_grad,
LeakyReluDoubleGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
thresholded_relu_grad
,
ThresholdedReluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
mish_grad
,
MishGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
stanh_grad
,
STanhGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
reciprocal_grad
,
ReciprocalGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
softplus_grad
,
SoftplusGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
sqrt_grad
,
SqrtGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
rsqrt_grad
,
RsqrtGradKernel
)
PD_REGISTER_KERNEL
(
exp_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
ExpGradKernel
,
float
,
double
,
int
,
int64_t
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
soft_shrink_grad
,
SoftShrinkGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
hard_shrink_grad
,
HardShrinkGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
tanh_shrink_grad
,
TanhShrinkGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
silu_grad
,
SiluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
elu_grad
,
EluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
elu_double_grad
,
EluDoubleGradKernel
)
PD_REGISTER_KERNEL
(
expm1_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
Expm1GradKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_KERNEL
(
logit_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
LogitGradKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_KERNEL
(
square_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
SquareGradKernel
,
float
,
double
,
int
,
int64_t
,
phi
::
dtype
::
float16
,
phi
::
dtype
::
bfloat16
)
{}
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
sigmoid_grad
,
SigmoidGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
sigmoid_double_grad
,
SigmoidDoubleGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
sigmoid_triple_grad
,
SigmoidTripleGradKernel
)
...
...
paddle/phi/kernels/gpu/activation_kernel.cu
浏览文件 @
91bb52cd
...
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/elementwise_base.h"
#include "paddle/phi/kernels/impl/activation_grad_impl.h"
#include "paddle/phi/kernels/impl/activation_impl.h"
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
...
...
@@ -91,6 +92,12 @@ DEFINE_GPU_ACTIVATION_KERNEL(Relu, CudaReluFunctor)
DEFINE_GPU_ACTIVATION_KERNEL
(
Tanh
,
CudaTanhFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
TanhShrink
,
CudaTanhShrinkFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Silu
,
CudaSiluFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Exp
,
CudaExpFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Expm1
,
CudaExpm1Functor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Reciprocal
,
CudaReciprocalFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Square
,
CudaSquareFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Sqrt
,
CudaSqrtFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Rsqrt
,
CudaRsqrtFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Sigmoid
,
CudaSigmoidFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
LogSigmoid
,
CudaLogSigmoidFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Log
,
CudaLogFunctor
)
...
...
@@ -112,7 +119,14 @@ DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(SoftShrink, CudaSoftShrinkFunctor, lambda)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Elu
,
CudaELUFunctor
,
alpha
)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Swish
,
CudaSwishFunctor
,
beta
)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Mish
,
CudaMishFunctor
,
threshold
)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS
(
BRelu
,
CudaBReluFunctor
,
t_min
,
t_max
)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS
(
Stanh
,
CudaSTanhFunctor
,
scale_a
,
scale_b
)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS
(
Softplus
,
CudaSoftplusFunctor
,
beta
,
threshold
)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS
(
HardSigmoid
,
CudaHardSigmoidFunctor
,
slope
,
...
...
@@ -180,6 +194,46 @@ PD_REGISTER_ACTIVATION_KERNEL(tanh, TanhKernel)
PD_REGISTER_ACTIVATION_KERNEL
(
brelu
,
BReluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
thresholded_relu
,
ThresholdedReluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
leaky_relu
,
LeakyReluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
mish
,
MishKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
stanh
,
StanhKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
reciprocal
,
ReciprocalKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
sqrt
,
SqrtKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
rsqrt
,
RsqrtKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
softplus
,
SoftplusKernel
)
PD_REGISTER_KERNEL
(
exp
,
GPU
,
ALL_LAYOUT
,
phi
::
ExpKernel
,
float
,
double
,
int
,
int64_t
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_KERNEL
(
expm1
,
GPU
,
ALL_LAYOUT
,
phi
::
Expm1Kernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_KERNEL
(
logit
,
GPU
,
ALL_LAYOUT
,
phi
::
LogitKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_KERNEL
(
square
,
GPU
,
ALL_LAYOUT
,
phi
::
SquareKernel
,
float
,
double
,
int
,
int64_t
,
phi
::
dtype
::
float16
,
phi
::
dtype
::
bfloat16
)
{}
PD_REGISTER_ACTIVATION_KERNEL
(
hard_shrink
,
HardShrinkKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
soft_shrink
,
SoftShrinkKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
tanh_shrink
,
TanhShrinkKernel
)
...
...
paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu
0 → 100644
浏览文件 @
91bb52cd
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/temporal_shift_grad_kernel.h"
namespace
phi
{
template
<
typename
T
>
__global__
void
KeTemporalShiftBwNCHW
(
const
T
*
output_grad
,
T
*
input_grad
,
const
int
ntchw
,
const
int
tchw
,
const
int
chw
,
const
int
hw
,
const
int
t
,
const
int
c1
,
const
int
c2
)
{
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
stride
=
blockDim
.
x
*
gridDim
.
x
;
int
src_it
=
0
;
for
(;
tid
<
ntchw
;
tid
+=
stride
)
{
int
it
=
(
tid
%
tchw
)
/
chw
;
int
ic
=
(
tid
%
chw
)
/
hw
;
if
(
ic
<
c1
)
{
src_it
=
it
+
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
-
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
>=
0
&&
src_it
<
t
)
{
input_grad
[
tid
]
=
output_grad
[
tid
+
(
src_it
-
it
)
*
chw
];
}
else
{
input_grad
[
tid
]
=
0
;
}
}
}
template
<
typename
T
>
__global__
void
KeTemporalShiftBwNHWC
(
const
T
*
output_grad
,
T
*
input_grad
,
const
int
nthwc
,
const
int
thwc
,
const
int
hwc
,
const
int
t
,
const
int
c
,
const
int
c1
,
const
int
c2
)
{
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
stride
=
blockDim
.
x
*
gridDim
.
x
;
int
src_it
=
0
;
for
(;
tid
<
nthwc
;
tid
+=
stride
)
{
int
it
=
(
tid
%
thwc
)
/
hwc
;
int
ic
=
tid
%
c
;
if
(
ic
<
c1
)
{
src_it
=
it
+
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
-
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
>=
0
&&
src_it
<
t
)
{
input_grad
[
tid
]
=
output_grad
[
tid
+
(
src_it
-
it
)
*
hwc
];
}
else
{
input_grad
[
tid
]
=
0
;
}
}
}
template
<
typename
T
,
typename
Context
>
void
TemporalShiftGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out_grad
,
int
seg_num
,
float
shift_ratio
,
const
std
::
string
&
data_format_str
,
DenseTensor
*
x_grad
)
{
auto
*
input_grad
=
x_grad
;
auto
*
output_grad
=
&
out_grad
;
int
t
=
seg_num
;
const
DataLayout
data_layout
=
paddle
::
framework
::
StringToDataLayout
(
data_format_str
);
const
int
nt
=
output_grad
->
dims
()[
0
];
const
int
c
=
(
data_layout
==
DataLayout
::
kNCHW
?
output_grad
->
dims
()[
1
]
:
output_grad
->
dims
()[
3
]);
const
int
h
=
(
data_layout
==
DataLayout
::
kNCHW
?
output_grad
->
dims
()[
2
]
:
output_grad
->
dims
()[
1
]);
const
int
w
=
(
data_layout
==
DataLayout
::
kNCHW
?
output_grad
->
dims
()[
3
]
:
output_grad
->
dims
()[
2
]);
const
int
hw
=
h
*
w
;
const
int
chw
=
c
*
hw
;
const
int
tchw
=
t
*
chw
;
const
int
ntchw
=
nt
*
chw
;
const
int
c1
=
static_cast
<
int
>
(
c
*
shift_ratio
);
const
int
c2
=
static_cast
<
int
>
(
c
*
2
*
shift_ratio
);
DDim
in_grad_dims
=
(
data_layout
==
DataLayout
::
kNCHW
?
phi
::
make_ddim
({
nt
,
c
,
h
,
w
})
:
phi
::
make_ddim
({
nt
,
h
,
w
,
c
}));
const
T
*
output_grad_data
=
output_grad
->
data
<
T
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
in_grad_dims
,
dev_ctx
.
GetPlace
());
int
pixelNum
=
nt
*
chw
;
int
threads
=
1024
;
int
grid
=
(
pixelNum
+
threads
-
1
)
/
threads
;
int
blocks_per_sm
=
dev_ctx
.
GetMaxPhysicalThreadCount
()
/
threads
;
grid
=
std
::
min
(
dev_ctx
.
GetSMCount
()
*
blocks_per_sm
,
grid
);
if
(
data_layout
==
DataLayout
::
kNCHW
)
{
KeTemporalShiftBwNCHW
<
T
><<<
grid
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
output_grad_data
,
input_grad_data
,
ntchw
,
tchw
,
chw
,
hw
,
t
,
c1
,
c2
);
}
else
{
KeTemporalShiftBwNHWC
<
T
><<<
grid
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
output_grad_data
,
input_grad_data
,
ntchw
,
tchw
,
chw
,
t
,
c
,
c1
,
c2
);
}
}
}
// namespace phi
PD_REGISTER_KERNEL
(
temporal_shift_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
TemporalShiftGradKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
paddle/phi/kernels/gpu/temporal_shift_kernel.cu
0 → 100644
浏览文件 @
91bb52cd
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/temporal_shift_kernel.h"
namespace
phi
{
template
<
typename
T
>
__global__
void
KeTemporalShiftFwNCHW
(
const
T
*
input
,
T
*
output
,
const
int
ntchw
,
const
int
tchw
,
const
int
chw
,
const
int
hw
,
const
int
t
,
const
int
c1
,
const
int
c2
)
{
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
stride
=
blockDim
.
x
*
gridDim
.
x
;
int
src_it
=
0
;
for
(;
tid
<
ntchw
;
tid
+=
stride
)
{
int
it
=
(
tid
%
tchw
)
/
chw
;
int
ic
=
(
tid
%
chw
)
/
hw
;
if
(
ic
<
c1
)
{
src_it
=
it
-
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
+
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
<
0
||
src_it
>=
t
)
{
output
[
tid
]
=
0
;
}
else
{
output
[
tid
]
=
input
[
tid
+
(
src_it
-
it
)
*
chw
];
}
}
}
template
<
typename
T
>
__global__
void
KeTemporalShiftFwNHWC
(
const
T
*
input
,
T
*
output
,
const
int
nthwc
,
const
int
thwc
,
const
int
hwc
,
const
int
t
,
const
int
c
,
const
int
c1
,
const
int
c2
)
{
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
stride
=
blockDim
.
x
*
gridDim
.
x
;
int
src_it
=
0
;
for
(;
tid
<
nthwc
;
tid
+=
stride
)
{
int
it
=
(
tid
%
thwc
)
/
hwc
;
int
ic
=
tid
%
c
;
if
(
ic
<
c1
)
{
src_it
=
it
-
1
;
}
else
if
(
ic
<
c2
)
{
src_it
=
it
+
1
;
}
else
{
src_it
=
it
;
}
if
(
src_it
<
0
||
src_it
>=
t
)
{
output
[
tid
]
=
0
;
}
else
{
output
[
tid
]
=
input
[
tid
+
(
src_it
-
it
)
*
hwc
];
}
}
}
template
<
typename
T
,
typename
Context
>
void
TemporalShiftKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
int
seg_num
,
float
shift_ratio
,
const
std
::
string
&
data_format_str
,
DenseTensor
*
out
)
{
auto
*
input
=
&
x
;
auto
*
output
=
out
;
int
t
=
seg_num
;
const
DataLayout
data_layout
=
paddle
::
framework
::
StringToDataLayout
(
data_format_str
);
const
int
nt
=
input
->
dims
()[
0
];
const
int
c
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
1
]
:
input
->
dims
()[
3
]);
const
int
h
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
2
]
:
input
->
dims
()[
1
]);
const
int
w
=
(
data_layout
==
DataLayout
::
kNCHW
?
input
->
dims
()[
3
]
:
input
->
dims
()[
2
]);
const
int
hw
=
h
*
w
;
const
int
chw
=
c
*
hw
;
const
int
tchw
=
t
*
chw
;
const
int
ntchw
=
nt
*
chw
;
const
int
c1
=
static_cast
<
int
>
(
c
*
shift_ratio
);
const
int
c2
=
static_cast
<
int
>
(
c
*
2
*
shift_ratio
);
DDim
out_dims
=
(
data_layout
==
DataLayout
::
kNCHW
?
phi
::
make_ddim
({
nt
,
c
,
h
,
w
})
:
phi
::
make_ddim
({
nt
,
h
,
w
,
c
}));
const
T
*
input_data
=
input
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
out_dims
,
dev_ctx
.
GetPlace
());
int
pixelNum
=
nt
*
chw
;
int
threads
=
1024
;
int
grid
=
(
pixelNum
+
threads
-
1
)
/
threads
;
int
blocks_per_sm
=
dev_ctx
.
GetMaxPhysicalThreadCount
()
/
threads
;
grid
=
std
::
min
(
dev_ctx
.
GetSMCount
()
*
blocks_per_sm
,
grid
);
if
(
data_layout
==
DataLayout
::
kNCHW
)
{
KeTemporalShiftFwNCHW
<
T
><<<
grid
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
input_data
,
output_data
,
ntchw
,
tchw
,
chw
,
hw
,
t
,
c1
,
c2
);
}
else
{
KeTemporalShiftFwNHWC
<
T
><<<
grid
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
input_data
,
output_data
,
ntchw
,
tchw
,
chw
,
t
,
c
,
c1
,
c2
);
}
}
}
// namespace phi
PD_REGISTER_KERNEL
(
temporal_shift
,
GPU
,
ALL_LAYOUT
,
phi
::
TemporalShiftKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
paddle/phi/kernels/impl/activation_grad_impl.h
浏览文件 @
91bb52cd
...
...
@@ -222,6 +222,24 @@ void EluDoubleGradKernel(const Context& dev_ctx,
functor
(
dev_ctx
,
&
x
,
&
ddx
,
ddout
,
&
dout
,
dx
);
}
template
<
typename
T
,
typename
Context
>
void
LogitGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
out_grad
,
float
eps
,
DenseTensor
*
x_grad
)
{
dev_ctx
.
template
Alloc
<
T
>(
x_grad
);
auto
eigen_x
=
EigenVector
<
T
>::
Flatten
(
x
);
auto
eigen_dout
=
EigenVector
<
T
>::
Flatten
(
out_grad
);
auto
eigen_dx
=
EigenVector
<
T
>::
Flatten
(
*
x_grad
);
auto
&
place
=
*
dev_ctx
.
eigen_device
();
auto
eigen_p
=
EigenVector
<
T
>::
Flatten
(
x
);
funcs
::
LogitGradFunctor
<
T
>
functor
;
functor
(
place
,
eigen_x
,
eigen_dout
,
eigen_dx
,
eigen_p
,
eps
);
}
template
<
typename
T
,
typename
Context
>
void
SigmoidDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
...
...
paddle/phi/kernels/impl/activation_impl.h
浏览文件 @
91bb52cd
...
...
@@ -47,6 +47,22 @@ void ActivationImpl(const Context& dev_ctx,
}
}
template
<
typename
T
,
typename
Context
>
void
LogitKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
float
eps
,
DenseTensor
*
out
)
{
dev_ctx
.
template
Alloc
<
T
>(
out
);
auto
eigen_out
=
EigenVector
<
T
>::
Flatten
(
*
out
);
auto
eigen_in
=
EigenVector
<
T
>::
Flatten
(
x
);
auto
&
place
=
*
dev_ctx
.
eigen_device
();
auto
eigen_p
=
EigenVector
<
T
>::
Flatten
(
*
out
);
funcs
::
LogitFunctor
<
T
>
functor
;
functor
(
place
,
eigen_in
,
eigen_out
,
eigen_p
,
eps
);
}
template
<
typename
T
,
typename
Context
>
void
PowKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
...
...
paddle/phi/kernels/selected_rows/activation_kernel.cc
0 → 100644
浏览文件 @
91bb52cd
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/selected_rows/activation_kernel.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/activation_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
namespace
phi
{
namespace
sr
{
template
<
typename
T
,
typename
Context
>
void
SquareKernel
(
const
Context
&
dev_ctx
,
const
SelectedRows
&
x
,
SelectedRows
*
out
)
{
out
->
set_rows
(
x
.
rows
());
out
->
set_height
(
x
.
height
());
phi
::
SquareKernel
<
T
,
Context
>
(
dev_ctx
,
x
.
value
(),
out
->
mutable_value
());
}
template
<
typename
T
,
typename
Context
>
void
SqrtKernel
(
const
Context
&
dev_ctx
,
const
SelectedRows
&
x
,
SelectedRows
*
out
)
{
out
->
set_rows
(
x
.
rows
());
out
->
set_height
(
x
.
height
());
phi
::
SqrtKernel
<
T
,
Context
>
(
dev_ctx
,
x
.
value
(),
out
->
mutable_value
());
}
}
// namespace sr
}
// namespace phi
PD_REGISTER_KERNEL
(
square_sr
,
CPU
,
ALL_LAYOUT
,
phi
::
sr
::
SquareKernel
,
float
,
double
)
{}
PD_REGISTER_KERNEL
(
sqrt_sr
,
CPU
,
ALL_LAYOUT
,
phi
::
sr
::
SqrtKernel
,
float
,
double
)
{}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_KERNEL
(
square_sr
,
GPU
,
ALL_LAYOUT
,
phi
::
sr
::
SquareKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
sqrt_sr
,
GPU
,
ALL_LAYOUT
,
phi
::
sr
::
SqrtKernel
,
float
,
double
)
{}
#endif
paddle/phi/kernels/selected_rows/activation_kernel.h
0 → 100644
浏览文件 @
91bb52cd
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/selected_rows.h"
namespace
phi
{
namespace
sr
{
template
<
typename
T
,
typename
Context
>
void
SquareKernel
(
const
Context
&
dev_ctx
,
const
SelectedRows
&
x
,
SelectedRows
*
out
);
template
<
typename
T
,
typename
Context
>
void
SqrtKernel
(
const
Context
&
dev_ctx
,
const
SelectedRows
&
x
,
SelectedRows
*
out
);
}
// namespace sr
}
// namespace phi
paddle/phi/kernels/temporal_shift_grad_kernel.h
0 → 100644
浏览文件 @
91bb52cd
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
TemporalShiftGradKernel
(
const
Context
&
ctx
,
const
DenseTensor
&
out_grad
,
int
seg_num
,
float
shift_ratio
,
const
std
::
string
&
data_format
,
DenseTensor
*
x_grad
);
}
// namespace phi
paddle/phi/kernels/temporal_shift_kernel.h
0 → 100644
浏览文件 @
91bb52cd
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
TemporalShiftKernel
(
const
Context
&
ctx
,
const
DenseTensor
&
x
,
int
seg_num
,
float
shift_ratio
,
const
std
::
string
&
data_format
,
DenseTensor
*
out
);
}
// namespace phi
paddle/phi/ops/compat/activation_sig.cc
浏览文件 @
91bb52cd
...
...
@@ -43,17 +43,19 @@ namespace phi {
#define comma ,
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Cos
,
"cos"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Tan
,
"tan"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Acos
,
"acos"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Sin
,
"sin"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Asin
,
"asin"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Atan
,
"atan"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Sinh
,
"sinh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Cosh
,
"cosh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Asinh
,
"asinh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Acosh
,
"acosh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Atanh
,
"atanh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Cos
,
"cos"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Tan
,
"tan"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Acos
,
"acos"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Sin
,
"sin"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Asin
,
"asin"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Atan
,
"atan"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Sinh
,
"sinh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Cosh
,
"cosh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Asinh
,
"asinh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Acosh
,
"acosh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Atanh
,
"atanh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Square
,
"square"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
BRelu
,
"brelu"
,
"t_min"
comma
"t_max"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
LeakyRelu
,
"leaky_relu"
,
"alpha"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
ThresholdedRelu
,
...
...
@@ -61,6 +63,7 @@ DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(ThresholdedRelu,
"threshold"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
SoftShrink
,
"soft_shrink"
,
"lambda"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
HardShrink
,
"hard_shrink"
,
"threshold"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Mish
,
"mish"
,
"threshold"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
TanhShrink
,
"tanh_shrink"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Silu
,
"silu"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
LogSigmoid
,
"logsigmoid"
,
);
// NOLINT
...
...
@@ -74,12 +77,41 @@ DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(HardSwish,
"offset"
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Swish
,
"swish"
,
"beta"
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Relu
,
"relu"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Tanh
,
"tanh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Sigmoid
,
"sigmoid"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
STanh
,
"stanh"
,
"scale_a"
comma
"scale_b"
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Softplus
,
"softplus"
,
"beta"
comma
"threshold"
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Relu
,
"relu"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Tanh
,
"tanh"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Sigmoid
,
"sigmoid"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Exp
,
"exp"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Expm1
,
"expm1"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Reciprocal
,
"reciprocal"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Sqrt
,
"sqrt"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Rsqrt
,
"rsqrt"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
HardSigmoid
,
"hard_sigmoid"
,
"slope"
comma
"offset"
);
// NOLINT
KernelSignature
SqrtActiOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
if
(
ctx
.
IsDenseTensorInput
(
"X"
))
{
return
KernelSignature
(
"sqrt"
,
{
"X"
},
{},
{
"Out"
});
}
else
{
return
KernelSignature
(
"sqrt_sr"
,
{
"X"
},
{},
{
"Out"
});
}
}
KernelSignature
SquareActiOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
if
(
ctx
.
IsDenseTensorInput
(
"X"
))
{
return
KernelSignature
(
"square"
,
{
"X"
},
{},
{
"Out"
});
}
else
{
return
KernelSignature
(
"square_sr"
,
{
"X"
},
{},
{
"Out"
});
}
}
DEFINE_ACT_GRAD_NODEP_OP_ARGMAP
(
Round
,
"round"
,
);
// NOLINT
DEFINE_ACT_GRAD_NODEP_OP_ARGMAP
(
Floor
,
"floor"
,
);
// NOLINT
...
...
@@ -132,6 +164,11 @@ KernelSignature EluOpArgumentMapping(const ArgumentMappingContext& ctx) {
return
KernelSignature
(
"elu"
,
{
"X"
},
{
"alpha"
},
{
"Out"
});
}
KernelSignature
LogitGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"logit_grad"
,
{
"X"
,
GradVarName
(
"Out"
)},
{
"eps"
},
{
GradVarName
(
"X"
)});
}
KernelSignature
EluGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"elu_grad"
,
{
"X"
,
"Out"
,
GradVarName
(
"Out"
)},
...
...
@@ -194,6 +231,18 @@ PD_REGISTER_ARG_MAPPING_FN(asinh_grad, phi::AsinhGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN
(
acosh_grad
,
phi
::
AcoshGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
atanh_grad
,
phi
::
AtanhGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
relu_grad
,
phi
::
ReluGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
exp_grad
,
phi
::
ExpGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
expm1_grad
,
phi
::
Expm1GradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
square_grad
,
phi
::
SquareGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
reciprocal_grad
,
phi
::
ReciprocalGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
sqrt_grad
,
phi
::
SqrtGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
rsqrt_grad
,
phi
::
RsqrtGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
mish_grad
,
phi
::
MishGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
stanh_grad
,
phi
::
STanhGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
softplus_grad
,
phi
::
SoftplusGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
relu_grad_grad
,
phi
::
ReluDoubleGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
tanh_grad
,
phi
::
TanhGradOpArgumentMapping
);
...
...
@@ -228,11 +277,16 @@ PD_REGISTER_ARG_MAPPING_FN(logsigmoid_grad,
phi
::
LogSigmoidGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
hard_sigmoid_grad
,
phi
::
HardSigmoidGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
logit_grad
,
phi
::
LogitGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
log_grad
,
phi
::
LogGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
log_grad_grad
,
phi
::
LogDoubleGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
log2_grad
,
phi
::
Log2GradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
log10_grad
,
phi
::
Log10GradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
log1p_grad
,
phi
::
Log1pGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
sqrt
,
phi
::
SqrtActiOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
square
,
phi
::
SquareActiOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
hard_swish_grad
,
phi
::
HardSwishGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
swish_grad
,
phi
::
SwishGradOpArgumentMapping
);
...
...
paddle/phi/ops/compat/temporal_shift_sig.cc
0 → 100644
浏览文件 @
91bb52cd
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/core/compat/op_utils.h"
namespace
phi
{
KernelSignature
TemporalShiftOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"temporal_shift"
,
{
"X"
},
{
"seg_num"
,
"shift_ratio"
,
"data_format"
},
{
"Out"
});
}
KernelSignature
TemporalShiftGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"temporal_shift_grad"
,
{
GradVarName
(
"Out"
)},
{
"seg_num"
,
"shift_ratio"
,
"data_format"
},
{
GradVarName
(
"X"
)});
}
}
// namespace phi
PD_REGISTER_ARG_MAPPING_FN
(
temporal_shift
,
phi
::
TemporalShiftOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
temporal_shift_grad
,
phi
::
TemporalShiftGradOpArgumentMapping
);
python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
浏览文件 @
91bb52cd
...
...
@@ -342,4 +342,5 @@ class TestLogDoubleGradCheck(unittest.TestCase):
if
__name__
==
"__main__"
:
paddle
.
enable_static
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_activation_sparse_op.py
0 → 100644
浏览文件 @
91bb52cd
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.op
import
Operator
from
op_test
import
OpTest
import
paddle
class
TestSparseSquareOp
(
unittest
.
TestCase
):
def
check_with_place
(
self
,
place
):
scope
=
core
.
Scope
()
# create and initialize Grad Variable
height
=
10
rows
=
[
0
,
4
,
7
]
self
.
row_numel
=
12
x_selected_rows
=
scope
.
var
(
'X'
).
get_selected_rows
()
x_selected_rows
.
set_height
(
height
)
x_selected_rows
.
set_rows
(
rows
)
np_array
=
np
.
ones
((
len
(
rows
),
self
.
row_numel
)).
astype
(
"float32"
)
np_array
[
0
,
0
]
=
2.0
np_array
[
2
,
8
]
=
4.0
x_tensor
=
x_selected_rows
.
get_tensor
()
x_tensor
.
set
(
np_array
,
place
)
out_selected_rows
=
scope
.
var
(
'Out'
).
get_selected_rows
()
# create and run sqrt operator
square_op
=
Operator
(
"square"
,
X
=
'X'
,
Out
=
'Out'
)
square_op
.
run
(
scope
,
place
)
# get and compare result
result_array
=
np
.
array
(
out_selected_rows
.
get_tensor
())
self
.
assertTrue
(
np
.
array_equal
(
result_array
,
np
.
square
(
np_array
)))
def
test_sparse_acti
(
self
):
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
for
place
in
places
:
self
.
check_with_place
(
place
)
class
TestSparseSqrtOp
(
unittest
.
TestCase
):
def
check_with_place
(
self
,
place
):
scope
=
core
.
Scope
()
# create and initialize Grad Variable
height
=
10
rows
=
[
0
,
4
,
7
]
self
.
row_numel
=
12
x_selected_rows
=
scope
.
var
(
'X1'
).
get_selected_rows
()
x_selected_rows
.
set_height
(
height
)
x_selected_rows
.
set_rows
(
rows
)
np_array
=
np
.
ones
((
len
(
rows
),
self
.
row_numel
)).
astype
(
"float32"
)
np_array
[
0
,
0
]
=
2.0
np_array
[
2
,
8
]
=
4.0
x_tensor
=
x_selected_rows
.
get_tensor
()
x_tensor
.
set
(
np_array
,
place
)
out_selected_rows
=
scope
.
var
(
'Out1'
).
get_selected_rows
()
# create and run sqrt operator
sqrt_op
=
Operator
(
"sqrt"
,
X
=
'X1'
,
Out
=
'Out1'
)
sqrt_op
.
run
(
scope
,
place
)
# get and compare result
result_array
=
np
.
array
(
out_selected_rows
.
get_tensor
())
self
.
assertTrue
(
np
.
allclose
(
result_array
,
np
.
sqrt
(
np_array
)))
def
test_sparse_acti
(
self
):
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
for
place
in
places
:
self
.
check_with_place
(
place
)
if
__name__
==
"__main__"
:
paddle
.
enable_static
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
浏览文件 @
91bb52cd
...
...
@@ -16,6 +16,7 @@ from __future__ import print_function
import
unittest
import
numpy
as
np
import
paddle
from
op_test
import
OpTest
import
paddle.fluid
as
fluid
...
...
@@ -153,4 +154,5 @@ class TestClipByNormOpWithSelectedRows(unittest.TestCase):
if
__name__
==
'__main__'
:
paddle
.
enable_static
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_temporal_shift_op.py
浏览文件 @
91bb52cd
...
...
@@ -143,4 +143,5 @@ class TestTemporalShiftAPI(unittest.TestCase):
if
__name__
==
"__main__"
:
paddle
.
enable_static
()
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录