未验证 提交 91bb52cd 编写于 作者: H hong 提交者: GitHub

Revert "Revert "Move some activation to phi (#40727)" (#41056)" (#41095)

This reverts commit 05f3d48e.
上级 abd2df4c
......@@ -53,7 +53,7 @@ USE_OP_ITSELF(tanh_grad);
USE_OP(sum);
USE_OP_ITSELF(slice_grad);
USE_OP_ITSELF(lookup_table_grad);
USE_OP(sqrt);
USE_OP_ITSELF(sqrt);
USE_OP_ITSELF(elementwise_max);
USE_OP_ITSELF(elementwise_div);
USE_OP_ITSELF(sgd);
......@@ -83,6 +83,7 @@ PD_DECLARE_KERNEL(max_raw, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(sgd, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(slice, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(slice_grad, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(sqrt, GPU, ALL_LAYOUT);
DECLARE_double(eager_delete_tensor_gb);
......
......@@ -1496,6 +1496,14 @@ REGISTER_ACTIVATION_OP(hard_sigmoid, HardSigmoid, HardSigmoidFunctor,
HardSigmoidGradFunctor);
REGISTER_ACTIVATION_OP(logsigmoid, LogSigmoid, LogSigmoidFunctor,
LogSigmoidGradFunctor);
REGISTER_ACTIVATION_OP(expm1, Expm1, Expm1Functor, Expm1GradFunctor);
REGISTER_ACTIVATION_OP(softplus, Softplus, SoftplusFunctor,
SoftplusGradFunctor);
REGISTER_ACTIVATION_OP(mish, Mish, MishFunctor, MishGradFunctor);
REGISTER_ACTIVATION_OP(stanh, STanh, STanhFunctor, STanhGradFunctor);
REGISTER_ACTIVATION_OP(reciprocal, Reciprocal, ReciprocalFunctor,
ReciprocalGradFunctor);
REGISTER_ACTIVATION_OP(log2, Log2, Log2Functor, Log2GradFunctor);
REGISTER_ACTIVATION_OP(log10, Log10, Log10Functor, Log10GradFunctor);
REGISTER_ACTIVATION_OP(log1p, Log1p, Log1pFunctor, Log1pGradFunctor);
......@@ -1630,12 +1638,7 @@ REGISTER_OPERATOR(logit, ops::LogitOp, ops::LogitOpMaker,
ops::LogitGradOpMaker<paddle::framework::OpDesc>,
ops::LogitGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(logit_grad, ops::LogitGradOp);
REGISTER_OP_CPU_KERNEL(
logit, ops::LogitKernel<paddle::platform::CPUDeviceContext, float>,
ops::LogitKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
logit_grad, ops::LogitGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::LogitGradKernel<paddle::platform::CPUDeviceContext, double>);
/* ========================================================================== */
/* ======================== celu register ============================
......@@ -1684,7 +1687,6 @@ REGISTER_OPERATOR(
ops::ActivationOpDoubleGrad<ops::SqrtGradGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInferer);
REGISTER_ACTIVATION_CPU_KERNEL(sqrt, Sqrt, SqrtFunctor, SqrtGradFunctor);
REGISTER_OP_CPU_KERNEL(
sqrt_grad_grad, ops::SqrtDoubleGradKernel<plat::CPUDeviceContext,
ops::SqrtGradGradFunctor<float>>,
......@@ -1712,7 +1714,6 @@ REGISTER_OPERATOR(
ops::ActivationOpDoubleGrad<ops::RsqrtGradGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInferer);
REGISTER_ACTIVATION_CPU_KERNEL(rsqrt, Rsqrt, RsqrtFunctor, RsqrtGradFunctor);
REGISTER_OP_CPU_KERNEL(
rsqrt_grad_grad,
ops::RsqrtDoubleGradKernel<plat::CPUDeviceContext,
......@@ -1741,25 +1742,6 @@ REGISTER_OPERATOR(
ops::ActivationOpDoubleGrad<ops::SquareGradGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInferer);
REGISTER_OP_CPU_KERNEL(square,
ops::ActivationKernel<paddle::platform::CPUDeviceContext,
ops::SquareFunctor<float>>,
ops::ActivationKernel<paddle::platform::CPUDeviceContext,
ops::SquareFunctor<double>>,
ops::ActivationKernel<paddle::platform::CPUDeviceContext,
ops::SquareFunctor<int>>,
ops::ActivationKernel<paddle::platform::CPUDeviceContext,
ops::SquareFunctor<int64_t>>);
REGISTER_OP_CPU_KERNEL(
square_grad, ops::ActivationGradKernel<paddle::platform::CPUDeviceContext,
ops::SquareGradFunctor<float>>,
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext,
ops::SquareGradFunctor<double>>,
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext,
ops::SquareGradFunctor<int>>,
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext,
ops::SquareGradFunctor<int64_t>>);
REGISTER_OP_CPU_KERNEL(
square_grad_grad,
ops::SquareDoubleGradKernel<plat::CPUDeviceContext,
......@@ -1798,54 +1780,6 @@ REGISTER_OPERATOR(
REGISTER_OPERATOR(exp_grad, ops::ActivationOpGrad,
ops::ActivationGradOpInplaceInferer);
REGISTER_OP_CPU_KERNEL(exp,
ops::ActivationKernel<paddle::platform::CPUDeviceContext,
ops::ExpFunctor<float>>,
ops::ActivationKernel<paddle::platform::CPUDeviceContext,
ops::ExpFunctor<double>>,
ops::ActivationKernel<paddle::platform::CPUDeviceContext,
ops::ExpFunctor<int>>,
ops::ActivationKernel<paddle::platform::CPUDeviceContext,
ops::ExpFunctor<int64_t>>);
REGISTER_OP_CPU_KERNEL(
exp_grad, ops::ActivationGradKernel<paddle::platform::CPUDeviceContext,
ops::ExpGradFunctor<float>>,
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext,
ops::ExpGradFunctor<double>>,
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext,
ops::ExpGradFunctor<int>>,
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext,
ops::ExpGradFunctor<int64_t>>);
/* ========================================================================== */
/* ========================== expm1 register ============================ */
REGISTER_OPERATOR(
expm1, ops::ActivationOp, ops::Expm1OpMaker, ops::ActivationOpInferVarType,
ops::ActivationGradOpMaker<ops::Expm1GradFunctor<float>::FwdDeps(),
paddle::framework::OpDesc>,
ops::ActivationGradOpMaker<ops::Expm1GradFunctor<float>::FwdDeps(),
paddle::imperative::OpBase>,
std::conditional<ops::CanInplaceAct<ops::Expm1GradFunctor<float>>(),
ops::ActFwdInplaceInferer, void>::type);
REGISTER_OPERATOR(expm1_grad, ops::ActivationOpGrad,
ops::ActivationGradOpInplaceInferer);
REGISTER_OP_CPU_KERNEL(expm1,
ops::ActivationKernel<paddle::platform::CPUDeviceContext,
ops::Expm1Functor<float>>,
ops::ActivationKernel<paddle::platform::CPUDeviceContext,
ops::Expm1Functor<double>>,
ops::ActivationKernel<paddle::platform::CPUDeviceContext,
ops::Expm1Functor<plat::float16>>);
REGISTER_OP_CPU_KERNEL(
expm1_grad, ops::ActivationGradKernel<paddle::platform::CPUDeviceContext,
ops::Expm1GradFunctor<float>>,
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext,
ops::Expm1GradFunctor<double>>,
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext,
ops::Expm1GradFunctor<plat::float16>>);
/* ========================================================================== */
/* ========================== Log register ==================================*/
REGISTER_OPERATOR(
log, ops::ActivationOp, ops::LogOpMaker, ops::ActivationOpInferVarType,
......@@ -1864,8 +1798,6 @@ REGISTER_OPERATOR(
ops::ActivationOpDoubleGrad<ops::LogGradGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInferer);
/* ========================================================================== */
/* ========================== register checkpoint ===========================*/
REGISTER_OP_VERSION(leaky_relu)
.AddCheckpoint(
......
......@@ -264,6 +264,7 @@ USE_PHI_FUNCTOR(Asinh)
USE_PHI_FUNCTOR(Acosh)
USE_PHI_FUNCTOR(Atanh)
USE_PHI_FUNCTOR(Tanh)
USE_PHI_FUNCTOR(Exp)
USE_PHI_DOUBLE_GRAD_FUNCTOR(Tanh)
USE_PHI_TRIPLE_GRAD_FUNCTOR(Tanh)
USE_PHI_FUNCTOR(BRelu)
......@@ -289,6 +290,15 @@ USE_PHI_FUNCTOR(Log1p)
USE_PHI_FUNCTOR(Swish)
USE_PHI_FUNCTOR(HardSwish)
USE_PHI_FUNCTOR(Pow)
USE_PHI_FUNCTOR(Exp)
USE_PHI_FUNCTOR(Expm1)
USE_PHI_FUNCTOR(Mish)
USE_PHI_FUNCTOR(STanh)
USE_PHI_FUNCTOR(Reciprocal)
USE_PHI_FUNCTOR(Square)
USE_PHI_FUNCTOR(Sqrt)
USE_PHI_FUNCTOR(Rsqrt)
USE_PHI_FUNCTOR(Softplus)
template <typename T>
using ELUGradNegativeAlphaFunctor = phi::funcs::ELUGradNegativeAlphaFunctor<T>;
......@@ -305,49 +315,8 @@ using CeilFunctor = phi::funcs::CeilFunctor<T>;
template <typename T>
using ZeroGradFunctor = phi::funcs::ZeroGradFunctor<T>;
// exp(x) = e^x
template <typename T>
struct ExpFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.exp();
}
};
template <typename T>
struct ExpGradFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
// expm1(x) = e^x - 1
template <typename T>
struct Expm1Functor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.expm1();
}
};
template <typename T>
struct Expm1GradFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * out + dout;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
using ELUGradNegativeAlphaFunctor = phi::funcs::ELUGradNegativeAlphaFunctor<T>;
// relu(x) = max(x, 0)
......@@ -362,92 +331,68 @@ using ReluGradGradFunctor = phi::funcs::ReluGradGradFunctor<T>;
template <typename T>
using ReluCUDAFunctor = phi::funcs::ReluCUDAFunctor<T>;
// sqrt(x) = x^(1/2)
template <typename T>
struct SqrtFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.sqrt();
}
};
template <typename T>
struct SqrtGradFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = static_cast<T>(0.5) * dout / out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
// rsqrt(x) = x^(-1/2)
template <typename T>
struct RsqrtFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.rsqrt();
}
};
template <typename T>
struct RsqrtGradFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = static_cast<T>(-0.5) * dout * out * out * out;
struct SqrtGradGradFunctor : public BaseActivationFunctor<T> {
template <typename Device>
void operator()(const Device& dev, const framework::Tensor* Out,
const framework::Tensor* ddX, framework::Tensor* ddOut,
framework::Tensor* dOut, const framework::Tensor* dX) const {
auto* d = dev.eigen_device();
auto ddx = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddX, "Input", "DDX", "SqrtGradGrad"));
auto out = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(Out, "Output", "Out", "SqrtGradGrad"));
// sqrt GradGrad: ddy = 0.5 * ddx / y, dy = -1 * dx * ddx
// calculate dy first, so ddy can inplace ddx
if (dOut) {
auto dx = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dX, "Output", "DX", "SqrtGradGrad"));
auto dout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dOut, "Output", "DOut", "SqrtGradGrad"));
dout.device(*d) = dx * ddx * static_cast<T>(-1) / out;
}
if (ddOut) {
auto ddout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddOut, "Output", "DDOut", "SqrtGradGrad"));
ddout.device(*d) = ddx * static_cast<T>(0.5) / out;
}
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
// reciprocal(x) = 1 / x
template <typename T>
struct ReciprocalFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = static_cast<T>(1) / x;
}
};
struct RsqrtGradGradFunctor : public BaseActivationFunctor<T> {
template <typename Device>
void operator()(const Device& dev, const framework::Tensor* Out,
const framework::Tensor* ddX, framework::Tensor* ddOut,
framework::Tensor* dOut, const framework::Tensor* dX) const {
auto* d = dev.eigen_device();
auto ddx = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddX, "Input", "DDX", "RsqrtGradGrad"));
auto out = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(Out, "Output", "Out", "RsqrtGradGrad"));
template <typename T>
struct ReciprocalGradFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * static_cast<T>(-1) * out * out;
// rsqrt GradGrad: ddy = -0.5 * ddx * y * y * y, dy = (3/y) * dx * ddx
if (dOut) {
auto dx = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dX, "Output", "DX", "RsqrtGradGrad"));
auto dout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dOut, "Output", "DOut", "RsqrtGradGrad"));
dout.device(*d) = (static_cast<T>(3.0) / out) * dx * ddx;
}
if (ddOut) {
auto ddout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddOut, "Output", "DDOut", "RsqrtGradGrad"));
ddout.device(*d) = ddx * static_cast<T>(-0.5) * out * out * out;
}
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
// square(x) = x^2
template <typename T>
struct SquareFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.square();
}
};
template <typename T>
struct SquareGradFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * static_cast<T>(2) * x;
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
// relu6(x) = min(max(0, x), 6)
template <typename T>
struct Relu6Functor : public BaseActivationFunctor<T> {
......@@ -484,114 +429,6 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> {
}
};
// For numerical stability, using the following formula instead of softplus(x) =
// log(1 + exp(x))
// softplus(x) = log(1 + exp(beta * x)) / beta when beta * x <= threshold(beta =
// 1, threshold = 20 by default), otherwise x
template <typename T>
struct SoftplusFunctor : public BaseActivationFunctor<T> {
float beta;
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}, {"threshold", &threshold}};
}
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) {
auto x_beta = static_cast<T>(beta) * x;
out.device(d) = (x_beta > static_cast<T>(threshold))
.select(x, (static_cast<T>(1) + x_beta.exp()).log() /
static_cast<T>(beta));
}
};
// For numerical stability, using the following formula instead of
// d(softplus(x))/dx = 1 / (1 + exp(-x))
// d(softplus(x))/dx = 1 / (1 + exp(-beta * x)) when beta * x <= threshold(beta
// = 1, threshold = 20 by default), otherwise x
template <typename T>
struct SoftplusGradFunctor : public BaseActivationFunctor<T> {
float beta;
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}, {"threshold", &threshold}};
}
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) {
auto x_beta = static_cast<T>(beta) * x;
dx.device(d) =
(x_beta > static_cast<T>(threshold))
.select(dout, dout / (static_cast<T>(1) + (-x_beta).exp()));
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
// mish(x) = x * tanh(softplus(x))
// softplus(x) = x, if x > threshold
// = ln(1 + exp(x)), otherwise
template <typename T>
struct MishFunctor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) {
auto sp = (x > static_cast<T>(threshold))
.select(x, (static_cast<T>(1) + x.exp()).log());
out.device(d) = x * sp.tanh();
}
};
// dx = dout * (tanh(sp) + x * (1 - tanh(sp) ** 2) * (1 - exp(-sp)))
// sp = softplus(x)
template <typename T>
struct MishGradFunctor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) {
auto sp = (x > static_cast<T>(threshold))
.select(x, (static_cast<T>(1) + x.exp()).log());
auto gsp = static_cast<T>(1) - (-sp).exp();
auto tsp = sp.tanh();
dx.device(d) = dout * (tsp + x * (static_cast<T>(1) - tsp * tsp) * gsp);
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
// softsign(x) = x / (1 + |x|)
template <typename T>
struct SoftsignFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) {
out.device(d) = x / (static_cast<T>(1) + x.abs());
}
};
// d(softsign(x))/dx = 1 / (1 + |x|)^2
// Taken from https://en.wikipedia.org/wiki/Activation_function
template <typename T>
struct SoftsignGradFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) {
dx.device(d) =
dout * (static_cast<T>(1) / (static_cast<T>(1) + x.abs()).square());
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct SoftReluFunctor : public BaseActivationFunctor<T> {
float threshold;
......@@ -706,71 +543,6 @@ struct CELUGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct LogitFunctor {
template <typename Device, typename X, typename Out, typename P>
void operator()(Device d, X x, Out out, P p, float eps) const {
// logit(x) = ln(x/(1-x))
auto tmp_x =
(x.cwiseMin(static_cast<T>(1.0 - eps))).cwiseMax(static_cast<T>(eps));
if (!eps) {
out.device(d) = (x < static_cast<T>(0.0) || x > static_cast<T>(1.0))
.select(p.constant(static_cast<T>(NAN)),
(tmp_x / (static_cast<T>(1) - tmp_x)).log());
} else {
out.device(d) = (tmp_x / (static_cast<T>(1) - tmp_x)).log();
}
}
};
template <typename T>
struct LogitGradFunctor {
template <typename Device, typename X, typename dOut, typename dX, typename P>
void operator()(Device d, X x, dOut dout, dX dx, P p, float eps) const {
// logit(x)' = 1/(x*(1-x))
dx.device(d) =
(x < static_cast<T>(eps) || x > static_cast<T>(1.0 - eps))
.select(p.constant(static_cast<T>(0)),
dout * (static_cast<T>(1) / ((static_cast<T>(1) - x) * x)));
}
};
template <typename T>
struct STanhFunctor : public BaseActivationFunctor<T> {
float scale_a;
float scale_b;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"scale_a", &scale_a}, {"scale_b", &scale_b}};
}
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) =
static_cast<T>(scale_b) * (static_cast<T>(scale_a) * x).tanh();
}
};
template <typename T>
struct STanhGradFunctor : public BaseActivationFunctor<T> {
float scale_a;
float scale_b;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"scale_a", &scale_a}, {"scale_b", &scale_b}};
}
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
auto a = static_cast<T>(scale_a);
auto b = static_cast<T>(scale_b);
auto temp = (a * x).tanh() * (a * x).tanh();
dx.device(d) = dout * a * b * (static_cast<T>(1) - temp);
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct AbsGradGradFunctor : public BaseActivationFunctor<T> {
template <typename Device>
......@@ -831,68 +603,6 @@ struct CELUGradGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct SqrtGradGradFunctor : public BaseActivationFunctor<T> {
template <typename Device>
void operator()(const Device& dev, const framework::Tensor* Out,
const framework::Tensor* ddX, framework::Tensor* ddOut,
framework::Tensor* dOut, const framework::Tensor* dX) const {
auto* d = dev.eigen_device();
auto ddx = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddX, "Input", "DDX", "SqrtGradGrad"));
auto out = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(Out, "Output", "Out", "SqrtGradGrad"));
// sqrt GradGrad: ddy = 0.5 * ddx / y, dy = -1 * dx * ddx
// calculate dy first, so ddy can inplace ddx
if (dOut) {
auto dx = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dX, "Output", "DX", "SqrtGradGrad"));
auto dout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dOut, "Output", "DOut", "SqrtGradGrad"));
dout.device(*d) = dx * ddx * static_cast<T>(-1) / out;
}
if (ddOut) {
auto ddout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddOut, "Output", "DDOut", "SqrtGradGrad"));
ddout.device(*d) = ddx * static_cast<T>(0.5) / out;
}
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct RsqrtGradGradFunctor : public BaseActivationFunctor<T> {
template <typename Device>
void operator()(const Device& dev, const framework::Tensor* Out,
const framework::Tensor* ddX, framework::Tensor* ddOut,
framework::Tensor* dOut, const framework::Tensor* dX) const {
auto* d = dev.eigen_device();
auto ddx = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddX, "Input", "DDX", "RsqrtGradGrad"));
auto out = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(Out, "Output", "Out", "RsqrtGradGrad"));
// rsqrt GradGrad: ddy = -0.5 * ddx * y * y * y, dy = (3/y) * dx * ddx
if (dOut) {
auto dx = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dX, "Output", "DX", "RsqrtGradGrad"));
auto dout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dOut, "Output", "DOut", "RsqrtGradGrad"));
dout.device(*d) = (static_cast<T>(3.0) / out) * dx * ddx;
}
if (ddOut) {
auto ddout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddOut, "Output", "DDOut", "RsqrtGradGrad"));
ddout.device(*d) = ddx * static_cast<T>(-0.5) * out * out * out;
}
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct SquareGradGradFunctor : public BaseActivationFunctor<T> {
template <typename Device>
......@@ -988,6 +698,29 @@ class SquareDoubleGradKernel
}
};
template <typename T>
struct SoftsignFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x / (static_cast<T>(1) + x.abs());
}
};
// d(softsign(x))/dx = 1 / (1 + |x|)^2
// Taken from https://en.wikipedia.org/wiki/Activation_function
template <typename T>
struct SoftsignGradFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) =
dout * (static_cast<T>(1) / (static_cast<T>(1) + x.abs()).square());
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename DeviceContext, typename Functor>
class CELUDoubleGradKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
......@@ -1135,57 +868,10 @@ class RsqrtDoubleGradKernel
}
};
template <typename DeviceContext, typename T>
class LogitKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* out = context.Output<framework::Tensor>("Out");
auto* in = context.Input<framework::Tensor>("X");
auto eps = context.Attr<float>("eps");
out->mutable_data<T>(in->place());
auto eigen_out = framework::EigenVector<T>::Flatten(*out);
auto eigen_in = framework::EigenVector<T>::Flatten(*in);
auto& place =
*context.template device_context<DeviceContext>().eigen_device();
auto eigen_p = framework::EigenVector<T>::Flatten(*out);
LogitFunctor<T> functor;
functor(place, eigen_in, eigen_out, eigen_p, eps);
}
};
template <typename DeviceContext, typename T>
class LogitGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* x = context.Input<framework::Tensor>("X");
auto* dout =
context.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* dx = context.Output<framework::Tensor>(framework::GradVarName("X"));
auto eps = context.Attr<float>("eps");
dx->mutable_data<T>(dout->place());
auto eigen_x = framework::EigenVector<T>::Flatten(*x);
auto eigen_dout = framework::EigenVector<T>::Flatten(*dout);
auto eigen_dx = framework::EigenVector<T>::Flatten(*dx);
auto& place =
*context.template device_context<DeviceContext>().eigen_device();
auto eigen_p = framework::EigenVector<T>::Flatten(*x);
LogitGradFunctor<T> functor;
functor(place, eigen_x, eigen_dout, eigen_dx, eigen_p, eps);
}
};
} // namespace operators
} // namespace paddle
#define FOR_EACH_ACTIVATION_OP(__macro) \
__macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \
__macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \
__macro(stanh, STanh, STanhFunctor, STanhGradFunctor); \
__macro(softplus, Softplus, SoftplusFunctor, SoftplusGradFunctor); \
__macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6, Relu6Functor, Relu6GradFunctor); \
__macro(mish, Mish, MishFunctor, MishGradFunctor);
#define FOR_EACH_ACTIVATION_OP(__macro) \
__macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \
__macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6, Relu6Functor, Relu6GradFunctor);
......@@ -20,140 +20,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
struct CudaReciprocalFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
// reciprocal(x) = 1 / x
__device__ __forceinline__ T operator()(const T x) const { return one / x; }
};
template <typename T>
struct CudaReciprocalGradFunctor : public BaseActivationFunctor<T> {
// dx = -dout * out^2
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return -dout * out * out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaExpFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
// exp(x) = exp(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(exp(x));
}
};
template <typename T>
struct CudaExpGradFunctor : public BaseActivationFunctor<T> {
// dx = dout * out
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return dout * out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaExpm1Functor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
// expm1(x) = expm1(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(expm1(x));
}
};
template <typename T>
struct CudaExpm1GradFunctor : public BaseActivationFunctor<T> {
// dx = dout * out
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return dout * out + dout;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaSquareFunctor : public BaseActivationFunctor<T> {
// square(x) = x * x
__device__ __forceinline__ T operator()(const T x) const { return x * x; }
};
template <typename T>
struct CudaSquareGradFunctor : public BaseActivationFunctor<T> {
T two = static_cast<T>(2.0f);
// dx = dout * 2 * x
__device__ __forceinline__ T operator()(const T dout, const T x) const {
return dout * two * x;
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaSqrtFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
// sqrt(x) = sqrt(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(sqrt(x));
}
};
template <typename T>
struct CudaSqrtGradFunctor : public BaseActivationFunctor<T> {
T one_half = static_cast<T>(0.5f);
// dx = dout * 0.5 / out
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return one_half * dout / out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaRsqrtFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
// rsqrt(x) = rsqrt(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(rsqrt(x));
}
};
template <typename T>
struct CudaRsqrtGradFunctor : public BaseActivationFunctor<T> {
T minus_one_half = static_cast<T>(-0.5f);
// dx = -0.5 * dout * out^3
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return minus_one_half * dout * out * out * out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaSoftReluFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
......@@ -201,119 +67,6 @@ struct CudaSoftReluGradFunctor : public BaseActivationFunctor<T> {
}
};
template <typename T>
struct CudaSTanhFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
float scale_a;
float scale_b;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"scale_a", &scale_a}, {"scale_b", &scale_b}};
}
// stanh(x) = b * tanh(a * x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
MPType a = static_cast<MPType>(scale_a);
MPType b = static_cast<MPType>(scale_b);
return static_cast<T>(b * tanh(a * x));
}
};
template <typename T>
struct CudaSTanhGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float scale_a;
float scale_b;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"scale_a", &scale_a}, {"scale_b", &scale_b}};
}
// dx = dout * a * b * (1 - tanh(a * x) * tanh(a * x))
__device__ __forceinline__ T operator()(const T arg_dout,
const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x);
MPType a = static_cast<MPType>(scale_a);
MPType b = static_cast<MPType>(scale_b);
MPType temp = tanh(a * x);
return static_cast<T>(dout * a * b * (one - temp * temp));
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaSoftplusFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float beta;
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}, {"threshold", &threshold}};
}
// softplus(x) = beta * x > threshold ? x : log(1 + exp(beta * x)) / beta
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
MPType b = static_cast<MPType>(beta);
MPType t = static_cast<MPType>(threshold);
MPType x_beta = x * beta;
return static_cast<T>(x_beta > t ? x : log(one + exp(x_beta)) / b);
}
};
template <typename T>
struct CudaSoftplusGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float beta;
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}, {"threshold", &threshold}};
}
// dx = x * beta > threshold ? dout : dout / (1 + exp(-beta * x))
__device__ __forceinline__ T operator()(const T arg_dout,
const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x);
MPType b = static_cast<MPType>(beta);
MPType t = static_cast<MPType>(threshold);
MPType x_beta = x * beta;
return x_beta > t ? arg_dout : static_cast<T>(dout / (one + exp(-x_beta)));
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaSoftsignFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
// softsign(x) = x / (1 + abs(x))
__device__ __forceinline__ T operator()(const T x) const {
return x / (one + abs(x));
}
};
template <typename T>
struct CudaSoftsignGradFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
// dx = dout / (1 + abs(x))^2
__device__ __forceinline__ T operator()(const T dout, const T x) const {
T temp = one + abs(x);
return dout / (temp * temp);
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaRelu6Functor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
......@@ -351,49 +104,23 @@ struct CudaRelu6GradFunctor : public BaseActivationFunctor<T> {
};
template <typename T>
struct CudaMishFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
struct CudaSoftsignFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
// mish(x) = x * tanh(softplus(x))
// softplus(x) = x, if x > threshold
// = ln(1 + exp(x)), otherwise
// Inputs: args[0], the input x
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
MPType sp = (x > static_cast<MPType>(threshold)) ? x : log(one + exp(x));
return static_cast<T>(x * tanh(sp));
// softsign(x) = x / (1 + abs(x))
__device__ __forceinline__ T operator()(const T x) const {
return x / (one + abs(x));
}
};
template <typename T>
struct CudaMishGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
struct CudaSoftsignGradFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
// dx = dout * (tanh(sp) + x * (1 - tanh(sp) ** 2) * (1 - exp(-sp)))
// sp = softplus(x)
// Inputs: args[0], the input dout
// args[1], the input x
__device__ __forceinline__ T operator()(const T arg_dout,
const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x);
MPType sp = (x > static_cast<MPType>(threshold)) ? x : log(one + exp(x));
MPType gsp =
(x > static_cast<MPType>(threshold)) ? one : one / (one + exp(-x));
MPType tsp = tanh(sp);
return static_cast<T>(dout * (tsp + x * (one - tsp * tsp) * gsp));
// dx = dout / (1 + abs(x))^2
__device__ __forceinline__ T operator()(const T dout, const T x) const {
T temp = one + abs(x);
return dout / (temp * temp);
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
......@@ -558,6 +285,16 @@ using CudaCeilFunctor = phi::funcs::CudaCeilFunctor<T>;
template <typename T>
using CudaZeroGradFunctor = phi::funcs::CudaZeroGradFunctor<T>;
USE_PHI_FUNCTOR(CudaExp)
USE_PHI_FUNCTOR(CudaExpm1)
USE_PHI_FUNCTOR(CudaMish)
USE_PHI_FUNCTOR(CudaSTanh)
USE_PHI_FUNCTOR(CudaReciprocal)
USE_PHI_FUNCTOR(CudaSquare)
USE_PHI_FUNCTOR(CudaSqrt)
USE_PHI_FUNCTOR(CudaRsqrt)
USE_PHI_FUNCTOR(CudaSoftplus)
template <typename T>
using CudaELUGradNegativeAlphaFunctor =
phi::funcs::CudaELUGradNegativeAlphaFunctor<T>;
......@@ -636,8 +373,6 @@ REGISTER_OP_CUDA_KERNEL(
/* ========================================================================== */
/* =========================== sqrt register ============================= */
REGISTER_ACTIVATION_CUDA_KERNEL(sqrt, Sqrt, CudaSqrtFunctor,
CudaSqrtGradFunctor);
REGISTER_OP_CUDA_KERNEL(
sqrt_grad_grad,
......@@ -653,8 +388,6 @@ REGISTER_OP_CUDA_KERNEL(
/* =========================== rsqrt register =============================
*/
REGISTER_ACTIVATION_CUDA_KERNEL(rsqrt, Rsqrt, CudaRsqrtFunctor,
CudaRsqrtGradFunctor);
REGISTER_OP_CUDA_KERNEL(
rsqrt_grad_grad,
......@@ -667,8 +400,6 @@ REGISTER_OP_CUDA_KERNEL(
/* ========================================================================== */
/* =========================== square register ============================ */
REGISTER_ACTIVATION_CUDA_KERNEL_INT(square, Square, CudaSquareFunctor,
CudaSquareGradFunctor);
REGISTER_OP_CUDA_KERNEL(
square_grad_grad,
......@@ -688,75 +419,19 @@ REGISTER_OP_CUDA_KERNEL(
/* ========================== logit register ============================ */
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
logit, ops::LogitKernel<paddle::platform::CUDADeviceContext, float>,
ops::LogitKernel<paddle::platform::CUDADeviceContext, double>,
ops::LogitKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
REGISTER_OP_CUDA_KERNEL(
logit_grad,
ops::LogitGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::LogitGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::LogitGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
/* ========================================================================== */
/* ========================== exp register ============================ */
REGISTER_OP_CUDA_KERNEL(
exp, ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpFunctor<float>>,
ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpFunctor<double>>,
ops::ActivationKernel<plat::CUDADeviceContext, ops::ExpFunctor<int>>,
ops::ActivationKernel<plat::CUDADeviceContext, ops::ExpFunctor<int64_t>>,
ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpFunctor<plat::float16>>);
REGISTER_OP_CUDA_KERNEL(
exp_grad, ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpGradFunctor<float>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpGradFunctor<double>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpGradFunctor<int>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpGradFunctor<int64_t>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpGradFunctor<plat::float16>>);
/* ========================================================================== */
/* ========================== expm1 register ============================ */
REGISTER_OP_CUDA_KERNEL(
expm1, ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1Functor<float>>,
ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1Functor<double>>,
ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1Functor<plat::float16>>);
REGISTER_OP_CUDA_KERNEL(
expm1_grad, ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1GradFunctor<float>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1GradFunctor<double>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaExpm1GradFunctor<plat::float16>>);
/* ========================================================================== */
#define FOR_EACH_ACTIVATION_CUDA_OP(__macro) \
__macro(softshrink, SoftShrink, CudaSoftShrinkFunctor, \
CudaSoftShrinkGradFunctor); \
__macro(reciprocal, Reciprocal, CudaReciprocalFunctor, \
CudaReciprocalGradFunctor); \
__macro(soft_relu, SoftRelu, CudaSoftReluFunctor, CudaSoftReluGradFunctor); \
__macro(stanh, STanh, CudaSTanhFunctor, CudaSTanhGradFunctor); \
__macro(softplus, Softplus, CudaSoftplusFunctor, CudaSoftplusGradFunctor); \
__macro(softsign, Softsign, CudaSoftsignFunctor, CudaSoftsignGradFunctor); \
__macro(relu6, Relu6, CudaRelu6Functor, CudaRelu6GradFunctor); \
__macro(tanh_shrink, TanhShrink, CudaTanhShrinkFunctor, \
CudaTanhShrinkGradFunctor); \
__macro(hard_shrink, HardShrink, CudaHardShrinkFunctor, \
CudaHardShrinkGradFunctor); \
__macro(mish, Mish, CudaMishFunctor, CudaMishGradFunctor);
__macro(softsign, Softsign, CudaSoftsignFunctor, CudaSoftsignGradFunctor);
FOR_EACH_ACTIVATION_CUDA_OP(REGISTER_ACTIVATION_CUDA_KERNEL)
#ifdef PADDLE_WITH_XPU_KP
......
......@@ -22,9 +22,9 @@ math_library(sampler DEPS generator)
math_library(maxouting)
if(WITH_MKLDNN)
math_library(selected_rows_functor DEPS selected_rows_utils math_function blas mkldnn_axpy_handler)
math_library(selected_rows_functor DEPS selected_rows_utils math_function blas mkldnn_axpy_handler mixed_vector)
else()
math_library(selected_rows_functor DEPS selected_rows_utils math_function blas)
math_library(selected_rows_functor DEPS selected_rows_utils math_function blas mixed_vector)
endif()
math_library(sequence_padding)
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#ifdef PADDLE_WITH_MKLDNN
......
......@@ -19,56 +19,6 @@ namespace operators {
using Tensor = framework::Tensor;
using DataLayout = framework::DataLayout;
template <typename T>
void TemporalShiftFwNCHW(const T* input, T* output, const int ntchw,
const int tchw, const int chw, const int hw,
const int t, const int c1, const int c2) {
int src_it = 0;
for (int i = 0; i < ntchw; i++) {
int it = (i % tchw) / chw;
int ic = (i % chw) / hw;
if (ic < c1) {
src_it = it - 1;
} else if (ic < c2) {
src_it = it + 1;
} else {
src_it = it;
}
if (src_it < 0 || src_it >= t) {
output[i] = 0;
} else {
output[i] = input[i + (src_it - it) * chw];
}
}
}
template <typename T>
void TemporalShiftFwNHWC(const T* input, T* output, const int nthwc,
const int thwc, const int hwc, const int t,
const int c, const int c1, const int c2) {
int src_it = 0;
for (int i = 0; i < nthwc; i++) {
int it = (i % thwc) / hwc;
int ic = i % c;
if (ic < c1) {
src_it = it - 1;
} else if (ic < c2) {
src_it = it + 1;
} else {
src_it = it;
}
if (src_it < 0 || src_it >= t) {
output[i] = 0;
} else {
output[i] = input[i + (src_it - it) * hwc];
}
}
}
template <typename T>
void TemporalShiftBwNCHW(const T* output_grad, T* input_grad, const int ntchw,
const int tchw, const int chw, const int hw,
......@@ -122,45 +72,7 @@ void TemporalShiftBwNHWC(const T* output_grad, T* input_grad, const int nthwc,
template <typename T>
class TemporalShiftKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<Tensor>("X");
auto* output = ctx.Output<Tensor>("Out");
int t = ctx.Attr<int>("seg_num");
float shift_ratio = ctx.Attr<float>("shift_ratio");
const std::string data_format_str = ctx.Attr<std::string>("data_format");
const DataLayout data_layout =
framework::StringToDataLayout(data_format_str);
const int nt = input->dims()[0];
const int c = (data_layout == DataLayout::kNCHW ? input->dims()[1]
: input->dims()[3]);
const int h = (data_layout == DataLayout::kNCHW ? input->dims()[2]
: input->dims()[1]);
const int w = (data_layout == DataLayout::kNCHW ? input->dims()[3]
: input->dims()[2]);
const int hw = h * w;
const int chw = c * hw;
const int tchw = t * chw;
const int ntchw = nt * chw;
const int c1 = static_cast<int>(c * shift_ratio);
const int c2 = static_cast<int>(c * 2 * shift_ratio);
framework::DDim out_dims =
(data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})
: phi::make_ddim({nt, h, w, c}));
const T* input_data = input->data<T>();
T* output_data = output->mutable_data<T>(out_dims, ctx.GetPlace());
if (data_layout == DataLayout::kNCHW) {
TemporalShiftFwNCHW<T>(input_data, output_data, ntchw, tchw, chw, hw, t,
c1, c2);
} else {
TemporalShiftFwNHWC<T>(input_data, output_data, ntchw, tchw, chw, t, c,
c1, c2);
}
}
void Compute(const framework::ExecutionContext& ctx) const override {}
};
template <typename T>
......
......@@ -53,6 +53,13 @@ DECLARE_ACTIVATION_KERNEL(Acosh)
DECLARE_ACTIVATION_KERNEL(Atanh)
DECLARE_ACTIVATION_KERNEL(Relu)
DECLARE_ACTIVATION_KERNEL(Tanh)
DECLARE_ACTIVATION_KERNEL(Exp)
DECLARE_ACTIVATION_KERNEL(Expm1)
DECLARE_ACTIVATION_KERNEL(Reciprocal)
DECLARE_ACTIVATION_KERNEL(Square)
DECLARE_ACTIVATION_KERNEL(Sqrt)
DECLARE_ACTIVATION_KERNEL(Rsqrt)
DECLARE_ACTIVATION_KERNEL(TanhShrink)
DECLARE_ACTIVATION_KERNEL(Silu)
DECLARE_ACTIVATION_KERNEL(Sigmoid)
......@@ -73,8 +80,23 @@ DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Elu, alpha)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Swish, beta)
DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(BRelu, t_min, t_max)
DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(STanh, scale_a, scale_b)
DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(HardSigmoid, slope, offset)
DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(Softplus, beta, threshold)
template <typename T, typename Context>
void LogitKernel(const Context& dev_ctx,
const DenseTensor& x,
float eps,
DenseTensor* out);
template <typename T, typename Context>
void MishKernel(const Context& dev_ctx,
const DenseTensor& x,
float threshold,
DenseTensor* out);
template <typename T, typename Context>
void HardSwishKernel(const Context& dev_ctx,
const DenseTensor& x,
......
......@@ -129,6 +129,13 @@ DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Acosh, AcoshGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Atanh, AtanhGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(TanhShrink, TanhShrinkGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Silu, SiluGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Square, SquareGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Exp, ExpGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Expm1, Expm1GradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Reciprocal, ReciprocalGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt, SqrtGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Rsqrt, RsqrtGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(LogSigmoid, LogSigmoidGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Log, LogGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Log2, Log2GradFunctor);
......@@ -157,11 +164,24 @@ DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(HardShrink,
threshold);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Swish, SwishGradFunctor, beta);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Mish,
MishGradFunctor,
threshold);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(BRelu,
BReluGradFunctor,
t_min,
t_max);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(STanh,
STanhGradFunctor,
scale_a,
scale_b);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(Softplus,
SoftplusGradFunctor,
beta,
threshold);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid,
HardSigmoidGradFunctor,
slope,
......@@ -247,6 +267,12 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(tanh_shrink_grad, TanhShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(elu_grad, EluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(silu_grad, SiluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(mish_grad, MishGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(stanh_grad, STanhGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(reciprocal_grad, ReciprocalGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sqrt_grad, SqrtGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(rsqrt_grad, RsqrtGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(softplus_grad, SoftplusGradKernel)
PD_REGISTER_ACTIVATION_DOUBLE_GRAD_KERNEL(relu_double_grad,
ReluDoubleGradKernel)
......@@ -263,6 +289,34 @@ PD_REGISTER_KERNEL(tanh_triple_grad,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(exp_grad,
CPU,
ALL_LAYOUT,
phi::ExpGradKernel,
float,
double,
int,
int64_t) {}
PD_REGISTER_KERNEL(expm1_grad,
CPU,
ALL_LAYOUT,
phi::Expm1GradKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(
logit_grad, CPU, ALL_LAYOUT, phi::LogitGradKernel, float, double) {}
PD_REGISTER_KERNEL(square_grad,
CPU,
ALL_LAYOUT,
phi::SquareGradKernel,
float,
double,
int,
int64_t) {}
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_grad, SigmoidGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_double_grad, SigmoidDoubleGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_triple_grad, SigmoidTripleGradKernel)
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/phi/kernels/activation_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/activation_functor.h"
#include "paddle/phi/kernels/impl/activation_impl.h"
namespace phi {
......@@ -72,6 +73,12 @@ DEFINE_CPU_ACTIVATION_KERNEL(Relu, ReluCPUFunctor)
DEFINE_CPU_ACTIVATION_KERNEL(Tanh, TanhFunctor)
DEFINE_CPU_ACTIVATION_KERNEL(TanhShrink, TanhShrinkFunctor)
DEFINE_CPU_ACTIVATION_KERNEL(Silu, SiluFunctor)
DEFINE_CPU_ACTIVATION_KERNEL(Exp, ExpFunctor)
DEFINE_CPU_ACTIVATION_KERNEL(Expm1, Expm1Functor)
DEFINE_CPU_ACTIVATION_KERNEL(Reciprocal, ReciprocalFunctor)
DEFINE_CPU_ACTIVATION_KERNEL(Square, SquareFunctor)
DEFINE_CPU_ACTIVATION_KERNEL(Sqrt, SqrtFunctor)
DEFINE_CPU_ACTIVATION_KERNEL(Rsqrt, RsqrtFunctor)
DEFINE_CPU_ACTIVATION_KERNEL(Sigmoid, SigmoidFunctor)
DEFINE_CPU_ACTIVATION_KERNEL(LogSigmoid, LogSigmoidFunctor)
DEFINE_CPU_ACTIVATION_KERNEL(Log, LogFunctor)
......@@ -83,15 +90,19 @@ DEFINE_CPU_ACTIVATION_KERNEL(Floor, FloorFunctor)
DEFINE_CPU_ACTIVATION_KERNEL(Ceil, CeilFunctor)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, LeakyReluFunctor, alpha)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu,
ThresholdedReluFunctor,
threshold)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Mish, MishFunctor, threshold)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(BRelu, BReluFunctor, t_min, t_max)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(STanh, STanhFunctor, scale_a, scale_b)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(Softplus, SoftplusFunctor, beta, threshold)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink, HardShrinkFunctor, threshold)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(SoftShrink, SoftShrinkFunctor, lambda)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Elu, ELUFunctor, alpha)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Swish, SwishFunctor, beta)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(BRelu, BReluFunctor, t_min, t_max)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(HardSigmoid,
HardSigmoidFunctor,
slope,
......@@ -139,6 +150,25 @@ PD_REGISTER_ACTIVATION_KERNEL(soft_shrink, SoftShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(elu, EluKernel)
PD_REGISTER_ACTIVATION_KERNEL(silu, SiluKernel)
PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel)
PD_REGISTER_ACTIVATION_KERNEL(stanh, STanhKernel)
PD_REGISTER_ACTIVATION_KERNEL(reciprocal, ReciprocalKernel)
PD_REGISTER_ACTIVATION_KERNEL(sqrt, SqrtKernel)
PD_REGISTER_ACTIVATION_KERNEL(rsqrt, RsqrtKernel)
PD_REGISTER_ACTIVATION_KERNEL(softplus, SoftplusKernel)
PD_REGISTER_KERNEL(
exp, CPU, ALL_LAYOUT, phi::ExpKernel, float, double, int, int64_t) {}
PD_REGISTER_KERNEL(expm1,
CPU,
ALL_LAYOUT,
phi::Expm1Kernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(logit, CPU, ALL_LAYOUT, phi::LogitKernel, float, double) {}
PD_REGISTER_KERNEL(
square, CPU, ALL_LAYOUT, phi::SquareKernel, float, double, int, int64_t) {}
PD_REGISTER_ACTIVATION_KERNEL(sigmoid, SigmoidKernel)
PD_REGISTER_ACTIVATION_KERNEL(logsigmoid, LogSigmoidKernel)
PD_REGISTER_ACTIVATION_KERNEL(hard_sigmoid, HardSigmoidKernel)
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/temporal_shift_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T>
void TemporalShiftBwNCHW(const T* output_grad,
T* input_grad,
const int ntchw,
const int tchw,
const int chw,
const int hw,
const int t,
const int c1,
const int c2) {
int src_it = 0;
for (int i = 0; i < ntchw; i++) {
int it = (i % tchw) / chw;
int ic = (i % chw) / hw;
if (ic < c1) {
src_it = it + 1;
} else if (ic < c2) {
src_it = it - 1;
} else {
src_it = it;
}
if (src_it >= 0 && src_it < t) {
input_grad[i] = output_grad[i + (src_it - it) * chw];
} else {
input_grad[i] = 0;
}
}
}
template <typename T>
void TemporalShiftBwNHWC(const T* output_grad,
T* input_grad,
const int nthwc,
const int thwc,
const int hwc,
const int t,
const int c,
const int c1,
const int c2) {
int src_it = 0;
for (int i = 0; i < nthwc; i++) {
int it = (i % thwc) / hwc;
int ic = i % c;
if (ic < c1) {
src_it = it + 1;
} else if (ic < c2) {
src_it = it - 1;
} else {
src_it = it;
}
if (src_it >= 0 && src_it < t) {
input_grad[i] = output_grad[i + (src_it - it) * hwc];
} else {
input_grad[i] = 0;
}
}
}
template <typename T, typename Context>
void TemporalShiftGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad,
int seg_num,
float shift_ratio,
const std::string& data_format_str,
DenseTensor* x_grad) {
auto* input_grad = x_grad;
auto* output_grad = &out_grad;
int t = seg_num;
const DataLayout data_layout =
paddle::framework::StringToDataLayout(data_format_str);
const int nt = output_grad->dims()[0];
const int c = (data_layout == DataLayout::kNCHW ? output_grad->dims()[1]
: output_grad->dims()[3]);
const int h = (data_layout == DataLayout::kNCHW ? output_grad->dims()[2]
: output_grad->dims()[1]);
const int w = (data_layout == DataLayout::kNCHW ? output_grad->dims()[3]
: output_grad->dims()[2]);
const int hw = h * w;
const int chw = c * hw;
const int tchw = t * chw;
const int ntchw = nt * chw;
const int c1 = static_cast<int>(c * shift_ratio);
const int c2 = static_cast<int>(c * 2 * shift_ratio);
DDim in_grad_dims =
(data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})
: phi::make_ddim({nt, h, w, c}));
const T* output_grad_data = output_grad->data<T>();
T* input_grad_data =
input_grad->mutable_data<T>(in_grad_dims, dev_ctx.GetPlace());
if (data_layout == DataLayout::kNCHW) {
TemporalShiftBwNCHW<T>(
output_grad_data, input_grad_data, ntchw, tchw, chw, hw, t, c1, c2);
} else {
TemporalShiftBwNHWC<T>(
output_grad_data, input_grad_data, ntchw, tchw, chw, t, c, c1, c2);
}
}
} // namespace phi
PD_REGISTER_KERNEL(temporal_shift_grad,
CPU,
ALL_LAYOUT,
phi::TemporalShiftGradKernel,
float,
double) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/temporal_shift_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T>
void TemporalShiftFwNCHW(const T* input,
T* output,
const int ntchw,
const int tchw,
const int chw,
const int hw,
const int t,
const int c1,
const int c2) {
int src_it = 0;
for (int i = 0; i < ntchw; i++) {
int it = (i % tchw) / chw;
int ic = (i % chw) / hw;
if (ic < c1) {
src_it = it - 1;
} else if (ic < c2) {
src_it = it + 1;
} else {
src_it = it;
}
if (src_it < 0 || src_it >= t) {
output[i] = 0;
} else {
output[i] = input[i + (src_it - it) * chw];
}
}
}
template <typename T>
void TemporalShiftFwNHWC(const T* input,
T* output,
const int nthwc,
const int thwc,
const int hwc,
const int t,
const int c,
const int c1,
const int c2) {
int src_it = 0;
for (int i = 0; i < nthwc; i++) {
int it = (i % thwc) / hwc;
int ic = i % c;
if (ic < c1) {
src_it = it - 1;
} else if (ic < c2) {
src_it = it + 1;
} else {
src_it = it;
}
if (src_it < 0 || src_it >= t) {
output[i] = 0;
} else {
output[i] = input[i + (src_it - it) * hwc];
}
}
}
template <typename T, typename Context>
void TemporalShiftKernel(const Context& dev_ctx,
const DenseTensor& x,
int seg_num,
float shift_ratio,
const std::string& data_format_str,
DenseTensor* out) {
auto* input = &x;
auto* output = out;
int t = seg_num;
const DataLayout data_layout =
paddle::framework::StringToDataLayout(data_format_str);
const int nt = input->dims()[0];
const int c =
(data_layout == DataLayout::kNCHW ? input->dims()[1] : input->dims()[3]);
const int h =
(data_layout == DataLayout::kNCHW ? input->dims()[2] : input->dims()[1]);
const int w =
(data_layout == DataLayout::kNCHW ? input->dims()[3] : input->dims()[2]);
const int hw = h * w;
const int chw = c * hw;
const int tchw = t * chw;
const int ntchw = nt * chw;
const int c1 = static_cast<int>(c * shift_ratio);
const int c2 = static_cast<int>(c * 2 * shift_ratio);
DDim out_dims =
(data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})
: phi::make_ddim({nt, h, w, c}));
const T* input_data = input->data<T>();
T* output_data = output->mutable_data<T>(out_dims, dev_ctx.GetPlace());
if (data_layout == DataLayout::kNCHW) {
TemporalShiftFwNCHW<T>(
input_data, output_data, ntchw, tchw, chw, hw, t, c1, c2);
} else {
TemporalShiftFwNHWC<T>(
input_data, output_data, ntchw, tchw, chw, t, c, c1, c2);
}
}
} // namespace phi
PD_REGISTER_KERNEL(
temporal_shift, CPU, ALL_LAYOUT, phi::TemporalShiftKernel, float, double) {}
......@@ -106,6 +106,31 @@ struct SinFunctor : public BaseActivationFunctor<T> {
}
};
// reciprocal(x) = 1 / x
template <typename T>
struct ReciprocalFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = static_cast<T>(1) / x;
}
};
template <typename T>
struct ReciprocalGradFunctor : public BaseActivationFunctor<T> {
template <typename Device,
typename X,
typename Out,
typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * static_cast<T>(-1) * out * out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
// cosine'(x) = -sin(x)
template <typename T>
struct CosGradFunctor : public BaseActivationFunctor<T> {
......@@ -130,6 +155,108 @@ struct CosFunctor : public BaseActivationFunctor<T> {
}
};
template <typename T>
struct LogitFunctor {
template <typename Device, typename X, typename Out, typename P>
void operator()(Device d, X x, Out out, P p, float eps) const {
// logit(x) = ln(x/(1-x))
auto tmp_x =
(x.cwiseMin(static_cast<T>(1.0 - eps))).cwiseMax(static_cast<T>(eps));
if (!eps) {
out.device(d) = (x < static_cast<T>(0.0) || x > static_cast<T>(1.0))
.select(p.constant(static_cast<T>(NAN)),
(tmp_x / (static_cast<T>(1) - tmp_x)).log());
} else {
out.device(d) = (tmp_x / (static_cast<T>(1) - tmp_x)).log();
}
}
};
// mish(x) = x * tanh(softplus(x))
// softplus(x) = x, if x > threshold
// = ln(1 + exp(x)), otherwise
template <typename T>
struct MishFunctor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
auto sp = (x > static_cast<T>(threshold))
.select(x, (static_cast<T>(1) + x.exp()).log());
out.device(d) = x * sp.tanh();
}
};
// dx = dout * (tanh(sp) + x * (1 - tanh(sp) ** 2) * (1 - exp(-sp)))
// sp = softplus(x)
template <typename T>
struct MishGradFunctor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device,
typename X,
typename Out,
typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
auto sp = (x > static_cast<T>(threshold))
.select(x, (static_cast<T>(1) + x.exp()).log());
auto gsp = static_cast<T>(1) - (-sp).exp();
auto tsp = sp.tanh();
dx.device(d) = dout * (tsp + x * (static_cast<T>(1) - tsp * tsp) * gsp);
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct STanhFunctor : public BaseActivationFunctor<T> {
float scale_a;
float scale_b;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"scale_a", &scale_a}, {"scale_b", &scale_b}};
}
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) =
static_cast<T>(scale_b) * (static_cast<T>(scale_a) * x).tanh();
}
};
template <typename T>
struct STanhGradFunctor : public BaseActivationFunctor<T> {
float scale_a;
float scale_b;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"scale_a", &scale_a}, {"scale_b", &scale_b}};
}
template <typename Device,
typename X,
typename Out,
typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
auto a = static_cast<T>(scale_a);
auto b = static_cast<T>(scale_b);
auto temp = (a * x).tanh() * (a * x).tanh();
dx.device(d) = dout * a * b * (static_cast<T>(1) - temp);
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct Tangent {
HOSTDEVICE T operator()(const T& val) const { return tan(val); }
......@@ -157,6 +284,132 @@ struct TanGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
};
// square(x) = x^2
template <typename T>
struct SquareFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.square();
}
};
template <typename T>
struct SquareGradFunctor : public BaseActivationFunctor<T> {
template <typename Device,
typename X,
typename Out,
typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * static_cast<T>(2) * x;
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
// sqrt(x) = x^(1/2)
template <typename T>
struct SqrtFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.sqrt();
}
};
template <typename T>
struct SqrtGradFunctor : public BaseActivationFunctor<T> {
template <typename Device,
typename X,
typename Out,
typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = static_cast<T>(0.5) * dout / out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
// rsqrt(x) = x^(-1/2)
template <typename T>
struct RsqrtFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.rsqrt();
}
};
template <typename T>
struct RsqrtGradFunctor : public BaseActivationFunctor<T> {
template <typename Device,
typename X,
typename Out,
typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = static_cast<T>(-0.5) * dout * out * out * out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
// // For numerical stability, using the following formula instead of
// softplus(x) =
// // log(1 + exp(x))
// // softplus(x) = log(1 + exp(beta * x)) / beta when beta * x <=
// threshold(beta =
// // 1, threshold = 20 by default), otherwise x
template <typename T>
struct SoftplusFunctor : public BaseActivationFunctor<T> {
float beta;
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}, {"threshold", &threshold}};
}
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
auto x_beta = static_cast<T>(beta) * x;
out.device(d) = (x_beta > static_cast<T>(threshold))
.select(x,
(static_cast<T>(1) + x_beta.exp()).log() /
static_cast<T>(beta));
}
};
// For numerical stability, using the following formula instead of
// d(softplus(x))/dx = 1 / (1 + exp(-x))
// d(softplus(x))/dx = 1 / (1 + exp(-beta * x)) when beta * x <= threshold(beta
// = 1, threshold = 20 by default), otherwise x
template <typename T>
struct SoftplusGradFunctor : public BaseActivationFunctor<T> {
float beta;
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}, {"threshold", &threshold}};
}
template <typename Device,
typename X,
typename Out,
typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
auto x_beta = static_cast<T>(beta) * x;
dx.device(d) =
(x_beta > static_cast<T>(threshold))
.select(dout, dout / (static_cast<T>(1) + (-x_beta).exp()));
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
// Tangent(x) = tan(x)
template <typename T>
struct TanFunctor : public BaseActivationFunctor<T> {
......@@ -348,6 +601,18 @@ struct AtanGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
};
template <typename T>
struct LogitGradFunctor {
template <typename Device, typename X, typename dOut, typename dX, typename P>
void operator()(Device d, X x, dOut dout, dX dx, P p, float eps) const {
// logit(x)' = 1/(x*(1-x))
dx.device(d) =
(x < static_cast<T>(eps) || x > static_cast<T>(1.0 - eps))
.select(p.constant(static_cast<T>(0)),
dout * (static_cast<T>(1) / ((static_cast<T>(1) - x) * x)));
}
};
template <typename T>
struct Acosh {
HOSTDEVICE T operator()(const T& val) const { return acosh(val); }
......@@ -458,6 +723,57 @@ struct AtanhGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
};
// exp functor
// exp(x) = e^x
template <typename T>
struct ExpFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.exp();
}
};
template <typename T>
struct ExpGradFunctor : public BaseActivationFunctor<T> {
template <typename Device,
typename X,
typename Out,
typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
// expm1(x) = e^x - 1
template <typename T>
struct Expm1Functor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.expm1();
}
};
template <typename T>
struct Expm1GradFunctor : public BaseActivationFunctor<T> {
template <typename Device,
typename X,
typename Out,
typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * out + dout;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
// relu(x) = max(x, 0)
template <typename T>
struct ReluCPUFunctor : public BaseActivationFunctor<T> {
......@@ -1560,6 +1876,90 @@ struct CudaCosGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaExpFunctor : public BaseActivationFunctor<T> {
using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
// exp(x) = exp(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(exp(x));
}
};
template <typename T>
struct CudaSquareFunctor : public BaseActivationFunctor<T> {
// square(x) = x * x
__device__ __forceinline__ T operator()(const T x) const { return x * x; }
};
template <typename T>
struct CudaSquareGradFunctor : public BaseActivationFunctor<T> {
T two = static_cast<T>(2.0f);
// dx = dout * 2 * x
__device__ __forceinline__ T operator()(const T dout, const T x) const {
return dout * two * x;
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaExpGradFunctor : public BaseActivationFunctor<T> {
// dx = dout * out
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return dout * out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaReciprocalFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
// reciprocal(x) = 1 / x
__device__ __forceinline__ T operator()(const T x) const { return one / x; }
};
template <typename T>
struct CudaReciprocalGradFunctor : public BaseActivationFunctor<T> {
// dx = -dout * out^2
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return -dout * out * out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaExpm1Functor : public BaseActivationFunctor<T> {
using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
// expm1(x) = expm1(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(expm1(x));
}
};
template <typename T>
struct CudaExpm1GradFunctor : public BaseActivationFunctor<T> {
// dx = dout * out
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return dout * out + dout;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaSinFunctor : public BaseActivationFunctor<T> {
using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
......@@ -1782,6 +2182,96 @@ struct CudaAtanhFunctor : public BaseActivationFunctor<T> {
}
};
template <typename T>
struct CudaSTanhFunctor : public BaseActivationFunctor<T> {
using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
float scale_a;
float scale_b;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"scale_a", &scale_a}, {"scale_b", &scale_b}};
}
// stanh(x) = b * tanh(a * x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
MPType a = static_cast<MPType>(scale_a);
MPType b = static_cast<MPType>(scale_b);
return static_cast<T>(b * tanh(a * x));
}
};
template <typename T>
struct CudaSTanhGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float scale_a;
float scale_b;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"scale_a", &scale_a}, {"scale_b", &scale_b}};
}
// dx = dout * a * b * (1 - tanh(a * x) * tanh(a * x))
__device__ __forceinline__ T operator()(const T arg_dout,
const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x);
MPType a = static_cast<MPType>(scale_a);
MPType b = static_cast<MPType>(scale_b);
MPType temp = tanh(a * x);
return static_cast<T>(dout * a * b * (one - temp * temp));
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaSoftplusFunctor : public BaseActivationFunctor<T> {
using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float beta;
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}, {"threshold", &threshold}};
}
// softplus(x) = beta * x > threshold ? x : log(1 + exp(beta * x)) / beta
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
MPType b = static_cast<MPType>(beta);
MPType t = static_cast<MPType>(threshold);
MPType x_beta = x * beta;
return static_cast<T>(x_beta > t ? x : log(one + exp(x_beta)) / b);
}
};
template <typename T>
struct CudaSoftplusGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float beta;
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}, {"threshold", &threshold}};
}
// dx = x * beta > threshold ? dout : dout / (1 + exp(-beta * x))
__device__ __forceinline__ T operator()(const T arg_dout,
const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x);
MPType b = static_cast<MPType>(beta);
MPType t = static_cast<MPType>(threshold);
MPType x_beta = x * beta;
return x_beta > t ? arg_dout : static_cast<T>(dout / (one + exp(-x_beta)));
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaAtanhGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
......@@ -1797,6 +2287,56 @@ struct CudaAtanhGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaSqrtFunctor : public BaseActivationFunctor<T> {
using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
// sqrt(x) = sqrt(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(sqrt(x));
}
};
template <typename T>
struct CudaSqrtGradFunctor : public BaseActivationFunctor<T> {
T one_half = static_cast<T>(0.5f);
// dx = dout * 0.5 / out
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return one_half * dout / out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaRsqrtFunctor : public BaseActivationFunctor<T> {
using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
// rsqrt(x) = rsqrt(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(rsqrt(x));
}
};
template <typename T>
struct CudaRsqrtGradFunctor : public BaseActivationFunctor<T> {
T minus_one_half = static_cast<T>(-0.5f);
// dx = -0.5 * dout * out^3
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return minus_one_half * dout * out * out * out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaAtanFunctor : public BaseActivationFunctor<T> {
using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
......@@ -1864,6 +2404,55 @@ struct CudaBReluFunctor : public BaseActivationFunctor<T> {
}
};
template <typename T>
struct CudaMishFunctor : public BaseActivationFunctor<T> {
using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
// mish(x) = x * tanh(softplus(x))
// softplus(x) = x, if x > threshold
// = ln(1 + exp(x)), otherwise
// Inputs: args[0], the input x
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
MPType sp = (x > static_cast<MPType>(threshold)) ? x : log(one + exp(x));
return static_cast<T>(x * tanh(sp));
}
};
template <typename T>
struct CudaMishGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
// dx = dout * (tanh(sp) + x * (1 - tanh(sp) ** 2) * (1 - exp(-sp)))
// sp = softplus(x)
// Inputs: args[0], the input dout
// args[1], the input x
__device__ __forceinline__ T operator()(const T arg_dout,
const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x);
MPType sp = (x > static_cast<MPType>(threshold)) ? x : log(one + exp(x));
MPType gsp =
(x > static_cast<MPType>(threshold)) ? one : one / (one + exp(-x));
MPType tsp = tanh(sp);
return static_cast<T>(dout * (tsp + x * (one - tsp * tsp) * gsp));
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaBReluGradFunctor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
......
......@@ -189,6 +189,13 @@ DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Acosh, CudaAcoshGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Atanh, CudaAtanhGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(TanhShrink, CudaTanhShrinkGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Silu, CudaSiluGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Square, CudaSquareGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Exp, CudaExpGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Expm1, CudaExpm1GradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Reciprocal, CudaReciprocalGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt, CudaSqrtGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Rsqrt, CudaRsqrtGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(LogSigmoid, CudaLogSigmoidGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Log, CudaLogGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Log2, CudaLog2GradFunctor);
......@@ -211,11 +218,24 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Swish,
CudaSwishGradFunctor,
beta);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Mish,
CudaMishGradFunctor,
threshold);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(BRelu,
CudaBReluGradFunctor,
t_min,
t_max);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(STanh,
CudaSTanhGradFunctor,
scale_a,
scale_b);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(Softplus,
CudaSoftplusGradFunctor,
beta,
threshold);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid,
CudaHardSigmoidGradFunctor,
slope,
......@@ -326,12 +346,57 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_double_grad,
LeakyReluDoubleGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(thresholded_relu_grad,
ThresholdedReluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(mish_grad, MishGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(stanh_grad, STanhGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(reciprocal_grad, ReciprocalGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(softplus_grad, SoftplusGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sqrt_grad, SqrtGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(rsqrt_grad, RsqrtGradKernel)
PD_REGISTER_KERNEL(exp_grad,
GPU,
ALL_LAYOUT,
phi::ExpGradKernel,
float,
double,
int,
int64_t,
phi::dtype::float16) {}
PD_REGISTER_ACTIVATION_GRAD_KERNEL(soft_shrink_grad, SoftShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(tanh_shrink_grad, TanhShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(silu_grad, SiluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(elu_grad, EluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(elu_double_grad, EluDoubleGradKernel)
PD_REGISTER_KERNEL(expm1_grad,
GPU,
ALL_LAYOUT,
phi::Expm1GradKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(logit_grad,
GPU,
ALL_LAYOUT,
phi::LogitGradKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(square_grad,
GPU,
ALL_LAYOUT,
phi::SquareGradKernel,
float,
double,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16) {}
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_grad, SigmoidGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_double_grad, SigmoidDoubleGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_triple_grad, SigmoidTripleGradKernel)
......
......@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/elementwise_base.h"
#include "paddle/phi/kernels/impl/activation_grad_impl.h"
#include "paddle/phi/kernels/impl/activation_impl.h"
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
......@@ -91,6 +92,12 @@ DEFINE_GPU_ACTIVATION_KERNEL(Relu, CudaReluFunctor)
DEFINE_GPU_ACTIVATION_KERNEL(Tanh, CudaTanhFunctor)
DEFINE_GPU_ACTIVATION_KERNEL(TanhShrink, CudaTanhShrinkFunctor)
DEFINE_GPU_ACTIVATION_KERNEL(Silu, CudaSiluFunctor)
DEFINE_GPU_ACTIVATION_KERNEL(Exp, CudaExpFunctor)
DEFINE_GPU_ACTIVATION_KERNEL(Expm1, CudaExpm1Functor)
DEFINE_GPU_ACTIVATION_KERNEL(Reciprocal, CudaReciprocalFunctor)
DEFINE_GPU_ACTIVATION_KERNEL(Square, CudaSquareFunctor)
DEFINE_GPU_ACTIVATION_KERNEL(Sqrt, CudaSqrtFunctor)
DEFINE_GPU_ACTIVATION_KERNEL(Rsqrt, CudaRsqrtFunctor)
DEFINE_GPU_ACTIVATION_KERNEL(Sigmoid, CudaSigmoidFunctor)
DEFINE_GPU_ACTIVATION_KERNEL(LogSigmoid, CudaLogSigmoidFunctor)
DEFINE_GPU_ACTIVATION_KERNEL(Log, CudaLogFunctor)
......@@ -112,7 +119,14 @@ DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(SoftShrink, CudaSoftShrinkFunctor, lambda)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(Elu, CudaELUFunctor, alpha)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(Swish, CudaSwishFunctor, beta)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(Mish, CudaMishFunctor, threshold)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(BRelu, CudaBReluFunctor, t_min, t_max)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(Stanh, CudaSTanhFunctor, scale_a, scale_b)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(Softplus,
CudaSoftplusFunctor,
beta,
threshold)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(HardSigmoid,
CudaHardSigmoidFunctor,
slope,
......@@ -180,6 +194,46 @@ PD_REGISTER_ACTIVATION_KERNEL(tanh, TanhKernel)
PD_REGISTER_ACTIVATION_KERNEL(brelu, BReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(thresholded_relu, ThresholdedReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel)
PD_REGISTER_ACTIVATION_KERNEL(stanh, StanhKernel)
PD_REGISTER_ACTIVATION_KERNEL(reciprocal, ReciprocalKernel)
PD_REGISTER_ACTIVATION_KERNEL(sqrt, SqrtKernel)
PD_REGISTER_ACTIVATION_KERNEL(rsqrt, RsqrtKernel)
PD_REGISTER_ACTIVATION_KERNEL(softplus, SoftplusKernel)
PD_REGISTER_KERNEL(exp,
GPU,
ALL_LAYOUT,
phi::ExpKernel,
float,
double,
int,
int64_t,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(expm1,
GPU,
ALL_LAYOUT,
phi::Expm1Kernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(logit,
GPU,
ALL_LAYOUT,
phi::LogitKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(square,
GPU,
ALL_LAYOUT,
phi::SquareKernel,
float,
double,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16) {}
PD_REGISTER_ACTIVATION_KERNEL(hard_shrink, HardShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(soft_shrink, SoftShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel)
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/temporal_shift_grad_kernel.h"
namespace phi {
template <typename T>
__global__ void KeTemporalShiftBwNCHW(const T* output_grad,
T* input_grad,
const int ntchw,
const int tchw,
const int chw,
const int hw,
const int t,
const int c1,
const int c2) {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
int src_it = 0;
for (; tid < ntchw; tid += stride) {
int it = (tid % tchw) / chw;
int ic = (tid % chw) / hw;
if (ic < c1) {
src_it = it + 1;
} else if (ic < c2) {
src_it = it - 1;
} else {
src_it = it;
}
if (src_it >= 0 && src_it < t) {
input_grad[tid] = output_grad[tid + (src_it - it) * chw];
} else {
input_grad[tid] = 0;
}
}
}
template <typename T>
__global__ void KeTemporalShiftBwNHWC(const T* output_grad,
T* input_grad,
const int nthwc,
const int thwc,
const int hwc,
const int t,
const int c,
const int c1,
const int c2) {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
int src_it = 0;
for (; tid < nthwc; tid += stride) {
int it = (tid % thwc) / hwc;
int ic = tid % c;
if (ic < c1) {
src_it = it + 1;
} else if (ic < c2) {
src_it = it - 1;
} else {
src_it = it;
}
if (src_it >= 0 && src_it < t) {
input_grad[tid] = output_grad[tid + (src_it - it) * hwc];
} else {
input_grad[tid] = 0;
}
}
}
template <typename T, typename Context>
void TemporalShiftGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad,
int seg_num,
float shift_ratio,
const std::string& data_format_str,
DenseTensor* x_grad) {
auto* input_grad = x_grad;
auto* output_grad = &out_grad;
int t = seg_num;
const DataLayout data_layout =
paddle::framework::StringToDataLayout(data_format_str);
const int nt = output_grad->dims()[0];
const int c = (data_layout == DataLayout::kNCHW ? output_grad->dims()[1]
: output_grad->dims()[3]);
const int h = (data_layout == DataLayout::kNCHW ? output_grad->dims()[2]
: output_grad->dims()[1]);
const int w = (data_layout == DataLayout::kNCHW ? output_grad->dims()[3]
: output_grad->dims()[2]);
const int hw = h * w;
const int chw = c * hw;
const int tchw = t * chw;
const int ntchw = nt * chw;
const int c1 = static_cast<int>(c * shift_ratio);
const int c2 = static_cast<int>(c * 2 * shift_ratio);
DDim in_grad_dims =
(data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})
: phi::make_ddim({nt, h, w, c}));
const T* output_grad_data = output_grad->data<T>();
T* input_grad_data =
input_grad->mutable_data<T>(in_grad_dims, dev_ctx.GetPlace());
int pixelNum = nt * chw;
int threads = 1024;
int grid = (pixelNum + threads - 1) / threads;
int blocks_per_sm = dev_ctx.GetMaxPhysicalThreadCount() / threads;
grid = std::min(dev_ctx.GetSMCount() * blocks_per_sm, grid);
if (data_layout == DataLayout::kNCHW) {
KeTemporalShiftBwNCHW<T><<<grid, threads, 0, dev_ctx.stream()>>>(
output_grad_data, input_grad_data, ntchw, tchw, chw, hw, t, c1, c2);
} else {
KeTemporalShiftBwNHWC<T><<<grid, threads, 0, dev_ctx.stream()>>>(
output_grad_data, input_grad_data, ntchw, tchw, chw, t, c, c1, c2);
}
}
} // namespace phi
PD_REGISTER_KERNEL(temporal_shift_grad,
GPU,
ALL_LAYOUT,
phi::TemporalShiftGradKernel,
float,
double,
phi::dtype::float16) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/temporal_shift_kernel.h"
namespace phi {
template <typename T>
__global__ void KeTemporalShiftFwNCHW(const T* input,
T* output,
const int ntchw,
const int tchw,
const int chw,
const int hw,
const int t,
const int c1,
const int c2) {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
int src_it = 0;
for (; tid < ntchw; tid += stride) {
int it = (tid % tchw) / chw;
int ic = (tid % chw) / hw;
if (ic < c1) {
src_it = it - 1;
} else if (ic < c2) {
src_it = it + 1;
} else {
src_it = it;
}
if (src_it < 0 || src_it >= t) {
output[tid] = 0;
} else {
output[tid] = input[tid + (src_it - it) * chw];
}
}
}
template <typename T>
__global__ void KeTemporalShiftFwNHWC(const T* input,
T* output,
const int nthwc,
const int thwc,
const int hwc,
const int t,
const int c,
const int c1,
const int c2) {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
int src_it = 0;
for (; tid < nthwc; tid += stride) {
int it = (tid % thwc) / hwc;
int ic = tid % c;
if (ic < c1) {
src_it = it - 1;
} else if (ic < c2) {
src_it = it + 1;
} else {
src_it = it;
}
if (src_it < 0 || src_it >= t) {
output[tid] = 0;
} else {
output[tid] = input[tid + (src_it - it) * hwc];
}
}
}
template <typename T, typename Context>
void TemporalShiftKernel(const Context& dev_ctx,
const DenseTensor& x,
int seg_num,
float shift_ratio,
const std::string& data_format_str,
DenseTensor* out) {
auto* input = &x;
auto* output = out;
int t = seg_num;
const DataLayout data_layout =
paddle::framework::StringToDataLayout(data_format_str);
const int nt = input->dims()[0];
const int c =
(data_layout == DataLayout::kNCHW ? input->dims()[1] : input->dims()[3]);
const int h =
(data_layout == DataLayout::kNCHW ? input->dims()[2] : input->dims()[1]);
const int w =
(data_layout == DataLayout::kNCHW ? input->dims()[3] : input->dims()[2]);
const int hw = h * w;
const int chw = c * hw;
const int tchw = t * chw;
const int ntchw = nt * chw;
const int c1 = static_cast<int>(c * shift_ratio);
const int c2 = static_cast<int>(c * 2 * shift_ratio);
DDim out_dims =
(data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})
: phi::make_ddim({nt, h, w, c}));
const T* input_data = input->data<T>();
T* output_data = output->mutable_data<T>(out_dims, dev_ctx.GetPlace());
int pixelNum = nt * chw;
int threads = 1024;
int grid = (pixelNum + threads - 1) / threads;
int blocks_per_sm = dev_ctx.GetMaxPhysicalThreadCount() / threads;
grid = std::min(dev_ctx.GetSMCount() * blocks_per_sm, grid);
if (data_layout == DataLayout::kNCHW) {
KeTemporalShiftFwNCHW<T><<<grid, threads, 0, dev_ctx.stream()>>>(
input_data, output_data, ntchw, tchw, chw, hw, t, c1, c2);
} else {
KeTemporalShiftFwNHWC<T><<<grid, threads, 0, dev_ctx.stream()>>>(
input_data, output_data, ntchw, tchw, chw, t, c, c1, c2);
}
}
} // namespace phi
PD_REGISTER_KERNEL(temporal_shift,
GPU,
ALL_LAYOUT,
phi::TemporalShiftKernel,
float,
double,
phi::dtype::float16) {}
......@@ -222,6 +222,24 @@ void EluDoubleGradKernel(const Context& dev_ctx,
functor(dev_ctx, &x, &ddx, ddout, &dout, dx);
}
template <typename T, typename Context>
void LogitGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
float eps,
DenseTensor* x_grad) {
dev_ctx.template Alloc<T>(x_grad);
auto eigen_x = EigenVector<T>::Flatten(x);
auto eigen_dout = EigenVector<T>::Flatten(out_grad);
auto eigen_dx = EigenVector<T>::Flatten(*x_grad);
auto& place = *dev_ctx.eigen_device();
auto eigen_p = EigenVector<T>::Flatten(x);
funcs::LogitGradFunctor<T> functor;
functor(place, eigen_x, eigen_dout, eigen_dx, eigen_p, eps);
}
template <typename T, typename Context>
void SigmoidDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& out,
......
......@@ -47,6 +47,22 @@ void ActivationImpl(const Context& dev_ctx,
}
}
template <typename T, typename Context>
void LogitKernel(const Context& dev_ctx,
const DenseTensor& x,
float eps,
DenseTensor* out) {
dev_ctx.template Alloc<T>(out);
auto eigen_out = EigenVector<T>::Flatten(*out);
auto eigen_in = EigenVector<T>::Flatten(x);
auto& place = *dev_ctx.eigen_device();
auto eigen_p = EigenVector<T>::Flatten(*out);
funcs::LogitFunctor<T> functor;
functor(place, eigen_in, eigen_out, eigen_p, eps);
}
template <typename T, typename Context>
void PowKernel(const Context& dev_ctx,
const DenseTensor& x,
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/selected_rows/activation_kernel.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/activation_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
namespace phi {
namespace sr {
template <typename T, typename Context>
void SquareKernel(const Context& dev_ctx,
const SelectedRows& x,
SelectedRows* out) {
out->set_rows(x.rows());
out->set_height(x.height());
phi::SquareKernel<T, Context>(dev_ctx, x.value(), out->mutable_value());
}
template <typename T, typename Context>
void SqrtKernel(const Context& dev_ctx,
const SelectedRows& x,
SelectedRows* out) {
out->set_rows(x.rows());
out->set_height(x.height());
phi::SqrtKernel<T, Context>(dev_ctx, x.value(), out->mutable_value());
}
} // namespace sr
} // namespace phi
PD_REGISTER_KERNEL(
square_sr, CPU, ALL_LAYOUT, phi::sr::SquareKernel, float, double) {}
PD_REGISTER_KERNEL(
sqrt_sr, CPU, ALL_LAYOUT, phi::sr::SqrtKernel, float, double) {}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_KERNEL(square_sr,
GPU,
ALL_LAYOUT,
phi::sr::SquareKernel,
float,
double,
int,
int64_t) {}
PD_REGISTER_KERNEL(
sqrt_sr, GPU, ALL_LAYOUT, phi::sr::SqrtKernel, float, double) {}
#endif
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/selected_rows.h"
namespace phi {
namespace sr {
template <typename T, typename Context>
void SquareKernel(const Context& dev_ctx,
const SelectedRows& x,
SelectedRows* out);
template <typename T, typename Context>
void SqrtKernel(const Context& dev_ctx,
const SelectedRows& x,
SelectedRows* out);
} // namespace sr
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename T, typename Context>
void TemporalShiftGradKernel(const Context& ctx,
const DenseTensor& out_grad,
int seg_num,
float shift_ratio,
const std::string& data_format,
DenseTensor* x_grad);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename T, typename Context>
void TemporalShiftKernel(const Context& ctx,
const DenseTensor& x,
int seg_num,
float shift_ratio,
const std::string& data_format,
DenseTensor* out);
} // namespace phi
......@@ -43,17 +43,19 @@ namespace phi {
#define comma ,
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Cos, "cos", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Tan, "tan", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Acos, "acos", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Sin, "sin", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Asin, "asin", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Atan, "atan", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Sinh, "sinh", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Cosh, "cosh", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Asinh, "asinh", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Acosh, "acosh", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Atanh, "atanh", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Cos, "cos", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Tan, "tan", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Acos, "acos", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Sin, "sin", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Asin, "asin", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Atan, "atan", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Sinh, "sinh", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Cosh, "cosh", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Asinh, "asinh", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Acosh, "acosh", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Atanh, "atanh", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Square, "square", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(BRelu, "brelu", "t_min" comma "t_max");
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(LeakyRelu, "leaky_relu", "alpha");
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(ThresholdedRelu,
......@@ -61,6 +63,7 @@ DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(ThresholdedRelu,
"threshold");
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(SoftShrink, "soft_shrink", "lambda");
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(HardShrink, "hard_shrink", "threshold");
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Mish, "mish", "threshold");
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(TanhShrink, "tanh_shrink", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Silu, "silu", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(LogSigmoid, "logsigmoid", ); // NOLINT
......@@ -74,12 +77,41 @@ DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(HardSwish,
"offset"); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Swish, "swish", "beta"); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Relu, "relu", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Tanh, "tanh", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Sigmoid, "sigmoid", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(STanh,
"stanh",
"scale_a" comma "scale_b"); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Softplus,
"softplus",
"beta" comma "threshold"); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Relu, "relu", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Tanh, "tanh", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Sigmoid, "sigmoid", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Exp, "exp", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Expm1, "expm1", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Reciprocal, "reciprocal", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Sqrt, "sqrt", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Rsqrt, "rsqrt", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(HardSigmoid,
"hard_sigmoid",
"slope" comma "offset"); // NOLINT
KernelSignature SqrtActiOpArgumentMapping(const ArgumentMappingContext& ctx) {
if (ctx.IsDenseTensorInput("X")) {
return KernelSignature("sqrt", {"X"}, {}, {"Out"});
} else {
return KernelSignature("sqrt_sr", {"X"}, {}, {"Out"});
}
}
KernelSignature SquareActiOpArgumentMapping(const ArgumentMappingContext& ctx) {
if (ctx.IsDenseTensorInput("X")) {
return KernelSignature("square", {"X"}, {}, {"Out"});
} else {
return KernelSignature("square_sr", {"X"}, {}, {"Out"});
}
}
DEFINE_ACT_GRAD_NODEP_OP_ARGMAP(Round, "round", ); // NOLINT
DEFINE_ACT_GRAD_NODEP_OP_ARGMAP(Floor, "floor", ); // NOLINT
......@@ -132,6 +164,11 @@ KernelSignature EluOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature("elu", {"X"}, {"alpha"}, {"Out"});
}
KernelSignature LogitGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature(
"logit_grad", {"X", GradVarName("Out")}, {"eps"}, {GradVarName("X")});
}
KernelSignature EluGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature("elu_grad",
{"X", "Out", GradVarName("Out")},
......@@ -194,6 +231,18 @@ PD_REGISTER_ARG_MAPPING_FN(asinh_grad, phi::AsinhGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(acosh_grad, phi::AcoshGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(atanh_grad, phi::AtanhGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(relu_grad, phi::ReluGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(exp_grad, phi::ExpGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(expm1_grad, phi::Expm1GradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(square_grad, phi::SquareGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(reciprocal_grad,
phi::ReciprocalGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(sqrt_grad, phi::SqrtGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(rsqrt_grad, phi::RsqrtGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(mish_grad, phi::MishGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(stanh_grad, phi::STanhGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(softplus_grad, phi::SoftplusGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(relu_grad_grad,
phi::ReluDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(tanh_grad, phi::TanhGradOpArgumentMapping);
......@@ -228,11 +277,16 @@ PD_REGISTER_ARG_MAPPING_FN(logsigmoid_grad,
phi::LogSigmoidGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(hard_sigmoid_grad,
phi::HardSigmoidGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(logit_grad, phi::LogitGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(log_grad, phi::LogGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(log_grad_grad, phi::LogDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(log2_grad, phi::Log2GradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(log10_grad, phi::Log10GradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(log1p_grad, phi::Log1pGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(sqrt, phi::SqrtActiOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(square, phi::SquareActiOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(hard_swish_grad,
phi::HardSwishGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(swish_grad, phi::SwishGradOpArgumentMapping);
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/core/compat/op_utils.h"
namespace phi {
KernelSignature TemporalShiftOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature("temporal_shift",
{"X"},
{"seg_num", "shift_ratio", "data_format"},
{"Out"});
}
KernelSignature TemporalShiftGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature("temporal_shift_grad",
{GradVarName("Out")},
{"seg_num", "shift_ratio", "data_format"},
{GradVarName("X")});
}
} // namespace phi
PD_REGISTER_ARG_MAPPING_FN(temporal_shift, phi::TemporalShiftOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(temporal_shift_grad,
phi::TemporalShiftGradOpArgumentMapping);
......@@ -342,4 +342,5 @@ class TestLogDoubleGradCheck(unittest.TestCase):
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.op import Operator
from op_test import OpTest
import paddle
class TestSparseSquareOp(unittest.TestCase):
def check_with_place(self, place):
scope = core.Scope()
# create and initialize Grad Variable
height = 10
rows = [0, 4, 7]
self.row_numel = 12
x_selected_rows = scope.var('X').get_selected_rows()
x_selected_rows.set_height(height)
x_selected_rows.set_rows(rows)
np_array = np.ones((len(rows), self.row_numel)).astype("float32")
np_array[0, 0] = 2.0
np_array[2, 8] = 4.0
x_tensor = x_selected_rows.get_tensor()
x_tensor.set(np_array, place)
out_selected_rows = scope.var('Out').get_selected_rows()
# create and run sqrt operator
square_op = Operator("square", X='X', Out='Out')
square_op.run(scope, place)
# get and compare result
result_array = np.array(out_selected_rows.get_tensor())
self.assertTrue(np.array_equal(result_array, np.square(np_array)))
def test_sparse_acti(self):
places = [core.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(core.CUDAPlace(0))
for place in places:
self.check_with_place(place)
class TestSparseSqrtOp(unittest.TestCase):
def check_with_place(self, place):
scope = core.Scope()
# create and initialize Grad Variable
height = 10
rows = [0, 4, 7]
self.row_numel = 12
x_selected_rows = scope.var('X1').get_selected_rows()
x_selected_rows.set_height(height)
x_selected_rows.set_rows(rows)
np_array = np.ones((len(rows), self.row_numel)).astype("float32")
np_array[0, 0] = 2.0
np_array[2, 8] = 4.0
x_tensor = x_selected_rows.get_tensor()
x_tensor.set(np_array, place)
out_selected_rows = scope.var('Out1').get_selected_rows()
# create and run sqrt operator
sqrt_op = Operator("sqrt", X='X1', Out='Out1')
sqrt_op.run(scope, place)
# get and compare result
result_array = np.array(out_selected_rows.get_tensor())
self.assertTrue(np.allclose(result_array, np.sqrt(np_array)))
def test_sparse_acti(self):
places = [core.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(core.CUDAPlace(0))
for place in places:
self.check_with_place(place)
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
......@@ -16,6 +16,7 @@ from __future__ import print_function
import unittest
import numpy as np
import paddle
from op_test import OpTest
import paddle.fluid as fluid
......@@ -153,4 +154,5 @@ class TestClipByNormOpWithSelectedRows(unittest.TestCase):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
......@@ -143,4 +143,5 @@ class TestTemporalShiftAPI(unittest.TestCase):
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册