From 05f3d48ed3b2f7b5eb0a135a4e07aa160cfa5d6d Mon Sep 17 00:00:00 2001 From: tianshuo78520a <707759223@qq.com> Date: Tue, 29 Mar 2022 11:47:13 +0800 Subject: [PATCH] Revert "Move some activation to phi (#40727)" (#41056) This reverts commit e77a947ec2018f3e2a97537f1d97d62f9611d13b. --- .../new_executor/standalone_executor_test.cc | 3 +- paddle/fluid/operators/activation_op.cc | 86 ++- paddle/fluid/operators/activation_op.h | 484 +++++++++++--- paddle/fluid/operators/activation_op.kps | 371 ++++++++++- paddle/fluid/operators/math/CMakeLists.txt | 4 +- .../operators/math/selected_rows_functor.cc | 1 - paddle/fluid/operators/temporal_shift_op.h | 90 ++- paddle/phi/kernels/activation_kernel.h | 22 - .../phi/kernels/cpu/activation_grad_kernel.cc | 54 -- paddle/phi/kernels/cpu/activation_kernel.cc | 32 +- .../kernels/cpu/temporal_shift_grad_kernel.cc | 136 ---- .../phi/kernels/cpu/temporal_shift_kernel.cc | 131 ---- paddle/phi/kernels/funcs/activation_functor.h | 589 ------------------ .../phi/kernels/gpu/activation_grad_kernel.cu | 65 -- paddle/phi/kernels/gpu/activation_kernel.cu | 54 -- .../kernels/gpu/temporal_shift_grad_kernel.cu | 149 ----- .../phi/kernels/gpu/temporal_shift_kernel.cu | 148 ----- .../phi/kernels/impl/activation_grad_impl.h | 18 - paddle/phi/kernels/impl/activation_impl.h | 16 - .../selected_rows/activation_kernel.cc | 68 -- .../kernels/selected_rows/activation_kernel.h | 34 - .../phi/kernels/temporal_shift_grad_kernel.h | 29 - paddle/phi/kernels/temporal_shift_kernel.h | 29 - paddle/phi/ops/compat/activation_sig.cc | 82 +-- paddle/phi/ops/compat/temporal_shift_sig.cc | 39 -- .../unittests/test_activation_nn_grad.py | 1 - .../unittests/test_activation_sparse_op.py | 101 --- .../tests/unittests/test_clip_by_norm_op.py | 2 - .../tests/unittests/test_temporal_shift_op.py | 1 - 29 files changed, 931 insertions(+), 1908 deletions(-) delete mode 100644 paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc delete mode 100644 paddle/phi/kernels/cpu/temporal_shift_kernel.cc delete mode 100644 paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu delete mode 100644 paddle/phi/kernels/gpu/temporal_shift_kernel.cu delete mode 100644 paddle/phi/kernels/selected_rows/activation_kernel.cc delete mode 100644 paddle/phi/kernels/selected_rows/activation_kernel.h delete mode 100644 paddle/phi/kernels/temporal_shift_grad_kernel.h delete mode 100644 paddle/phi/kernels/temporal_shift_kernel.h delete mode 100644 paddle/phi/ops/compat/temporal_shift_sig.cc delete mode 100644 python/paddle/fluid/tests/unittests/test_activation_sparse_op.py diff --git a/paddle/fluid/framework/new_executor/standalone_executor_test.cc b/paddle/fluid/framework/new_executor/standalone_executor_test.cc index 8a3b40bbd7..d3adccff73 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor_test.cc +++ b/paddle/fluid/framework/new_executor/standalone_executor_test.cc @@ -53,7 +53,7 @@ USE_OP_ITSELF(tanh_grad); USE_OP(sum); USE_OP_ITSELF(slice_grad); USE_OP_ITSELF(lookup_table_grad); -USE_OP_ITSELF(sqrt); +USE_OP(sqrt); USE_OP_ITSELF(elementwise_max); USE_OP_ITSELF(elementwise_div); USE_OP_ITSELF(sgd); @@ -83,7 +83,6 @@ PD_DECLARE_KERNEL(max_raw, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sgd, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(slice, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(slice_grad, GPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(sqrt, GPU, ALL_LAYOUT); DECLARE_double(eager_delete_tensor_gb); diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 6be872b028..632ea92746 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -1496,14 +1496,6 @@ REGISTER_ACTIVATION_OP(hard_sigmoid, HardSigmoid, HardSigmoidFunctor, HardSigmoidGradFunctor); REGISTER_ACTIVATION_OP(logsigmoid, LogSigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); -REGISTER_ACTIVATION_OP(expm1, Expm1, Expm1Functor, Expm1GradFunctor); -REGISTER_ACTIVATION_OP(softplus, Softplus, SoftplusFunctor, - SoftplusGradFunctor); -REGISTER_ACTIVATION_OP(mish, Mish, MishFunctor, MishGradFunctor); -REGISTER_ACTIVATION_OP(stanh, STanh, STanhFunctor, STanhGradFunctor); -REGISTER_ACTIVATION_OP(reciprocal, Reciprocal, ReciprocalFunctor, - ReciprocalGradFunctor); - REGISTER_ACTIVATION_OP(log2, Log2, Log2Functor, Log2GradFunctor); REGISTER_ACTIVATION_OP(log10, Log10, Log10Functor, Log10GradFunctor); REGISTER_ACTIVATION_OP(log1p, Log1p, Log1pFunctor, Log1pGradFunctor); @@ -1638,7 +1630,12 @@ REGISTER_OPERATOR(logit, ops::LogitOp, ops::LogitOpMaker, ops::LogitGradOpMaker, ops::LogitGradOpMaker); REGISTER_OPERATOR(logit_grad, ops::LogitGradOp); - +REGISTER_OP_CPU_KERNEL( + logit, ops::LogitKernel, + ops::LogitKernel); +REGISTER_OP_CPU_KERNEL( + logit_grad, ops::LogitGradKernel, + ops::LogitGradKernel); /* ========================================================================== */ /* ======================== celu register ============================ @@ -1687,6 +1684,7 @@ REGISTER_OPERATOR( ops::ActivationOpDoubleGrad::FwdDeps()>, ops::ActivationDoubleGradOpInplaceInferer); +REGISTER_ACTIVATION_CPU_KERNEL(sqrt, Sqrt, SqrtFunctor, SqrtGradFunctor); REGISTER_OP_CPU_KERNEL( sqrt_grad_grad, ops::SqrtDoubleGradKernel>, @@ -1714,6 +1712,7 @@ REGISTER_OPERATOR( ops::ActivationOpDoubleGrad::FwdDeps()>, ops::ActivationDoubleGradOpInplaceInferer); +REGISTER_ACTIVATION_CPU_KERNEL(rsqrt, Rsqrt, RsqrtFunctor, RsqrtGradFunctor); REGISTER_OP_CPU_KERNEL( rsqrt_grad_grad, ops::RsqrtDoubleGradKernel::FwdDeps()>, ops::ActivationDoubleGradOpInplaceInferer); +REGISTER_OP_CPU_KERNEL(square, + ops::ActivationKernel>, + ops::ActivationKernel>, + ops::ActivationKernel>, + ops::ActivationKernel>); +REGISTER_OP_CPU_KERNEL( + square_grad, ops::ActivationGradKernel>, + ops::ActivationGradKernel>, + ops::ActivationGradKernel>, + ops::ActivationGradKernel>); + REGISTER_OP_CPU_KERNEL( square_grad_grad, ops::SquareDoubleGradKernel>, + ops::ActivationKernel>, + ops::ActivationKernel>, + ops::ActivationKernel>); +REGISTER_OP_CPU_KERNEL( + exp_grad, ops::ActivationGradKernel>, + ops::ActivationGradKernel>, + ops::ActivationGradKernel>, + ops::ActivationGradKernel>); +/* ========================================================================== */ + +/* ========================== expm1 register ============================ */ +REGISTER_OPERATOR( + expm1, ops::ActivationOp, ops::Expm1OpMaker, ops::ActivationOpInferVarType, + ops::ActivationGradOpMaker::FwdDeps(), + paddle::framework::OpDesc>, + ops::ActivationGradOpMaker::FwdDeps(), + paddle::imperative::OpBase>, + std::conditional>(), + ops::ActFwdInplaceInferer, void>::type); +REGISTER_OPERATOR(expm1_grad, ops::ActivationOpGrad, + ops::ActivationGradOpInplaceInferer); + +REGISTER_OP_CPU_KERNEL(expm1, + ops::ActivationKernel>, + ops::ActivationKernel>, + ops::ActivationKernel>); +REGISTER_OP_CPU_KERNEL( + expm1_grad, ops::ActivationGradKernel>, + ops::ActivationGradKernel>, + ops::ActivationGradKernel>); +/* ========================================================================== */ + /* ========================== Log register ==================================*/ REGISTER_OPERATOR( log, ops::ActivationOp, ops::LogOpMaker, ops::ActivationOpInferVarType, @@ -1798,6 +1864,8 @@ REGISTER_OPERATOR( ops::ActivationOpDoubleGrad::FwdDeps()>, ops::ActivationDoubleGradOpInplaceInferer); +/* ========================================================================== */ + /* ========================== register checkpoint ===========================*/ REGISTER_OP_VERSION(leaky_relu) .AddCheckpoint( diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 5448ed2a4b..5a72f2086c 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -264,7 +264,6 @@ USE_PHI_FUNCTOR(Asinh) USE_PHI_FUNCTOR(Acosh) USE_PHI_FUNCTOR(Atanh) USE_PHI_FUNCTOR(Tanh) -USE_PHI_FUNCTOR(Exp) USE_PHI_DOUBLE_GRAD_FUNCTOR(Tanh) USE_PHI_TRIPLE_GRAD_FUNCTOR(Tanh) USE_PHI_FUNCTOR(BRelu) @@ -290,15 +289,6 @@ USE_PHI_FUNCTOR(Log1p) USE_PHI_FUNCTOR(Swish) USE_PHI_FUNCTOR(HardSwish) USE_PHI_FUNCTOR(Pow) -USE_PHI_FUNCTOR(Exp) -USE_PHI_FUNCTOR(Expm1) -USE_PHI_FUNCTOR(Mish) -USE_PHI_FUNCTOR(STanh) -USE_PHI_FUNCTOR(Reciprocal) -USE_PHI_FUNCTOR(Square) -USE_PHI_FUNCTOR(Sqrt) -USE_PHI_FUNCTOR(Rsqrt) -USE_PHI_FUNCTOR(Softplus) template using ELUGradNegativeAlphaFunctor = phi::funcs::ELUGradNegativeAlphaFunctor; @@ -315,8 +305,49 @@ using CeilFunctor = phi::funcs::CeilFunctor; template using ZeroGradFunctor = phi::funcs::ZeroGradFunctor; +// exp(x) = e^x template -using ELUGradNegativeAlphaFunctor = phi::funcs::ELUGradNegativeAlphaFunctor; +struct ExpFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.exp(); + } +}; + +template +struct ExpGradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * out; + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { + return ActBwdOpFwdDeps::kDepOut; + } +}; + +// expm1(x) = e^x - 1 +template +struct Expm1Functor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.expm1(); + } +}; + +template +struct Expm1GradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * out + dout; + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { + return ActBwdOpFwdDeps::kDepOut; + } +}; // relu(x) = max(x, 0) @@ -331,68 +362,92 @@ using ReluGradGradFunctor = phi::funcs::ReluGradGradFunctor; template using ReluCUDAFunctor = phi::funcs::ReluCUDAFunctor; +// sqrt(x) = x^(1/2) template -struct SqrtGradGradFunctor : public BaseActivationFunctor { - template - void operator()(const Device& dev, const framework::Tensor* Out, - const framework::Tensor* ddX, framework::Tensor* ddOut, - framework::Tensor* dOut, const framework::Tensor* dX) const { - auto* d = dev.eigen_device(); - auto ddx = framework::EigenVector::Flatten( - GET_DATA_SAFELY(ddX, "Input", "DDX", "SqrtGradGrad")); - auto out = framework::EigenVector::Flatten( - GET_DATA_SAFELY(Out, "Output", "Out", "SqrtGradGrad")); - // sqrt GradGrad: ddy = 0.5 * ddx / y, dy = -1 * dx * ddx - // calculate dy first, so ddy can inplace ddx - if (dOut) { - auto dx = framework::EigenVector::Flatten( - GET_DATA_SAFELY(dX, "Output", "DX", "SqrtGradGrad")); - auto dout = framework::EigenVector::Flatten( - GET_DATA_SAFELY(dOut, "Output", "DOut", "SqrtGradGrad")); - dout.device(*d) = dx * ddx * static_cast(-1) / out; - } - if (ddOut) { - auto ddout = framework::EigenVector::Flatten( - GET_DATA_SAFELY(ddOut, "Output", "DDOut", "SqrtGradGrad")); - ddout.device(*d) = ddx * static_cast(0.5) / out; - } +struct SqrtFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.sqrt(); } +}; + +template +struct SqrtGradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = static_cast(0.5) * dout / out; + } + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepOut; } }; +// rsqrt(x) = x^(-1/2) template -struct RsqrtGradGradFunctor : public BaseActivationFunctor { - template - void operator()(const Device& dev, const framework::Tensor* Out, - const framework::Tensor* ddX, framework::Tensor* ddOut, - framework::Tensor* dOut, const framework::Tensor* dX) const { - auto* d = dev.eigen_device(); - auto ddx = framework::EigenVector::Flatten( - GET_DATA_SAFELY(ddX, "Input", "DDX", "RsqrtGradGrad")); - auto out = framework::EigenVector::Flatten( - GET_DATA_SAFELY(Out, "Output", "Out", "RsqrtGradGrad")); +struct RsqrtFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.rsqrt(); + } +}; - // rsqrt GradGrad: ddy = -0.5 * ddx * y * y * y, dy = (3/y) * dx * ddx - if (dOut) { - auto dx = framework::EigenVector::Flatten( - GET_DATA_SAFELY(dX, "Output", "DX", "RsqrtGradGrad")); - auto dout = framework::EigenVector::Flatten( - GET_DATA_SAFELY(dOut, "Output", "DOut", "RsqrtGradGrad")); - dout.device(*d) = (static_cast(3.0) / out) * dx * ddx; - } - if (ddOut) { - auto ddout = framework::EigenVector::Flatten( - GET_DATA_SAFELY(ddOut, "Output", "DDOut", "RsqrtGradGrad")); - ddout.device(*d) = ddx * static_cast(-0.5) * out * out * out; - } +template +struct RsqrtGradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = static_cast(-0.5) * dout * out * out * out; } + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepOut; } }; +// reciprocal(x) = 1 / x +template +struct ReciprocalFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = static_cast(1) / x; + } +}; + +template +struct ReciprocalGradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * static_cast(-1) * out * out; + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { + return ActBwdOpFwdDeps::kDepOut; + } +}; + +// square(x) = x^2 +template +struct SquareFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.square(); + } +}; + +template +struct SquareGradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * static_cast(2) * x; + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + // relu6(x) = min(max(0, x), 6) template struct Relu6Functor : public BaseActivationFunctor { @@ -429,6 +484,114 @@ struct Relu6GradFunctor : public BaseActivationFunctor { } }; +// For numerical stability, using the following formula instead of softplus(x) = +// log(1 + exp(x)) +// softplus(x) = log(1 + exp(beta * x)) / beta when beta * x <= threshold(beta = +// 1, threshold = 20 by default), otherwise x +template +struct SoftplusFunctor : public BaseActivationFunctor { + float beta; + float threshold; + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"beta", &beta}, {"threshold", &threshold}}; + } + + template + void operator()(Device d, X x, Out out) { + auto x_beta = static_cast(beta) * x; + out.device(d) = (x_beta > static_cast(threshold)) + .select(x, (static_cast(1) + x_beta.exp()).log() / + static_cast(beta)); + } +}; + +// For numerical stability, using the following formula instead of +// d(softplus(x))/dx = 1 / (1 + exp(-x)) +// d(softplus(x))/dx = 1 / (1 + exp(-beta * x)) when beta * x <= threshold(beta +// = 1, threshold = 20 by default), otherwise x +template +struct SoftplusGradFunctor : public BaseActivationFunctor { + float beta; + float threshold; + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"beta", &beta}, {"threshold", &threshold}}; + } + + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) { + auto x_beta = static_cast(beta) * x; + dx.device(d) = + (x_beta > static_cast(threshold)) + .select(dout, dout / (static_cast(1) + (-x_beta).exp())); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + +// mish(x) = x * tanh(softplus(x)) +// softplus(x) = x, if x > threshold +// = ln(1 + exp(x)), otherwise +template +struct MishFunctor : public BaseActivationFunctor { + float threshold; + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"threshold", &threshold}}; + } + + template + void operator()(Device d, X x, Out out) { + auto sp = (x > static_cast(threshold)) + .select(x, (static_cast(1) + x.exp()).log()); + out.device(d) = x * sp.tanh(); + } +}; + +// dx = dout * (tanh(sp) + x * (1 - tanh(sp) ** 2) * (1 - exp(-sp))) +// sp = softplus(x) +template +struct MishGradFunctor : public BaseActivationFunctor { + float threshold; + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"threshold", &threshold}}; + } + + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) { + auto sp = (x > static_cast(threshold)) + .select(x, (static_cast(1) + x.exp()).log()); + auto gsp = static_cast(1) - (-sp).exp(); + auto tsp = sp.tanh(); + dx.device(d) = dout * (tsp + x * (static_cast(1) - tsp * tsp) * gsp); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + +// softsign(x) = x / (1 + |x|) +template +struct SoftsignFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out) { + out.device(d) = x / (static_cast(1) + x.abs()); + } +}; + +// d(softsign(x))/dx = 1 / (1 + |x|)^2 +// Taken from https://en.wikipedia.org/wiki/Activation_function +template +struct SoftsignGradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) { + dx.device(d) = + dout * (static_cast(1) / (static_cast(1) + x.abs()).square()); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + template struct SoftReluFunctor : public BaseActivationFunctor { float threshold; @@ -543,6 +706,71 @@ struct CELUGradFunctor : public BaseActivationFunctor { static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } }; +template +struct LogitFunctor { + template + void operator()(Device d, X x, Out out, P p, float eps) const { + // logit(x) = ln(x/(1-x)) + auto tmp_x = + (x.cwiseMin(static_cast(1.0 - eps))).cwiseMax(static_cast(eps)); + + if (!eps) { + out.device(d) = (x < static_cast(0.0) || x > static_cast(1.0)) + .select(p.constant(static_cast(NAN)), + (tmp_x / (static_cast(1) - tmp_x)).log()); + } else { + out.device(d) = (tmp_x / (static_cast(1) - tmp_x)).log(); + } + } +}; + +template +struct LogitGradFunctor { + template + void operator()(Device d, X x, dOut dout, dX dx, P p, float eps) const { + // logit(x)' = 1/(x*(1-x)) + dx.device(d) = + (x < static_cast(eps) || x > static_cast(1.0 - eps)) + .select(p.constant(static_cast(0)), + dout * (static_cast(1) / ((static_cast(1) - x) * x))); + } +}; + +template +struct STanhFunctor : public BaseActivationFunctor { + float scale_a; + float scale_b; + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"scale_a", &scale_a}, {"scale_b", &scale_b}}; + } + + template + void operator()(Device d, X x, Out out) const { + out.device(d) = + static_cast(scale_b) * (static_cast(scale_a) * x).tanh(); + } +}; + +template +struct STanhGradFunctor : public BaseActivationFunctor { + float scale_a; + float scale_b; + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"scale_a", &scale_a}, {"scale_b", &scale_b}}; + } + + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + auto a = static_cast(scale_a); + auto b = static_cast(scale_b); + auto temp = (a * x).tanh() * (a * x).tanh(); + dx.device(d) = dout * a * b * (static_cast(1) - temp); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + template struct AbsGradGradFunctor : public BaseActivationFunctor { template @@ -603,6 +831,68 @@ struct CELUGradGradFunctor : public BaseActivationFunctor { static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } }; +template +struct SqrtGradGradFunctor : public BaseActivationFunctor { + template + void operator()(const Device& dev, const framework::Tensor* Out, + const framework::Tensor* ddX, framework::Tensor* ddOut, + framework::Tensor* dOut, const framework::Tensor* dX) const { + auto* d = dev.eigen_device(); + auto ddx = framework::EigenVector::Flatten( + GET_DATA_SAFELY(ddX, "Input", "DDX", "SqrtGradGrad")); + auto out = framework::EigenVector::Flatten( + GET_DATA_SAFELY(Out, "Output", "Out", "SqrtGradGrad")); + // sqrt GradGrad: ddy = 0.5 * ddx / y, dy = -1 * dx * ddx + // calculate dy first, so ddy can inplace ddx + if (dOut) { + auto dx = framework::EigenVector::Flatten( + GET_DATA_SAFELY(dX, "Output", "DX", "SqrtGradGrad")); + auto dout = framework::EigenVector::Flatten( + GET_DATA_SAFELY(dOut, "Output", "DOut", "SqrtGradGrad")); + dout.device(*d) = dx * ddx * static_cast(-1) / out; + } + if (ddOut) { + auto ddout = framework::EigenVector::Flatten( + GET_DATA_SAFELY(ddOut, "Output", "DDOut", "SqrtGradGrad")); + ddout.device(*d) = ddx * static_cast(0.5) / out; + } + } + static constexpr ActBwdOpFwdDeps FwdDeps() { + return ActBwdOpFwdDeps::kDepOut; + } +}; + +template +struct RsqrtGradGradFunctor : public BaseActivationFunctor { + template + void operator()(const Device& dev, const framework::Tensor* Out, + const framework::Tensor* ddX, framework::Tensor* ddOut, + framework::Tensor* dOut, const framework::Tensor* dX) const { + auto* d = dev.eigen_device(); + auto ddx = framework::EigenVector::Flatten( + GET_DATA_SAFELY(ddX, "Input", "DDX", "RsqrtGradGrad")); + auto out = framework::EigenVector::Flatten( + GET_DATA_SAFELY(Out, "Output", "Out", "RsqrtGradGrad")); + + // rsqrt GradGrad: ddy = -0.5 * ddx * y * y * y, dy = (3/y) * dx * ddx + if (dOut) { + auto dx = framework::EigenVector::Flatten( + GET_DATA_SAFELY(dX, "Output", "DX", "RsqrtGradGrad")); + auto dout = framework::EigenVector::Flatten( + GET_DATA_SAFELY(dOut, "Output", "DOut", "RsqrtGradGrad")); + dout.device(*d) = (static_cast(3.0) / out) * dx * ddx; + } + if (ddOut) { + auto ddout = framework::EigenVector::Flatten( + GET_DATA_SAFELY(ddOut, "Output", "DDOut", "RsqrtGradGrad")); + ddout.device(*d) = ddx * static_cast(-0.5) * out * out * out; + } + } + static constexpr ActBwdOpFwdDeps FwdDeps() { + return ActBwdOpFwdDeps::kDepOut; + } +}; + template struct SquareGradGradFunctor : public BaseActivationFunctor { template @@ -698,29 +988,6 @@ class SquareDoubleGradKernel } }; -template -struct SoftsignFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out) const { - out.device(d) = x / (static_cast(1) + x.abs()); - } -}; - -// d(softsign(x))/dx = 1 / (1 + |x|)^2 -// Taken from https://en.wikipedia.org/wiki/Activation_function - -template -struct SoftsignGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = - dout * (static_cast(1) / (static_cast(1) + x.abs()).square()); - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - template class CELUDoubleGradKernel : public framework::OpKernel { @@ -868,10 +1135,57 @@ class RsqrtDoubleGradKernel } }; +template +class LogitKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* out = context.Output("Out"); + auto* in = context.Input("X"); + auto eps = context.Attr("eps"); + out->mutable_data(in->place()); + + auto eigen_out = framework::EigenVector::Flatten(*out); + auto eigen_in = framework::EigenVector::Flatten(*in); + auto& place = + *context.template device_context().eigen_device(); + auto eigen_p = framework::EigenVector::Flatten(*out); + + LogitFunctor functor; + functor(place, eigen_in, eigen_out, eigen_p, eps); + } +}; + +template +class LogitGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* x = context.Input("X"); + auto* dout = + context.Input(framework::GradVarName("Out")); + auto* dx = context.Output(framework::GradVarName("X")); + auto eps = context.Attr("eps"); + dx->mutable_data(dout->place()); + + auto eigen_x = framework::EigenVector::Flatten(*x); + auto eigen_dout = framework::EigenVector::Flatten(*dout); + auto eigen_dx = framework::EigenVector::Flatten(*dx); + auto& place = + *context.template device_context().eigen_device(); + auto eigen_p = framework::EigenVector::Flatten(*x); + + LogitGradFunctor functor; + functor(place, eigen_x, eigen_dout, eigen_dx, eigen_p, eps); + } +}; + } // namespace operators } // namespace paddle -#define FOR_EACH_ACTIVATION_OP(__macro) \ - __macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \ - __macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor); \ - __macro(relu6, Relu6, Relu6Functor, Relu6GradFunctor); +#define FOR_EACH_ACTIVATION_OP(__macro) \ + __macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ + __macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \ + __macro(stanh, STanh, STanhFunctor, STanhGradFunctor); \ + __macro(softplus, Softplus, SoftplusFunctor, SoftplusGradFunctor); \ + __macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor); \ + __macro(relu6, Relu6, Relu6Functor, Relu6GradFunctor); \ + __macro(mish, Mish, MishFunctor, MishGradFunctor); diff --git a/paddle/fluid/operators/activation_op.kps b/paddle/fluid/operators/activation_op.kps index e33351520e..5118302f77 100644 --- a/paddle/fluid/operators/activation_op.kps +++ b/paddle/fluid/operators/activation_op.kps @@ -20,6 +20,140 @@ limitations under the License. */ namespace paddle { namespace operators { +template +struct CudaReciprocalFunctor : public BaseActivationFunctor { + T one = static_cast(1.0f); + + // reciprocal(x) = 1 / x + __device__ __forceinline__ T operator()(const T x) const { return one / x; } +}; + +template +struct CudaReciprocalGradFunctor : public BaseActivationFunctor { + // dx = -dout * out^2 + __device__ __forceinline__ T operator()(const T dout, const T out) const { + return -dout * out * out; + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { + return ActBwdOpFwdDeps::kDepOut; + } +}; + +template +struct CudaExpFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // exp(x) = exp(x) + __device__ __forceinline__ T operator()(const T arg_x) const { + MPType x = static_cast(arg_x); + return static_cast(exp(x)); + } +}; + +template +struct CudaExpGradFunctor : public BaseActivationFunctor { + // dx = dout * out + __device__ __forceinline__ T operator()(const T dout, const T out) const { + return dout * out; + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { + return ActBwdOpFwdDeps::kDepOut; + } +}; + +template +struct CudaExpm1Functor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // expm1(x) = expm1(x) + __device__ __forceinline__ T operator()(const T arg_x) const { + MPType x = static_cast(arg_x); + return static_cast(expm1(x)); + } +}; + +template +struct CudaExpm1GradFunctor : public BaseActivationFunctor { + // dx = dout * out + __device__ __forceinline__ T operator()(const T dout, const T out) const { + return dout * out + dout; + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { + return ActBwdOpFwdDeps::kDepOut; + } +}; + +template +struct CudaSquareFunctor : public BaseActivationFunctor { + // square(x) = x * x + __device__ __forceinline__ T operator()(const T x) const { return x * x; } +}; + +template +struct CudaSquareGradFunctor : public BaseActivationFunctor { + T two = static_cast(2.0f); + + // dx = dout * 2 * x + __device__ __forceinline__ T operator()(const T dout, const T x) const { + return dout * two * x; + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + +template +struct CudaSqrtFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // sqrt(x) = sqrt(x) + __device__ __forceinline__ T operator()(const T arg_x) const { + MPType x = static_cast(arg_x); + return static_cast(sqrt(x)); + } +}; + +template +struct CudaSqrtGradFunctor : public BaseActivationFunctor { + T one_half = static_cast(0.5f); + + // dx = dout * 0.5 / out + __device__ __forceinline__ T operator()(const T dout, const T out) const { + return one_half * dout / out; + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { + return ActBwdOpFwdDeps::kDepOut; + } +}; + +template +struct CudaRsqrtFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + + // rsqrt(x) = rsqrt(x) + __device__ __forceinline__ T operator()(const T arg_x) const { + MPType x = static_cast(arg_x); + return static_cast(rsqrt(x)); + } +}; + +template +struct CudaRsqrtGradFunctor : public BaseActivationFunctor { + T minus_one_half = static_cast(-0.5f); + + // dx = -0.5 * dout * out^3 + __device__ __forceinline__ T operator()(const T dout, const T out) const { + return minus_one_half * dout * out * out * out; + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { + return ActBwdOpFwdDeps::kDepOut; + } +}; + template struct CudaSoftReluFunctor : public BaseActivationFunctor { using MPType = typename details::MPTypeTrait::Type; @@ -67,6 +201,119 @@ struct CudaSoftReluGradFunctor : public BaseActivationFunctor { } }; +template +struct CudaSTanhFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + float scale_a; + float scale_b; + + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"scale_a", &scale_a}, {"scale_b", &scale_b}}; + } + + // stanh(x) = b * tanh(a * x) + __device__ __forceinline__ T operator()(const T arg_x) const { + MPType x = static_cast(arg_x); + MPType a = static_cast(scale_a); + MPType b = static_cast(scale_b); + return static_cast(b * tanh(a * x)); + } +}; + +template +struct CudaSTanhGradFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + MPType one = static_cast(1.0f); + float scale_a; + float scale_b; + + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"scale_a", &scale_a}, {"scale_b", &scale_b}}; + } + + // dx = dout * a * b * (1 - tanh(a * x) * tanh(a * x)) + __device__ __forceinline__ T operator()(const T arg_dout, + const T arg_x) const { + MPType dout = static_cast(arg_dout); + MPType x = static_cast(arg_x); + MPType a = static_cast(scale_a); + MPType b = static_cast(scale_b); + MPType temp = tanh(a * x); + return static_cast(dout * a * b * (one - temp * temp)); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + +template +struct CudaSoftplusFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + MPType one = static_cast(1.0f); + float beta; + float threshold; + + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"beta", &beta}, {"threshold", &threshold}}; + } + + // softplus(x) = beta * x > threshold ? x : log(1 + exp(beta * x)) / beta + __device__ __forceinline__ T operator()(const T arg_x) const { + MPType x = static_cast(arg_x); + MPType b = static_cast(beta); + MPType t = static_cast(threshold); + MPType x_beta = x * beta; + return static_cast(x_beta > t ? x : log(one + exp(x_beta)) / b); + } +}; + +template +struct CudaSoftplusGradFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + MPType one = static_cast(1.0f); + float beta; + float threshold; + + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"beta", &beta}, {"threshold", &threshold}}; + } + + // dx = x * beta > threshold ? dout : dout / (1 + exp(-beta * x)) + __device__ __forceinline__ T operator()(const T arg_dout, + const T arg_x) const { + MPType dout = static_cast(arg_dout); + MPType x = static_cast(arg_x); + MPType b = static_cast(beta); + MPType t = static_cast(threshold); + MPType x_beta = x * beta; + return x_beta > t ? arg_dout : static_cast(dout / (one + exp(-x_beta))); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + +template +struct CudaSoftsignFunctor : public BaseActivationFunctor { + T one = static_cast(1.0f); + + // softsign(x) = x / (1 + abs(x)) + __device__ __forceinline__ T operator()(const T x) const { + return x / (one + abs(x)); + } +}; + +template +struct CudaSoftsignGradFunctor : public BaseActivationFunctor { + T one = static_cast(1.0f); + + // dx = dout / (1 + abs(x))^2 + __device__ __forceinline__ T operator()(const T dout, const T x) const { + T temp = one + abs(x); + return dout / (temp * temp); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + template struct CudaRelu6Functor : public BaseActivationFunctor { T zero = static_cast(0.0f); @@ -104,23 +351,49 @@ struct CudaRelu6GradFunctor : public BaseActivationFunctor { }; template -struct CudaSoftsignFunctor : public BaseActivationFunctor { - T one = static_cast(1.0f); +struct CudaMishFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + MPType one = static_cast(1.0f); + float threshold; - // softsign(x) = x / (1 + abs(x)) - __device__ __forceinline__ T operator()(const T x) const { - return x / (one + abs(x)); + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"threshold", &threshold}}; + } + + // mish(x) = x * tanh(softplus(x)) + // softplus(x) = x, if x > threshold + // = ln(1 + exp(x)), otherwise + // Inputs: args[0], the input x + __device__ __forceinline__ T operator()(const T arg_x) const { + MPType x = static_cast(arg_x); + MPType sp = (x > static_cast(threshold)) ? x : log(one + exp(x)); + return static_cast(x * tanh(sp)); } }; template -struct CudaSoftsignGradFunctor : public BaseActivationFunctor { - T one = static_cast(1.0f); +struct CudaMishGradFunctor : public BaseActivationFunctor { + using MPType = typename details::MPTypeTrait::Type; + MPType one = static_cast(1.0f); + float threshold; - // dx = dout / (1 + abs(x))^2 - __device__ __forceinline__ T operator()(const T dout, const T x) const { - T temp = one + abs(x); - return dout / (temp * temp); + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"threshold", &threshold}}; + } + + // dx = dout * (tanh(sp) + x * (1 - tanh(sp) ** 2) * (1 - exp(-sp))) + // sp = softplus(x) + // Inputs: args[0], the input dout + // args[1], the input x + __device__ __forceinline__ T operator()(const T arg_dout, + const T arg_x) const { + MPType dout = static_cast(arg_dout); + MPType x = static_cast(arg_x); + MPType sp = (x > static_cast(threshold)) ? x : log(one + exp(x)); + MPType gsp = + (x > static_cast(threshold)) ? one : one / (one + exp(-x)); + MPType tsp = tanh(sp); + return static_cast(dout * (tsp + x * (one - tsp * tsp) * gsp)); } static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } @@ -285,16 +558,6 @@ using CudaCeilFunctor = phi::funcs::CudaCeilFunctor; template using CudaZeroGradFunctor = phi::funcs::CudaZeroGradFunctor; -USE_PHI_FUNCTOR(CudaExp) -USE_PHI_FUNCTOR(CudaExpm1) -USE_PHI_FUNCTOR(CudaMish) -USE_PHI_FUNCTOR(CudaSTanh) -USE_PHI_FUNCTOR(CudaReciprocal) -USE_PHI_FUNCTOR(CudaSquare) -USE_PHI_FUNCTOR(CudaSqrt) -USE_PHI_FUNCTOR(CudaRsqrt) -USE_PHI_FUNCTOR(CudaSoftplus) - template using CudaELUGradNegativeAlphaFunctor = phi::funcs::CudaELUGradNegativeAlphaFunctor; @@ -373,6 +636,8 @@ REGISTER_OP_CUDA_KERNEL( /* ========================================================================== */ /* =========================== sqrt register ============================= */ +REGISTER_ACTIVATION_CUDA_KERNEL(sqrt, Sqrt, CudaSqrtFunctor, + CudaSqrtGradFunctor); REGISTER_OP_CUDA_KERNEL( sqrt_grad_grad, @@ -388,6 +653,8 @@ REGISTER_OP_CUDA_KERNEL( /* =========================== rsqrt register ============================= */ +REGISTER_ACTIVATION_CUDA_KERNEL(rsqrt, Rsqrt, CudaRsqrtFunctor, + CudaRsqrtGradFunctor); REGISTER_OP_CUDA_KERNEL( rsqrt_grad_grad, @@ -400,6 +667,8 @@ REGISTER_OP_CUDA_KERNEL( /* ========================================================================== */ /* =========================== square register ============================ */ +REGISTER_ACTIVATION_CUDA_KERNEL_INT(square, Square, CudaSquareFunctor, + CudaSquareGradFunctor); REGISTER_OP_CUDA_KERNEL( square_grad_grad, @@ -419,19 +688,75 @@ REGISTER_OP_CUDA_KERNEL( /* ========================== logit register ============================ */ namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + logit, ops::LogitKernel, + ops::LogitKernel, + ops::LogitKernel); +REGISTER_OP_CUDA_KERNEL( + logit_grad, + ops::LogitGradKernel, + ops::LogitGradKernel, + ops::LogitGradKernel); /* ========================================================================== */ /* ========================== exp register ============================ */ +REGISTER_OP_CUDA_KERNEL( + exp, ops::ActivationCudaKernel>, + ops::ActivationCudaKernel>, + ops::ActivationKernel>, + ops::ActivationKernel>, + ops::ActivationCudaKernel>); +REGISTER_OP_CUDA_KERNEL( + exp_grad, ops::ActivationGradCudaKernel>, + ops::ActivationGradCudaKernel>, + ops::ActivationGradCudaKernel>, + ops::ActivationGradCudaKernel>, + ops::ActivationGradCudaKernel>); /* ========================================================================== */ /* ========================== expm1 register ============================ */ + +REGISTER_OP_CUDA_KERNEL( + expm1, ops::ActivationCudaKernel>, + ops::ActivationCudaKernel>, + ops::ActivationCudaKernel>); +REGISTER_OP_CUDA_KERNEL( + expm1_grad, ops::ActivationGradCudaKernel>, + ops::ActivationGradCudaKernel>, + ops::ActivationGradCudaKernel>); /* ========================================================================== */ #define FOR_EACH_ACTIVATION_CUDA_OP(__macro) \ + __macro(softshrink, SoftShrink, CudaSoftShrinkFunctor, \ + CudaSoftShrinkGradFunctor); \ + __macro(reciprocal, Reciprocal, CudaReciprocalFunctor, \ + CudaReciprocalGradFunctor); \ __macro(soft_relu, SoftRelu, CudaSoftReluFunctor, CudaSoftReluGradFunctor); \ + __macro(stanh, STanh, CudaSTanhFunctor, CudaSTanhGradFunctor); \ + __macro(softplus, Softplus, CudaSoftplusFunctor, CudaSoftplusGradFunctor); \ + __macro(softsign, Softsign, CudaSoftsignFunctor, CudaSoftsignGradFunctor); \ __macro(relu6, Relu6, CudaRelu6Functor, CudaRelu6GradFunctor); \ - __macro(softsign, Softsign, CudaSoftsignFunctor, CudaSoftsignGradFunctor); - + __macro(tanh_shrink, TanhShrink, CudaTanhShrinkFunctor, \ + CudaTanhShrinkGradFunctor); \ + __macro(hard_shrink, HardShrink, CudaHardShrinkFunctor, \ + CudaHardShrinkGradFunctor); \ + __macro(mish, Mish, CudaMishFunctor, CudaMishGradFunctor); FOR_EACH_ACTIVATION_CUDA_OP(REGISTER_ACTIVATION_CUDA_KERNEL) #ifdef PADDLE_WITH_XPU_KP diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index df8150b192..af1069cb86 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -22,9 +22,9 @@ math_library(sampler DEPS generator) math_library(maxouting) if(WITH_MKLDNN) - math_library(selected_rows_functor DEPS selected_rows_utils math_function blas mkldnn_axpy_handler mixed_vector) + math_library(selected_rows_functor DEPS selected_rows_utils math_function blas mkldnn_axpy_handler) else() - math_library(selected_rows_functor DEPS selected_rows_utils math_function blas mixed_vector) + math_library(selected_rows_functor DEPS selected_rows_utils math_function blas) endif() math_library(sequence_padding) diff --git a/paddle/fluid/operators/math/selected_rows_functor.cc b/paddle/fluid/operators/math/selected_rows_functor.cc index f77287826f..0ca2529f13 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cc +++ b/paddle/fluid/operators/math/selected_rows_functor.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/selected_rows_functor.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/platform/device/device_wrapper.h" #ifdef PADDLE_WITH_MKLDNN diff --git a/paddle/fluid/operators/temporal_shift_op.h b/paddle/fluid/operators/temporal_shift_op.h index 141f2127da..ec43ed88cb 100644 --- a/paddle/fluid/operators/temporal_shift_op.h +++ b/paddle/fluid/operators/temporal_shift_op.h @@ -19,6 +19,56 @@ namespace operators { using Tensor = framework::Tensor; using DataLayout = framework::DataLayout; +template +void TemporalShiftFwNCHW(const T* input, T* output, const int ntchw, + const int tchw, const int chw, const int hw, + const int t, const int c1, const int c2) { + int src_it = 0; + for (int i = 0; i < ntchw; i++) { + int it = (i % tchw) / chw; + int ic = (i % chw) / hw; + + if (ic < c1) { + src_it = it - 1; + } else if (ic < c2) { + src_it = it + 1; + } else { + src_it = it; + } + + if (src_it < 0 || src_it >= t) { + output[i] = 0; + } else { + output[i] = input[i + (src_it - it) * chw]; + } + } +} + +template +void TemporalShiftFwNHWC(const T* input, T* output, const int nthwc, + const int thwc, const int hwc, const int t, + const int c, const int c1, const int c2) { + int src_it = 0; + for (int i = 0; i < nthwc; i++) { + int it = (i % thwc) / hwc; + int ic = i % c; + + if (ic < c1) { + src_it = it - 1; + } else if (ic < c2) { + src_it = it + 1; + } else { + src_it = it; + } + + if (src_it < 0 || src_it >= t) { + output[i] = 0; + } else { + output[i] = input[i + (src_it - it) * hwc]; + } + } +} + template void TemporalShiftBwNCHW(const T* output_grad, T* input_grad, const int ntchw, const int tchw, const int chw, const int hw, @@ -72,7 +122,45 @@ void TemporalShiftBwNHWC(const T* output_grad, T* input_grad, const int nthwc, template class TemporalShiftKernel : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& ctx) const override {} + void Compute(const framework::ExecutionContext& ctx) const override { + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); + int t = ctx.Attr("seg_num"); + float shift_ratio = ctx.Attr("shift_ratio"); + const std::string data_format_str = ctx.Attr("data_format"); + const DataLayout data_layout = + framework::StringToDataLayout(data_format_str); + + const int nt = input->dims()[0]; + const int c = (data_layout == DataLayout::kNCHW ? input->dims()[1] + : input->dims()[3]); + const int h = (data_layout == DataLayout::kNCHW ? input->dims()[2] + : input->dims()[1]); + const int w = (data_layout == DataLayout::kNCHW ? input->dims()[3] + : input->dims()[2]); + + const int hw = h * w; + const int chw = c * hw; + const int tchw = t * chw; + const int ntchw = nt * chw; + + const int c1 = static_cast(c * shift_ratio); + const int c2 = static_cast(c * 2 * shift_ratio); + + framework::DDim out_dims = + (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) + : phi::make_ddim({nt, h, w, c})); + const T* input_data = input->data(); + T* output_data = output->mutable_data(out_dims, ctx.GetPlace()); + + if (data_layout == DataLayout::kNCHW) { + TemporalShiftFwNCHW(input_data, output_data, ntchw, tchw, chw, hw, t, + c1, c2); + } else { + TemporalShiftFwNHWC(input_data, output_data, ntchw, tchw, chw, t, c, + c1, c2); + } + } }; template diff --git a/paddle/phi/kernels/activation_kernel.h b/paddle/phi/kernels/activation_kernel.h index 8a40bacd39..84c46870e0 100644 --- a/paddle/phi/kernels/activation_kernel.h +++ b/paddle/phi/kernels/activation_kernel.h @@ -53,13 +53,6 @@ DECLARE_ACTIVATION_KERNEL(Acosh) DECLARE_ACTIVATION_KERNEL(Atanh) DECLARE_ACTIVATION_KERNEL(Relu) DECLARE_ACTIVATION_KERNEL(Tanh) -DECLARE_ACTIVATION_KERNEL(Exp) -DECLARE_ACTIVATION_KERNEL(Expm1) -DECLARE_ACTIVATION_KERNEL(Reciprocal) -DECLARE_ACTIVATION_KERNEL(Square) -DECLARE_ACTIVATION_KERNEL(Sqrt) -DECLARE_ACTIVATION_KERNEL(Rsqrt) - DECLARE_ACTIVATION_KERNEL(TanhShrink) DECLARE_ACTIVATION_KERNEL(Silu) DECLARE_ACTIVATION_KERNEL(Sigmoid) @@ -80,23 +73,8 @@ DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Elu, alpha) DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Swish, beta) DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(BRelu, t_min, t_max) -DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(STanh, scale_a, scale_b) DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(HardSigmoid, slope, offset) -DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(Softplus, beta, threshold) - -template -void LogitKernel(const Context& dev_ctx, - const DenseTensor& x, - float eps, - DenseTensor* out); - -template -void MishKernel(const Context& dev_ctx, - const DenseTensor& x, - float threshold, - DenseTensor* out); - template void HardSwishKernel(const Context& dev_ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/cpu/activation_grad_kernel.cc b/paddle/phi/kernels/cpu/activation_grad_kernel.cc index d2b816de8f..be0d02e2a1 100644 --- a/paddle/phi/kernels/cpu/activation_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/activation_grad_kernel.cc @@ -129,13 +129,6 @@ DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Acosh, AcoshGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Atanh, AtanhGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(TanhShrink, TanhShrinkGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Silu, SiluGradFunctor); -DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Square, SquareGradFunctor); - -DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Exp, ExpGradFunctor); -DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Expm1, Expm1GradFunctor); -DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Reciprocal, ReciprocalGradFunctor); -DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt, SqrtGradFunctor); -DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Rsqrt, RsqrtGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(LogSigmoid, LogSigmoidGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Log, LogGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Log2, Log2GradFunctor); @@ -164,24 +157,11 @@ DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(HardShrink, threshold); DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Swish, SwishGradFunctor, beta); -DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Mish, - MishGradFunctor, - threshold); - DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(BRelu, BReluGradFunctor, t_min, t_max); -DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(STanh, - STanhGradFunctor, - scale_a, - scale_b); - -DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(Softplus, - SoftplusGradFunctor, - beta, - threshold); DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid, HardSigmoidGradFunctor, slope, @@ -267,12 +247,6 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(tanh_shrink_grad, TanhShrinkGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(elu_grad, EluGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(silu_grad, SiluGradKernel) -PD_REGISTER_ACTIVATION_GRAD_KERNEL(mish_grad, MishGradKernel) -PD_REGISTER_ACTIVATION_GRAD_KERNEL(stanh_grad, STanhGradKernel) -PD_REGISTER_ACTIVATION_GRAD_KERNEL(reciprocal_grad, ReciprocalGradKernel) -PD_REGISTER_ACTIVATION_GRAD_KERNEL(sqrt_grad, SqrtGradKernel) -PD_REGISTER_ACTIVATION_GRAD_KERNEL(rsqrt_grad, RsqrtGradKernel) -PD_REGISTER_ACTIVATION_GRAD_KERNEL(softplus_grad, SoftplusGradKernel) PD_REGISTER_ACTIVATION_DOUBLE_GRAD_KERNEL(relu_double_grad, ReluDoubleGradKernel) @@ -289,34 +263,6 @@ PD_REGISTER_KERNEL(tanh_triple_grad, float, double, phi::dtype::float16) {} - -PD_REGISTER_KERNEL(exp_grad, - CPU, - ALL_LAYOUT, - phi::ExpGradKernel, - float, - double, - int, - int64_t) {} - -PD_REGISTER_KERNEL(expm1_grad, - CPU, - ALL_LAYOUT, - phi::Expm1GradKernel, - float, - double, - phi::dtype::float16) {} - -PD_REGISTER_KERNEL( - logit_grad, CPU, ALL_LAYOUT, phi::LogitGradKernel, float, double) {} -PD_REGISTER_KERNEL(square_grad, - CPU, - ALL_LAYOUT, - phi::SquareGradKernel, - float, - double, - int, - int64_t) {} PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_grad, SigmoidGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_double_grad, SigmoidDoubleGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_triple_grad, SigmoidTripleGradKernel) diff --git a/paddle/phi/kernels/cpu/activation_kernel.cc b/paddle/phi/kernels/cpu/activation_kernel.cc index fe0643286c..d55d4cfd0f 100644 --- a/paddle/phi/kernels/cpu/activation_kernel.cc +++ b/paddle/phi/kernels/cpu/activation_kernel.cc @@ -15,7 +15,6 @@ limitations under the License. */ #include "paddle/phi/kernels/activation_kernel.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/funcs/activation_functor.h" #include "paddle/phi/kernels/impl/activation_impl.h" namespace phi { @@ -73,12 +72,6 @@ DEFINE_CPU_ACTIVATION_KERNEL(Relu, ReluCPUFunctor) DEFINE_CPU_ACTIVATION_KERNEL(Tanh, TanhFunctor) DEFINE_CPU_ACTIVATION_KERNEL(TanhShrink, TanhShrinkFunctor) DEFINE_CPU_ACTIVATION_KERNEL(Silu, SiluFunctor) -DEFINE_CPU_ACTIVATION_KERNEL(Exp, ExpFunctor) -DEFINE_CPU_ACTIVATION_KERNEL(Expm1, Expm1Functor) -DEFINE_CPU_ACTIVATION_KERNEL(Reciprocal, ReciprocalFunctor) -DEFINE_CPU_ACTIVATION_KERNEL(Square, SquareFunctor) -DEFINE_CPU_ACTIVATION_KERNEL(Sqrt, SqrtFunctor) -DEFINE_CPU_ACTIVATION_KERNEL(Rsqrt, RsqrtFunctor) DEFINE_CPU_ACTIVATION_KERNEL(Sigmoid, SigmoidFunctor) DEFINE_CPU_ACTIVATION_KERNEL(LogSigmoid, LogSigmoidFunctor) DEFINE_CPU_ACTIVATION_KERNEL(Log, LogFunctor) @@ -90,19 +83,15 @@ DEFINE_CPU_ACTIVATION_KERNEL(Floor, FloorFunctor) DEFINE_CPU_ACTIVATION_KERNEL(Ceil, CeilFunctor) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, LeakyReluFunctor, alpha) - DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu, ThresholdedReluFunctor, threshold) -DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Mish, MishFunctor, threshold) -DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(BRelu, BReluFunctor, t_min, t_max) -DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(STanh, STanhFunctor, scale_a, scale_b) -DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(Softplus, SoftplusFunctor, beta, threshold) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink, HardShrinkFunctor, threshold) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(SoftShrink, SoftShrinkFunctor, lambda) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Elu, ELUFunctor, alpha) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Swish, SwishFunctor, beta) +DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(BRelu, BReluFunctor, t_min, t_max) DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(HardSigmoid, HardSigmoidFunctor, slope, @@ -150,25 +139,6 @@ PD_REGISTER_ACTIVATION_KERNEL(soft_shrink, SoftShrinkKernel) PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel) PD_REGISTER_ACTIVATION_KERNEL(elu, EluKernel) PD_REGISTER_ACTIVATION_KERNEL(silu, SiluKernel) -PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel) -PD_REGISTER_ACTIVATION_KERNEL(stanh, STanhKernel) -PD_REGISTER_ACTIVATION_KERNEL(reciprocal, ReciprocalKernel) -PD_REGISTER_ACTIVATION_KERNEL(sqrt, SqrtKernel) -PD_REGISTER_ACTIVATION_KERNEL(rsqrt, RsqrtKernel) -PD_REGISTER_ACTIVATION_KERNEL(softplus, SoftplusKernel) - -PD_REGISTER_KERNEL( - exp, CPU, ALL_LAYOUT, phi::ExpKernel, float, double, int, int64_t) {} -PD_REGISTER_KERNEL(expm1, - CPU, - ALL_LAYOUT, - phi::Expm1Kernel, - float, - double, - phi::dtype::float16) {} -PD_REGISTER_KERNEL(logit, CPU, ALL_LAYOUT, phi::LogitKernel, float, double) {} -PD_REGISTER_KERNEL( - square, CPU, ALL_LAYOUT, phi::SquareKernel, float, double, int, int64_t) {} PD_REGISTER_ACTIVATION_KERNEL(sigmoid, SigmoidKernel) PD_REGISTER_ACTIVATION_KERNEL(logsigmoid, LogSigmoidKernel) PD_REGISTER_ACTIVATION_KERNEL(hard_sigmoid, HardSigmoidKernel) diff --git a/paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc b/paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc deleted file mode 100644 index 400f7e8783..0000000000 --- a/paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/kernels/temporal_shift_grad_kernel.h" -#include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/common/layout.h" -#include "paddle/phi/core/kernel_registry.h" - -namespace phi { - -template -void TemporalShiftBwNCHW(const T* output_grad, - T* input_grad, - const int ntchw, - const int tchw, - const int chw, - const int hw, - const int t, - const int c1, - const int c2) { - int src_it = 0; - for (int i = 0; i < ntchw; i++) { - int it = (i % tchw) / chw; - int ic = (i % chw) / hw; - - if (ic < c1) { - src_it = it + 1; - } else if (ic < c2) { - src_it = it - 1; - } else { - src_it = it; - } - - if (src_it >= 0 && src_it < t) { - input_grad[i] = output_grad[i + (src_it - it) * chw]; - } else { - input_grad[i] = 0; - } - } -} - -template -void TemporalShiftBwNHWC(const T* output_grad, - T* input_grad, - const int nthwc, - const int thwc, - const int hwc, - const int t, - const int c, - const int c1, - const int c2) { - int src_it = 0; - for (int i = 0; i < nthwc; i++) { - int it = (i % thwc) / hwc; - int ic = i % c; - - if (ic < c1) { - src_it = it + 1; - } else if (ic < c2) { - src_it = it - 1; - } else { - src_it = it; - } - - if (src_it >= 0 && src_it < t) { - input_grad[i] = output_grad[i + (src_it - it) * hwc]; - } else { - input_grad[i] = 0; - } - } -} - -template -void TemporalShiftGradKernel(const Context& dev_ctx, - const DenseTensor& out_grad, - int seg_num, - float shift_ratio, - const std::string& data_format_str, - DenseTensor* x_grad) { - auto* input_grad = x_grad; - auto* output_grad = &out_grad; - int t = seg_num; - const DataLayout data_layout = - paddle::framework::StringToDataLayout(data_format_str); - - const int nt = output_grad->dims()[0]; - const int c = (data_layout == DataLayout::kNCHW ? output_grad->dims()[1] - : output_grad->dims()[3]); - const int h = (data_layout == DataLayout::kNCHW ? output_grad->dims()[2] - : output_grad->dims()[1]); - const int w = (data_layout == DataLayout::kNCHW ? output_grad->dims()[3] - : output_grad->dims()[2]); - - const int hw = h * w; - const int chw = c * hw; - const int tchw = t * chw; - const int ntchw = nt * chw; - - const int c1 = static_cast(c * shift_ratio); - const int c2 = static_cast(c * 2 * shift_ratio); - - DDim in_grad_dims = - (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) - : phi::make_ddim({nt, h, w, c})); - const T* output_grad_data = output_grad->data(); - T* input_grad_data = - input_grad->mutable_data(in_grad_dims, dev_ctx.GetPlace()); - - if (data_layout == DataLayout::kNCHW) { - TemporalShiftBwNCHW( - output_grad_data, input_grad_data, ntchw, tchw, chw, hw, t, c1, c2); - } else { - TemporalShiftBwNHWC( - output_grad_data, input_grad_data, ntchw, tchw, chw, t, c, c1, c2); - } -} - -} // namespace phi - -PD_REGISTER_KERNEL(temporal_shift_grad, - CPU, - ALL_LAYOUT, - phi::TemporalShiftGradKernel, - float, - double) {} diff --git a/paddle/phi/kernels/cpu/temporal_shift_kernel.cc b/paddle/phi/kernels/cpu/temporal_shift_kernel.cc deleted file mode 100644 index 6721117992..0000000000 --- a/paddle/phi/kernels/cpu/temporal_shift_kernel.cc +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/kernels/temporal_shift_kernel.h" -#include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/common/layout.h" -#include "paddle/phi/core/kernel_registry.h" - -namespace phi { - -template -void TemporalShiftFwNCHW(const T* input, - T* output, - const int ntchw, - const int tchw, - const int chw, - const int hw, - const int t, - const int c1, - const int c2) { - int src_it = 0; - for (int i = 0; i < ntchw; i++) { - int it = (i % tchw) / chw; - int ic = (i % chw) / hw; - - if (ic < c1) { - src_it = it - 1; - } else if (ic < c2) { - src_it = it + 1; - } else { - src_it = it; - } - - if (src_it < 0 || src_it >= t) { - output[i] = 0; - } else { - output[i] = input[i + (src_it - it) * chw]; - } - } -} - -template -void TemporalShiftFwNHWC(const T* input, - T* output, - const int nthwc, - const int thwc, - const int hwc, - const int t, - const int c, - const int c1, - const int c2) { - int src_it = 0; - for (int i = 0; i < nthwc; i++) { - int it = (i % thwc) / hwc; - int ic = i % c; - - if (ic < c1) { - src_it = it - 1; - } else if (ic < c2) { - src_it = it + 1; - } else { - src_it = it; - } - - if (src_it < 0 || src_it >= t) { - output[i] = 0; - } else { - output[i] = input[i + (src_it - it) * hwc]; - } - } -} - -template -void TemporalShiftKernel(const Context& dev_ctx, - const DenseTensor& x, - int seg_num, - float shift_ratio, - const std::string& data_format_str, - DenseTensor* out) { - auto* input = &x; - auto* output = out; - int t = seg_num; - const DataLayout data_layout = - paddle::framework::StringToDataLayout(data_format_str); - - const int nt = input->dims()[0]; - const int c = - (data_layout == DataLayout::kNCHW ? input->dims()[1] : input->dims()[3]); - const int h = - (data_layout == DataLayout::kNCHW ? input->dims()[2] : input->dims()[1]); - const int w = - (data_layout == DataLayout::kNCHW ? input->dims()[3] : input->dims()[2]); - - const int hw = h * w; - const int chw = c * hw; - const int tchw = t * chw; - const int ntchw = nt * chw; - - const int c1 = static_cast(c * shift_ratio); - const int c2 = static_cast(c * 2 * shift_ratio); - - DDim out_dims = - (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) - : phi::make_ddim({nt, h, w, c})); - const T* input_data = input->data(); - T* output_data = output->mutable_data(out_dims, dev_ctx.GetPlace()); - - if (data_layout == DataLayout::kNCHW) { - TemporalShiftFwNCHW( - input_data, output_data, ntchw, tchw, chw, hw, t, c1, c2); - } else { - TemporalShiftFwNHWC( - input_data, output_data, ntchw, tchw, chw, t, c, c1, c2); - } -} - -} // namespace phi - -PD_REGISTER_KERNEL( - temporal_shift, CPU, ALL_LAYOUT, phi::TemporalShiftKernel, float, double) {} diff --git a/paddle/phi/kernels/funcs/activation_functor.h b/paddle/phi/kernels/funcs/activation_functor.h index eee6cf5640..bcadc59126 100644 --- a/paddle/phi/kernels/funcs/activation_functor.h +++ b/paddle/phi/kernels/funcs/activation_functor.h @@ -106,31 +106,6 @@ struct SinFunctor : public BaseActivationFunctor { } }; -// reciprocal(x) = 1 / x -template -struct ReciprocalFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out) const { - out.device(d) = static_cast(1) / x; - } -}; - -template -struct ReciprocalGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = dout * static_cast(-1) * out * out; - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { - return ActBwdOpFwdDeps::kDepOut; - } -}; - // cosine'(x) = -sin(x) template struct CosGradFunctor : public BaseActivationFunctor { @@ -155,108 +130,6 @@ struct CosFunctor : public BaseActivationFunctor { } }; -template -struct LogitFunctor { - template - void operator()(Device d, X x, Out out, P p, float eps) const { - // logit(x) = ln(x/(1-x)) - auto tmp_x = - (x.cwiseMin(static_cast(1.0 - eps))).cwiseMax(static_cast(eps)); - - if (!eps) { - out.device(d) = (x < static_cast(0.0) || x > static_cast(1.0)) - .select(p.constant(static_cast(NAN)), - (tmp_x / (static_cast(1) - tmp_x)).log()); - } else { - out.device(d) = (tmp_x / (static_cast(1) - tmp_x)).log(); - } - } -}; - -// mish(x) = x * tanh(softplus(x)) -// softplus(x) = x, if x > threshold -// = ln(1 + exp(x)), otherwise - -template -struct MishFunctor : public BaseActivationFunctor { - float threshold; - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"threshold", &threshold}}; - } - - template - void operator()(Device d, X x, Out out) const { - auto sp = (x > static_cast(threshold)) - .select(x, (static_cast(1) + x.exp()).log()); - out.device(d) = x * sp.tanh(); - } -}; - -// dx = dout * (tanh(sp) + x * (1 - tanh(sp) ** 2) * (1 - exp(-sp))) -// sp = softplus(x) - -template -struct MishGradFunctor : public BaseActivationFunctor { - float threshold; - - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"threshold", &threshold}}; - } - - template - void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - auto sp = (x > static_cast(threshold)) - .select(x, (static_cast(1) + x.exp()).log()); - auto gsp = static_cast(1) - (-sp).exp(); - auto tsp = sp.tanh(); - dx.device(d) = dout * (tsp + x * (static_cast(1) - tsp * tsp) * gsp); - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - -template -struct STanhFunctor : public BaseActivationFunctor { - float scale_a; - float scale_b; - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"scale_a", &scale_a}, {"scale_b", &scale_b}}; - } - - template - void operator()(Device d, X x, Out out) const { - out.device(d) = - static_cast(scale_b) * (static_cast(scale_a) * x).tanh(); - } -}; - -template -struct STanhGradFunctor : public BaseActivationFunctor { - float scale_a; - float scale_b; - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"scale_a", &scale_a}, {"scale_b", &scale_b}}; - } - - template - void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - auto a = static_cast(scale_a); - auto b = static_cast(scale_b); - auto temp = (a * x).tanh() * (a * x).tanh(); - dx.device(d) = dout * a * b * (static_cast(1) - temp); - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - template struct Tangent { HOSTDEVICE T operator()(const T& val) const { return tan(val); } @@ -284,132 +157,6 @@ struct TanGradFunctor : public BaseActivationFunctor { static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } }; -// square(x) = x^2 -template -struct SquareFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out) const { - out.device(d) = x.square(); - } -}; - -template -struct SquareGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = dout * static_cast(2) * x; - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - -// sqrt(x) = x^(1/2) -template -struct SqrtFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out) const { - out.device(d) = x.sqrt(); - } -}; - -template -struct SqrtGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = static_cast(0.5) * dout / out; - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { - return ActBwdOpFwdDeps::kDepOut; - } -}; - -// rsqrt(x) = x^(-1/2) -template -struct RsqrtFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out) const { - out.device(d) = x.rsqrt(); - } -}; - -template -struct RsqrtGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = static_cast(-0.5) * dout * out * out * out; - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { - return ActBwdOpFwdDeps::kDepOut; - } -}; - -// // For numerical stability, using the following formula instead of -// softplus(x) = -// // log(1 + exp(x)) -// // softplus(x) = log(1 + exp(beta * x)) / beta when beta * x <= -// threshold(beta = -// // 1, threshold = 20 by default), otherwise x - -template -struct SoftplusFunctor : public BaseActivationFunctor { - float beta; - float threshold; - - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"beta", &beta}, {"threshold", &threshold}}; - } - - template - void operator()(Device d, X x, Out out) const { - auto x_beta = static_cast(beta) * x; - out.device(d) = (x_beta > static_cast(threshold)) - .select(x, - (static_cast(1) + x_beta.exp()).log() / - static_cast(beta)); - } -}; - -// For numerical stability, using the following formula instead of -// d(softplus(x))/dx = 1 / (1 + exp(-x)) -// d(softplus(x))/dx = 1 / (1 + exp(-beta * x)) when beta * x <= threshold(beta -// = 1, threshold = 20 by default), otherwise x - -template -struct SoftplusGradFunctor : public BaseActivationFunctor { - float beta; - float threshold; - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"beta", &beta}, {"threshold", &threshold}}; - } - template - void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - auto x_beta = static_cast(beta) * x; - dx.device(d) = - (x_beta > static_cast(threshold)) - .select(dout, dout / (static_cast(1) + (-x_beta).exp())); - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - // Tangent(x) = tan(x) template struct TanFunctor : public BaseActivationFunctor { @@ -601,18 +348,6 @@ struct AtanGradFunctor : public BaseActivationFunctor { static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } }; -template -struct LogitGradFunctor { - template - void operator()(Device d, X x, dOut dout, dX dx, P p, float eps) const { - // logit(x)' = 1/(x*(1-x)) - dx.device(d) = - (x < static_cast(eps) || x > static_cast(1.0 - eps)) - .select(p.constant(static_cast(0)), - dout * (static_cast(1) / ((static_cast(1) - x) * x))); - } -}; - template struct Acosh { HOSTDEVICE T operator()(const T& val) const { return acosh(val); } @@ -723,57 +458,6 @@ struct AtanhGradFunctor : public BaseActivationFunctor { static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } }; -// exp functor -// exp(x) = e^x -template -struct ExpFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out) const { - out.device(d) = x.exp(); - } -}; - -template -struct ExpGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = dout * out; - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { - return ActBwdOpFwdDeps::kDepOut; - } -}; - -// expm1(x) = e^x - 1 -template -struct Expm1Functor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out) const { - out.device(d) = x.expm1(); - } -}; - -template -struct Expm1GradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = dout * out + dout; - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { - return ActBwdOpFwdDeps::kDepOut; - } -}; - // relu(x) = max(x, 0) template struct ReluCPUFunctor : public BaseActivationFunctor { @@ -1876,90 +1560,6 @@ struct CudaCosGradFunctor : public BaseActivationFunctor { static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } }; -template -struct CudaExpFunctor : public BaseActivationFunctor { - using MPType = typename phi::dtype::MPTypeTrait::Type; - - // exp(x) = exp(x) - __device__ __forceinline__ T operator()(const T arg_x) const { - MPType x = static_cast(arg_x); - return static_cast(exp(x)); - } -}; - -template -struct CudaSquareFunctor : public BaseActivationFunctor { - // square(x) = x * x - __device__ __forceinline__ T operator()(const T x) const { return x * x; } -}; - -template -struct CudaSquareGradFunctor : public BaseActivationFunctor { - T two = static_cast(2.0f); - - // dx = dout * 2 * x - __device__ __forceinline__ T operator()(const T dout, const T x) const { - return dout * two * x; - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - -template -struct CudaExpGradFunctor : public BaseActivationFunctor { - // dx = dout * out - __device__ __forceinline__ T operator()(const T dout, const T out) const { - return dout * out; - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { - return ActBwdOpFwdDeps::kDepOut; - } -}; - -template -struct CudaReciprocalFunctor : public BaseActivationFunctor { - T one = static_cast(1.0f); - - // reciprocal(x) = 1 / x - __device__ __forceinline__ T operator()(const T x) const { return one / x; } -}; - -template -struct CudaReciprocalGradFunctor : public BaseActivationFunctor { - // dx = -dout * out^2 - __device__ __forceinline__ T operator()(const T dout, const T out) const { - return -dout * out * out; - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { - return ActBwdOpFwdDeps::kDepOut; - } -}; - -template -struct CudaExpm1Functor : public BaseActivationFunctor { - using MPType = typename phi::dtype::MPTypeTrait::Type; - - // expm1(x) = expm1(x) - __device__ __forceinline__ T operator()(const T arg_x) const { - MPType x = static_cast(arg_x); - return static_cast(expm1(x)); - } -}; - -template -struct CudaExpm1GradFunctor : public BaseActivationFunctor { - // dx = dout * out - __device__ __forceinline__ T operator()(const T dout, const T out) const { - return dout * out + dout; - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { - return ActBwdOpFwdDeps::kDepOut; - } -}; - template struct CudaSinFunctor : public BaseActivationFunctor { using MPType = typename phi::dtype::MPTypeTrait::Type; @@ -2182,96 +1782,6 @@ struct CudaAtanhFunctor : public BaseActivationFunctor { } }; -template -struct CudaSTanhFunctor : public BaseActivationFunctor { - using MPType = typename phi::dtype::MPTypeTrait::Type; - float scale_a; - float scale_b; - - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"scale_a", &scale_a}, {"scale_b", &scale_b}}; - } - - // stanh(x) = b * tanh(a * x) - __device__ __forceinline__ T operator()(const T arg_x) const { - MPType x = static_cast(arg_x); - MPType a = static_cast(scale_a); - MPType b = static_cast(scale_b); - return static_cast(b * tanh(a * x)); - } -}; - -template -struct CudaSTanhGradFunctor : public BaseActivationFunctor { - using MPType = typename phi::dtype::MPTypeTrait::Type; - MPType one = static_cast(1.0f); - float scale_a; - float scale_b; - - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"scale_a", &scale_a}, {"scale_b", &scale_b}}; - } - - // dx = dout * a * b * (1 - tanh(a * x) * tanh(a * x)) - __device__ __forceinline__ T operator()(const T arg_dout, - const T arg_x) const { - MPType dout = static_cast(arg_dout); - MPType x = static_cast(arg_x); - MPType a = static_cast(scale_a); - MPType b = static_cast(scale_b); - MPType temp = tanh(a * x); - return static_cast(dout * a * b * (one - temp * temp)); - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - -template -struct CudaSoftplusFunctor : public BaseActivationFunctor { - using MPType = typename phi::dtype::MPTypeTrait::Type; - MPType one = static_cast(1.0f); - float beta; - float threshold; - - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"beta", &beta}, {"threshold", &threshold}}; - } - - // softplus(x) = beta * x > threshold ? x : log(1 + exp(beta * x)) / beta - __device__ __forceinline__ T operator()(const T arg_x) const { - MPType x = static_cast(arg_x); - MPType b = static_cast(beta); - MPType t = static_cast(threshold); - MPType x_beta = x * beta; - return static_cast(x_beta > t ? x : log(one + exp(x_beta)) / b); - } -}; - -template -struct CudaSoftplusGradFunctor : public BaseActivationFunctor { - using MPType = typename phi::dtype::MPTypeTrait::Type; - MPType one = static_cast(1.0f); - float beta; - float threshold; - - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"beta", &beta}, {"threshold", &threshold}}; - } - - // dx = x * beta > threshold ? dout : dout / (1 + exp(-beta * x)) - __device__ __forceinline__ T operator()(const T arg_dout, - const T arg_x) const { - MPType dout = static_cast(arg_dout); - MPType x = static_cast(arg_x); - MPType b = static_cast(beta); - MPType t = static_cast(threshold); - MPType x_beta = x * beta; - return x_beta > t ? arg_dout : static_cast(dout / (one + exp(-x_beta))); - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - template struct CudaAtanhGradFunctor : public BaseActivationFunctor { using MPType = typename phi::dtype::MPTypeTrait::Type; @@ -2287,56 +1797,6 @@ struct CudaAtanhGradFunctor : public BaseActivationFunctor { static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } }; -template -struct CudaSqrtFunctor : public BaseActivationFunctor { - using MPType = typename phi::dtype::MPTypeTrait::Type; - - // sqrt(x) = sqrt(x) - __device__ __forceinline__ T operator()(const T arg_x) const { - MPType x = static_cast(arg_x); - return static_cast(sqrt(x)); - } -}; - -template -struct CudaSqrtGradFunctor : public BaseActivationFunctor { - T one_half = static_cast(0.5f); - - // dx = dout * 0.5 / out - __device__ __forceinline__ T operator()(const T dout, const T out) const { - return one_half * dout / out; - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { - return ActBwdOpFwdDeps::kDepOut; - } -}; - -template -struct CudaRsqrtFunctor : public BaseActivationFunctor { - using MPType = typename phi::dtype::MPTypeTrait::Type; - - // rsqrt(x) = rsqrt(x) - __device__ __forceinline__ T operator()(const T arg_x) const { - MPType x = static_cast(arg_x); - return static_cast(rsqrt(x)); - } -}; - -template -struct CudaRsqrtGradFunctor : public BaseActivationFunctor { - T minus_one_half = static_cast(-0.5f); - - // dx = -0.5 * dout * out^3 - __device__ __forceinline__ T operator()(const T dout, const T out) const { - return minus_one_half * dout * out * out * out; - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { - return ActBwdOpFwdDeps::kDepOut; - } -}; - template struct CudaAtanFunctor : public BaseActivationFunctor { using MPType = typename phi::dtype::MPTypeTrait::Type; @@ -2404,55 +1864,6 @@ struct CudaBReluFunctor : public BaseActivationFunctor { } }; -template -struct CudaMishFunctor : public BaseActivationFunctor { - using MPType = typename phi::dtype::MPTypeTrait::Type; - MPType one = static_cast(1.0f); - float threshold; - - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"threshold", &threshold}}; - } - - // mish(x) = x * tanh(softplus(x)) - // softplus(x) = x, if x > threshold - // = ln(1 + exp(x)), otherwise - // Inputs: args[0], the input x - __device__ __forceinline__ T operator()(const T arg_x) const { - MPType x = static_cast(arg_x); - MPType sp = (x > static_cast(threshold)) ? x : log(one + exp(x)); - return static_cast(x * tanh(sp)); - } -}; - -template -struct CudaMishGradFunctor : public BaseActivationFunctor { - using MPType = typename phi::dtype::MPTypeTrait::Type; - MPType one = static_cast(1.0f); - float threshold; - - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"threshold", &threshold}}; - } - - // dx = dout * (tanh(sp) + x * (1 - tanh(sp) ** 2) * (1 - exp(-sp))) - // sp = softplus(x) - // Inputs: args[0], the input dout - // args[1], the input x - __device__ __forceinline__ T operator()(const T arg_dout, - const T arg_x) const { - MPType dout = static_cast(arg_dout); - MPType x = static_cast(arg_x); - MPType sp = (x > static_cast(threshold)) ? x : log(one + exp(x)); - MPType gsp = - (x > static_cast(threshold)) ? one : one / (one + exp(-x)); - MPType tsp = tanh(sp); - return static_cast(dout * (tsp + x * (one - tsp * tsp) * gsp)); - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - template struct CudaBReluGradFunctor : public BaseActivationFunctor { T zero = static_cast(0.0f); diff --git a/paddle/phi/kernels/gpu/activation_grad_kernel.cu b/paddle/phi/kernels/gpu/activation_grad_kernel.cu index 944cd2ea10..3c8b338d86 100644 --- a/paddle/phi/kernels/gpu/activation_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/activation_grad_kernel.cu @@ -189,13 +189,6 @@ DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Acosh, CudaAcoshGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Atanh, CudaAtanhGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(TanhShrink, CudaTanhShrinkGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Silu, CudaSiluGradFunctor); -DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Square, CudaSquareGradFunctor); - -DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Exp, CudaExpGradFunctor); -DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Expm1, CudaExpm1GradFunctor); -DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Reciprocal, CudaReciprocalGradFunctor); -DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt, CudaSqrtGradFunctor); -DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Rsqrt, CudaRsqrtGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(LogSigmoid, CudaLogSigmoidGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Log, CudaLogGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Log2, CudaLog2GradFunctor); @@ -218,24 +211,11 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Swish, CudaSwishGradFunctor, beta); -DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Mish, - CudaMishGradFunctor, - threshold); - DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(BRelu, CudaBReluGradFunctor, t_min, t_max); -DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(STanh, - CudaSTanhGradFunctor, - scale_a, - scale_b); - -DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(Softplus, - CudaSoftplusGradFunctor, - beta, - threshold); DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid, CudaHardSigmoidGradFunctor, slope, @@ -346,57 +326,12 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_double_grad, LeakyReluDoubleGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(thresholded_relu_grad, ThresholdedReluGradKernel) -PD_REGISTER_ACTIVATION_GRAD_KERNEL(mish_grad, MishGradKernel) -PD_REGISTER_ACTIVATION_GRAD_KERNEL(stanh_grad, STanhGradKernel) -PD_REGISTER_ACTIVATION_GRAD_KERNEL(reciprocal_grad, ReciprocalGradKernel) -PD_REGISTER_ACTIVATION_GRAD_KERNEL(softplus_grad, SoftplusGradKernel) -PD_REGISTER_ACTIVATION_GRAD_KERNEL(sqrt_grad, SqrtGradKernel) -PD_REGISTER_ACTIVATION_GRAD_KERNEL(rsqrt_grad, RsqrtGradKernel) - -PD_REGISTER_KERNEL(exp_grad, - GPU, - ALL_LAYOUT, - phi::ExpGradKernel, - float, - double, - int, - int64_t, - phi::dtype::float16) {} - PD_REGISTER_ACTIVATION_GRAD_KERNEL(soft_shrink_grad, SoftShrinkGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(tanh_shrink_grad, TanhShrinkGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(silu_grad, SiluGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(elu_grad, EluGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(elu_double_grad, EluDoubleGradKernel) - -PD_REGISTER_KERNEL(expm1_grad, - GPU, - ALL_LAYOUT, - phi::Expm1GradKernel, - float, - double, - phi::dtype::float16) {} - -PD_REGISTER_KERNEL(logit_grad, - GPU, - ALL_LAYOUT, - phi::LogitGradKernel, - float, - double, - phi::dtype::float16) {} - -PD_REGISTER_KERNEL(square_grad, - GPU, - ALL_LAYOUT, - phi::SquareGradKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16) {} - PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_grad, SigmoidGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_double_grad, SigmoidDoubleGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_triple_grad, SigmoidTripleGradKernel) diff --git a/paddle/phi/kernels/gpu/activation_kernel.cu b/paddle/phi/kernels/gpu/activation_kernel.cu index 8cc546ba73..75003cf342 100644 --- a/paddle/phi/kernels/gpu/activation_kernel.cu +++ b/paddle/phi/kernels/gpu/activation_kernel.cu @@ -19,7 +19,6 @@ limitations under the License. */ #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/elementwise_base.h" -#include "paddle/phi/kernels/impl/activation_grad_impl.h" #include "paddle/phi/kernels/impl/activation_impl.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" @@ -92,12 +91,6 @@ DEFINE_GPU_ACTIVATION_KERNEL(Relu, CudaReluFunctor) DEFINE_GPU_ACTIVATION_KERNEL(Tanh, CudaTanhFunctor) DEFINE_GPU_ACTIVATION_KERNEL(TanhShrink, CudaTanhShrinkFunctor) DEFINE_GPU_ACTIVATION_KERNEL(Silu, CudaSiluFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Exp, CudaExpFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Expm1, CudaExpm1Functor) -DEFINE_GPU_ACTIVATION_KERNEL(Reciprocal, CudaReciprocalFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Square, CudaSquareFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Sqrt, CudaSqrtFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Rsqrt, CudaRsqrtFunctor) DEFINE_GPU_ACTIVATION_KERNEL(Sigmoid, CudaSigmoidFunctor) DEFINE_GPU_ACTIVATION_KERNEL(LogSigmoid, CudaLogSigmoidFunctor) DEFINE_GPU_ACTIVATION_KERNEL(Log, CudaLogFunctor) @@ -119,14 +112,7 @@ DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(SoftShrink, CudaSoftShrinkFunctor, lambda) DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(Elu, CudaELUFunctor, alpha) DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(Swish, CudaSwishFunctor, beta) -DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(Mish, CudaMishFunctor, threshold) - DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(BRelu, CudaBReluFunctor, t_min, t_max) -DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(Stanh, CudaSTanhFunctor, scale_a, scale_b) -DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(Softplus, - CudaSoftplusFunctor, - beta, - threshold) DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(HardSigmoid, CudaHardSigmoidFunctor, slope, @@ -194,46 +180,6 @@ PD_REGISTER_ACTIVATION_KERNEL(tanh, TanhKernel) PD_REGISTER_ACTIVATION_KERNEL(brelu, BReluKernel) PD_REGISTER_ACTIVATION_KERNEL(thresholded_relu, ThresholdedReluKernel) PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel) -PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel) -PD_REGISTER_ACTIVATION_KERNEL(stanh, StanhKernel) -PD_REGISTER_ACTIVATION_KERNEL(reciprocal, ReciprocalKernel) -PD_REGISTER_ACTIVATION_KERNEL(sqrt, SqrtKernel) -PD_REGISTER_ACTIVATION_KERNEL(rsqrt, RsqrtKernel) -PD_REGISTER_ACTIVATION_KERNEL(softplus, SoftplusKernel) - -PD_REGISTER_KERNEL(exp, - GPU, - ALL_LAYOUT, - phi::ExpKernel, - float, - double, - int, - int64_t, - phi::dtype::float16) {} -PD_REGISTER_KERNEL(expm1, - GPU, - ALL_LAYOUT, - phi::Expm1Kernel, - float, - double, - phi::dtype::float16) {} -PD_REGISTER_KERNEL(logit, - GPU, - ALL_LAYOUT, - phi::LogitKernel, - float, - double, - phi::dtype::float16) {} -PD_REGISTER_KERNEL(square, - GPU, - ALL_LAYOUT, - phi::SquareKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16) {} PD_REGISTER_ACTIVATION_KERNEL(hard_shrink, HardShrinkKernel) PD_REGISTER_ACTIVATION_KERNEL(soft_shrink, SoftShrinkKernel) PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel) diff --git a/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu b/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu deleted file mode 100644 index 065b1726dc..0000000000 --- a/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu +++ /dev/null @@ -1,149 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/common/layout.h" -#include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/temporal_shift_grad_kernel.h" - -namespace phi { - -template -__global__ void KeTemporalShiftBwNCHW(const T* output_grad, - T* input_grad, - const int ntchw, - const int tchw, - const int chw, - const int hw, - const int t, - const int c1, - const int c2) { - int tid = blockIdx.x * blockDim.x + threadIdx.x; - int stride = blockDim.x * gridDim.x; - int src_it = 0; - - for (; tid < ntchw; tid += stride) { - int it = (tid % tchw) / chw; - int ic = (tid % chw) / hw; - - if (ic < c1) { - src_it = it + 1; - } else if (ic < c2) { - src_it = it - 1; - } else { - src_it = it; - } - - if (src_it >= 0 && src_it < t) { - input_grad[tid] = output_grad[tid + (src_it - it) * chw]; - } else { - input_grad[tid] = 0; - } - } -} - -template -__global__ void KeTemporalShiftBwNHWC(const T* output_grad, - T* input_grad, - const int nthwc, - const int thwc, - const int hwc, - const int t, - const int c, - const int c1, - const int c2) { - int tid = blockIdx.x * blockDim.x + threadIdx.x; - int stride = blockDim.x * gridDim.x; - int src_it = 0; - - for (; tid < nthwc; tid += stride) { - int it = (tid % thwc) / hwc; - int ic = tid % c; - - if (ic < c1) { - src_it = it + 1; - } else if (ic < c2) { - src_it = it - 1; - } else { - src_it = it; - } - - if (src_it >= 0 && src_it < t) { - input_grad[tid] = output_grad[tid + (src_it - it) * hwc]; - } else { - input_grad[tid] = 0; - } - } -} - -template -void TemporalShiftGradKernel(const Context& dev_ctx, - const DenseTensor& out_grad, - int seg_num, - float shift_ratio, - const std::string& data_format_str, - DenseTensor* x_grad) { - auto* input_grad = x_grad; - auto* output_grad = &out_grad; - int t = seg_num; - const DataLayout data_layout = - paddle::framework::StringToDataLayout(data_format_str); - - const int nt = output_grad->dims()[0]; - const int c = (data_layout == DataLayout::kNCHW ? output_grad->dims()[1] - : output_grad->dims()[3]); - const int h = (data_layout == DataLayout::kNCHW ? output_grad->dims()[2] - : output_grad->dims()[1]); - const int w = (data_layout == DataLayout::kNCHW ? output_grad->dims()[3] - : output_grad->dims()[2]); - - const int hw = h * w; - const int chw = c * hw; - const int tchw = t * chw; - const int ntchw = nt * chw; - - const int c1 = static_cast(c * shift_ratio); - const int c2 = static_cast(c * 2 * shift_ratio); - - DDim in_grad_dims = - (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) - : phi::make_ddim({nt, h, w, c})); - const T* output_grad_data = output_grad->data(); - T* input_grad_data = - input_grad->mutable_data(in_grad_dims, dev_ctx.GetPlace()); - - int pixelNum = nt * chw; - int threads = 1024; - int grid = (pixelNum + threads - 1) / threads; - int blocks_per_sm = dev_ctx.GetMaxPhysicalThreadCount() / threads; - grid = std::min(dev_ctx.GetSMCount() * blocks_per_sm, grid); - - if (data_layout == DataLayout::kNCHW) { - KeTemporalShiftBwNCHW<<>>( - output_grad_data, input_grad_data, ntchw, tchw, chw, hw, t, c1, c2); - } else { - KeTemporalShiftBwNHWC<<>>( - output_grad_data, input_grad_data, ntchw, tchw, chw, t, c, c1, c2); - } -} - -} // namespace phi - -PD_REGISTER_KERNEL(temporal_shift_grad, - GPU, - ALL_LAYOUT, - phi::TemporalShiftGradKernel, - float, - double, - phi::dtype::float16) {} diff --git a/paddle/phi/kernels/gpu/temporal_shift_kernel.cu b/paddle/phi/kernels/gpu/temporal_shift_kernel.cu deleted file mode 100644 index 34d80a1bc8..0000000000 --- a/paddle/phi/kernels/gpu/temporal_shift_kernel.cu +++ /dev/null @@ -1,148 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/common/layout.h" -#include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/temporal_shift_kernel.h" - -namespace phi { - -template -__global__ void KeTemporalShiftFwNCHW(const T* input, - T* output, - const int ntchw, - const int tchw, - const int chw, - const int hw, - const int t, - const int c1, - const int c2) { - int tid = blockIdx.x * blockDim.x + threadIdx.x; - int stride = blockDim.x * gridDim.x; - int src_it = 0; - - for (; tid < ntchw; tid += stride) { - int it = (tid % tchw) / chw; - int ic = (tid % chw) / hw; - - if (ic < c1) { - src_it = it - 1; - } else if (ic < c2) { - src_it = it + 1; - } else { - src_it = it; - } - - if (src_it < 0 || src_it >= t) { - output[tid] = 0; - } else { - output[tid] = input[tid + (src_it - it) * chw]; - } - } -} - -template -__global__ void KeTemporalShiftFwNHWC(const T* input, - T* output, - const int nthwc, - const int thwc, - const int hwc, - const int t, - const int c, - const int c1, - const int c2) { - int tid = blockIdx.x * blockDim.x + threadIdx.x; - int stride = blockDim.x * gridDim.x; - int src_it = 0; - - for (; tid < nthwc; tid += stride) { - int it = (tid % thwc) / hwc; - int ic = tid % c; - - if (ic < c1) { - src_it = it - 1; - } else if (ic < c2) { - src_it = it + 1; - } else { - src_it = it; - } - - if (src_it < 0 || src_it >= t) { - output[tid] = 0; - } else { - output[tid] = input[tid + (src_it - it) * hwc]; - } - } -} - -template -void TemporalShiftKernel(const Context& dev_ctx, - const DenseTensor& x, - int seg_num, - float shift_ratio, - const std::string& data_format_str, - DenseTensor* out) { - auto* input = &x; - auto* output = out; - int t = seg_num; - const DataLayout data_layout = - paddle::framework::StringToDataLayout(data_format_str); - - const int nt = input->dims()[0]; - const int c = - (data_layout == DataLayout::kNCHW ? input->dims()[1] : input->dims()[3]); - const int h = - (data_layout == DataLayout::kNCHW ? input->dims()[2] : input->dims()[1]); - const int w = - (data_layout == DataLayout::kNCHW ? input->dims()[3] : input->dims()[2]); - - const int hw = h * w; - const int chw = c * hw; - const int tchw = t * chw; - const int ntchw = nt * chw; - - const int c1 = static_cast(c * shift_ratio); - const int c2 = static_cast(c * 2 * shift_ratio); - - DDim out_dims = - (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) - : phi::make_ddim({nt, h, w, c})); - const T* input_data = input->data(); - T* output_data = output->mutable_data(out_dims, dev_ctx.GetPlace()); - - int pixelNum = nt * chw; - int threads = 1024; - int grid = (pixelNum + threads - 1) / threads; - int blocks_per_sm = dev_ctx.GetMaxPhysicalThreadCount() / threads; - grid = std::min(dev_ctx.GetSMCount() * blocks_per_sm, grid); - - if (data_layout == DataLayout::kNCHW) { - KeTemporalShiftFwNCHW<<>>( - input_data, output_data, ntchw, tchw, chw, hw, t, c1, c2); - } else { - KeTemporalShiftFwNHWC<<>>( - input_data, output_data, ntchw, tchw, chw, t, c, c1, c2); - } -} - -} // namespace phi - -PD_REGISTER_KERNEL(temporal_shift, - GPU, - ALL_LAYOUT, - phi::TemporalShiftKernel, - float, - double, - phi::dtype::float16) {} diff --git a/paddle/phi/kernels/impl/activation_grad_impl.h b/paddle/phi/kernels/impl/activation_grad_impl.h index 37273b7944..7924276414 100644 --- a/paddle/phi/kernels/impl/activation_grad_impl.h +++ b/paddle/phi/kernels/impl/activation_grad_impl.h @@ -222,24 +222,6 @@ void EluDoubleGradKernel(const Context& dev_ctx, functor(dev_ctx, &x, &ddx, ddout, &dout, dx); } -template -void LogitGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& out_grad, - float eps, - DenseTensor* x_grad) { - dev_ctx.template Alloc(x_grad); - - auto eigen_x = EigenVector::Flatten(x); - auto eigen_dout = EigenVector::Flatten(out_grad); - auto eigen_dx = EigenVector::Flatten(*x_grad); - auto& place = *dev_ctx.eigen_device(); - auto eigen_p = EigenVector::Flatten(x); - - funcs::LogitGradFunctor functor; - functor(place, eigen_x, eigen_dout, eigen_dx, eigen_p, eps); -} - template void SigmoidDoubleGradKernel(const Context& dev_ctx, const DenseTensor& out, diff --git a/paddle/phi/kernels/impl/activation_impl.h b/paddle/phi/kernels/impl/activation_impl.h index 1a62c4e06b..c2d160caf7 100644 --- a/paddle/phi/kernels/impl/activation_impl.h +++ b/paddle/phi/kernels/impl/activation_impl.h @@ -47,22 +47,6 @@ void ActivationImpl(const Context& dev_ctx, } } -template -void LogitKernel(const Context& dev_ctx, - const DenseTensor& x, - float eps, - DenseTensor* out) { - dev_ctx.template Alloc(out); - - auto eigen_out = EigenVector::Flatten(*out); - auto eigen_in = EigenVector::Flatten(x); - auto& place = *dev_ctx.eigen_device(); - auto eigen_p = EigenVector::Flatten(*out); - - funcs::LogitFunctor functor; - functor(place, eigen_in, eigen_out, eigen_p, eps); -} - template void PowKernel(const Context& dev_ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/selected_rows/activation_kernel.cc b/paddle/phi/kernels/selected_rows/activation_kernel.cc deleted file mode 100644 index 438a080a63..0000000000 --- a/paddle/phi/kernels/selected_rows/activation_kernel.cc +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/kernels/selected_rows/activation_kernel.h" - -#include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/activation_kernel.h" - -#include "paddle/phi/backends/cpu/cpu_context.h" - -#include "paddle/phi/backends/gpu/gpu_context.h" - -namespace phi { -namespace sr { - -template -void SquareKernel(const Context& dev_ctx, - const SelectedRows& x, - SelectedRows* out) { - out->set_rows(x.rows()); - out->set_height(x.height()); - phi::SquareKernel(dev_ctx, x.value(), out->mutable_value()); -} - -template -void SqrtKernel(const Context& dev_ctx, - const SelectedRows& x, - SelectedRows* out) { - out->set_rows(x.rows()); - out->set_height(x.height()); - phi::SqrtKernel(dev_ctx, x.value(), out->mutable_value()); -} - -} // namespace sr -} // namespace phi - -PD_REGISTER_KERNEL( - square_sr, CPU, ALL_LAYOUT, phi::sr::SquareKernel, float, double) {} - -PD_REGISTER_KERNEL( - sqrt_sr, CPU, ALL_LAYOUT, phi::sr::SqrtKernel, float, double) {} - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - -PD_REGISTER_KERNEL(square_sr, - GPU, - ALL_LAYOUT, - phi::sr::SquareKernel, - float, - double, - int, - int64_t) {} - -PD_REGISTER_KERNEL( - sqrt_sr, GPU, ALL_LAYOUT, phi::sr::SqrtKernel, float, double) {} - -#endif diff --git a/paddle/phi/kernels/selected_rows/activation_kernel.h b/paddle/phi/kernels/selected_rows/activation_kernel.h deleted file mode 100644 index 6518f95539..0000000000 --- a/paddle/phi/kernels/selected_rows/activation_kernel.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/selected_rows.h" - -namespace phi { -namespace sr { - -template -void SquareKernel(const Context& dev_ctx, - const SelectedRows& x, - SelectedRows* out); - -template -void SqrtKernel(const Context& dev_ctx, - const SelectedRows& x, - SelectedRows* out); - -} // namespace sr -} // namespace phi diff --git a/paddle/phi/kernels/temporal_shift_grad_kernel.h b/paddle/phi/kernels/temporal_shift_grad_kernel.h deleted file mode 100644 index 1bcd3d61c2..0000000000 --- a/paddle/phi/kernels/temporal_shift_grad_kernel.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/phi/core/dense_tensor.h" - -namespace phi { - -template -void TemporalShiftGradKernel(const Context& ctx, - const DenseTensor& out_grad, - int seg_num, - float shift_ratio, - const std::string& data_format, - DenseTensor* x_grad); - -} // namespace phi diff --git a/paddle/phi/kernels/temporal_shift_kernel.h b/paddle/phi/kernels/temporal_shift_kernel.h deleted file mode 100644 index a927d7fb23..0000000000 --- a/paddle/phi/kernels/temporal_shift_kernel.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/phi/core/dense_tensor.h" - -namespace phi { - -template -void TemporalShiftKernel(const Context& ctx, - const DenseTensor& x, - int seg_num, - float shift_ratio, - const std::string& data_format, - DenseTensor* out); - -} // namespace phi diff --git a/paddle/phi/ops/compat/activation_sig.cc b/paddle/phi/ops/compat/activation_sig.cc index 34f830abe7..7919769ec8 100644 --- a/paddle/phi/ops/compat/activation_sig.cc +++ b/paddle/phi/ops/compat/activation_sig.cc @@ -43,19 +43,17 @@ namespace phi { #define comma , -DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Cos, "cos", ); // NOLINT -DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Tan, "tan", ); // NOLINT -DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Acos, "acos", ); // NOLINT -DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Sin, "sin", ); // NOLINT -DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Asin, "asin", ); // NOLINT -DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Atan, "atan", ); // NOLINT -DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Sinh, "sinh", ); // NOLINT -DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Cosh, "cosh", ); // NOLINT -DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Asinh, "asinh", ); // NOLINT -DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Acosh, "acosh", ); // NOLINT -DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Atanh, "atanh", ); // NOLINT -DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Square, "square", ); // NOLINT - +DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Cos, "cos", ); // NOLINT +DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Tan, "tan", ); // NOLINT +DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Acos, "acos", ); // NOLINT +DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Sin, "sin", ); // NOLINT +DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Asin, "asin", ); // NOLINT +DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Atan, "atan", ); // NOLINT +DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Sinh, "sinh", ); // NOLINT +DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Cosh, "cosh", ); // NOLINT +DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Asinh, "asinh", ); // NOLINT +DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Acosh, "acosh", ); // NOLINT +DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Atanh, "atanh", ); // NOLINT DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(BRelu, "brelu", "t_min" comma "t_max"); DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(LeakyRelu, "leaky_relu", "alpha"); DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(ThresholdedRelu, @@ -63,7 +61,6 @@ DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(ThresholdedRelu, "threshold"); DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(SoftShrink, "soft_shrink", "lambda"); DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(HardShrink, "hard_shrink", "threshold"); -DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Mish, "mish", "threshold"); DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(TanhShrink, "tanh_shrink", ); // NOLINT DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Silu, "silu", ); // NOLINT DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(LogSigmoid, "logsigmoid", ); // NOLINT @@ -77,41 +74,12 @@ DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(HardSwish, "offset"); // NOLINT DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Swish, "swish", "beta"); // NOLINT -DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(STanh, - "stanh", - "scale_a" comma "scale_b"); // NOLINT - -DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Softplus, - "softplus", - "beta" comma "threshold"); // NOLINT - -DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Relu, "relu", ); // NOLINT -DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Tanh, "tanh", ); // NOLINT -DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Sigmoid, "sigmoid", ); // NOLINT -DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Exp, "exp", ); // NOLINT -DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Expm1, "expm1", ); // NOLINT -DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Reciprocal, "reciprocal", ); // NOLINT -DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Sqrt, "sqrt", ); // NOLINT -DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Rsqrt, "rsqrt", ); // NOLINT - +DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Relu, "relu", ); // NOLINT +DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Tanh, "tanh", ); // NOLINT +DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Sigmoid, "sigmoid", ); // NOLINT DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(HardSigmoid, "hard_sigmoid", "slope" comma "offset"); // NOLINT -KernelSignature SqrtActiOpArgumentMapping(const ArgumentMappingContext& ctx) { - if (ctx.IsDenseTensorInput("X")) { - return KernelSignature("sqrt", {"X"}, {}, {"Out"}); - } else { - return KernelSignature("sqrt_sr", {"X"}, {}, {"Out"}); - } -} - -KernelSignature SquareActiOpArgumentMapping(const ArgumentMappingContext& ctx) { - if (ctx.IsDenseTensorInput("X")) { - return KernelSignature("square", {"X"}, {}, {"Out"}); - } else { - return KernelSignature("square_sr", {"X"}, {}, {"Out"}); - } -} DEFINE_ACT_GRAD_NODEP_OP_ARGMAP(Round, "round", ); // NOLINT DEFINE_ACT_GRAD_NODEP_OP_ARGMAP(Floor, "floor", ); // NOLINT @@ -164,11 +132,6 @@ KernelSignature EluOpArgumentMapping(const ArgumentMappingContext& ctx) { return KernelSignature("elu", {"X"}, {"alpha"}, {"Out"}); } -KernelSignature LogitGradOpArgumentMapping(const ArgumentMappingContext& ctx) { - return KernelSignature( - "logit_grad", {"X", GradVarName("Out")}, {"eps"}, {GradVarName("X")}); -} - KernelSignature EluGradOpArgumentMapping(const ArgumentMappingContext& ctx) { return KernelSignature("elu_grad", {"X", "Out", GradVarName("Out")}, @@ -231,18 +194,6 @@ PD_REGISTER_ARG_MAPPING_FN(asinh_grad, phi::AsinhGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(acosh_grad, phi::AcoshGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(atanh_grad, phi::AtanhGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(relu_grad, phi::ReluGradOpArgumentMapping); - -PD_REGISTER_ARG_MAPPING_FN(exp_grad, phi::ExpGradOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(expm1_grad, phi::Expm1GradOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(square_grad, phi::SquareGradOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(reciprocal_grad, - phi::ReciprocalGradOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(sqrt_grad, phi::SqrtGradOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(rsqrt_grad, phi::RsqrtGradOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(mish_grad, phi::MishGradOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(stanh_grad, phi::STanhGradOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(softplus_grad, phi::SoftplusGradOpArgumentMapping); - PD_REGISTER_ARG_MAPPING_FN(relu_grad_grad, phi::ReluDoubleGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(tanh_grad, phi::TanhGradOpArgumentMapping); @@ -277,16 +228,11 @@ PD_REGISTER_ARG_MAPPING_FN(logsigmoid_grad, phi::LogSigmoidGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(hard_sigmoid_grad, phi::HardSigmoidGradOpArgumentMapping); - -PD_REGISTER_ARG_MAPPING_FN(logit_grad, phi::LogitGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(log_grad, phi::LogGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(log_grad_grad, phi::LogDoubleGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(log2_grad, phi::Log2GradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(log10_grad, phi::Log10GradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(log1p_grad, phi::Log1pGradOpArgumentMapping); - -PD_REGISTER_ARG_MAPPING_FN(sqrt, phi::SqrtActiOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(square, phi::SquareActiOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(hard_swish_grad, phi::HardSwishGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(swish_grad, phi::SwishGradOpArgumentMapping); diff --git a/paddle/phi/ops/compat/temporal_shift_sig.cc b/paddle/phi/ops/compat/temporal_shift_sig.cc deleted file mode 100644 index a686c37ff7..0000000000 --- a/paddle/phi/ops/compat/temporal_shift_sig.cc +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature TemporalShiftOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature("temporal_shift", - {"X"}, - {"seg_num", "shift_ratio", "data_format"}, - {"Out"}); -} - -KernelSignature TemporalShiftGradOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature("temporal_shift_grad", - {GradVarName("Out")}, - {"seg_num", "shift_ratio", "data_format"}, - {GradVarName("X")}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(temporal_shift, phi::TemporalShiftOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(temporal_shift_grad, - phi::TemporalShiftGradOpArgumentMapping); diff --git a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py index eb4243ef1c..825d74388b 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py @@ -342,5 +342,4 @@ class TestLogDoubleGradCheck(unittest.TestCase): if __name__ == "__main__": - paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_activation_sparse_op.py b/python/paddle/fluid/tests/unittests/test_activation_sparse_op.py deleted file mode 100644 index 5c07a544ca..0000000000 --- a/python/paddle/fluid/tests/unittests/test_activation_sparse_op.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import unittest -import numpy as np -import paddle.fluid as fluid -import paddle.fluid.core as core -from paddle.fluid.op import Operator -from op_test import OpTest -import paddle - - -class TestSparseSquareOp(unittest.TestCase): - def check_with_place(self, place): - scope = core.Scope() - - # create and initialize Grad Variable - height = 10 - rows = [0, 4, 7] - self.row_numel = 12 - - x_selected_rows = scope.var('X').get_selected_rows() - x_selected_rows.set_height(height) - x_selected_rows.set_rows(rows) - np_array = np.ones((len(rows), self.row_numel)).astype("float32") - np_array[0, 0] = 2.0 - np_array[2, 8] = 4.0 - - x_tensor = x_selected_rows.get_tensor() - x_tensor.set(np_array, place) - - out_selected_rows = scope.var('Out').get_selected_rows() - # create and run sqrt operator - square_op = Operator("square", X='X', Out='Out') - square_op.run(scope, place) - - # get and compare result - result_array = np.array(out_selected_rows.get_tensor()) - - self.assertTrue(np.array_equal(result_array, np.square(np_array))) - - def test_sparse_acti(self): - places = [core.CPUPlace()] - if core.is_compiled_with_cuda(): - places.append(core.CUDAPlace(0)) - for place in places: - self.check_with_place(place) - - -class TestSparseSqrtOp(unittest.TestCase): - def check_with_place(self, place): - scope = core.Scope() - - # create and initialize Grad Variable - height = 10 - rows = [0, 4, 7] - self.row_numel = 12 - - x_selected_rows = scope.var('X1').get_selected_rows() - x_selected_rows.set_height(height) - x_selected_rows.set_rows(rows) - np_array = np.ones((len(rows), self.row_numel)).astype("float32") - np_array[0, 0] = 2.0 - np_array[2, 8] = 4.0 - - x_tensor = x_selected_rows.get_tensor() - x_tensor.set(np_array, place) - - out_selected_rows = scope.var('Out1').get_selected_rows() - # create and run sqrt operator - sqrt_op = Operator("sqrt", X='X1', Out='Out1') - sqrt_op.run(scope, place) - - # get and compare result - result_array = np.array(out_selected_rows.get_tensor()) - self.assertTrue(np.allclose(result_array, np.sqrt(np_array))) - - def test_sparse_acti(self): - places = [core.CPUPlace()] - if core.is_compiled_with_cuda(): - places.append(core.CUDAPlace(0)) - for place in places: - self.check_with_place(place) - - -if __name__ == "__main__": - paddle.enable_static() - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py b/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py index 7f137cf137..de7aaf9479 100644 --- a/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py @@ -16,7 +16,6 @@ from __future__ import print_function import unittest import numpy as np -import paddle from op_test import OpTest import paddle.fluid as fluid @@ -154,5 +153,4 @@ class TestClipByNormOpWithSelectedRows(unittest.TestCase): if __name__ == '__main__': - paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py b/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py index 407a252e1a..5bab4a52bf 100644 --- a/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py +++ b/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py @@ -143,5 +143,4 @@ class TestTemporalShiftAPI(unittest.TestCase): if __name__ == "__main__": - paddle.enable_static() unittest.main() -- GitLab