未验证 提交 a9bba5ba 编写于 作者: C chentianyu03 提交者: GitHub

[phi]add relu6 kernel and yaml (#43549)

* add relu6 kernel and yaml

* format files

* format code and fix bug

* fix build failed
上级 2616d51a
......@@ -56,4 +56,4 @@ TEST(Relu6OpConverter, main) { test_activation("relu6"); }
USE_OP_ITSELF(relu);
USE_OP_ITSELF(sigmoid);
USE_OP_ITSELF(tanh);
USE_OP(relu6);
USE_OP_ITSELF(relu6);
......@@ -1503,6 +1503,7 @@ REGISTER_ACTIVATION_OP(thresholded_relu,
ThresholdedRelu,
ThresholdedReluFunctor,
ThresholdedReluGradFunctor);
REGISTER_ACTIVATION_OP(relu6, Relu6, Relu6Functor, Relu6GradFunctor);
REGISTER_ACTIVATION_OP(hard_shrink,
HardShrink,
HardShrinkFunctor,
......
......@@ -281,6 +281,7 @@ USE_PHI_DOUBLE_GRAD_FUNCTOR(Tanh)
USE_PHI_TRIPLE_GRAD_FUNCTOR(Tanh)
USE_PHI_FUNCTOR(BRelu)
USE_PHI_FUNCTOR(ThresholdedRelu)
USE_PHI_FUNCTOR(Relu6)
USE_PHI_FUNCTOR(LeakyRelu)
USE_PHI_DOUBLE_GRAD_FUNCTOR(LeakyRelu)
USE_PHI_FUNCTOR(HardShrink)
......@@ -348,44 +349,6 @@ using ReluGradGradFunctor = phi::funcs::ReluGradGradFunctor<T>;
template <typename T>
using ReluCUDAFunctor = phi::funcs::ReluCUDAFunctor<T>;
// relu6(x) = min(max(0, x), 6)
template <typename T>
struct Relu6Functor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) =
x.cwiseMax(static_cast<T>(0)).cwiseMin(static_cast<T>(threshold));
}
};
template <typename T>
struct Relu6GradFunctor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device,
typename X,
typename Out,
typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) =
dout * ((out > static_cast<T>(0)) * (out < static_cast<T>(threshold)))
.template cast<T>();
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct SoftReluFunctor : public BaseActivationFunctor<T> {
float threshold;
......@@ -561,5 +524,4 @@ struct SoftsignGradFunctor : public BaseActivationFunctor<T> {
#define FOR_EACH_ACTIVATION_OP(__macro) \
__macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \
__macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6, Relu6Functor, Relu6GradFunctor);
__macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor);
......@@ -14,7 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
#include "paddle/phi/kernels/funcs/activation_functor.h"
namespace paddle {
......@@ -67,42 +66,6 @@ struct CudaSoftReluGradFunctor : public BaseActivationFunctor<T> {
}
};
template <typename T>
struct CudaRelu6Functor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
// relu6(x) = min(max(0, x), 6)
__device__ __forceinline__ T operator()(const T x) const {
T t = static_cast<T>(threshold);
return x <= zero ? zero : (x < t ? x : t);
}
};
template <typename T>
struct CudaRelu6GradFunctor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
// dx = (out > 0 && out < t) ? dout : 0
__device__ __forceinline__ T operator()(const T dout, const T out) const {
T t = static_cast<T>(threshold);
return (out > zero && out < t) ? dout : zero;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaSoftsignFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
......@@ -144,8 +107,8 @@ class ActivationCudaKernel
for (auto& attr : attrs) {
*attr.second = ctx.Attr<float>(attr.first);
}
paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(dev_ctx, ins,
&outs, functor);
paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(
dev_ctx, ins, &outs, functor);
}
};
......@@ -158,8 +121,8 @@ class ActivationGradCudaKernel
const framework::Tensor *x, *out, *d_out;
framework::Tensor* d_x = nullptr;
x = out = d_out = nullptr;
ExtractActivationGradTensor<Functor::FwdDeps()>(ctx, &x, &out, &d_out,
&d_x);
ExtractActivationGradTensor<Functor::FwdDeps()>(
ctx, &x, &out, &d_out, &d_x);
d_x->mutable_data<T>(ctx.GetPlace());
auto& dev_ctx = ctx.template device_context<DeviceContext>();
auto functor = Functor();
......@@ -175,17 +138,17 @@ class ActivationGradCudaKernel
static_cast<int>(ActBwdOpFwdDeps::kDepOut)) {
// Only need forward output Out
ins.push_back(out);
paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(dev_ctx, ins,
&outs, functor);
paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(
dev_ctx, ins, &outs, functor);
} else if (static_cast<int>(Functor::FwdDeps()) ==
static_cast<int>(ActBwdOpFwdDeps::kDepX)) {
// Only need forward input X
ins.push_back(x);
paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(dev_ctx, ins,
&outs, functor);
paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(
dev_ctx, ins, &outs, functor);
} else {
paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(dev_ctx, ins,
&outs, functor);
paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(
dev_ctx, ins, &outs, functor);
}
}
};
......@@ -205,6 +168,7 @@ USE_PHI_FUNCTOR(CudaTanh)
USE_PHI_FUNCTOR(CudaBRelu)
USE_PHI_FUNCTOR(CudaLeakyRelu)
USE_PHI_FUNCTOR(CudaThresholdedRelu)
USE_PHI_FUNCTOR(CudaRelu6)
USE_PHI_FUNCTOR(CudaHardShrink)
USE_PHI_FUNCTOR(CudaSoftShrink)
USE_PHI_FUNCTOR(CudaTanhShrink)
......@@ -252,10 +216,11 @@ using CudaELUGradNegativeAlphaFunctor =
namespace ops = paddle::operators;
namespace plat = paddle::platform;
#define REGISTER_ACTIVATION_CUDA_KERNEL(act_type, op_name, functor, \
grad_functor) \
#define REGISTER_ACTIVATION_CUDA_KERNEL( \
act_type, op_name, functor, grad_functor) \
REGISTER_OP_CUDA_KERNEL( \
act_type, ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \
act_type, \
ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \
ops::functor<float>>, \
ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \
ops::functor<double>>, \
......@@ -274,10 +239,11 @@ namespace plat = paddle::platform;
ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \
ops::grad_functor<plat::bfloat16>>);
#define REGISTER_ACTIVATION_CUDA_KERNEL_INT(act_type, op_name, functor, \
grad_functor) \
#define REGISTER_ACTIVATION_CUDA_KERNEL_INT( \
act_type, op_name, functor, grad_functor) \
REGISTER_OP_CUDA_KERNEL( \
act_type, ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \
act_type, \
ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \
ops::functor<float>>, \
ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \
ops::functor<double>>, \
......@@ -305,7 +271,8 @@ namespace plat = paddle::platform;
ops::grad_functor<plat::bfloat16>>);
REGISTER_OP_CUDA_KERNEL(
relu6, ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext,
relu6,
ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext,
ops::CudaRelu6Functor<float>>,
ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext,
ops::CudaRelu6Functor<double>>,
......@@ -318,7 +285,8 @@ REGISTER_OP_CUDA_KERNEL(
ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaRelu6Functor<plat::bfloat16>>);
REGISTER_OP_CUDA_KERNEL(
relu6_grad, ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
relu6_grad,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaRelu6GradFunctor<float>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaRelu6GradFunctor<double>>,
......@@ -339,220 +307,324 @@ FOR_EACH_ACTIVATION_CUDA_OP(REGISTER_ACTIVATION_CUDA_KERNEL)
#ifdef PADDLE_WITH_XPU_KP
REGISTER_OP_KERNEL(
brelu, KP, plat::XPUPlace,
brelu,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaBReluFunctor<float>>);
REGISTER_OP_KERNEL(
brelu_grad, KP, plat::XPUPlace,
brelu_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaBReluGradFunctor<float>>);
REGISTER_OP_KERNEL(ceil, KP, plat::XPUPlace,
REGISTER_OP_KERNEL(ceil,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaCeilFunctor<float>>);
REGISTER_OP_KERNEL(
ceil_grad, KP, plat::XPUPlace,
ceil_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaZeroGradFunctor<float>>);
REGISTER_OP_KERNEL(
celu, KP, plat::XPUPlace,
celu,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaCELUFunctor<float>>);
REGISTER_OP_KERNEL(
celu_grad, KP, plat::XPUPlace,
celu_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaCELUGradFunctor<float>>);
REGISTER_OP_KERNEL(elu, KP, plat::XPUPlace,
REGISTER_OP_KERNEL(elu,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaELUFunctor<float>>);
REGISTER_OP_KERNEL(
elu_grad, KP, plat::XPUPlace,
elu_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaELUGradFunctor<float>>);
REGISTER_OP_KERNEL(exp, KP, plat::XPUPlace,
REGISTER_OP_KERNEL(exp,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaExpFunctor<float>>);
REGISTER_OP_KERNEL(
exp_grad, KP, plat::XPUPlace,
exp_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaExpGradFunctor<float>>);
REGISTER_OP_KERNEL(floor, KP, plat::XPUPlace,
REGISTER_OP_KERNEL(floor,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaFloorFunctor<float>>);
REGISTER_OP_KERNEL(
floor_grad, KP, plat::XPUPlace,
floor_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaZeroGradFunctor<float>>);
REGISTER_OP_KERNEL(
hard_shrink, KP, plat::XPUPlace,
hard_shrink,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaHardShrinkFunctor<float>>);
REGISTER_OP_KERNEL(
hard_shrink_grad, KP, plat::XPUPlace,
hard_shrink_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaHardShrinkGradFunctor<float>>);
REGISTER_OP_KERNEL(
hard_sigmoid, KP, plat::XPUPlace,
hard_sigmoid,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaHardSigmoidFunctor<float>>);
REGISTER_OP_KERNEL(
hard_sigmoid_grad, KP, plat::XPUPlace,
hard_sigmoid_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaHardSigmoidGradFunctor<float>>);
REGISTER_OP_KERNEL(hard_swish, KP, plat::XPUPlace,
REGISTER_OP_KERNEL(hard_swish,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaHardSwishFunctor<float>>);
REGISTER_OP_KERNEL(
hard_swish_grad, KP, plat::XPUPlace,
hard_swish_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaHardSwishGradFunctor<float>>);
REGISTER_OP_KERNEL(
leaky_relu, KP, plat::XPUPlace,
leaky_relu,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaLeakyReluFunctor<float>>);
REGISTER_OP_KERNEL(
leaky_relu_grad, KP, plat::XPUPlace,
leaky_relu_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaLeakyReluGradFunctor<float>>);
REGISTER_OP_KERNEL(log, KP, plat::XPUPlace,
REGISTER_OP_KERNEL(log,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLogFunctor<float>>);
REGISTER_OP_KERNEL(
log_grad, KP, plat::XPUPlace,
log_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLogGradFunctor<float>>);
REGISTER_OP_KERNEL(log1p, KP, plat::XPUPlace,
REGISTER_OP_KERNEL(log1p,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLog1pFunctor<float>>);
REGISTER_OP_KERNEL(
log1p_grad, KP, plat::XPUPlace,
log1p_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLog1pGradFunctor<float>>);
REGISTER_OP_KERNEL(
logsigmoid, KP, plat::XPUPlace,
logsigmoid,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLogSigmoidFunctor<float>>);
REGISTER_OP_KERNEL(
logsigmoid_grad, KP, plat::XPUPlace,
logsigmoid_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLogSigmoidGradFunctor<float>>);
REGISTER_OP_KERNEL(
reciprocal, KP, plat::XPUPlace,
reciprocal,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaReciprocalFunctor<float>>);
REGISTER_OP_KERNEL(
reciprocal_grad, KP, plat::XPUPlace,
reciprocal_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaReciprocalGradFunctor<float>>);
REGISTER_OP_KERNEL(
relu, KP, plat::XPUPlace,
relu,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaReluFunctor<float>>);
REGISTER_OP_KERNEL(
relu_grad, KP, plat::XPUPlace,
relu_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaReluGradFunctor<float>>);
REGISTER_OP_KERNEL(relu6, KP, plat::XPUPlace,
REGISTER_OP_KERNEL(relu6,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaRelu6Functor<float>>);
REGISTER_OP_KERNEL(
relu6_grad, KP, plat::XPUPlace,
relu6_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaRelu6GradFunctor<float>>);
REGISTER_OP_KERNEL(sigmoid, KP, plat::XPUPlace,
REGISTER_OP_KERNEL(sigmoid,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSigmoidFunctor<float>>);
REGISTER_OP_KERNEL(
sigmoid_grad, KP, plat::XPUPlace,
sigmoid_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSigmoidGradFunctor<float>>);
REGISTER_OP_KERNEL(silu, KP, plat::XPUPlace,
REGISTER_OP_KERNEL(silu,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSiluFunctor<float>>);
REGISTER_OP_KERNEL(
silu_grad, KP, plat::XPUPlace,
silu_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSiluGradFunctor<float>>);
REGISTER_OP_KERNEL(soft_relu, KP, plat::XPUPlace,
REGISTER_OP_KERNEL(soft_relu,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftReluFunctor<float>>);
REGISTER_OP_KERNEL(
soft_relu_grad, KP, plat::XPUPlace,
soft_relu_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftReluGradFunctor<float>>);
REGISTER_OP_KERNEL(softplus, KP, plat::XPUPlace,
REGISTER_OP_KERNEL(softplus,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftplusFunctor<float>>);
REGISTER_OP_KERNEL(
softplus_grad, KP, plat::XPUPlace,
softplus_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftplusGradFunctor<float>>);
REGISTER_OP_KERNEL(
softshrink, KP, plat::XPUPlace,
softshrink,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftShrinkFunctor<float>>);
REGISTER_OP_KERNEL(
softshrink_grad, KP, plat::XPUPlace,
softshrink_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftShrinkGradFunctor<float>>);
REGISTER_OP_KERNEL(softsign, KP, plat::XPUPlace,
REGISTER_OP_KERNEL(softsign,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftsignFunctor<float>>);
REGISTER_OP_KERNEL(
softsign_grad, KP, plat::XPUPlace,
softsign_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftsignGradFunctor<float>>);
REGISTER_OP_KERNEL(sqrt, KP, plat::XPUPlace,
REGISTER_OP_KERNEL(sqrt,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSqrtFunctor<float>>);
REGISTER_OP_KERNEL(
sqrt_grad, KP, plat::XPUPlace,
sqrt_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSqrtGradFunctor<float>>);
REGISTER_OP_KERNEL(square, KP, plat::XPUPlace,
REGISTER_OP_KERNEL(square,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSquareFunctor<float>>);
REGISTER_OP_KERNEL(
square_grad, KP, plat::XPUPlace,
square_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSquareGradFunctor<float>>);
REGISTER_OP_KERNEL(swish, KP, plat::XPUPlace,
REGISTER_OP_KERNEL(swish,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSwishFunctor<float>>);
REGISTER_OP_KERNEL(
swish_grad, KP, plat::XPUPlace,
swish_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSwishGradFunctor<float>>);
REGISTER_OP_KERNEL(
thresholded_relu, KP, plat::XPUPlace,
thresholded_relu,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaThresholdedReluFunctor<float>>);
REGISTER_OP_KERNEL(
thresholded_relu_grad, KP, plat::XPUPlace,
thresholded_relu_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaThresholdedReluGradFunctor<float>>);
......
......@@ -75,6 +75,7 @@ DECLARE_ACTIVATION_KERNEL(Negative)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(LeakyRelu, alpha)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu, threshold)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Relu6, threshold)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(SoftShrink, lambda)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(HardShrink, threshold)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Elu, alpha)
......
......@@ -156,6 +156,9 @@ DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu,
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(ThresholdedRelu,
ThresholdedReluGradFunctor,
threshold);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT(Relu6,
Relu6GradFunctor,
threshold);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(SoftShrink,
SoftShrinkGradFunctor,
lambda);
......@@ -263,6 +266,7 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(brelu_grad, BReluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_grad, LeakyReluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(thresholded_relu_grad,
ThresholdedReluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(relu6_grad, Relu6GradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(soft_shrink_grad, SoftShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(tanh_shrink_grad, TanhShrinkGradKernel)
......
......@@ -95,6 +95,7 @@ DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, LeakyReluFunctor, alpha)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu,
ThresholdedReluFunctor,
threshold)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Relu6, Relu6Functor, threshold)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Mish, MishFunctor, threshold)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink, HardShrinkFunctor, threshold)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(SoftShrink, SoftShrinkFunctor, lambda)
......@@ -147,6 +148,7 @@ PD_REGISTER_ACTIVATION_KERNEL(tanh, TanhKernel)
PD_REGISTER_ACTIVATION_KERNEL(brelu, BReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(thresholded_relu, ThresholdedReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel)
PD_REGISTER_ACTIVATION_KERNEL(hard_shrink, HardShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(soft_shrink, SoftShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel)
......
......@@ -1097,6 +1097,44 @@ struct ThresholdedReluGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
// relu6(x) = min(max(0, x), 6)
template <typename T>
struct Relu6Functor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) =
x.cwiseMax(static_cast<T>(0)).cwiseMin(static_cast<T>(threshold));
}
};
template <typename T>
struct Relu6GradFunctor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device,
typename X,
typename Out,
typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) =
dout * ((out > static_cast<T>(0)) * (out < static_cast<T>(threshold)))
.template cast<T>();
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
// tanhshrink(x) = x - tanh(x)
// where tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
template <typename T>
......@@ -2712,6 +2750,41 @@ struct CudaThresholdedReluGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaRelu6Functor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
// relu6(x) = min(max(0, x), 6)
__device__ __forceinline__ T operator()(const T x) const {
T t = static_cast<T>(threshold);
return x <= zero ? zero : (x < t ? x : t);
}
};
template <typename T>
struct CudaRelu6GradFunctor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
// dx = (out > 0 && out < t) ? dout : 0
__device__ __forceinline__ T operator()(const T dout, const T out) const {
T t = static_cast<T>(threshold);
return (out > zero && out < t) ? dout : zero;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaLeakyReluFunctor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
......
......@@ -223,6 +223,9 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Mish,
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Celu,
CudaCELUGradFunctor,
alpha);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT(Relu6,
CudaRelu6GradFunctor,
threshold);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(BRelu,
CudaBReluGradFunctor,
......@@ -348,6 +351,7 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_double_grad,
LeakyReluDoubleGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(thresholded_relu_grad,
ThresholdedReluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(relu6_grad, Relu6GradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(mish_grad, MishGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(stanh_grad, STanhGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(reciprocal_grad, ReciprocalGradKernel)
......
......@@ -111,6 +111,7 @@ DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, CudaLeakyReluFunctor, alpha)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu,
CudaThresholdedReluFunctor,
threshold)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(Relu6, CudaRelu6Functor, threshold)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink,
CudaHardShrinkFunctor,
threshold)
......@@ -192,6 +193,7 @@ PD_REGISTER_ACTIVATION_KERNEL(atanh, AtanhKernel)
PD_REGISTER_ACTIVATION_KERNEL(tanh, TanhKernel)
PD_REGISTER_ACTIVATION_KERNEL(brelu, BReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(thresholded_relu, ThresholdedReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel)
PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel)
PD_REGISTER_ACTIVATION_KERNEL(stanh, StanhKernel)
......
......@@ -90,6 +90,7 @@ DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Expm1, "expm1", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Reciprocal, "reciprocal", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Sqrt, "sqrt", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Rsqrt, "rsqrt", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Relu6, "relu6", "threshold"); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(HardSigmoid,
"hard_sigmoid",
......@@ -282,6 +283,7 @@ PD_REGISTER_ARG_MAPPING_FN(leaky_relu_grad_grad,
phi::LeakyReluDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(thresholded_relu_grad,
phi::ThresholdedReluGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(relu6_grad, phi::Relu6GradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(softshrink_grad,
phi::SoftShrinkGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(hard_shrink_grad,
......
......@@ -1737,6 +1737,7 @@ class TestRelu6(TestActivation):
def setUp(self):
self.op_type = "relu6"
self.init_dtype()
self.python_api = paddle.nn.functional.relu6
np.random.seed(1024)
x = np.random.uniform(-1, 10, [10, 12]).astype(self.dtype)
......@@ -1750,7 +1751,7 @@ class TestRelu6(TestActivation):
def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad(['X'], 'Out')
self.check_grad(['X'], 'Out', check_eager=True)
class TestRelu6API(unittest.TestCase):
......
......@@ -860,6 +860,8 @@ def relu6(x, name=None):
out = F.relu6(x) # [0, 0.3, 6]
"""
threshold = 6.0
if in_dygraph_mode():
return _C_ops.final_state_relu6(x, threshold)
if in_dynamic_mode():
return _C_ops.relu6(x, 'threshold', threshold)
......
......@@ -1713,6 +1713,16 @@
inplace : (x -> out)
backward : relu_grad
- api : relu6
args : (Tensor x, float threshold)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : relu6
backward : relu6_grad
- api : reshape
args : (Tensor x, IntArray shape)
output : Tensor(out), Tensor(xshape)
......
......@@ -1565,6 +1565,17 @@
kernel :
func : prod_grad
- backward_api : relu6_grad
forward : relu6 (Tensor x, float threshold) -> Tensor(out)
args : (Tensor out, Tensor out_grad, float threshold)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : relu6_grad
inplace : (out_grad -> x_grad)
- backward_api : relu_double_grad
forward : relu_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_x_grad)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册