未验证 提交 a9bba5ba 编写于 作者: C chentianyu03 提交者: GitHub

[phi]add relu6 kernel and yaml (#43549)

* add relu6 kernel and yaml

* format files

* format code and fix bug

* fix build failed
上级 2616d51a
...@@ -56,4 +56,4 @@ TEST(Relu6OpConverter, main) { test_activation("relu6"); } ...@@ -56,4 +56,4 @@ TEST(Relu6OpConverter, main) { test_activation("relu6"); }
USE_OP_ITSELF(relu); USE_OP_ITSELF(relu);
USE_OP_ITSELF(sigmoid); USE_OP_ITSELF(sigmoid);
USE_OP_ITSELF(tanh); USE_OP_ITSELF(tanh);
USE_OP(relu6); USE_OP_ITSELF(relu6);
...@@ -1503,6 +1503,7 @@ REGISTER_ACTIVATION_OP(thresholded_relu, ...@@ -1503,6 +1503,7 @@ REGISTER_ACTIVATION_OP(thresholded_relu,
ThresholdedRelu, ThresholdedRelu,
ThresholdedReluFunctor, ThresholdedReluFunctor,
ThresholdedReluGradFunctor); ThresholdedReluGradFunctor);
REGISTER_ACTIVATION_OP(relu6, Relu6, Relu6Functor, Relu6GradFunctor);
REGISTER_ACTIVATION_OP(hard_shrink, REGISTER_ACTIVATION_OP(hard_shrink,
HardShrink, HardShrink,
HardShrinkFunctor, HardShrinkFunctor,
......
...@@ -281,6 +281,7 @@ USE_PHI_DOUBLE_GRAD_FUNCTOR(Tanh) ...@@ -281,6 +281,7 @@ USE_PHI_DOUBLE_GRAD_FUNCTOR(Tanh)
USE_PHI_TRIPLE_GRAD_FUNCTOR(Tanh) USE_PHI_TRIPLE_GRAD_FUNCTOR(Tanh)
USE_PHI_FUNCTOR(BRelu) USE_PHI_FUNCTOR(BRelu)
USE_PHI_FUNCTOR(ThresholdedRelu) USE_PHI_FUNCTOR(ThresholdedRelu)
USE_PHI_FUNCTOR(Relu6)
USE_PHI_FUNCTOR(LeakyRelu) USE_PHI_FUNCTOR(LeakyRelu)
USE_PHI_DOUBLE_GRAD_FUNCTOR(LeakyRelu) USE_PHI_DOUBLE_GRAD_FUNCTOR(LeakyRelu)
USE_PHI_FUNCTOR(HardShrink) USE_PHI_FUNCTOR(HardShrink)
...@@ -348,44 +349,6 @@ using ReluGradGradFunctor = phi::funcs::ReluGradGradFunctor<T>; ...@@ -348,44 +349,6 @@ using ReluGradGradFunctor = phi::funcs::ReluGradGradFunctor<T>;
template <typename T> template <typename T>
using ReluCUDAFunctor = phi::funcs::ReluCUDAFunctor<T>; using ReluCUDAFunctor = phi::funcs::ReluCUDAFunctor<T>;
// relu6(x) = min(max(0, x), 6)
template <typename T>
struct Relu6Functor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) =
x.cwiseMax(static_cast<T>(0)).cwiseMin(static_cast<T>(threshold));
}
};
template <typename T>
struct Relu6GradFunctor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device,
typename X,
typename Out,
typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) =
dout * ((out > static_cast<T>(0)) * (out < static_cast<T>(threshold)))
.template cast<T>();
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T> template <typename T>
struct SoftReluFunctor : public BaseActivationFunctor<T> { struct SoftReluFunctor : public BaseActivationFunctor<T> {
float threshold; float threshold;
...@@ -561,5 +524,4 @@ struct SoftsignGradFunctor : public BaseActivationFunctor<T> { ...@@ -561,5 +524,4 @@ struct SoftsignGradFunctor : public BaseActivationFunctor<T> {
#define FOR_EACH_ACTIVATION_OP(__macro) \ #define FOR_EACH_ACTIVATION_OP(__macro) \
__macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \ __macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \
__macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor); \ __macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor);
__macro(relu6, Relu6, Relu6Functor, Relu6GradFunctor);
...@@ -14,7 +14,6 @@ limitations under the License. */ ...@@ -14,7 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h" #include "paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h"
#include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
#include "paddle/phi/kernels/funcs/activation_functor.h" #include "paddle/phi/kernels/funcs/activation_functor.h"
namespace paddle { namespace paddle {
...@@ -67,42 +66,6 @@ struct CudaSoftReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -67,42 +66,6 @@ struct CudaSoftReluGradFunctor : public BaseActivationFunctor<T> {
} }
}; };
template <typename T>
struct CudaRelu6Functor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
// relu6(x) = min(max(0, x), 6)
__device__ __forceinline__ T operator()(const T x) const {
T t = static_cast<T>(threshold);
return x <= zero ? zero : (x < t ? x : t);
}
};
template <typename T>
struct CudaRelu6GradFunctor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
// dx = (out > 0 && out < t) ? dout : 0
__device__ __forceinline__ T operator()(const T dout, const T out) const {
T t = static_cast<T>(threshold);
return (out > zero && out < t) ? dout : zero;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T> template <typename T>
struct CudaSoftsignFunctor : public BaseActivationFunctor<T> { struct CudaSoftsignFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f); T one = static_cast<T>(1.0f);
...@@ -144,8 +107,8 @@ class ActivationCudaKernel ...@@ -144,8 +107,8 @@ class ActivationCudaKernel
for (auto& attr : attrs) { for (auto& attr : attrs) {
*attr.second = ctx.Attr<float>(attr.first); *attr.second = ctx.Attr<float>(attr.first);
} }
paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(dev_ctx, ins, paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(
&outs, functor); dev_ctx, ins, &outs, functor);
} }
}; };
...@@ -158,8 +121,8 @@ class ActivationGradCudaKernel ...@@ -158,8 +121,8 @@ class ActivationGradCudaKernel
const framework::Tensor *x, *out, *d_out; const framework::Tensor *x, *out, *d_out;
framework::Tensor* d_x = nullptr; framework::Tensor* d_x = nullptr;
x = out = d_out = nullptr; x = out = d_out = nullptr;
ExtractActivationGradTensor<Functor::FwdDeps()>(ctx, &x, &out, &d_out, ExtractActivationGradTensor<Functor::FwdDeps()>(
&d_x); ctx, &x, &out, &d_out, &d_x);
d_x->mutable_data<T>(ctx.GetPlace()); d_x->mutable_data<T>(ctx.GetPlace());
auto& dev_ctx = ctx.template device_context<DeviceContext>(); auto& dev_ctx = ctx.template device_context<DeviceContext>();
auto functor = Functor(); auto functor = Functor();
...@@ -175,17 +138,17 @@ class ActivationGradCudaKernel ...@@ -175,17 +138,17 @@ class ActivationGradCudaKernel
static_cast<int>(ActBwdOpFwdDeps::kDepOut)) { static_cast<int>(ActBwdOpFwdDeps::kDepOut)) {
// Only need forward output Out // Only need forward output Out
ins.push_back(out); ins.push_back(out);
paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(dev_ctx, ins, paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(
&outs, functor); dev_ctx, ins, &outs, functor);
} else if (static_cast<int>(Functor::FwdDeps()) == } else if (static_cast<int>(Functor::FwdDeps()) ==
static_cast<int>(ActBwdOpFwdDeps::kDepX)) { static_cast<int>(ActBwdOpFwdDeps::kDepX)) {
// Only need forward input X // Only need forward input X
ins.push_back(x); ins.push_back(x);
paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(dev_ctx, ins, paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(
&outs, functor); dev_ctx, ins, &outs, functor);
} else { } else {
paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(dev_ctx, ins, paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(
&outs, functor); dev_ctx, ins, &outs, functor);
} }
} }
}; };
...@@ -205,6 +168,7 @@ USE_PHI_FUNCTOR(CudaTanh) ...@@ -205,6 +168,7 @@ USE_PHI_FUNCTOR(CudaTanh)
USE_PHI_FUNCTOR(CudaBRelu) USE_PHI_FUNCTOR(CudaBRelu)
USE_PHI_FUNCTOR(CudaLeakyRelu) USE_PHI_FUNCTOR(CudaLeakyRelu)
USE_PHI_FUNCTOR(CudaThresholdedRelu) USE_PHI_FUNCTOR(CudaThresholdedRelu)
USE_PHI_FUNCTOR(CudaRelu6)
USE_PHI_FUNCTOR(CudaHardShrink) USE_PHI_FUNCTOR(CudaHardShrink)
USE_PHI_FUNCTOR(CudaSoftShrink) USE_PHI_FUNCTOR(CudaSoftShrink)
USE_PHI_FUNCTOR(CudaTanhShrink) USE_PHI_FUNCTOR(CudaTanhShrink)
...@@ -252,61 +216,64 @@ using CudaELUGradNegativeAlphaFunctor = ...@@ -252,61 +216,64 @@ using CudaELUGradNegativeAlphaFunctor =
namespace ops = paddle::operators; namespace ops = paddle::operators;
namespace plat = paddle::platform; namespace plat = paddle::platform;
#define REGISTER_ACTIVATION_CUDA_KERNEL(act_type, op_name, functor, \ #define REGISTER_ACTIVATION_CUDA_KERNEL( \
grad_functor) \ act_type, op_name, functor, grad_functor) \
REGISTER_OP_CUDA_KERNEL( \ REGISTER_OP_CUDA_KERNEL( \
act_type, ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \ act_type, \
ops::functor<float>>, \ ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \
ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \ ops::functor<float>>, \
ops::functor<double>>, \ ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \
ops::ActivationCudaKernel<plat::CUDADeviceContext, \ ops::functor<double>>, \
ops::functor<plat::float16>>, \ ops::ActivationCudaKernel<plat::CUDADeviceContext, \
ops::ActivationCudaKernel<plat::CUDADeviceContext, \ ops::functor<plat::float16>>, \
ops::functor<plat::bfloat16>>); \ ops::ActivationCudaKernel<plat::CUDADeviceContext, \
REGISTER_OP_CUDA_KERNEL( \ ops::functor<plat::bfloat16>>); \
act_type##_grad, \ REGISTER_OP_CUDA_KERNEL( \
ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \ act_type##_grad, \
ops::grad_functor<float>>, \ ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \
ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \ ops::grad_functor<float>>, \
ops::grad_functor<double>>, \ ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \
ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \ ops::grad_functor<double>>, \
ops::grad_functor<plat::float16>>, \ ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \
ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \ ops::grad_functor<plat::float16>>, \
ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \
ops::grad_functor<plat::bfloat16>>); ops::grad_functor<plat::bfloat16>>);
#define REGISTER_ACTIVATION_CUDA_KERNEL_INT(act_type, op_name, functor, \ #define REGISTER_ACTIVATION_CUDA_KERNEL_INT( \
grad_functor) \ act_type, op_name, functor, grad_functor) \
REGISTER_OP_CUDA_KERNEL( \ REGISTER_OP_CUDA_KERNEL( \
act_type, ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \ act_type, \
ops::functor<float>>, \ ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \
ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \ ops::functor<float>>, \
ops::functor<double>>, \ ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \
ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \ ops::functor<double>>, \
ops::functor<int>>, \ ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \
ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \ ops::functor<int>>, \
ops::functor<int64_t>>, \ ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, \
ops::ActivationCudaKernel<plat::CUDADeviceContext, \ ops::functor<int64_t>>, \
ops::functor<plat::float16>>, \ ops::ActivationCudaKernel<plat::CUDADeviceContext, \
ops::ActivationCudaKernel<plat::CUDADeviceContext, \ ops::functor<plat::float16>>, \
ops::functor<plat::bfloat16>>); \ ops::ActivationCudaKernel<plat::CUDADeviceContext, \
REGISTER_OP_CUDA_KERNEL( \ ops::functor<plat::bfloat16>>); \
act_type##_grad, \ REGISTER_OP_CUDA_KERNEL( \
ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \ act_type##_grad, \
ops::grad_functor<float>>, \ ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \
ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \ ops::grad_functor<float>>, \
ops::grad_functor<double>>, \ ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \
ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \ ops::grad_functor<double>>, \
ops::grad_functor<int>>, \ ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \
ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \ ops::grad_functor<int>>, \
ops::grad_functor<int64_t>>, \ ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \
ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \ ops::grad_functor<int64_t>>, \
ops::grad_functor<plat::float16>>, \ ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \
ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \ ops::grad_functor<plat::float16>>, \
ops::ActivationGradCudaKernel<plat::CUDADeviceContext, \
ops::grad_functor<plat::bfloat16>>); ops::grad_functor<plat::bfloat16>>);
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
relu6, ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, relu6,
ops::CudaRelu6Functor<float>>, ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext,
ops::CudaRelu6Functor<float>>,
ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext,
ops::CudaRelu6Functor<double>>, ops::CudaRelu6Functor<double>>,
ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext, ops::ActivationCudaKernel<paddle::platform::CUDADeviceContext,
...@@ -318,8 +285,9 @@ REGISTER_OP_CUDA_KERNEL( ...@@ -318,8 +285,9 @@ REGISTER_OP_CUDA_KERNEL(
ops::ActivationCudaKernel<plat::CUDADeviceContext, ops::ActivationCudaKernel<plat::CUDADeviceContext,
ops::CudaRelu6Functor<plat::bfloat16>>); ops::CudaRelu6Functor<plat::bfloat16>>);
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
relu6_grad, ops::ActivationGradCudaKernel<plat::CUDADeviceContext, relu6_grad,
ops::CudaRelu6GradFunctor<float>>, ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaRelu6GradFunctor<float>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext, ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
ops::CudaRelu6GradFunctor<double>>, ops::CudaRelu6GradFunctor<double>>,
ops::ActivationGradCudaKernel<plat::CUDADeviceContext, ops::ActivationGradCudaKernel<plat::CUDADeviceContext,
...@@ -339,220 +307,324 @@ FOR_EACH_ACTIVATION_CUDA_OP(REGISTER_ACTIVATION_CUDA_KERNEL) ...@@ -339,220 +307,324 @@ FOR_EACH_ACTIVATION_CUDA_OP(REGISTER_ACTIVATION_CUDA_KERNEL)
#ifdef PADDLE_WITH_XPU_KP #ifdef PADDLE_WITH_XPU_KP
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
brelu, KP, plat::XPUPlace, brelu,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaBReluFunctor<float>>); phi::funcs::CudaBReluFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
brelu_grad, KP, plat::XPUPlace, brelu_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaBReluGradFunctor<float>>); phi::funcs::CudaBReluGradFunctor<float>>);
REGISTER_OP_KERNEL(ceil, KP, plat::XPUPlace, REGISTER_OP_KERNEL(ceil,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaCeilFunctor<float>>); ops::CudaCeilFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
ceil_grad, KP, plat::XPUPlace, ceil_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaZeroGradFunctor<float>>); ops::CudaZeroGradFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
celu, KP, plat::XPUPlace, celu,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaCELUFunctor<float>>); phi::funcs::CudaCELUFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
celu_grad, KP, plat::XPUPlace, celu_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaCELUGradFunctor<float>>); phi::funcs::CudaCELUGradFunctor<float>>);
REGISTER_OP_KERNEL(elu, KP, plat::XPUPlace, REGISTER_OP_KERNEL(elu,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaELUFunctor<float>>); ops::CudaELUFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
elu_grad, KP, plat::XPUPlace, elu_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaELUGradFunctor<float>>); ops::CudaELUGradFunctor<float>>);
REGISTER_OP_KERNEL(exp, KP, plat::XPUPlace, REGISTER_OP_KERNEL(exp,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaExpFunctor<float>>); ops::CudaExpFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
exp_grad, KP, plat::XPUPlace, exp_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaExpGradFunctor<float>>); ops::CudaExpGradFunctor<float>>);
REGISTER_OP_KERNEL(floor, KP, plat::XPUPlace, REGISTER_OP_KERNEL(floor,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaFloorFunctor<float>>); ops::CudaFloorFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
floor_grad, KP, plat::XPUPlace, floor_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaZeroGradFunctor<float>>); ops::CudaZeroGradFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
hard_shrink, KP, plat::XPUPlace, hard_shrink,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaHardShrinkFunctor<float>>); ops::CudaHardShrinkFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
hard_shrink_grad, KP, plat::XPUPlace, hard_shrink_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaHardShrinkGradFunctor<float>>); ops::CudaHardShrinkGradFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
hard_sigmoid, KP, plat::XPUPlace, hard_sigmoid,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaHardSigmoidFunctor<float>>); ops::CudaHardSigmoidFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
hard_sigmoid_grad, KP, plat::XPUPlace, hard_sigmoid_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaHardSigmoidGradFunctor<float>>); ops::CudaHardSigmoidGradFunctor<float>>);
REGISTER_OP_KERNEL(hard_swish, KP, plat::XPUPlace, REGISTER_OP_KERNEL(hard_swish,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaHardSwishFunctor<float>>); ops::CudaHardSwishFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
hard_swish_grad, KP, plat::XPUPlace, hard_swish_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaHardSwishGradFunctor<float>>); ops::CudaHardSwishGradFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
leaky_relu, KP, plat::XPUPlace, leaky_relu,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaLeakyReluFunctor<float>>); phi::funcs::CudaLeakyReluFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
leaky_relu_grad, KP, plat::XPUPlace, leaky_relu_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaLeakyReluGradFunctor<float>>); phi::funcs::CudaLeakyReluGradFunctor<float>>);
REGISTER_OP_KERNEL(log, KP, plat::XPUPlace, REGISTER_OP_KERNEL(log,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLogFunctor<float>>); ops::CudaLogFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
log_grad, KP, plat::XPUPlace, log_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLogGradFunctor<float>>); ops::CudaLogGradFunctor<float>>);
REGISTER_OP_KERNEL(log1p, KP, plat::XPUPlace, REGISTER_OP_KERNEL(log1p,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLog1pFunctor<float>>); ops::CudaLog1pFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
log1p_grad, KP, plat::XPUPlace, log1p_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLog1pGradFunctor<float>>); ops::CudaLog1pGradFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
logsigmoid, KP, plat::XPUPlace, logsigmoid,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLogSigmoidFunctor<float>>); ops::CudaLogSigmoidFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
logsigmoid_grad, KP, plat::XPUPlace, logsigmoid_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLogSigmoidGradFunctor<float>>); ops::CudaLogSigmoidGradFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
reciprocal, KP, plat::XPUPlace, reciprocal,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaReciprocalFunctor<float>>); ops::CudaReciprocalFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
reciprocal_grad, KP, plat::XPUPlace, reciprocal_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaReciprocalGradFunctor<float>>); ops::CudaReciprocalGradFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
relu, KP, plat::XPUPlace, relu,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaReluFunctor<float>>); phi::funcs::CudaReluFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
relu_grad, KP, plat::XPUPlace, relu_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaReluGradFunctor<float>>); phi::funcs::CudaReluGradFunctor<float>>);
REGISTER_OP_KERNEL(relu6, KP, plat::XPUPlace, REGISTER_OP_KERNEL(relu6,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaRelu6Functor<float>>); ops::CudaRelu6Functor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
relu6_grad, KP, plat::XPUPlace, relu6_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaRelu6GradFunctor<float>>); ops::CudaRelu6GradFunctor<float>>);
REGISTER_OP_KERNEL(sigmoid, KP, plat::XPUPlace, REGISTER_OP_KERNEL(sigmoid,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSigmoidFunctor<float>>); ops::CudaSigmoidFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
sigmoid_grad, KP, plat::XPUPlace, sigmoid_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSigmoidGradFunctor<float>>); ops::CudaSigmoidGradFunctor<float>>);
REGISTER_OP_KERNEL(silu, KP, plat::XPUPlace, REGISTER_OP_KERNEL(silu,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSiluFunctor<float>>); ops::CudaSiluFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
silu_grad, KP, plat::XPUPlace, silu_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSiluGradFunctor<float>>); ops::CudaSiluGradFunctor<float>>);
REGISTER_OP_KERNEL(soft_relu, KP, plat::XPUPlace, REGISTER_OP_KERNEL(soft_relu,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftReluFunctor<float>>); ops::CudaSoftReluFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
soft_relu_grad, KP, plat::XPUPlace, soft_relu_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftReluGradFunctor<float>>); ops::CudaSoftReluGradFunctor<float>>);
REGISTER_OP_KERNEL(softplus, KP, plat::XPUPlace, REGISTER_OP_KERNEL(softplus,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftplusFunctor<float>>); ops::CudaSoftplusFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
softplus_grad, KP, plat::XPUPlace, softplus_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftplusGradFunctor<float>>); ops::CudaSoftplusGradFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
softshrink, KP, plat::XPUPlace, softshrink,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftShrinkFunctor<float>>); ops::CudaSoftShrinkFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
softshrink_grad, KP, plat::XPUPlace, softshrink_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftShrinkGradFunctor<float>>); ops::CudaSoftShrinkGradFunctor<float>>);
REGISTER_OP_KERNEL(softsign, KP, plat::XPUPlace, REGISTER_OP_KERNEL(softsign,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftsignFunctor<float>>); ops::CudaSoftsignFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
softsign_grad, KP, plat::XPUPlace, softsign_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftsignGradFunctor<float>>); ops::CudaSoftsignGradFunctor<float>>);
REGISTER_OP_KERNEL(sqrt, KP, plat::XPUPlace, REGISTER_OP_KERNEL(sqrt,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSqrtFunctor<float>>); ops::CudaSqrtFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
sqrt_grad, KP, plat::XPUPlace, sqrt_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSqrtGradFunctor<float>>); ops::CudaSqrtGradFunctor<float>>);
REGISTER_OP_KERNEL(square, KP, plat::XPUPlace, REGISTER_OP_KERNEL(square,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSquareFunctor<float>>); ops::CudaSquareFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
square_grad, KP, plat::XPUPlace, square_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSquareGradFunctor<float>>); ops::CudaSquareGradFunctor<float>>);
REGISTER_OP_KERNEL(swish, KP, plat::XPUPlace, REGISTER_OP_KERNEL(swish,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSwishFunctor<float>>); ops::CudaSwishFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
swish_grad, KP, plat::XPUPlace, swish_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSwishGradFunctor<float>>); ops::CudaSwishGradFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
thresholded_relu, KP, plat::XPUPlace, thresholded_relu,
KP,
plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaThresholdedReluFunctor<float>>); ops::CudaThresholdedReluFunctor<float>>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
thresholded_relu_grad, KP, plat::XPUPlace, thresholded_relu_grad,
KP,
plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaThresholdedReluGradFunctor<float>>); ops::CudaThresholdedReluGradFunctor<float>>);
......
...@@ -75,6 +75,7 @@ DECLARE_ACTIVATION_KERNEL(Negative) ...@@ -75,6 +75,7 @@ DECLARE_ACTIVATION_KERNEL(Negative)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(LeakyRelu, alpha) DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(LeakyRelu, alpha)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu, threshold) DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu, threshold)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Relu6, threshold)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(SoftShrink, lambda) DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(SoftShrink, lambda)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(HardShrink, threshold) DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(HardShrink, threshold)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Elu, alpha) DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Elu, alpha)
......
...@@ -156,6 +156,9 @@ DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu, ...@@ -156,6 +156,9 @@ DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu,
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(ThresholdedRelu, DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(ThresholdedRelu,
ThresholdedReluGradFunctor, ThresholdedReluGradFunctor,
threshold); threshold);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT(Relu6,
Relu6GradFunctor,
threshold);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(SoftShrink, DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(SoftShrink,
SoftShrinkGradFunctor, SoftShrinkGradFunctor,
lambda); lambda);
...@@ -263,6 +266,7 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(brelu_grad, BReluGradKernel) ...@@ -263,6 +266,7 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(brelu_grad, BReluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_grad, LeakyReluGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_grad, LeakyReluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(thresholded_relu_grad, PD_REGISTER_ACTIVATION_GRAD_KERNEL(thresholded_relu_grad,
ThresholdedReluGradKernel) ThresholdedReluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(relu6_grad, Relu6GradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(soft_shrink_grad, SoftShrinkGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(soft_shrink_grad, SoftShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(tanh_shrink_grad, TanhShrinkGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(tanh_shrink_grad, TanhShrinkGradKernel)
......
...@@ -95,6 +95,7 @@ DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, LeakyReluFunctor, alpha) ...@@ -95,6 +95,7 @@ DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, LeakyReluFunctor, alpha)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu, DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu,
ThresholdedReluFunctor, ThresholdedReluFunctor,
threshold) threshold)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Relu6, Relu6Functor, threshold)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Mish, MishFunctor, threshold) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Mish, MishFunctor, threshold)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink, HardShrinkFunctor, threshold) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink, HardShrinkFunctor, threshold)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(SoftShrink, SoftShrinkFunctor, lambda) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(SoftShrink, SoftShrinkFunctor, lambda)
...@@ -147,6 +148,7 @@ PD_REGISTER_ACTIVATION_KERNEL(tanh, TanhKernel) ...@@ -147,6 +148,7 @@ PD_REGISTER_ACTIVATION_KERNEL(tanh, TanhKernel)
PD_REGISTER_ACTIVATION_KERNEL(brelu, BReluKernel) PD_REGISTER_ACTIVATION_KERNEL(brelu, BReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel) PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(thresholded_relu, ThresholdedReluKernel) PD_REGISTER_ACTIVATION_KERNEL(thresholded_relu, ThresholdedReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel)
PD_REGISTER_ACTIVATION_KERNEL(hard_shrink, HardShrinkKernel) PD_REGISTER_ACTIVATION_KERNEL(hard_shrink, HardShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(soft_shrink, SoftShrinkKernel) PD_REGISTER_ACTIVATION_KERNEL(soft_shrink, SoftShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel) PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel)
......
...@@ -1097,6 +1097,44 @@ struct ThresholdedReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -1097,6 +1097,44 @@ struct ThresholdedReluGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
}; };
// relu6(x) = min(max(0, x), 6)
template <typename T>
struct Relu6Functor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) =
x.cwiseMax(static_cast<T>(0)).cwiseMin(static_cast<T>(threshold));
}
};
template <typename T>
struct Relu6GradFunctor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device,
typename X,
typename Out,
typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) =
dout * ((out > static_cast<T>(0)) * (out < static_cast<T>(threshold)))
.template cast<T>();
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
// tanhshrink(x) = x - tanh(x) // tanhshrink(x) = x - tanh(x)
// where tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x)) // where tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
template <typename T> template <typename T>
...@@ -2712,6 +2750,41 @@ struct CudaThresholdedReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -2712,6 +2750,41 @@ struct CudaThresholdedReluGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
}; };
template <typename T>
struct CudaRelu6Functor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
// relu6(x) = min(max(0, x), 6)
__device__ __forceinline__ T operator()(const T x) const {
T t = static_cast<T>(threshold);
return x <= zero ? zero : (x < t ? x : t);
}
};
template <typename T>
struct CudaRelu6GradFunctor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
// dx = (out > 0 && out < t) ? dout : 0
__device__ __forceinline__ T operator()(const T dout, const T out) const {
T t = static_cast<T>(threshold);
return (out > zero && out < t) ? dout : zero;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T> template <typename T>
struct CudaLeakyReluFunctor : public BaseActivationFunctor<T> { struct CudaLeakyReluFunctor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f); T zero = static_cast<T>(0.0f);
......
...@@ -223,6 +223,9 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Mish, ...@@ -223,6 +223,9 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Mish,
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Celu, DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Celu,
CudaCELUGradFunctor, CudaCELUGradFunctor,
alpha); alpha);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT(Relu6,
CudaRelu6GradFunctor,
threshold);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(BRelu, DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(BRelu,
CudaBReluGradFunctor, CudaBReluGradFunctor,
...@@ -348,6 +351,7 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_double_grad, ...@@ -348,6 +351,7 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_double_grad,
LeakyReluDoubleGradKernel) LeakyReluDoubleGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(thresholded_relu_grad, PD_REGISTER_ACTIVATION_GRAD_KERNEL(thresholded_relu_grad,
ThresholdedReluGradKernel) ThresholdedReluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(relu6_grad, Relu6GradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(mish_grad, MishGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(mish_grad, MishGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(stanh_grad, STanhGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(stanh_grad, STanhGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(reciprocal_grad, ReciprocalGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(reciprocal_grad, ReciprocalGradKernel)
......
...@@ -111,6 +111,7 @@ DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, CudaLeakyReluFunctor, alpha) ...@@ -111,6 +111,7 @@ DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, CudaLeakyReluFunctor, alpha)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu, DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu,
CudaThresholdedReluFunctor, CudaThresholdedReluFunctor,
threshold) threshold)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(Relu6, CudaRelu6Functor, threshold)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink, DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink,
CudaHardShrinkFunctor, CudaHardShrinkFunctor,
threshold) threshold)
...@@ -192,6 +193,7 @@ PD_REGISTER_ACTIVATION_KERNEL(atanh, AtanhKernel) ...@@ -192,6 +193,7 @@ PD_REGISTER_ACTIVATION_KERNEL(atanh, AtanhKernel)
PD_REGISTER_ACTIVATION_KERNEL(tanh, TanhKernel) PD_REGISTER_ACTIVATION_KERNEL(tanh, TanhKernel)
PD_REGISTER_ACTIVATION_KERNEL(brelu, BReluKernel) PD_REGISTER_ACTIVATION_KERNEL(brelu, BReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(thresholded_relu, ThresholdedReluKernel) PD_REGISTER_ACTIVATION_KERNEL(thresholded_relu, ThresholdedReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel)
PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel) PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel) PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel)
PD_REGISTER_ACTIVATION_KERNEL(stanh, StanhKernel) PD_REGISTER_ACTIVATION_KERNEL(stanh, StanhKernel)
......
...@@ -82,14 +82,15 @@ DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Softplus, ...@@ -82,14 +82,15 @@ DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Softplus,
"softplus", "softplus",
"beta" comma "threshold"); // NOLINT "beta" comma "threshold"); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Relu, "relu", ); // NOLINT DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Relu, "relu", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Tanh, "tanh", ); // NOLINT DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Tanh, "tanh", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Sigmoid, "sigmoid", ); // NOLINT DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Sigmoid, "sigmoid", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Exp, "exp", ); // NOLINT DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Exp, "exp", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Expm1, "expm1", ); // NOLINT DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Expm1, "expm1", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Reciprocal, "reciprocal", ); // NOLINT DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Reciprocal, "reciprocal", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Sqrt, "sqrt", ); // NOLINT DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Sqrt, "sqrt", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Rsqrt, "rsqrt", ); // NOLINT DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Rsqrt, "rsqrt", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Relu6, "relu6", "threshold"); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(HardSigmoid, DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(HardSigmoid,
"hard_sigmoid", "hard_sigmoid",
...@@ -282,6 +283,7 @@ PD_REGISTER_ARG_MAPPING_FN(leaky_relu_grad_grad, ...@@ -282,6 +283,7 @@ PD_REGISTER_ARG_MAPPING_FN(leaky_relu_grad_grad,
phi::LeakyReluDoubleGradOpArgumentMapping); phi::LeakyReluDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(thresholded_relu_grad, PD_REGISTER_ARG_MAPPING_FN(thresholded_relu_grad,
phi::ThresholdedReluGradOpArgumentMapping); phi::ThresholdedReluGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(relu6_grad, phi::Relu6GradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(softshrink_grad, PD_REGISTER_ARG_MAPPING_FN(softshrink_grad,
phi::SoftShrinkGradOpArgumentMapping); phi::SoftShrinkGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(hard_shrink_grad, PD_REGISTER_ARG_MAPPING_FN(hard_shrink_grad,
......
...@@ -1737,6 +1737,7 @@ class TestRelu6(TestActivation): ...@@ -1737,6 +1737,7 @@ class TestRelu6(TestActivation):
def setUp(self): def setUp(self):
self.op_type = "relu6" self.op_type = "relu6"
self.init_dtype() self.init_dtype()
self.python_api = paddle.nn.functional.relu6
np.random.seed(1024) np.random.seed(1024)
x = np.random.uniform(-1, 10, [10, 12]).astype(self.dtype) x = np.random.uniform(-1, 10, [10, 12]).astype(self.dtype)
...@@ -1750,7 +1751,7 @@ class TestRelu6(TestActivation): ...@@ -1750,7 +1751,7 @@ class TestRelu6(TestActivation):
def test_check_grad(self): def test_check_grad(self):
if self.dtype == np.float16: if self.dtype == np.float16:
return return
self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out', check_eager=True)
class TestRelu6API(unittest.TestCase): class TestRelu6API(unittest.TestCase):
......
...@@ -860,6 +860,8 @@ def relu6(x, name=None): ...@@ -860,6 +860,8 @@ def relu6(x, name=None):
out = F.relu6(x) # [0, 0.3, 6] out = F.relu6(x) # [0, 0.3, 6]
""" """
threshold = 6.0 threshold = 6.0
if in_dygraph_mode():
return _C_ops.final_state_relu6(x, threshold)
if in_dynamic_mode(): if in_dynamic_mode():
return _C_ops.relu6(x, 'threshold', threshold) return _C_ops.relu6(x, 'threshold', threshold)
......
...@@ -1713,6 +1713,16 @@ ...@@ -1713,6 +1713,16 @@
inplace : (x -> out) inplace : (x -> out)
backward : relu_grad backward : relu_grad
- api : relu6
args : (Tensor x, float threshold)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : relu6
backward : relu6_grad
- api : reshape - api : reshape
args : (Tensor x, IntArray shape) args : (Tensor x, IntArray shape)
output : Tensor(out), Tensor(xshape) output : Tensor(out), Tensor(xshape)
......
...@@ -1565,6 +1565,17 @@ ...@@ -1565,6 +1565,17 @@
kernel : kernel :
func : prod_grad func : prod_grad
- backward_api : relu6_grad
forward : relu6 (Tensor x, float threshold) -> Tensor(out)
args : (Tensor out, Tensor out_grad, float threshold)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : relu6_grad
inplace : (out_grad -> x_grad)
- backward_api : relu_double_grad - backward_api : relu_double_grad
forward : relu_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x) forward : relu_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_x_grad) args : (Tensor out, Tensor grad_x_grad)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册