未验证 提交 d2f87d96 编写于 作者: Z zhangyikun02 提交者: GitHub

add exp_grad, hard_sigmoid and hard_sigmoid_grad for xpu, test=kunlun (#48307)

上级 22555e96
...@@ -220,6 +220,7 @@ XPUOpMap& get_kl2_ops() { ...@@ -220,6 +220,7 @@ XPUOpMap& get_kl2_ops() {
XPUKernelSet({pOpKernelType(vartype::INT64, XPUPlace()), XPUKernelSet({pOpKernelType(vartype::INT64, XPUPlace()),
pOpKernelType(vartype::INT32, XPUPlace()), pOpKernelType(vartype::INT32, XPUPlace()),
pOpKernelType(vartype::FP32, XPUPlace())})}, pOpKernelType(vartype::FP32, XPUPlace())})},
{"exp_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"exp", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"exp", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"expand_as_v2", {"expand_as_v2",
XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace()), XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace()),
...@@ -314,6 +315,10 @@ XPUOpMap& get_kl2_ops() { ...@@ -314,6 +315,10 @@ XPUOpMap& get_kl2_ops() {
pOpKernelType(vartype::FP32, XPUPlace())})}, pOpKernelType(vartype::FP32, XPUPlace())})},
{"grid_sampler", {"grid_sampler",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"hard_sigmoid_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"hard_sigmoid",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"hard_swish_grad", {"hard_swish_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})}, pOpKernelType(vartype::FP16, XPUPlace())})},
......
...@@ -160,6 +160,21 @@ int xpu_activation_backward(const Context& dev_ctx, ...@@ -160,6 +160,21 @@ int xpu_activation_backward(const Context& dev_ctx,
return r; return r;
} }
template <typename T>
struct XPUExpGradFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dout,
DenseTensor* dx) const {
int r = xpu_activation_backward<Context, T, XPUType>(
dev_ctx, x, out, dout, dx, xpu::exp_grad<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "exp_grad");
}
};
template <typename T> template <typename T>
struct XPULogGradFunctor : public funcs::BaseActivationFunctor<T> { struct XPULogGradFunctor : public funcs::BaseActivationFunctor<T> {
template <typename Context> template <typename Context>
...@@ -238,6 +253,39 @@ struct XPULeakyReluGradFunctor : public funcs::BaseActivationFunctor<T> { ...@@ -238,6 +253,39 @@ struct XPULeakyReluGradFunctor : public funcs::BaseActivationFunctor<T> {
} }
}; };
template <typename T>
struct XPUHardSigmoidGradFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
float slope;
float offset;
typename funcs::BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"slope", &slope}, {"offset", &offset}};
}
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dout,
DenseTensor* dx) const {
const T* y_data = out->data<T>();
const T* y_grad = dout->data<T>();
T* x_grad = dx->data<T>();
auto xpu_context = dev_ctx.x_context();
int r = xpu::hard_sigmoid_grad(
xpu_context,
reinterpret_cast<const XPUType*>(
y_data), // hard_sigmoid_grad do not need x_data
reinterpret_cast<const XPUType*>(y_data),
reinterpret_cast<const XPUType*>(y_grad),
reinterpret_cast<XPUType*>(x_grad),
dx->numel(),
slope);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "hard_sigmoid_grad");
}
};
template <typename T> template <typename T>
struct XPUHardSwishGradFunctor : public funcs::BaseActivationFunctor<T> { struct XPUHardSwishGradFunctor : public funcs::BaseActivationFunctor<T> {
float threshold; float threshold;
...@@ -497,6 +545,7 @@ struct XPUSoftPlusGradFunctor : public funcs::BaseActivationFunctor<T> { ...@@ -497,6 +545,7 @@ struct XPUSoftPlusGradFunctor : public funcs::BaseActivationFunctor<T> {
} }
}; };
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Exp, XPUExpGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Reciprocal, XPUReciprocalGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Reciprocal, XPUReciprocalGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sigmoid, XPUSigmoidGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sigmoid, XPUSigmoidGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt, XPUSqrtGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt, XPUSqrtGradFunctor);
...@@ -524,6 +573,10 @@ DEFINE_XPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(Softplus, ...@@ -524,6 +573,10 @@ DEFINE_XPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(Softplus,
XPUSoftPlusGradFunctor, XPUSoftPlusGradFunctor,
beta, beta,
threshold) threshold)
DEFINE_XPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid,
XPUHardSigmoidGradFunctor,
slope,
offset)
template <typename T, typename Context> template <typename T, typename Context>
void HardSwishGradKernel(const Context& dev_ctx, void HardSwishGradKernel(const Context& dev_ctx,
...@@ -560,8 +613,10 @@ PD_REGISTER_KERNEL(tanh_grad, ...@@ -560,8 +613,10 @@ PD_REGISTER_KERNEL(tanh_grad,
phi::TanhGradKernel, phi::TanhGradKernel,
float, float,
phi::dtype::float16) {} phi::dtype::float16) {}
PD_REGISTER_ACTIVATION_GRAD_KERNEL(exp_grad, ExpGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(log_grad, LogGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(log_grad, LogGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_grad, LeakyReluGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_grad, LeakyReluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_sigmoid_grad, HardSigmoidGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_swish_grad, HardSwishGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_swish_grad, HardSwishGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(reciprocal_grad, ReciprocalGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(reciprocal_grad, ReciprocalGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(relu6_grad, Relu6GradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(relu6_grad, Relu6GradKernel)
......
...@@ -226,6 +226,25 @@ void PowKernel(const Context& dev_ctx, ...@@ -226,6 +226,25 @@ void PowKernel(const Context& dev_ctx,
PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_pow"); PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_pow");
} }
template <typename T>
struct XPUHardSigmoidFunctor : public funcs::BaseActivationFunctor<T> {
float slope;
float offset;
typename funcs::BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"slope", &slope}, {"offset", &offset}};
}
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) const {
using XPUType = typename XPUTypeTrait<T>::Type;
int r = xpu_activation_1attr_func<Context, T, XPUType>(
dev_ctx, x, out, slope, xpu::hard_sigmoid<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "hard_sigmoid");
}
};
template <typename T> template <typename T>
struct XPUHardSwishFunctor : public funcs::BaseActivationFunctor<T> { struct XPUHardSwishFunctor : public funcs::BaseActivationFunctor<T> {
float threshold; float threshold;
...@@ -428,6 +447,10 @@ DEFINE_XPU_ACTIVATION_KERNEL_WITH_TWO_ATTRS(Softplus, ...@@ -428,6 +447,10 @@ DEFINE_XPU_ACTIVATION_KERNEL_WITH_TWO_ATTRS(Softplus,
XPUSoftplusFunctor, XPUSoftplusFunctor,
beta, beta,
threshold) threshold)
DEFINE_XPU_ACTIVATION_KERNEL_WITH_TWO_ATTRS(HardSigmoid,
XPUHardSigmoidFunctor,
slope,
offset)
template <typename T, typename Context> template <typename T, typename Context>
void HardSwishRawKernel(const Context& dev_ctx, void HardSwishRawKernel(const Context& dev_ctx,
...@@ -459,6 +482,7 @@ PD_REGISTER_KERNEL( ...@@ -459,6 +482,7 @@ PD_REGISTER_KERNEL(
PD_REGISTER_ACTIVATION_KERNEL(exp, ExpKernel) // no grad PD_REGISTER_ACTIVATION_KERNEL(exp, ExpKernel) // no grad
PD_REGISTER_ACTIVATION_KERNEL(log, LogKernel) PD_REGISTER_ACTIVATION_KERNEL(log, LogKernel)
PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel) PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(hard_sigmoid, HardSigmoidKernel)
PD_REGISTER_ACTIVATION_KERNEL(hard_swish_raw, HardSwishRawKernel) PD_REGISTER_ACTIVATION_KERNEL(hard_swish_raw, HardSwishRawKernel)
PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel) PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel)
PD_REGISTER_ACTIVATION_KERNEL(pow, PowKernel) PD_REGISTER_ACTIVATION_KERNEL(pow, PowKernel)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册