未验证 提交 6c5f9aa8 编写于 作者: Y ykkk2333 提交者: GitHub

migrate xpu...

migrate xpu activation/activation_grad/transpose/transpose_grad/tril_triu/tril_triu_grad kernel to PHI, test=kunlun (#45554)
上级 530f6b79
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include <string>
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/device/xpu/xpu_header.h"
namespace paddle {
namespace operators {
using paddle::framework::Tensor;
template <typename Functor>
class XPUActivationKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
void Compute(const framework::ExecutionContext &context) const override {
Functor functor;
auto attrs = functor.GetAttrs();
for (auto &attr : attrs) {
*attr.second = context.Attr<float>(attr.first);
}
functor(context);
}
};
template <typename Functor>
class XPUActivationGradKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
void Compute(const framework::ExecutionContext &context) const override {
Functor functor;
auto attrs = functor.GetAttrs();
for (auto &attr : attrs) {
*attr.second = context.Attr<float>(attr.first);
}
functor(context);
}
};
template <typename DeviceContext, typename T, typename XPUT>
void xpu_activation_forward(
const framework::ExecutionContext &ctx,
std::function<int(xpu::Context *, const XPUT *, XPUT *, int)> func) {
const auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Output<Tensor>("Out");
const XPUT *x_data = reinterpret_cast<const XPUT *>(x->data<T>());
XPUT *y_data = reinterpret_cast<XPUT *>(y->mutable_data<T>(ctx.GetPlace()));
auto xpu_context = ctx.device_context<DeviceContext>().x_context();
int r = func(xpu_context, x_data, y_data, x->numel());
PADDLE_ENFORCE_EQ(
r,
xpu::Error_t::SUCCESS,
platform::errors::External("XPU activation op return wrong value[%d %s].",
r,
XPUAPIErrorMsg[r]));
}
template <typename DeviceContext, typename T, typename XPUT>
void xpu_activation_backward(
const framework::ExecutionContext &ctx,
std::function<int(
xpu::Context *, const XPUT *, const XPUT *, const XPUT *, XPUT *, int)>
func) {
/* TODO: relu tanh sigmoid are inplace */
const auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Input<Tensor>("Out");
auto *dOut = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto *dX = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
const XPUT *x_data = nullptr;
const XPUT *y_data = nullptr;
const XPUT *y_grad = nullptr;
if (x != nullptr) x_data = reinterpret_cast<const XPUT *>(x->data<T>());
if (y != nullptr) y_data = reinterpret_cast<const XPUT *>(y->data<T>());
if (dOut != nullptr) y_grad = reinterpret_cast<const XPUT *>(dOut->data<T>());
XPUT *x_grad = reinterpret_cast<XPUT *>(dX->mutable_data<T>(ctx.GetPlace()));
auto xpu_context = ctx.device_context<DeviceContext>().x_context();
int r = func(xpu_context, x_data, y_data, y_grad, x_grad, dX->numel());
PADDLE_ENFORCE_EQ(r,
xpu::Error_t::SUCCESS,
platform::errors::External(
"XPU activation grad op return wrong value[%d %s].",
r,
XPUAPIErrorMsg[r]));
}
template <typename T>
struct XPUAbsFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::abs<XPUType>);
}
};
template <typename T>
struct XPUAbsGradFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_backward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::abs_grad<XPUType>);
}
};
template <typename T>
struct XPUExpFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::exp<XPUType>);
}
};
template <typename T>
struct XPULogFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::log<XPUType>);
}
};
template <typename T>
struct XPUReciprocalFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::reciprocal<XPUType>);
}
};
template <typename T>
struct XPUReciprocalGradFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_backward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::reciprocal_grad<XPUType>);
}
};
template <typename T>
struct XPUReluGradFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_backward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::relu_grad<XPUType>);
}
};
template <typename T>
struct XPURelu6Functor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::relu6<XPUType>);
}
};
template <typename T>
struct XPURelu6GradFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_backward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::relu6_grad<XPUType>);
}
};
template <typename T>
struct XPUSigmoidFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::sigmoid<XPUType>);
}
};
template <typename T>
struct XPUSigmoidGradFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_backward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::sigmoid_grad<XPUType>);
}
};
template <typename T>
struct XPUSqrtFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::sqrt<XPUType>);
}
};
template <typename T>
struct XPUSqrtGradFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_backward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::sqrt_grad<XPUType>);
}
};
template <typename T>
struct XPUSquareFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::square<XPUType>);
}
};
template <typename T>
struct XPUSquareGradFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_backward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::square_grad<XPUType>);
}
};
template <typename T>
struct XPUTanhFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::tanh<XPUType>);
}
};
template <typename T>
struct XPUTanhGradFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_backward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::tanh_grad<XPUType>);
}
};
template <typename T>
struct XPUHardSwishFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
float threshold = ctx.Attr<float>("threshold");
float scale = ctx.Attr<float>("scale");
float offset = ctx.Attr<float>("offset");
PADDLE_ENFORCE_EQ(threshold,
6.0f,
platform::errors::External(
"Not support threshold [%f] in XPU", threshold));
PADDLE_ENFORCE_EQ(
scale,
6.0f,
platform::errors::External("Not support scale [%f] in XPU", scale));
PADDLE_ENFORCE_EQ(
offset,
3.0f,
platform::errors::External("Not support offset [%f] in XPU", offset));
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::hard_swish<XPUType>);
}
};
template <typename T>
struct XPUHardSwishGradFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
float threshold = ctx.Attr<float>("threshold");
float scale = ctx.Attr<float>("scale");
float offset = ctx.Attr<float>("offset");
PADDLE_ENFORCE_EQ(threshold,
6.0f,
platform::errors::External(
"Not support threshold [%f] in XPU", threshold));
PADDLE_ENFORCE_EQ(
scale,
6.0f,
platform::errors::External("Not support scale [%f] in XPU", scale));
PADDLE_ENFORCE_EQ(
offset,
3.0f,
platform::errors::External("Not support offset [%f] in XPU", offset));
xpu_activation_backward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::hard_swish_grad<XPUType>);
}
};
template <typename T>
struct XPULeakyReluFunctor : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
const auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Output<Tensor>("Out");
float alpha = ctx.Attr<float>("alpha");
const T *x_data = x->data<T>();
T *y_data = y->mutable_data<T>(ctx.GetPlace());
auto xpu_context =
ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
int r = xpu::leaky_relu(xpu_context, x_data, y_data, x->numel(), alpha);
PADDLE_ENFORCE_EQ(
r,
xpu::Error_t::SUCCESS,
platform::errors::External(
"XPU leaky_relu return wrong value[%d %s].", r, XPUAPIErrorMsg[r]));
}
};
template <typename T>
struct XPULeakyReluGradFunctor : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
const auto *x = ctx.Input<Tensor>("X");
auto *dOut = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto *dX = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
float alpha = ctx.Attr<float>("alpha");
const T *x_data = nullptr;
const T *y_grad = nullptr;
if (x != nullptr) x_data = x->data<T>();
if (dOut != nullptr) y_grad = dOut->data<T>();
T *x_grad = dX->mutable_data<T>(ctx.GetPlace());
auto xpu_context =
ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
// The signs of x and y are the same,
// y == nullptr here,
// so we give 2 x to the api
int r = xpu::leaky_relu_grad(xpu_context,
reinterpret_cast<const float *>(x_data),
reinterpret_cast<const float *>(x_data),
reinterpret_cast<const float *>(y_grad),
reinterpret_cast<float *>(x_grad),
dX->numel(),
alpha);
PADDLE_ENFORCE_EQ(r,
xpu::Error_t::SUCCESS,
platform::errors::External(
"XPU leaky_relu_grad return wrong value[%d %s].",
r,
XPUAPIErrorMsg[r]));
}
};
template <typename T>
struct XPULogGradFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
const auto *x = ctx.Input<Tensor>("X");
auto *dOut = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto *dX = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
const T *x_data = nullptr;
const T *y_grad = nullptr;
if (x != nullptr) x_data = x->data<T>();
if (dOut != nullptr) y_grad = dOut->data<T>();
T *x_grad = dX->mutable_data<T>(ctx.GetPlace());
auto dev_ctx =
ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
const auto x_dims = x->dims();
auto xshape = phi::vectorize<int>(x_dims);
int len = x->dims()[x_dims.size() - 1];
std::vector<int> yshape(1, len);
xpu::ctx_guard RAII_GUARD(dev_ctx);
T *y_data = RAII_GUARD.alloc_l3_or_gm<T>(len);
PADDLE_ENFORCE_XDNN_NOT_NULL(y_data);
T *tmp_grad = RAII_GUARD.alloc_l3_or_gm<T>(x->numel());
PADDLE_ENFORCE_XDNN_NOT_NULL(tmp_grad);
int r = xpu::constant<T>(dev_ctx, y_data, len, static_cast<T>(1.0));
PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant");
// dx.device(d) = dout * (static_cast<T>(1) / x);
r = xpu::broadcast_div(dev_ctx,
reinterpret_cast<const float *>(y_data),
reinterpret_cast<const float *>(x_data),
reinterpret_cast<float *>(tmp_grad),
yshape,
xshape);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_div");
r = xpu::broadcast_mul(dev_ctx,
reinterpret_cast<const float *>(y_grad),
reinterpret_cast<const float *>(tmp_grad),
reinterpret_cast<float *>(x_grad),
xshape,
xshape);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_mul");
}
};
template <typename T>
struct XPUMishFunctor : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
const auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Output<Tensor>("Out");
const T *x_data = x->data<T>();
T *y_data = y->mutable_data<T>(ctx.GetPlace());
float threshold = ctx.Attr<float>("threshold");
auto xpu_context =
ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
int r = xpu::mish(xpu_context, x_data, y_data, x->numel(), threshold);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "mish");
}
};
template <typename T>
struct XPUMishGradFunctor : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
const auto *x = ctx.Input<Tensor>("X");
auto *dOut = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto *dX = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
const T *x_data = x->data<T>();
const T *y_grad = dOut->data<T>();
T *x_grad = dX->mutable_data<T>(ctx.GetPlace());
float threshold = ctx.Attr<float>("threshold");
auto xpu_context =
ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
int r = xpu::mish_grad(xpu_context,
reinterpret_cast<const float *>(x_data),
reinterpret_cast<const float *>(
x_data), // mish_grad do not need y_data
reinterpret_cast<const float *>(y_grad),
reinterpret_cast<float *>(x_grad),
dX->numel(),
threshold);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "mish_grad");
}
};
template <typename T>
struct XPUPowFunctor : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
const auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Output<Tensor>("Out");
auto pow_factor = ctx.Attr<float>("factor");
const T *x_data = x->data<T>();
T *y_data = y->mutable_data<T>(ctx.GetPlace());
// allocate temp memory for factor on xpu
auto xpu_context =
ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
xpu::ctx_guard RAII_GUARD(xpu_context);
T *factor_data = RAII_GUARD.alloc_l3_or_gm<T>(1);
PADDLE_ENFORCE_NOT_NULL(
factor_data,
platform::errors::External("XPU alloc_l3_or_gm returns nullptr"));
memory::Copy(ctx.GetPlace(),
static_cast<void *>(factor_data),
platform::CPUPlace(),
static_cast<void *>(&pow_factor),
sizeof(T));
// broadcast_pow(Context* ctx, const T* x, const T* y, T* z, const
// std::vector<int>& xshape, const std::vector<int>& yshape);
auto x_dims = phi::vectorize<int>(x->dims());
int r = xpu::broadcast_pow(
xpu_context, x_data, factor_data, y_data, x_dims, {1});
PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_pow");
}
};
template <typename T>
struct XPUPowGradFunctor : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
const auto *x = ctx.Input<Tensor>("X");
auto *dOut = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto *dX = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
const T *x_data = x->data<T>();
const T *y_grad = dOut->data<T>();
T *x_grad = dX->mutable_data<T>(ctx.GetPlace());
// check dims: all dims should equal
auto x_dims = phi::vectorize<int>(x->dims());
auto dy_dims = phi::vectorize<int>(dOut->dims());
auto dx_dims = phi::vectorize<int>(dX->dims());
PADDLE_ENFORCE_EQ(
x_dims,
dy_dims,
platform::errors::PreconditionNotMet("x_dims should match dy_dims."));
PADDLE_ENFORCE_EQ(
x_dims,
dx_dims,
platform::errors::PreconditionNotMet("x_dims should match dx_dims."));
float pow_factor = ctx.Attr<float>("factor");
auto xpu_context =
ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
// int pow_grad(Context* ctx, const T* x, const T* dy, T* dx, int len, float
// factor);
int r = xpu::pow_grad(
xpu_context, x_data, y_grad, x_grad, x->numel(), pow_factor);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "pow_grad");
}
};
template <typename T>
struct XPUReluFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
const auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Output<Tensor>("Out");
const XPUType *x_data = reinterpret_cast<const XPUType *>(x->data<T>());
XPUType *y_data =
reinterpret_cast<XPUType *>(y->mutable_data<T>(ctx.GetPlace()));
auto xpu_context =
ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
int r =
xpu::relu(xpu_context, x_data, y_data, x->numel(), nullptr, nullptr);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "relu");
}
};
template <typename T>
struct XPUSoftPlusFunctor : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
const auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Output<Tensor>("Out");
const T *x_data = x->data<T>();
T *y_data = y->mutable_data<T>(ctx.GetPlace());
float beta = ctx.Attr<float>("beta");
float threshold = ctx.Attr<float>("threshold");
auto xpu_context =
ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
int r =
xpu::softplus(xpu_context, x_data, y_data, x->numel(), beta, threshold);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "softplus");
}
};
template <typename T>
struct XPUSoftPlusGradFunctor : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
const auto *x = ctx.Input<Tensor>("X");
auto *dOut = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto *dX = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
const T *x_data = x->data<T>();
const T *y_grad = dOut->data<T>();
T *x_grad = dX->mutable_data<T>(ctx.GetPlace());
float beta = ctx.Attr<float>("beta");
float threshold = ctx.Attr<float>("threshold");
auto xpu_context =
ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
int r = xpu::softplus_grad(xpu_context,
reinterpret_cast<const float *>(x_data),
reinterpret_cast<const float *>(
x_data), // softplus_grad do not need y_data
reinterpret_cast<const float *>(y_grad),
reinterpret_cast<float *>(x_grad),
dX->numel(),
beta,
threshold);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "softplus_grad");
}
};
template <typename T>
struct XPUSwishFunctor : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
const auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Output<Tensor>("Out");
const T *x_data = x->data<T>();
T *y_data = y->mutable_data<T>(ctx.GetPlace());
auto xpu_context =
ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
// int swish(Context* ctx, const T* x, T* y, int len);
int r = xpu::swish(xpu_context, x_data, y_data, x->numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "swish");
}
};
template <typename T>
struct XPUSwishGradFunctor : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
const auto *x = ctx.Input<Tensor>("X");
auto *dOut = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto *dX = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
const T *x_data = x->data<T>();
const T *y_grad = dOut->data<T>();
T *x_grad = dX->mutable_data<T>(ctx.GetPlace());
auto xpu_context =
ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
// int swish_grad(Context* ctx, const T* x, const T* dy, T* dx, int len);
int r = xpu::swish_grad(xpu_context, x_data, y_grad, x_grad, dX->numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "swish_grad");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
#define REGISTER_ACTIVATION_XPU_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_XPU_KERNEL(act_type, \
ops::XPUActivationKernel<ops::functor<float>>); \
REGISTER_OP_XPU_KERNEL( \
act_type##_grad, \
ops::XPUActivationGradKernel<ops::grad_functor<float>>);
REGISTER_ACTIVATION_XPU_KERNEL(abs, XPUAbsFunctor, XPUAbsGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(hard_swish,
XPUHardSwishFunctor,
XPUHardSwishGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(leaky_relu,
XPULeakyReluFunctor,
XPULeakyReluGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(mish, XPUMishFunctor, XPUMishGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(reciprocal,
XPUReciprocalFunctor,
XPUReciprocalGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(sigmoid,
XPUSigmoidFunctor,
XPUSigmoidGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(sqrt, XPUSqrtFunctor, XPUSqrtGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(square, XPUSquareFunctor, XPUSquareGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(softplus,
XPUSoftPlusFunctor,
XPUSoftPlusGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(swish, XPUSwishFunctor, XPUSwishGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(pow, XPUPowFunctor, XPUPowGradFunctor)
REGISTER_OP_XPU_KERNEL(
relu,
ops::XPUActivationKernel<ops::XPUReluFunctor<float>>,
ops::XPUActivationKernel<ops::XPUReluFunctor<paddle::platform::float16>>);
REGISTER_OP_XPU_KERNEL(
relu_grad,
ops::XPUActivationGradKernel<ops::XPUReluGradFunctor<float>>,
ops::XPUActivationGradKernel<
ops::XPUReluGradFunctor<paddle::platform::float16>>);
REGISTER_OP_XPU_KERNEL(relu6,
ops::XPUActivationKernel<ops::XPURelu6Functor<float>>);
REGISTER_OP_XPU_KERNEL(
relu6_grad, ops::XPUActivationKernel<ops::XPURelu6GradFunctor<float>>);
REGISTER_OP_XPU_KERNEL(
tanh,
ops::XPUActivationKernel<ops::XPUTanhFunctor<float>>,
ops::XPUActivationKernel<ops::XPUTanhFunctor<paddle::platform::float16>>);
REGISTER_OP_XPU_KERNEL(
tanh_grad,
ops::XPUActivationGradKernel<ops::XPUTanhGradFunctor<float>>,
ops::XPUActivationGradKernel<
ops::XPUTanhGradFunctor<paddle::platform::float16>>);
REGISTER_OP_XPU_KERNEL(exp,
ops::XPUActivationKernel<ops::XPUExpFunctor<float>>);
REGISTER_OP_XPU_KERNEL(log,
ops::XPUActivationKernel<ops::XPULogFunctor<float>>);
REGISTER_OP_XPU_KERNEL(
log_grad, ops::XPUActivationGradKernel<ops::XPULogGradFunctor<float>>);
#endif // PADDLE_WITH_XPU
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/device/xpu/xpu_header.h"
#include "paddle/phi/kernels/instance_norm_grad_kernel.h"
#include "paddle/phi/kernels/instance_norm_kernel.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename DeviceContext, typename T>
class InstanceNormXPUKernel : public framework::OpKernel<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const auto epsilon = ctx.Attr<float>("epsilon");
const auto* x = ctx.Input<Tensor>("X");
const auto* scale = ctx.Input<Tensor>("Scale");
const auto* bias = ctx.Input<Tensor>("Bias");
auto* y = ctx.Output<Tensor>("Y");
auto* mean = ctx.Output<Tensor>("SavedMean");
auto* variance = ctx.Output<Tensor>("SavedVariance");
auto& dev_ctx = ctx.template device_context<DeviceContext>();
// call phi kernel
phi::InstanceNormKernel<T>(
static_cast<const typename paddle::framework::ConvertToPhiContext<
DeviceContext>::TYPE&>(dev_ctx),
*x,
*scale,
*bias,
epsilon,
y,
mean,
variance);
}
};
template <typename DeviceContext, typename T>
class InstanceNormGradXPUKernel : public framework::OpKernel<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const auto epsilon = ctx.Attr<float>("epsilon");
const auto* x = ctx.Input<Tensor>("X");
const auto* mean = ctx.Input<Tensor>("SavedMean");
const auto* variance = ctx.Input<Tensor>("SavedVariance");
const auto* scale = ctx.Input<Tensor>("Scale");
const auto* dy = ctx.Input<Tensor>(framework::GradVarName("Y"));
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* dscale = ctx.Output<Tensor>(framework::GradVarName("Scale"));
auto* dbias = ctx.Output<Tensor>(framework::GradVarName("Bias"));
auto& dev_ctx = ctx.template device_context<DeviceContext>();
// call phi kernel
phi::InstanceNormGradKernel<T>(
static_cast<const typename paddle::framework::ConvertToPhiContext<
DeviceContext>::TYPE&>(dev_ctx),
*x,
*dy,
*scale,
*mean,
*variance,
epsilon,
dx,
dbias,
dscale);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(
instance_norm,
ops::InstanceNormXPUKernel<paddle::platform::XPUDeviceContext, float>);
REGISTER_OP_XPU_KERNEL(
instance_norm_grad,
ops::InstanceNormGradXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif // PADDLE_WITH_XPU}
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/operators/transpose_op.h"
#include "paddle/fluid/platform/device/xpu/xpu_header.h"
namespace paddle {
namespace operators {
using framework::Tensor;
template <typename DeviceContext, typename T>
class TransposeXPUKernel : public framework::OpKernel<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
public:
void Compute(const framework::ExecutionContext& context) const override {
auto x = context.Input<framework::Tensor>("X");
auto out = context.Output<framework::Tensor>("Out");
// axis is permute
auto axis = context.Attr<std::vector<int>>("axis");
int ndims = axis.size();
const auto x_dims = x->dims();
const T* x_data = x->data<T>();
T* y_data = out->mutable_data<T>(context.GetPlace());
if (out->numel() == 0) {
return;
}
std::vector<int> x_shape_host(ndims, 0);
for (int i = 0; i < ndims; ++i) {
x_shape_host[i] = x_dims[i];
}
auto& dev_ctx = context.template device_context<DeviceContext>();
int r = xpu::transpose<XPUType>(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x_data),
reinterpret_cast<XPUType*>(y_data),
x_shape_host,
axis);
PADDLE_ENFORCE_EQ(
r,
xpu::Error_t::SUCCESS,
platform::errors::External("XPU kernel error! error code=%d", r));
}
};
template <typename DeviceContext, typename T>
class TransposeGradXPUKernel : public framework::OpKernel<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* out_grad =
context.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* x_grad =
context.Output<framework::Tensor>(framework::GradVarName("X"));
if (!x_grad) return;
x_grad->mutable_data<T>(context.GetPlace());
std::vector<int> axis = context.Attr<std::vector<int>>("axis");
std::vector<int> reversed_axis(axis);
for (size_t i = 0; i < axis.size(); i++) {
reversed_axis[axis[i]] = i;
}
int ndims = axis.size();
std::vector<int> out_shape_host(ndims, 0);
for (int i = 0; i < ndims; ++i) {
out_shape_host[i] = out_grad->dims()[i];
}
auto& dev_ctx = context.template device_context<DeviceContext>();
int r = xpu::transpose<XPUType>(
dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(out_grad->data<T>()),
reinterpret_cast<XPUType*>(x_grad->data<T>()),
out_shape_host,
reversed_axis);
PADDLE_ENFORCE_EQ(
r,
xpu::Error_t::SUCCESS,
platform::errors::External("XPU kernel error! error code=%d", r));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(
transpose,
ops::TransposeXPUKernel<paddle::platform::XPUDeviceContext, float>,
ops::TransposeXPUKernel<paddle::platform::XPUDeviceContext,
paddle::platform::float16>);
REGISTER_OP_XPU_KERNEL(
transpose_grad,
ops::TransposeGradXPUKernel<paddle::platform::XPUDeviceContext, float>,
ops::TransposeGradXPUKernel<paddle::platform::XPUDeviceContext,
paddle::platform::float16>);
REGISTER_OP_XPU_KERNEL(
transpose2,
ops::TransposeXPUKernel<paddle::platform::XPUDeviceContext, float>,
ops::TransposeXPUKernel<paddle::platform::XPUDeviceContext,
paddle::platform::float16>);
REGISTER_OP_XPU_KERNEL(
transpose2_grad,
ops::TransposeGradXPUKernel<paddle::platform::XPUDeviceContext, float>,
ops::TransposeGradXPUKernel<paddle::platform::XPUDeviceContext,
paddle::platform::float16>);
#endif // PADDLE_WITH_XPU
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. Licensed under
the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename DeviceContext, typename T>
class TrilTriuXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const auto* x = context.Input<framework::Tensor>("X");
const auto* x_data = x->data<T>();
auto* out = context.Output<framework::Tensor>("Out");
auto* out_data = out->mutable_data<T>(context.GetPlace());
const int diagonal = context.Attr<int>("diagonal");
const bool lower = context.Attr<bool>("lower");
auto xshape = phi::vectorize<int>(x->dims());
auto& dev_ctx = context.template device_context<DeviceContext>();
int r = 0;
if (lower) {
r = xpu::tril(dev_ctx.x_context(), x_data, out_data, xshape, diagonal);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "tril_op");
} else {
r = xpu::triu(dev_ctx.x_context(), x_data, out_data, xshape, diagonal);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "triu_op");
}
}
};
template <typename DeviceContext, typename T>
class TrilTriuGradXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const auto* d_out =
context.Input<framework::Tensor>(framework::GradVarName("Out"));
const auto* dout_data = d_out->data<T>();
auto* d_x = context.Output<framework::Tensor>(framework::GradVarName("X"));
auto* dx_data = d_x->mutable_data<T>(context.GetPlace());
const int diagonal = context.Attr<int>("diagonal");
const bool lower = context.Attr<bool>("lower");
auto dy_shape = phi::vectorize<int>(d_out->dims());
auto& dev_ctx = context.template device_context<DeviceContext>();
int r = 0;
if (lower) {
r = xpu::tril(
dev_ctx.x_context(), dout_data, dx_data, dy_shape, diagonal);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "tril_op");
} else {
r = xpu::triu(
dev_ctx.x_context(), dout_data, dx_data, dy_shape, diagonal);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "triu_op");
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(
tril_triu,
ops::TrilTriuXPUKernel<paddle::platform::XPUDeviceContext, int>,
ops::TrilTriuXPUKernel<paddle::platform::XPUDeviceContext, float>);
REGISTER_OP_XPU_KERNEL(
tril_triu_grad,
ops::TrilTriuGradXPUKernel<paddle::platform::XPUDeviceContext, int>,
ops::TrilTriuGradXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/abs_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void AbsGradKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& dout,
DenseTensor* dx) {
ctx.template Alloc<T>(dx);
int r = xpu::abs_grad(ctx.x_context(),
x.data<T>(),
dout.data<T>(),
dout.data<T>(),
dx->data<T>(),
x.numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "abs_grad");
}
} // namespace phi
PD_REGISTER_KERNEL(abs_grad, XPU, ALL_LAYOUT, phi::AbsGradKernel, float) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/abs_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void AbsKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) {
ctx.template Alloc<T>(out);
int r = xpu::abs(ctx.x_context(), x.data<T>(), out->data<T>(), x.numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "abs");
}
} // namespace phi
PD_REGISTER_KERNEL(abs, XPU, ALL_LAYOUT, phi::AbsKernel, float) {}
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/activation_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/activation_functor.h"
namespace phi {
template <typename T, typename Context, typename Functor>
void ActivationGradXPUImpl(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* d_out,
DenseTensor* d_x,
const Functor& functor) {
PADDLE_ENFORCE_NOT_NULL(
d_out, errors::NotFound("The input DenseTensor dOut can not be nullptr"));
PADDLE_ENFORCE_NOT_NULL(
d_x, errors::NotFound("The output DenseTensor dX can not be nullptr"));
if (!out) {
out = d_out; // fake out
}
dev_ctx.template Alloc<T>(d_x);
functor(dev_ctx, x, out, d_out, d_x);
}
#define DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(name, functor_class) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& dout, \
DenseTensor* dx) { \
functor_class<T> functor; \
ActivationGradXPUImpl<T, Context, functor_class<T>>( \
dev_ctx, &x, nullptr, &dout, dx, functor); \
}
#define DEFINE_XPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX( \
name, functor_class, attr) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& dout, \
float attr, \
DenseTensor* dx) { \
functor_class<T> functor; \
auto attrs = functor.GetAttrs(); \
*(attrs[0].second) = attr; \
ActivationGradXPUImpl<T, Context, functor_class<T>>( \
dev_ctx, &x, nullptr, &dout, dx, functor); \
}
#define DEFINE_XPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX( \
name, functor_class, attr1, attr2) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& dout, \
float attr1, \
float attr2, \
DenseTensor* dx) { \
functor_class<T> functor; \
auto attrs = functor.GetAttrs(); \
*(attrs[0].second) = attr1; \
*(attrs[1].second) = attr2; \
ActivationGradXPUImpl<T, Context, functor_class<T>>( \
dev_ctx, &x, nullptr, &dout, dx, functor); \
}
#define DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(name, functor_class) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& out, \
const DenseTensor& dout, \
DenseTensor* dx) { \
functor_class<T> functor; \
ActivationGradXPUImpl<T, Context, functor_class<T>>( \
dev_ctx, nullptr, &out, &dout, dx, functor); \
}
#define DEFINE_XPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT( \
name, functor_class, attr) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& out, \
const DenseTensor& dout, \
float attr, \
DenseTensor* dx) { \
functor_class<T> functor; \
auto attrs = functor.GetAttrs(); \
*(attrs[0].second) = attr; \
ActivationGradXPUImpl<T, Context, functor_class<T>>( \
dev_ctx, nullptr, &out, &dout, dx, functor); \
}
#define DEFINE_XPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT( \
name, functor_class, attr1, attr2) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& out, \
const DenseTensor& dout, \
float attr1, \
float attr2, \
DenseTensor* dx) { \
functor_class<T> functor; \
auto attrs = functor.GetAttrs(); \
*(attrs[0].second) = attr1; \
*(attrs[1].second) = attr2; \
ActivationGradXPUImpl<T, Context, functor_class<T>>( \
dev_ctx, nullptr, &out, &dout, dx, functor); \
}
#define DEFINE_XPU_ACTIVATION_GRAD_KERNEL_NODEP(name, functor_class) \
template <typename T, typename Context> \
void name##GradKernel( \
const Context& dev_ctx, const DenseTensor& dout, DenseTensor* dx) { \
functor_class<T> functor; \
ActivationGradXPUImpl<T, Context, functor_class<T>>( \
dev_ctx, nullptr, nullptr, &dout, dx, functor); \
}
template <typename Context, typename T, typename XPUType>
int xpu_activation_backward(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dout,
DenseTensor* dx,
std::function<int(xpu::Context*,
const XPUType*,
const XPUType*,
const XPUType*,
XPUType*,
int)> func) {
/* TODO: relu tanh sigmoid are inplace */
const XPUType* x_data = nullptr;
const XPUType* y_data = nullptr;
const XPUType* y_grad = nullptr;
if (x != nullptr) x_data = reinterpret_cast<const XPUType*>(x->data<T>());
if (out != nullptr) y_data = reinterpret_cast<const XPUType*>(out->data<T>());
if (dout != nullptr)
y_grad = reinterpret_cast<const XPUType*>(dout->data<T>());
XPUType* x_grad = reinterpret_cast<XPUType*>(dx->data<T>());
int r =
func(dev_ctx.x_context(), x_data, y_data, y_grad, x_grad, dx->numel());
return r;
}
template <typename T>
struct XPULogGradFunctor : public funcs::BaseActivationFunctor<T> {
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dOut,
DenseTensor* dX) const {
const T* x_data = nullptr;
const T* y_grad = nullptr;
if (x != nullptr) x_data = x->data<T>();
if (dOut != nullptr) y_grad = dOut->data<T>();
T* x_grad = dX->data<T>();
const auto x_dims = x->dims();
auto xshape = vectorize<int>(x_dims);
int len = x->dims()[x_dims.size() - 1];
std::vector<int> yshape(1, len);
xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
T* y_data = RAII_GUARD.alloc_l3_or_gm<T>(len);
PADDLE_ENFORCE_XDNN_NOT_NULL(y_data);
T* tmp_grad = RAII_GUARD.alloc_l3_or_gm<T>(x->numel());
PADDLE_ENFORCE_XDNN_NOT_NULL(tmp_grad);
int r =
xpu::constant<T>(dev_ctx.x_context(), y_data, len, static_cast<T>(1.0));
PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant");
// dx.device(d) = dout * (static_cast<T>(1) / x);
r = xpu::broadcast_div(dev_ctx.x_context(),
reinterpret_cast<const float*>(y_data),
reinterpret_cast<const float*>(x_data),
reinterpret_cast<float*>(tmp_grad),
yshape,
xshape);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_div");
r = xpu::broadcast_mul(dev_ctx.x_context(),
reinterpret_cast<const float*>(y_grad),
reinterpret_cast<const float*>(tmp_grad),
reinterpret_cast<float*>(x_grad),
xshape,
xshape);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_mul");
}
};
template <typename T>
struct XPULeakyReluGradFunctor : public funcs::BaseActivationFunctor<T> {
float alpha;
typename funcs::BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"alpha", &alpha}};
}
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dout,
DenseTensor* dx) const {
const T* x_data = nullptr;
const T* y_grad = nullptr;
if (x != nullptr) x_data = x->data<T>();
if (dout != nullptr) y_grad = dout->data<T>();
T* x_grad = dx->data<T>();
auto xpu_context = dev_ctx.x_context();
// The signs of x and y are the same,
// y == nullptr here,
// so we give 2 x to the api
int r = xpu::leaky_relu_grad(xpu_context,
reinterpret_cast<const float*>(x_data),
reinterpret_cast<const float*>(x_data),
reinterpret_cast<const float*>(y_grad),
reinterpret_cast<float*>(x_grad),
dx->numel(),
alpha);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "leaky_relu_grad");
}
};
template <typename T>
struct XPUHardSwishGradFunctor : public funcs::BaseActivationFunctor<T> {
float threshold;
float scale;
float offset;
typename funcs::BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}, {"scale", &scale}, {"offset", &offset}};
}
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dout,
DenseTensor* dx) const {
using XPUType = typename XPUTypeTrait<T>::Type;
PADDLE_ENFORCE_EQ(
threshold,
6.0f,
errors::External("Not support threshold [%f] in XPU", threshold));
PADDLE_ENFORCE_EQ(
scale, 6.0f, errors::External("Not support scale [%f] in XPU", scale));
PADDLE_ENFORCE_EQ(
offset,
3.0f,
errors::External("Not support offset [%f] in XPU", offset));
int r = xpu_activation_backward<Context, T, XPUType>(
dev_ctx, x, out, dout, dx, xpu::hard_swish_grad<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "hard_swish_grad");
}
};
template <typename T>
struct XPUReciprocalGradFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dout,
DenseTensor* dx) const {
int r = xpu_activation_backward<Context, T, XPUType>(
dev_ctx, x, out, dout, dx, xpu::reciprocal_grad<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reciprocal_grad");
}
};
template <typename T>
struct XPUReluGradFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dout,
DenseTensor* dx) const {
int r = xpu_activation_backward<Context, T, XPUType>(
dev_ctx, x, out, dout, dx, xpu::relu_grad<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "relu_grad");
}
};
template <typename T>
struct XPURelu6GradFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
float threshold;
typename funcs::BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dout,
DenseTensor* dx) const {
int r = xpu_activation_backward<Context, T, XPUType>(
dev_ctx, x, out, dout, dx, xpu::relu6_grad<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "relu6_grad");
}
};
template <typename T>
struct XPUSigmoidGradFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dout,
DenseTensor* dx) const {
int r = xpu_activation_backward<Context, T, XPUType>(
dev_ctx, x, out, dout, dx, xpu::sigmoid_grad<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "sigmoid_grad");
}
};
template <typename T>
struct XPUTanhGradFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dout,
DenseTensor* dx) const {
int r = xpu_activation_backward<Context, T, XPUType>(
dev_ctx, x, out, dout, dx, xpu::tanh_grad<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "tanh_grad");
}
};
template <typename T>
struct XPUSquareGradFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dout,
DenseTensor* dx) const {
int r = xpu_activation_backward<Context, T, XPUType>(
dev_ctx, x, out, dout, dx, xpu::square_grad<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "square_grad");
}
};
template <typename T>
struct XPUSqrtGradFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dout,
DenseTensor* dx) const {
int r = xpu_activation_backward<Context, T, XPUType>(
dev_ctx, x, out, dout, dx, xpu::sqrt_grad<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "sqrt_grad");
}
};
template <typename T, typename Context>
void PowGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& dout,
const Scalar& factor,
DenseTensor* dx) {
dev_ctx.template Alloc<T>(dx);
const T* x_data = x.data<T>();
const T* y_grad = dout.data<T>();
T* x_grad = dx->data<T>();
// check dims: all dims should equal
auto x_dims = vectorize<int>(x.dims());
auto dy_dims = vectorize<int>(dout.dims());
auto dx_dims = vectorize<int>(dx->dims());
PADDLE_ENFORCE_EQ(x_dims,
dy_dims,
errors::PreconditionNotMet("x_dims should match dy_dims."));
PADDLE_ENFORCE_EQ(x_dims,
dx_dims,
errors::PreconditionNotMet("x_dims should match dx_dims."));
float pow_factor = factor.to<float>();
auto xpu_context = dev_ctx.x_context();
// int pow_grad(Context* ctx, const T* x, const T* dy, T* dx, int len, float
// factor);
int r =
xpu::pow_grad(xpu_context, x_data, y_grad, x_grad, x.numel(), pow_factor);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "pow_grad");
}
template <typename T>
struct XPUSwishGradFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
float beta;
typename funcs::BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}};
}
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dout,
DenseTensor* dx) const {
const XPUType* x_data = reinterpret_cast<const XPUType*>(x->data<T>());
const XPUType* y_grad = reinterpret_cast<const XPUType*>(dout->data<T>());
XPUType* x_grad = reinterpret_cast<XPUType*>(dx->data<T>());
auto xpu_context = dev_ctx.x_context();
int r = xpu::swish_grad(xpu_context, x_data, y_grad, x_grad, dx->numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "swish_grad");
}
};
template <typename T>
struct XPUMishGradFunctor : public funcs::BaseActivationFunctor<T> {
float threshold;
typename funcs::BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dout,
DenseTensor* dx) const {
const T* x_data = x->data<T>();
const T* y_grad = dout->data<T>();
T* x_grad = dx->data<T>();
auto xpu_context = dev_ctx.x_context();
int r = xpu::mish_grad(
xpu_context,
reinterpret_cast<const float*>(x_data),
reinterpret_cast<const float*>(x_data), // mish_grad do not need y_data
reinterpret_cast<const float*>(y_grad),
reinterpret_cast<float*>(x_grad),
dx->numel(),
threshold);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "mish_grad");
}
};
template <typename T>
struct XPUSoftPlusGradFunctor : public funcs::BaseActivationFunctor<T> {
float beta;
float threshold;
typename funcs::BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}, {"threshold", &threshold}};
}
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dOut,
DenseTensor* dX) const {
const T* x_data = x->data<T>();
const T* y_grad = dOut->data<T>();
T* x_grad = dX->data<T>();
auto xpu_context = dev_ctx.x_context();
int r = xpu::softplus_grad(xpu_context,
reinterpret_cast<const float*>(x_data),
reinterpret_cast<const float*>(
x_data), // softplus_grad do not need y_data
reinterpret_cast<const float*>(y_grad),
reinterpret_cast<float*>(x_grad),
dX->numel(),
beta,
threshold);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "softplus_grad");
}
};
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Reciprocal, XPUReciprocalGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sigmoid, XPUSigmoidGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt, XPUSqrtGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Tanh, XPUTanhGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu, XPUReluGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Log, XPULogGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Square, XPUSquareGradFunctor);
DEFINE_XPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Swish,
XPUSwishGradFunctor,
beta);
DEFINE_XPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Mish,
XPUMishGradFunctor,
threshold);
DEFINE_XPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu,
XPULeakyReluGradFunctor,
alpha);
DEFINE_XPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT(Relu6,
XPURelu6GradFunctor,
threshold);
DEFINE_XPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(Softplus,
XPUSoftPlusGradFunctor,
beta,
threshold)
template <typename T, typename Context>
void HardSwishGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& dout,
float threshold,
float scale,
float offset,
DenseTensor* dx) {
XPUHardSwishGradFunctor<T> functor;
auto attrs = functor.GetAttrs();
*(attrs[0].second) = threshold;
*(attrs[1].second) = scale;
*(attrs[2].second) = offset;
ActivationGradXPUImpl<T, Context, XPUHardSwishGradFunctor<T>>(
dev_ctx, &x, nullptr, &dout, dx, functor);
}
} // namespace phi
PD_REGISTER_KERNEL(relu_grad,
XPU,
ALL_LAYOUT,
phi::ReluGradKernel,
float,
phi::dtype::float16) {}
#define PD_REGISTER_ACTIVATION_GRAD_KERNEL(name, func) \
PD_REGISTER_KERNEL(name, XPU, ALL_LAYOUT, phi::func, float) {}
PD_REGISTER_KERNEL(tanh_grad,
XPU,
ALL_LAYOUT,
phi::TanhGradKernel,
float,
phi::dtype::float16) {}
PD_REGISTER_ACTIVATION_GRAD_KERNEL(log_grad, LogGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_grad, LeakyReluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_swish_grad, HardSwishGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(reciprocal_grad, ReciprocalGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(relu6_grad, Relu6GradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_grad, SigmoidGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sqrt_grad, SqrtGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(mish_grad, MishGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(swish_grad, SwishGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(softplus_grad, SoftplusGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(square_grad, SquareGradKernel)
PD_REGISTER_KERNEL(pow_grad, XPU, ALL_LAYOUT, phi::PowGradKernel, float) {}
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/activation_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/activation_functor.h"
#include "paddle/fluid/memory/memory.h"
namespace phi {
template <typename T, typename Context, typename Functor>
void ActivationXPUImpl(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out,
const Functor& functor) {
PADDLE_ENFORCE_NOT_NULL(out,
errors::NotFound("Output Out should not be nullptr"));
dev_ctx.template Alloc<T>(out);
functor(dev_ctx, x, out);
}
#define DEFINE_XPU_ACTIVATION_KERNEL(name, functor_class) \
template <typename T, typename Context> \
void name##Kernel( \
const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) { \
functor_class<T> functor; \
ActivationXPUImpl<T, Context, functor_class<T>>(dev_ctx, x, out, functor); \
}
#define DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(name, functor_class, attr) \
template <typename T, typename Context> \
void name##Kernel(const Context& dev_ctx, \
const DenseTensor& x, \
float attr, \
DenseTensor* out) { \
functor_class<T> functor; \
auto attrs = functor.GetAttrs(); \
*(attrs[0].second) = attr; \
ActivationXPUImpl<T, Context, functor_class<T>>(dev_ctx, x, out, functor); \
}
#define DEFINE_XPU_ACTIVATION_KERNEL_WITH_TWO_ATTRS( \
name, functor_class, attr1, attr2) \
template <typename T, typename Context> \
void name##Kernel(const Context& dev_ctx, \
const DenseTensor& x, \
float attr1, \
float attr2, \
DenseTensor* out) { \
functor_class<T> functor; \
auto attrs = functor.GetAttrs(); \
*(attrs[0].second) = attr1; \
*(attrs[1].second) = attr2; \
ActivationXPUImpl<T, Context, functor_class<T>>(dev_ctx, x, out, functor); \
}
template <typename Context, typename T, typename XPUType>
int xpu_activation_func(
const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out,
std::function<int(xpu::Context*, const XPUType*, XPUType*, int)> func) {
int r = func(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x.data<T>()),
reinterpret_cast<XPUType*>(out->data<T>()),
x.numel());
return r;
}
template <typename Context, typename T, typename XPUType>
int xpu_activation_1attr_func(
const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out,
float attr,
std::function<int(xpu::Context*, const XPUType*, XPUType*, int, float)>
func) {
int r = func(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x.data<T>()),
reinterpret_cast<XPUType*>(out->data<T>()),
x.numel(),
attr);
return r;
}
template <typename Context, typename T, typename XPUType>
int xpu_activation_2attr_func(
const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out,
float attr1,
float attr2,
std::function<
int(xpu::Context*, const XPUType*, XPUType*, int, float, float)> func) {
int r = func(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x.data<T>()),
reinterpret_cast<XPUType*>(out->data<T>()),
x.numel(),
attr1,
attr2);
return r;
}
template <typename T>
struct XPUExpFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) const {
int r = xpu_activation_func<Context, T, XPUType>(
dev_ctx, x, out, xpu::exp<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "exp");
}
};
template <typename T>
struct XPULogFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) const {
int r = xpu_activation_func<Context, T, XPUType>(
dev_ctx, x, out, xpu::log<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "log");
}
};
template <typename T>
struct XPULeakyReluFunctor : public funcs::BaseActivationFunctor<T> {
float alpha;
typename funcs::BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"alpha", &alpha}};
}
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) const {
using XPUType = typename XPUTypeTrait<T>::Type;
int r = xpu_activation_1attr_func<Context, T, XPUType>(
dev_ctx, x, out, alpha, xpu::leaky_relu<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "leaky_relu");
}
};
template <typename T, typename Context>
void PowKernel(const Context& dev_ctx,
const DenseTensor& x,
const Scalar& factor,
DenseTensor* out) {
dev_ctx.template Alloc<T>(out);
float pow_factor = factor.to<float>();
const T* x_data = x.data<T>();
T* y_data = out->data<T>();
auto xpu_context = dev_ctx.x_context();
// allocate temp memory for factor on xpu
xpu::ctx_guard RAII_GUARD(xpu_context);
T* factor_data = RAII_GUARD.alloc_l3_or_gm<T>(1);
PADDLE_ENFORCE_NOT_NULL(
factor_data, errors::External("XPU alloc_l3_or_gm returns nullptr"));
paddle::memory::Copy(dev_ctx.GetPlace(),
static_cast<void*>(factor_data),
phi::CPUPlace(),
static_cast<void*>(&pow_factor),
sizeof(T));
// broadcast_pow(Context* ctx, const T* x, const T* y, T* z, const
// std::vector<int>& xshape, const std::vector<int>& yshape);
auto x_dims = vectorize<int>(x.dims());
int r =
xpu::broadcast_pow(xpu_context, x_data, factor_data, y_data, x_dims, {1});
PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_pow");
}
template <typename T>
struct XPUHardSwishFunctor : public funcs::BaseActivationFunctor<T> {
float threshold;
float scale;
float offset;
typename funcs::BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}, {"scale", &scale}, {"offset", &offset}};
}
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) const {
using XPUType = typename XPUTypeTrait<T>::Type;
PADDLE_ENFORCE_EQ(
threshold,
6.0f,
errors::External("Not support threshold [%f] in XPU", threshold));
PADDLE_ENFORCE_EQ(
scale, 6.0f, errors::External("Not support scale [%f] in XPU", scale));
PADDLE_ENFORCE_EQ(
offset,
3.0f,
errors::External("Not support offset [%f] in XPU", offset));
int r = xpu_activation_func<Context, T, XPUType>(
dev_ctx, x, out, xpu::hard_swish<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "hard_swish");
}
};
template <typename T>
struct XPUReciprocalFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) const {
int r = xpu_activation_func<Context, T, XPUType>(
dev_ctx, x, out, xpu::reciprocal<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reciprocal");
}
};
template <typename T>
struct XPUReluFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) const {
const XPUType* x_data = reinterpret_cast<const XPUType*>(x.data<T>());
XPUType* y_data = reinterpret_cast<XPUType*>(out->data<T>());
auto xpu_context = dev_ctx.x_context();
int r = xpu::relu(xpu_context, x_data, y_data, x.numel(), nullptr, nullptr);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "relu");
}
};
template <typename T>
struct XPURelu6Functor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
float threshold;
typename funcs::BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) const {
int r = xpu_activation_func<Context, T, XPUType>(
dev_ctx, x, out, xpu::relu6<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "relu6");
}
};
template <typename T>
struct XPUSigmoidFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) const {
int r = xpu_activation_func<Context, T, XPUType>(
dev_ctx, x, out, xpu::sigmoid<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "sigmoid");
}
};
template <typename T>
struct XPUSquareFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) const {
int r = xpu_activation_func<Context, T, XPUType>(
dev_ctx, x, out, xpu::square<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "square");
}
};
template <typename T>
struct XPUSqrtFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) const {
int r = xpu_activation_func<Context, T, XPUType>(
dev_ctx, x, out, xpu::sqrt<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "sqrt");
}
};
template <typename T>
struct XPUMishFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
float threshold;
typename funcs::BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) const {
int r = xpu_activation_1attr_func<Context, T, XPUType>(
dev_ctx, x, out, threshold, xpu::mish<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "mish");
}
};
template <typename T, typename Context>
void SwishKernel(const Context& dev_ctx,
const DenseTensor& x,
float beta,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
dev_ctx.template Alloc<T>(out);
int r = xpu::swish(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x.data<T>()),
reinterpret_cast<XPUType*>(out->data<T>()),
x.numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "swish");
}
template <typename T>
struct XPUSoftplusFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
float beta;
float threshold;
typename funcs::BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}, {"threshold", &threshold}};
}
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) const {
int r = xpu_activation_2attr_func<Context, T, XPUType>(
dev_ctx, x, out, beta, threshold, xpu::softplus<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "softplus");
}
};
template <typename T>
struct XPUTanhFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) const {
int r = xpu_activation_func<Context, T, XPUType>(
dev_ctx, x, out, xpu::tanh<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "tanh");
}
};
DEFINE_XPU_ACTIVATION_KERNEL(Exp, XPUExpFunctor)
DEFINE_XPU_ACTIVATION_KERNEL(Log, XPULogFunctor)
DEFINE_XPU_ACTIVATION_KERNEL(Reciprocal, XPUReciprocalFunctor)
DEFINE_XPU_ACTIVATION_KERNEL(Relu, XPUReluFunctor)
DEFINE_XPU_ACTIVATION_KERNEL(Sigmoid, XPUSigmoidFunctor)
DEFINE_XPU_ACTIVATION_KERNEL(Square, XPUSquareFunctor)
DEFINE_XPU_ACTIVATION_KERNEL(Sqrt, XPUSqrtFunctor)
DEFINE_XPU_ACTIVATION_KERNEL(Tanh, XPUTanhFunctor)
DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Mish, XPUMishFunctor, threshold)
DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(LeakyRelu,
XPULeakyReluFunctor,
alpha)
DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Relu6, XPURelu6Functor, threshold)
DEFINE_XPU_ACTIVATION_KERNEL_WITH_TWO_ATTRS(Softplus,
XPUSoftplusFunctor,
beta,
threshold)
template <typename T, typename Context>
void HardSwishKernel(const Context& dev_ctx,
const DenseTensor& x,
float threshold,
float scale,
float offset,
DenseTensor* out) {
XPUHardSwishFunctor<T> functor;
auto attrs = functor.GetAttrs();
*(attrs[0].second) = threshold;
*(attrs[1].second) = scale;
*(attrs[2].second) = offset;
ActivationXPUImpl<T, Context, XPUHardSwishFunctor<T>>(
dev_ctx, x, out, functor);
}
} // namespace phi
PD_REGISTER_KERNEL(
relu, XPU, ALL_LAYOUT, phi::ReluKernel, float, phi::dtype::float16) {}
#define PD_REGISTER_ACTIVATION_KERNEL(name, func) \
PD_REGISTER_KERNEL(name, XPU, ALL_LAYOUT, phi::func, float) {}
PD_REGISTER_KERNEL(
tanh, XPU, ALL_LAYOUT, phi::TanhKernel, float, phi::dtype::float16) {}
PD_REGISTER_ACTIVATION_KERNEL(exp, ExpKernel) // no grad
PD_REGISTER_ACTIVATION_KERNEL(log, LogKernel)
PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(hard_swish, HardSwishKernel)
PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel)
PD_REGISTER_ACTIVATION_KERNEL(pow, PowKernel)
PD_REGISTER_ACTIVATION_KERNEL(reciprocal, ReciprocalKernel)
PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel)
PD_REGISTER_ACTIVATION_KERNEL(sigmoid, SigmoidKernel)
PD_REGISTER_ACTIVATION_KERNEL(sqrt, SqrtKernel)
PD_REGISTER_ACTIVATION_KERNEL(swish, SwishKernel)
PD_REGISTER_ACTIVATION_KERNEL(softplus, SoftplusKernel)
PD_REGISTER_ACTIVATION_KERNEL(square, SquareKernel)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/transpose_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void TransposeGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad,
const std::vector<int>& axis,
DenseTensor* x_grad) {
using XPUType = typename XPUTypeTrait<T>::Type;
dev_ctx.template Alloc<T>(x_grad);
std::vector<int> reversed_axis(axis);
for (size_t i = 0; i < axis.size(); i++) {
reversed_axis[axis[i]] = i;
}
int ndims = axis.size();
std::vector<int> out_shape_host(ndims, 0);
for (int i = 0; i < ndims; ++i) {
out_shape_host[i] = out_grad.dims()[i];
}
int r = xpu::transpose<XPUType>(
dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(out_grad.data<T>()),
reinterpret_cast<XPUType*>(x_grad->data<T>()),
out_shape_host,
reversed_axis);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose_grad");
}
} // namespace phi
PD_REGISTER_KERNEL(transpose_grad,
XPU,
ALL_LAYOUT,
phi::TransposeGradKernel,
float,
phi::dtype::float16) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/transpose_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void TransposeKernel(const Context& dev_ctx,
const DenseTensor& x,
const std::vector<int>& axis,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
if (out->numel() == 0) {
return;
}
dev_ctx.template Alloc<T>(out);
int ndims = axis.size();
std::vector<int> x_shape_host(ndims, 0);
for (int i = 0; i < ndims; ++i) {
x_shape_host[i] = x.dims()[i];
}
int r = xpu::transpose<XPUType>(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x.data<T>()),
reinterpret_cast<XPUType*>(out->data<T>()),
x_shape_host,
axis);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose");
}
} // namespace phi
PD_REGISTER_KERNEL(transpose,
XPU,
ALL_LAYOUT,
phi::TransposeKernel,
float,
phi::dtype::float16) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/tril_triu_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void TrilTriuGradKernel(const Context& ctx,
const DenseTensor& out_grad,
int diagonal,
bool lower,
DenseTensor* x_grad) {
using XPUType = typename XPUTypeTrait<T>::Type;
ctx.template Alloc<T>(x_grad);
auto dy_shape = vectorize<int>(out_grad.dims());
int r = 0;
if (lower) {
r = xpu::tril(ctx.x_context(),
reinterpret_cast<const XPUType*>(out_grad.data<T>()),
reinterpret_cast<XPUType*>(x_grad->data<T>()),
dy_shape,
diagonal);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "tril_op");
} else {
r = xpu::triu(ctx.x_context(),
reinterpret_cast<const XPUType*>(out_grad.data<T>()),
reinterpret_cast<XPUType*>(x_grad->data<T>()),
dy_shape,
diagonal);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "triu_op");
}
}
} // namespace phi
PD_REGISTER_KERNEL(
tril_triu_grad, XPU, ALL_LAYOUT, phi::TrilTriuGradKernel, int, float) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/tril_triu_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void TrilTriuKernel(const Context& ctx,
const DenseTensor& x,
int diagonal,
bool lower,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
ctx.template Alloc<T>(out);
auto xshape = vectorize<int>(x.dims());
int r = 0;
if (lower) {
r = xpu::tril(ctx.x_context(),
reinterpret_cast<const XPUType*>(x.data<T>()),
reinterpret_cast<XPUType*>(out->data<T>()),
xshape,
diagonal);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "tril_op");
} else {
r = xpu::triu(ctx.x_context(),
reinterpret_cast<const XPUType*>(x.data<T>()),
reinterpret_cast<XPUType*>(out->data<T>()),
xshape,
diagonal);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "triu_op");
}
}
} // namespace phi
PD_REGISTER_KERNEL(
tril_triu, XPU, ALL_LAYOUT, phi::TrilTriuKernel, int, float) {}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册