/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifdef PADDLE_WITH_XPU #include "paddle/fluid/operators/activation_op.h" #include #include "paddle/fluid/platform/xpu_header.h" namespace paddle { namespace operators { using paddle::framework::Tensor; template class XPUActivationKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { Functor functor; auto attrs = functor.GetAttrs(); for (auto &attr : attrs) { *attr.second = context.Attr(attr.first); } functor(context); } }; template class XPUActivationGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { Functor functor; auto attrs = functor.GetAttrs(); for (auto &attr : attrs) { *attr.second = context.Attr(attr.first); } functor(context); } }; template void xpu_activation_forward( const framework::ExecutionContext &ctx, std::function func) { const auto *x = ctx.Input("X"); auto *y = ctx.Output("Out"); const T *x_data = x->data(); T *y_data = y->mutable_data(ctx.GetPlace()); auto xpu_context = ctx.device_context().x_context(); int r = func(xpu_context, x_data, y_data, x->numel()); PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, platform::errors::External("XPU activation op return wrong value[%d %s].", r, XPUAPIErrorMsg[r])); } template void xpu_activation_backward(const framework::ExecutionContext &ctx, std::function func) { /* TODO: relu tanh sigmoid are inplace */ const auto *x = ctx.Input("X"); auto *y = ctx.Input("Out"); auto *dOut = ctx.Input(framework::GradVarName("Out")); auto *dX = ctx.Output(framework::GradVarName("X")); const T *x_data = nullptr; const T *y_data = nullptr; const T *y_grad = nullptr; if (x != nullptr) x_data = x->data(); if (y != nullptr) y_data = y->data(); if (dOut != nullptr) y_grad = dOut->data(); T *x_grad = dX->mutable_data(ctx.GetPlace()); auto xpu_context = ctx.device_context().x_context(); int r = func(xpu_context, x_data, y_data, y_grad, x_grad, dX->numel()); PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, platform::errors::External( "XPU activation grad op return wrong value[%d %s].", r, XPUAPIErrorMsg[r])); } template struct XPUReluFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { xpu_activation_forward(ctx, xpu::relu); } }; template struct XPUSigmoidFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { xpu_activation_forward( ctx, xpu::sigmoid); } }; template struct XPUTanhFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { xpu_activation_forward(ctx, xpu::tanh); } }; template struct XPUGeluFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { xpu_activation_forward(ctx, xpu::gelu); } }; template struct XPULogFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { xpu_activation_forward(ctx, xpu::log); } }; template struct XPUSquareFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { xpu_activation_forward( ctx, xpu::square); } }; template struct XPUSqrtFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { xpu_activation_forward(ctx, xpu::sqrt); } }; template struct XPUAbsFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { xpu_activation_forward(ctx, xpu::abs); } }; template struct XPUPowFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { const auto *x = ctx.Input("X"); auto *y = ctx.Output("Out"); auto pow_factor = ctx.Attr("factor"); const T *x_data = x->data(); T *y_data = y->mutable_data(ctx.GetPlace()); T *factor_data = nullptr; auto xpu_context = ctx.device_context().x_context(); PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast(&factor_data), x->numel() * sizeof(T)), XPU_SUCCESS, platform::errors::ResourceExhausted( "XPU has no enough memory")); int r = xpu::constant(xpu_context, factor_data, x->numel(), pow_factor); PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, platform::errors::External("XPU constant op return" " wrong value[%d %s] in pow op.", r, XPUAPIErrorMsg[r])); r = xpu::pow(xpu_context, x_data, factor_data, y_data, x->numel()); PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, platform::errors::External("XPU pow op return" " wrong value[%d %s].", r, XPUAPIErrorMsg[r])); if (xpu_context->xpu_stream != nullptr) { xpu_wait(xpu_context->xpu_stream); } xpu_free(factor_data); } }; template struct XPUHardSwishFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { float threshold = ctx.Attr("threshold"); float scale = ctx.Attr("scale"); float offset = ctx.Attr("offset"); PADDLE_ENFORCE_EQ(threshold, 6.0f, platform::errors::External( "Not support threshold [%f] in XPU", threshold)); PADDLE_ENFORCE_EQ(scale, 6.0f, platform::errors::External( "Not support scale [%f] in XPU", scale)); PADDLE_ENFORCE_EQ( offset, 3.0f, platform::errors::External("Not support offset [%f] in XPU", offset)); xpu_activation_forward( ctx, xpu::hard_swish); } }; template struct XPUReluGradFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { xpu_activation_backward( ctx, xpu::relu_grad); } }; template struct XPUTanhGradFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { xpu_activation_backward( ctx, xpu::tanh_grad); } }; template struct XPUSigmoidGradFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { xpu_activation_backward( ctx, xpu::sigmoid_grad); } }; template struct XPUGeluGradFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { xpu_activation_backward( ctx, xpu::gelu_grad); } }; template struct XPUSqrtGradFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { xpu_activation_backward( ctx, xpu::sqrt_grad); } }; template struct XPUSquareGradFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { xpu_activation_backward( ctx, xpu::square_grad); } }; template struct XPUHardSwishGradFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { float threshold = ctx.Attr("threshold"); float scale = ctx.Attr("scale"); float offset = ctx.Attr("offset"); PADDLE_ENFORCE_EQ(threshold, 6.0f, platform::errors::External( "Not support threshold [%f] in XPU", threshold)); PADDLE_ENFORCE_EQ(scale, 6.0f, platform::errors::External( "Not support scale [%f] in XPU", scale)); PADDLE_ENFORCE_EQ( offset, 3.0f, platform::errors::External("Not support offset [%f] in XPU", offset)); xpu_activation_backward( ctx, xpu::hard_swish_grad); } }; template struct XPULeakyReluFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { const auto *x = ctx.Input("X"); auto *y = ctx.Output("Out"); float alpha = ctx.Attr("alpha"); const T *x_data = x->data(); T *y_data = y->mutable_data(ctx.GetPlace()); auto xpu_context = ctx.device_context().x_context(); int r = xpu::leaky_relu(xpu_context, x_data, y_data, x->numel(), alpha); PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, platform::errors::External("XPU leaky_relu return wrong value[%d %s].", r, XPUAPIErrorMsg[r])); } }; template struct XPULeakyReluGradFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { const auto *x = ctx.Input("X"); auto *dOut = ctx.Input(framework::GradVarName("Out")); auto *dX = ctx.Output(framework::GradVarName("X")); float alpha = ctx.Attr("alpha"); const T *x_data = nullptr; const T *y_grad = nullptr; if (x != nullptr) x_data = x->data(); if (dOut != nullptr) y_grad = dOut->data(); T *x_grad = dX->mutable_data(ctx.GetPlace()); auto xpu_context = ctx.device_context().x_context(); // The signs of x and y are the same, // y == nullptr here, // so we give 2 x to the api int r = xpu::leaky_relu_grad( xpu_context, reinterpret_cast(x_data), reinterpret_cast(x_data), reinterpret_cast(y_grad), reinterpret_cast(x_grad), dX->numel(), alpha); PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, platform::errors::External( "XPU leaky_relu_grad return wrong value[%d %s].", r, XPUAPIErrorMsg[r])); } }; } // namespace operators } // namespace paddle namespace ops = paddle::operators; #define REGISTER_ACTIVATION_XPU_KERNEL(act_type, functor, grad_functor) \ REGISTER_OP_XPU_KERNEL(act_type, \ ops::XPUActivationKernel>); \ REGISTER_OP_XPU_KERNEL( \ act_type##_grad, \ ops::XPUActivationGradKernel>); REGISTER_ACTIVATION_XPU_KERNEL(relu, XPUReluFunctor, XPUReluGradFunctor) REGISTER_ACTIVATION_XPU_KERNEL(tanh, XPUTanhFunctor, XPUTanhGradFunctor) REGISTER_ACTIVATION_XPU_KERNEL(sigmoid, XPUSigmoidFunctor, XPUSigmoidGradFunctor) REGISTER_ACTIVATION_XPU_KERNEL(gelu, XPUGeluFunctor, XPUGeluGradFunctor) REGISTER_ACTIVATION_XPU_KERNEL(sqrt, XPUSqrtFunctor, XPUSqrtGradFunctor) REGISTER_ACTIVATION_XPU_KERNEL(square, XPUSquareFunctor, XPUSquareGradFunctor) REGISTER_ACTIVATION_XPU_KERNEL(hard_swish, XPUHardSwishFunctor, XPUHardSwishGradFunctor) REGISTER_ACTIVATION_XPU_KERNEL(leaky_relu, XPULeakyReluFunctor, XPULeakyReluGradFunctor) REGISTER_OP_XPU_KERNEL(log, ops::XPUActivationKernel>); REGISTER_OP_XPU_KERNEL(pow, ops::XPUActivationKernel>); REGISTER_OP_XPU_KERNEL(abs, ops::XPUActivationKernel>); #endif // PADDLE_WITH_XPU