/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/activation_kernel.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/activation_functor.h" namespace phi { template void ActivationXPUImpl(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out, const Functor& functor) { PADDLE_ENFORCE_NOT_NULL(out, errors::NotFound("Output Out should not be nullptr")); dev_ctx.template Alloc(out); functor(dev_ctx, x, out); } #define DEFINE_XPU_ACTIVATION_KERNEL(name, functor_class) \ template \ void name##Kernel( \ const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) { \ functor_class functor; \ ActivationXPUImpl>(dev_ctx, x, out, functor); \ } #define DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(name, functor_class, attr) \ template \ void name##Kernel(const Context& dev_ctx, \ const DenseTensor& x, \ float attr, \ DenseTensor* out) { \ functor_class functor; \ auto attrs = functor.GetAttrs(); \ *(attrs[0].second) = attr; \ ActivationXPUImpl>(dev_ctx, x, out, functor); \ } #define DEFINE_XPU_ACTIVATION_KERNEL_WITH_TWO_ATTRS( \ name, functor_class, attr1, attr2) \ template \ void name##Kernel(const Context& dev_ctx, \ const DenseTensor& x, \ float attr1, \ float attr2, \ DenseTensor* out) { \ functor_class functor; \ auto attrs = functor.GetAttrs(); \ *(attrs[0].second) = attr1; \ *(attrs[1].second) = attr2; \ ActivationXPUImpl>(dev_ctx, x, out, functor); \ } template int xpu_activation_func( const Context& dev_ctx, const DenseTensor& x, DenseTensor* out, std::function func) { int r = func(dev_ctx.x_context(), reinterpret_cast(x.data()), reinterpret_cast(out->data()), x.numel()); return r; } template int xpu_activation_func_with_max_x_y( const Context& dev_ctx, const DenseTensor& x, DenseTensor* out, std::function< int(xpu::Context*, const XPUType*, XPUType*, int, const float*, float*)> func) { // does not support "const float* max_x, float* max_y" now int r = func(dev_ctx.x_context(), reinterpret_cast(x.data()), reinterpret_cast(out->data()), x.numel(), nullptr, nullptr); return r; } template int xpu_activation_1attr_func(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out, float attr, std::function func) { // does not support "const float* max_x, float* max_y" now int r = func(dev_ctx.x_context(), reinterpret_cast(x.data()), reinterpret_cast(out->data()), x.numel(), attr, nullptr, nullptr); return r; } template int xpu_activation_2attr_func(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out, float attr1, float attr2, std::function func) { // does not support "const float* max_x, float* max_y" now int r = func(dev_ctx.x_context(), reinterpret_cast(x.data()), reinterpret_cast(out->data()), x.numel(), attr1, attr2, nullptr, nullptr); return r; } template struct XPUExpFunctor : public funcs::BaseActivationFunctor { using XPUType = typename XPUTypeTrait::Type; template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { int r = xpu_activation_func( dev_ctx, x, out, xpu::exp); PADDLE_ENFORCE_XDNN_SUCCESS(r, "exp"); } }; template struct XPULogFunctor : public funcs::BaseActivationFunctor { using XPUType = typename XPUTypeTrait::Type; template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { int r = xpu_activation_func( dev_ctx, x, out, xpu::log); PADDLE_ENFORCE_XDNN_SUCCESS(r, "log"); } }; template struct XPULeakyReluFunctor : public funcs::BaseActivationFunctor { float alpha; typename funcs::BaseActivationFunctor::AttrPair GetAttrs() { return {{"alpha", &alpha}}; } template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { using XPUType = typename XPUTypeTrait::Type; int r = xpu_activation_1attr_func( dev_ctx, x, out, alpha, xpu::leaky_relu); PADDLE_ENFORCE_XDNN_SUCCESS(r, "leaky_relu"); } }; template void PowKernel(const Context& dev_ctx, const DenseTensor& x, const Scalar& factor, DenseTensor* out) { dev_ctx.template Alloc(out); float pow_factor = factor.to(); const T* x_data = x.data(); T* y_data = out->data(); auto xpu_context = dev_ctx.x_context(); // allocate temp memory for factor on xpu xpu::ctx_guard RAII_GUARD(xpu_context); T* factor_data = RAII_GUARD.alloc_l3_or_gm(1); PADDLE_ENFORCE_NOT_NULL( factor_data, errors::External("XPU alloc_l3_or_gm returns nullptr")); memory_utils::Copy(dev_ctx.GetPlace(), static_cast(factor_data), phi::CPUPlace(), static_cast(&pow_factor), sizeof(T)); auto x_dims = vectorize(x.dims()); // use [1] to replace [], because xpu not support [] if (x_dims.size() == 0) { x_dims = std::vector({1}); } // broadcast_pow(Context* ctx, const T* x, const T* y, T* z, const // std::vector& xshape, const std::vector& yshape); int r = xpu::broadcast_pow(xpu_context, x_data, factor_data, y_data, x_dims, {1}); PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_pow"); } template struct XPUHardSigmoidFunctor : public funcs::BaseActivationFunctor { float slope; float offset; typename funcs::BaseActivationFunctor::AttrPair GetAttrs() { return {{"slope", &slope}, {"offset", &offset}}; } template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { using XPUType = typename XPUTypeTrait::Type; int r = xpu_activation_1attr_func( dev_ctx, x, out, slope, xpu::hard_sigmoid); PADDLE_ENFORCE_XDNN_SUCCESS(r, "hard_sigmoid"); } }; template struct XPUHardSwishFunctor : public funcs::BaseActivationFunctor { float threshold; float scale; float offset; typename funcs::BaseActivationFunctor::AttrPair GetAttrs() { return {{"threshold", &threshold}, {"scale", &scale}, {"offset", &offset}}; } template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { using XPUType = typename XPUTypeTrait::Type; PADDLE_ENFORCE_EQ( threshold, 6.0f, errors::External("Not support threshold [%f] in XPU", threshold)); PADDLE_ENFORCE_EQ( scale, 6.0f, errors::External("Not support scale [%f] in XPU", scale)); PADDLE_ENFORCE_EQ( offset, 3.0f, errors::External("Not support offset [%f] in XPU", offset)); int r = xpu_activation_func_with_max_x_y( dev_ctx, x, out, xpu::hard_swish); PADDLE_ENFORCE_XDNN_SUCCESS(r, "hard_swish"); } }; template struct XPUReciprocalFunctor : public funcs::BaseActivationFunctor { using XPUType = typename XPUTypeTrait::Type; template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { int r = xpu_activation_func( dev_ctx, x, out, xpu::reciprocal); PADDLE_ENFORCE_XDNN_SUCCESS(r, "reciprocal"); } }; template struct XPUReluFunctor : public funcs::BaseActivationFunctor { using XPUType = typename XPUTypeTrait::Type; template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { const XPUType* x_data = reinterpret_cast(x.data()); XPUType* y_data = reinterpret_cast(out->data()); auto xpu_context = dev_ctx.x_context(); int r = xpu::relu(xpu_context, x_data, y_data, x.numel(), nullptr, nullptr); PADDLE_ENFORCE_XDNN_SUCCESS(r, "relu"); } }; template struct XPURelu6Functor : public funcs::BaseActivationFunctor { using XPUType = typename XPUTypeTrait::Type; float threshold; typename funcs::BaseActivationFunctor::AttrPair GetAttrs() { return {{"threshold", &threshold}}; } template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { int r = xpu_activation_func_with_max_x_y( dev_ctx, x, out, xpu::relu6); PADDLE_ENFORCE_XDNN_SUCCESS(r, "relu6"); } }; template struct XPUSiluFunctor : public funcs::BaseActivationFunctor { using XPUType = typename XPUTypeTrait::Type; template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { dev_ctx.template Alloc(out); const XPUType* x_data = reinterpret_cast(x.data()); XPUType* y_data = reinterpret_cast(out->data()); auto xpu_context = dev_ctx.x_context(); if (std::getenv("XPU_PADDLE_ACT_LUT") != nullptr) { int r = xpu::fast_swish( xpu_context, x_data, y_data, x.numel(), nullptr, nullptr); PADDLE_ENFORCE_XDNN_SUCCESS(r, "fast_swish"); } else { int r = xpu::swish(xpu_context, x_data, y_data, x.numel(), nullptr, nullptr); PADDLE_ENFORCE_XDNN_SUCCESS(r, "swish"); } } }; template struct XPUSigmoidFunctor : public funcs::BaseActivationFunctor { using XPUType = typename XPUTypeTrait::Type; template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { int r = xpu_activation_func_with_max_x_y( dev_ctx, x, out, xpu::sigmoid); PADDLE_ENFORCE_XDNN_SUCCESS(r, "sigmoid"); } }; template struct XPUSquareFunctor : public funcs::BaseActivationFunctor { using XPUType = typename XPUTypeTrait::Type; template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { int r = xpu_activation_func( dev_ctx, x, out, xpu::square); PADDLE_ENFORCE_XDNN_SUCCESS(r, "square"); } }; template struct XPUSqrtFunctor : public funcs::BaseActivationFunctor { using XPUType = typename XPUTypeTrait::Type; template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { int r = xpu_activation_func( dev_ctx, x, out, xpu::sqrt); PADDLE_ENFORCE_XDNN_SUCCESS(r, "sqrt"); } }; template struct XPUMishFunctor : public funcs::BaseActivationFunctor { using XPUType = typename XPUTypeTrait::Type; float threshold; typename funcs::BaseActivationFunctor::AttrPair GetAttrs() { return {{"threshold", &threshold}}; } template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { int r = xpu_activation_1attr_func( dev_ctx, x, out, threshold, xpu::mish); PADDLE_ENFORCE_XDNN_SUCCESS(r, "mish"); } }; template void SwishRawKernel(const Context& dev_ctx, const DenseTensor& x, float beta, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; dev_ctx.template Alloc(out); int r = xpu::swish(dev_ctx.x_context(), reinterpret_cast(x.data()), reinterpret_cast(out->data()), x.numel()); PADDLE_ENFORCE_XDNN_SUCCESS(r, "swish"); } template struct XPUSoftplusFunctor : public funcs::BaseActivationFunctor { using XPUType = typename XPUTypeTrait::Type; float beta; float threshold; typename funcs::BaseActivationFunctor::AttrPair GetAttrs() { return {{"beta", &beta}, {"threshold", &threshold}}; } template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { int r = xpu_activation_2attr_func( dev_ctx, x, out, beta, threshold, xpu::softplus); PADDLE_ENFORCE_XDNN_SUCCESS(r, "softplus"); } }; template struct XPUTanhFunctor : public funcs::BaseActivationFunctor { using XPUType = typename XPUTypeTrait::Type; template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { int r = xpu_activation_func_with_max_x_y( dev_ctx, x, out, xpu::tanh); PADDLE_ENFORCE_XDNN_SUCCESS(r, "tanh"); } }; template struct XPUFloorFunctor : public funcs::BaseActivationFunctor { using XPUType = typename XPUTypeTrait::Type; template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { int r = xpu_activation_func( dev_ctx, x, out, xpu::floor); PADDLE_ENFORCE_XDNN_SUCCESS(r, "floor"); } }; template struct XPUSinFunctor : public funcs::BaseActivationFunctor { using XPUType = typename XPUTypeTrait::Type; template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { int r = xpu_activation_func( dev_ctx, x, out, xpu::sin); PADDLE_ENFORCE_XDNN_SUCCESS(r, "sin"); } }; template struct XPUCosFunctor : public funcs::BaseActivationFunctor { using XPUType = typename XPUTypeTrait::Type; template void operator()(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) const { int r = xpu_activation_func( dev_ctx, x, out, xpu::cos); PADDLE_ENFORCE_XDNN_SUCCESS(r, "cos"); } }; DEFINE_XPU_ACTIVATION_KERNEL(Exp, XPUExpFunctor) DEFINE_XPU_ACTIVATION_KERNEL(Floor, XPUFloorFunctor) DEFINE_XPU_ACTIVATION_KERNEL(Log, XPULogFunctor) DEFINE_XPU_ACTIVATION_KERNEL(Reciprocal, XPUReciprocalFunctor) DEFINE_XPU_ACTIVATION_KERNEL(Relu, XPUReluFunctor) DEFINE_XPU_ACTIVATION_KERNEL(Sigmoid, XPUSigmoidFunctor) DEFINE_XPU_ACTIVATION_KERNEL(Square, XPUSquareFunctor) DEFINE_XPU_ACTIVATION_KERNEL(Sqrt, XPUSqrtFunctor) DEFINE_XPU_ACTIVATION_KERNEL(Tanh, XPUTanhFunctor) DEFINE_XPU_ACTIVATION_KERNEL(Silu, XPUSiluFunctor) DEFINE_XPU_ACTIVATION_KERNEL(Sin, XPUSinFunctor) DEFINE_XPU_ACTIVATION_KERNEL(Cos, XPUCosFunctor) DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Mish, XPUMishFunctor, threshold) DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(LeakyRelu, XPULeakyReluFunctor, alpha) DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Relu6Raw, XPURelu6Functor, threshold) DEFINE_XPU_ACTIVATION_KERNEL_WITH_TWO_ATTRS(Softplus, XPUSoftplusFunctor, beta, threshold) DEFINE_XPU_ACTIVATION_KERNEL_WITH_TWO_ATTRS(HardSigmoid, XPUHardSigmoidFunctor, slope, offset) template void HardSwishKernel(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) { XPUHardSwishFunctor functor; float threshold = 6; float scale = 6; float offset = 3; auto attrs = functor.GetAttrs(); *(attrs[0].second) = threshold; *(attrs[1].second) = scale; *(attrs[2].second) = offset; ActivationXPUImpl>( dev_ctx, x, out, functor); } } // namespace phi PD_REGISTER_KERNEL( relu, XPU, ALL_LAYOUT, phi::ReluKernel, float, phi::dtype::float16) {} PD_REGISTER_KERNEL( silu, XPU, ALL_LAYOUT, phi::SiluKernel, float, phi::dtype::float16) {} PD_REGISTER_KERNEL( sigmoid, XPU, ALL_LAYOUT, phi::SigmoidKernel, float, phi::dtype::float16) {} PD_REGISTER_KERNEL(swish_raw, XPU, ALL_LAYOUT, phi::SwishRawKernel, float, phi::dtype::float16) {} PD_REGISTER_KERNEL(hard_sigmoid, XPU, ALL_LAYOUT, phi::HardSigmoidKernel, float, phi::dtype::float16) {} PD_REGISTER_KERNEL(leaky_relu, XPU, ALL_LAYOUT, phi::LeakyReluKernel, float, phi::dtype::float16) {} PD_REGISTER_KERNEL( sqrt, XPU, ALL_LAYOUT, phi::SqrtKernel, float, phi::dtype::float16) {} PD_REGISTER_KERNEL( tanh, XPU, ALL_LAYOUT, phi::TanhKernel, float, phi::dtype::float16) {} PD_REGISTER_KERNEL( square, XPU, ALL_LAYOUT, phi::SquareKernel, float, phi::dtype::float16) {} PD_REGISTER_KERNEL( log, XPU, ALL_LAYOUT, phi::LogKernel, float, phi::dtype::float16) {} PD_REGISTER_KERNEL(relu6_raw, XPU, ALL_LAYOUT, phi::Relu6RawKernel, float, phi::dtype::float16) {} #define PD_REGISTER_ACTIVATION_KERNEL(name, func) \ PD_REGISTER_KERNEL(name, XPU, ALL_LAYOUT, phi::func, float) {} PD_REGISTER_ACTIVATION_KERNEL(exp, ExpKernel) // no grad PD_REGISTER_ACTIVATION_KERNEL(floor, FloorKernel) PD_REGISTER_ACTIVATION_KERNEL(hardswish, HardSwishKernel) PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel) PD_REGISTER_ACTIVATION_KERNEL(pow, PowKernel) PD_REGISTER_ACTIVATION_KERNEL(reciprocal, ReciprocalKernel) PD_REGISTER_ACTIVATION_KERNEL(softplus, SoftplusKernel) PD_REGISTER_ACTIVATION_KERNEL(sin, SinKernel) PD_REGISTER_ACTIVATION_KERNEL(cos, CosKernel)