From 9f99b591f45f66806cd4f1ada87ff1cefce4f2a2 Mon Sep 17 00:00:00 2001 From: houj04 <35131887+houj04@users.noreply.github.com> Date: Thu, 17 Feb 2022 17:17:14 +0800 Subject: [PATCH] add softplus op for kunlun2. test=kunlun (#39555) * add softplus op for kunlun2. test=kunlun * add softplus op for kunlun2. test=kunlun * fix code style. test=kunlun * fix code style. test=kunlun * add more test cases. test=kunlun --- cmake/external/xpu.cmake | 2 +- paddle/fluid/operators/activation_op_xpu.cc | 50 ++++++++++++++++- .../fluid/platform/device/xpu/xpu2_op_list.h | 3 ++ .../unittests/xpu/test_activation_op_xpu.py | 53 +++++++++++++++++++ 4 files changed, 106 insertions(+), 2 deletions(-) diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index c86748dc5e..5e60f1f2b9 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -36,7 +36,7 @@ ENDIF() if(NOT DEFINED XPU_BASE_URL) SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev") - SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220119") + SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220215") else() SET(XPU_BASE_URL "${XPU_BASE_URL}") endif() diff --git a/paddle/fluid/operators/activation_op_xpu.cc b/paddle/fluid/operators/activation_op_xpu.cc index 60188ee53e..62fb98b63a 100644 --- a/paddle/fluid/operators/activation_op_xpu.cc +++ b/paddle/fluid/operators/activation_op_xpu.cc @@ -14,8 +14,10 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/activation_op.h" #include + +#include "paddle/fluid/operators/activation_op.h" +#include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/device/xpu/xpu_header.h" namespace paddle { @@ -364,6 +366,50 @@ struct XPUPowFunctor : public BaseActivationFunctor { } }; +template +struct XPUSoftPlusFunctor : public BaseActivationFunctor { + void operator()(const framework::ExecutionContext &ctx) const { + const auto *x = ctx.Input("X"); + auto *y = ctx.Output("Out"); + const T *x_data = x->data(); + T *y_data = y->mutable_data(ctx.GetPlace()); + + float beta = ctx.Attr("beta"); + float threshold = ctx.Attr("threshold"); + + auto xpu_context = + ctx.device_context().x_context(); + int r = + xpu::softplus(xpu_context, x_data, y_data, x->numel(), beta, threshold); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "softplus"); + } +}; + +template +struct XPUSoftPlusGradFunctor : public BaseActivationFunctor { + void operator()(const framework::ExecutionContext &ctx) const { + const auto *x = ctx.Input("X"); + auto *dOut = ctx.Input(framework::GradVarName("Out")); + auto *dX = ctx.Output(framework::GradVarName("X")); + const T *x_data = x->data(); + const T *y_grad = dOut->data(); + T *x_grad = dX->mutable_data(ctx.GetPlace()); + + float beta = ctx.Attr("beta"); + float threshold = ctx.Attr("threshold"); + + auto xpu_context = + ctx.device_context().x_context(); + int r = xpu::softplus_grad( + xpu_context, reinterpret_cast(x_data), + reinterpret_cast( + x_data), // softplus_grad do not need y_data + reinterpret_cast(y_grad), + reinterpret_cast(x_grad), dX->numel(), beta, threshold); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "softplus_grad"); + } +}; + } // namespace operators } // namespace paddle @@ -388,6 +434,8 @@ REGISTER_ACTIVATION_XPU_KERNEL(sigmoid, XPUSigmoidFunctor, XPUSigmoidGradFunctor) REGISTER_ACTIVATION_XPU_KERNEL(sqrt, XPUSqrtFunctor, XPUSqrtGradFunctor) REGISTER_ACTIVATION_XPU_KERNEL(square, XPUSquareFunctor, XPUSquareGradFunctor) +REGISTER_ACTIVATION_XPU_KERNEL(softplus, XPUSoftPlusFunctor, + XPUSoftPlusGradFunctor) REGISTER_OP_XPU_KERNEL( tanh, ops::XPUActivationKernel>, diff --git a/paddle/fluid/platform/device/xpu/xpu2_op_list.h b/paddle/fluid/platform/device/xpu/xpu2_op_list.h index d73d6f0b81..6e7c98dd71 100644 --- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h +++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h @@ -317,6 +317,9 @@ XPUOpMap& get_kl2_ops() { pOpKernelType(vartype::FP16, XPUPlace())})}, {"softmax_with_cross_entropy", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"softplus", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"softplus_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"split", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), pOpKernelType(vartype::INT32, XPUPlace())})}, {"squeeze2_grad", diff --git a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py index ce82b20eca..57af5739f5 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py @@ -358,6 +358,59 @@ class TestXPUReciprocal(TestXPUActivation): self.check_grad_with_place(place, ['X'], 'Out') +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUSoftPlus(TestXPUActivation): + def setUp(self): + self.op_type = "softplus" + self.init_dtype() + self.init_config() + + beta = np.random.uniform(0, 1) + threshold = np.random.uniform(0, 1) + out = ref_softplus(self.x, beta, threshold) + + self.inputs = {'X': self.x} + self.outputs = {'Out': out} + self.attrs = {'use_xpu': True, 'beta': beta, 'threshold': threshold} + + def init_config(self): + self.x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) + + def test_check_grad(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ['X'], 'Out') + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUSoftPlus2(TestXPUSoftPlus): + def init_config(self): + self.x = np.random.uniform(-2, 2, [1024, 8]).astype(self.dtype) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUSoftPlus3(TestXPUSoftPlus): + def init_config(self): + self.x = np.random.uniform(-2, 2, [4, 512, 15, 15]).astype(self.dtype) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUSoftPlus4(TestXPUSoftPlus): + def init_config(self): + self.x = np.random.uniform(-2, 2, [4, 256, 22, 22]).astype(self.dtype) + + +def ref_softplus(x, beta=1, threshold=20): + x_beta = beta * x + out = np.select([x_beta <= threshold, x_beta > threshold], + [np.log(1 + np.exp(x_beta)) / beta, x]) + return out + + if __name__ == "__main__": paddle.enable_static() unittest.main() -- GitLab