add softplus op for kunlun2. test=kunlun (#39555)

* add softplus op for kunlun2. test=kunlun * add softplus op for kunlun2. test=kunlun * fix code style. test=kunlun * fix code style. test=kunlun * add more test cases. test=kunlun

add softplus op for kunlun2. test=kunlun (#39555)
* add softplus op for kunlun2. test=kunlun * add softplus op for kunlun2. test=kunlun * fix code style. test=kunlun * fix code style. test=kunlun * add more test cases. test=kunlun
9f99b591 · houj04 · GitHub · c1c5c1fc · 9f99b591 · 9f99b591
4 changed file
--- a/cmake/external/xpu.cmake
+++ b/cmake/external/xpu.cmake
@@ -36,7 +36,7 @@ ENDIF()

 if(NOT DEFINED XPU_BASE_URL)
  SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
-  SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220119")
+  SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220215")
 else()
  SET(XPU_BASE_URL "${XPU_BASE_URL}")
 endif()

--- a/paddle/fluid/operators/activation_op_xpu.cc
+++ b/paddle/fluid/operators/activation_op_xpu.cc
@@ -14,8 +14,10 @@ limitations under the License. */

 #ifdef PADDLE_WITH_XPU

-#include "paddle/fluid/operators/activation_op.h"
 #include <string>
+
+#include "paddle/fluid/operators/activation_op.h"
+#include "paddle/fluid/platform/device/device_wrapper.h"
 #include "paddle/fluid/platform/device/xpu/xpu_header.h"

 namespace paddle {
@@ -364,6 +366,50 @@ struct XPUPowFunctor : public BaseActivationFunctor<T> {
  }
 };

+template <typename T>
+struct XPUSoftPlusFunctor : public BaseActivationFunctor<T> {
+  void operator()(const framework::ExecutionContext &ctx) const {
+    const auto *x = ctx.Input<Tensor>("X");
+    auto *y = ctx.Output<Tensor>("Out");
+    const T *x_data = x->data<T>();
+    T *y_data = y->mutable_data<T>(ctx.GetPlace());
+
+    float beta = ctx.Attr<float>("beta");
+    float threshold = ctx.Attr<float>("threshold");
+
+    auto xpu_context =
+        ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
+    int r =
+        xpu::softplus(xpu_context, x_data, y_data, x->numel(), beta, threshold);
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "softplus");
+  }
+};
+
+template <typename T>
+struct XPUSoftPlusGradFunctor : public BaseActivationFunctor<T> {
+  void operator()(const framework::ExecutionContext &ctx) const {
+    const auto *x = ctx.Input<Tensor>("X");
+    auto *dOut = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
+    auto *dX = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
+    const T *x_data = x->data<T>();
+    const T *y_grad = dOut->data<T>();
+    T *x_grad = dX->mutable_data<T>(ctx.GetPlace());
+
+    float beta = ctx.Attr<float>("beta");
+    float threshold = ctx.Attr<float>("threshold");
+
+    auto xpu_context =
+        ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
+    int r = xpu::softplus_grad(
+        xpu_context, reinterpret_cast<const float *>(x_data),
+        reinterpret_cast<const float *>(
+            x_data),  // softplus_grad do not need y_data
+        reinterpret_cast<const float *>(y_grad),
+        reinterpret_cast<float *>(x_grad), dX->numel(), beta, threshold);
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "softplus_grad");
+  }
+};
+
 }  // namespace operators
 }  // namespace paddle

@@ -388,6 +434,8 @@ REGISTER_ACTIVATION_XPU_KERNEL(sigmoid, XPUSigmoidFunctor,
                               XPUSigmoidGradFunctor)
 REGISTER_ACTIVATION_XPU_KERNEL(sqrt, XPUSqrtFunctor, XPUSqrtGradFunctor)
 REGISTER_ACTIVATION_XPU_KERNEL(square, XPUSquareFunctor, XPUSquareGradFunctor)
+REGISTER_ACTIVATION_XPU_KERNEL(softplus, XPUSoftPlusFunctor,
+                               XPUSoftPlusGradFunctor)

 REGISTER_OP_XPU_KERNEL(
    tanh, ops::XPUActivationKernel<ops::XPUTanhFunctor<float>>,

--- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h
+++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h
@@ -317,6 +317,9 @@ XPUOpMap& get_kl2_ops() {
                     pOpKernelType(vartype::FP16, XPUPlace())})},
      {"softmax_with_cross_entropy",
       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
+      {"softplus", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
+      {"softplus_grad",
+       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
      {"split", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
                              pOpKernelType(vartype::INT32, XPUPlace())})},
      {"squeeze2_grad",

--- a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py
@@ -358,6 +358,59 @@ class TestXPUReciprocal(TestXPUActivation):
            self.check_grad_with_place(place, ['X'], 'Out')


+@unittest.skipIf(not paddle.is_compiled_with_xpu(),
+                 "core is not compiled with XPU")
+class TestXPUSoftPlus(TestXPUActivation):
+    def setUp(self):
+        self.op_type = "softplus"
+        self.init_dtype()
+        self.init_config()
+
+        beta = np.random.uniform(0, 1)
+        threshold = np.random.uniform(0, 1)
+        out = ref_softplus(self.x, beta, threshold)
+
+        self.inputs = {'X': self.x}
+        self.outputs = {'Out': out}
+        self.attrs = {'use_xpu': True, 'beta': beta, 'threshold': threshold}
+
+    def init_config(self):
+        self.x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
+
+    def test_check_grad(self):
+        if paddle.is_compiled_with_xpu():
+            place = paddle.XPUPlace(0)
+            self.check_grad_with_place(place, ['X'], 'Out')
+
+
+@unittest.skipIf(not paddle.is_compiled_with_xpu(),
+                 "core is not compiled with XPU")
+class TestXPUSoftPlus2(TestXPUSoftPlus):
+    def init_config(self):
+        self.x = np.random.uniform(-2, 2, [1024, 8]).astype(self.dtype)
+
+
+@unittest.skipIf(not paddle.is_compiled_with_xpu(),
+                 "core is not compiled with XPU")
+class TestXPUSoftPlus3(TestXPUSoftPlus):
+    def init_config(self):
+        self.x = np.random.uniform(-2, 2, [4, 512, 15, 15]).astype(self.dtype)
+
+
+@unittest.skipIf(not paddle.is_compiled_with_xpu(),
+                 "core is not compiled with XPU")
+class TestXPUSoftPlus4(TestXPUSoftPlus):
+    def init_config(self):
+        self.x = np.random.uniform(-2, 2, [4, 256, 22, 22]).astype(self.dtype)
+
+
+def ref_softplus(x, beta=1, threshold=20):
+    x_beta = beta * x
+    out = np.select([x_beta <= threshold, x_beta > threshold],
+                    [np.log(1 + np.exp(x_beta)) / beta, x])
+    return out
+
+
 if __name__ == "__main__":
    paddle.enable_static()
    unittest.main()