support mobilenet for kunlun (#29458)

3a055833 · procr · GitHub · ec26a26a · 3a055833 · 3a055833
3 changed file
--- a/cmake/external/xpu.cmake
+++ b/cmake/external/xpu.cmake
@@ -4,7 +4,7 @@ endif()
 INCLUDE(ExternalProject)
 SET(XPU_PROJECT                 "extern_xpu")
-SET(XPU_URL    "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2020_12_04.tar.gz" CACHE STRING "" FORCE)
+SET(XPU_URL    "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2020_12_07_cdfbf0c.tar.gz" CACHE STRING "" FORCE)
 SET(XPU_SOURCE_DIR              "${THIRD_PARTY_PATH}/xpu")
 SET(XPU_DOWNLOAD_DIR            "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}")
 SET(XPU_INSTALL_DIR             "${THIRD_PARTY_PATH}/install/xpu")

--- a/paddle/fluid/operators/activation_op_xpu.cc
+++ b/paddle/fluid/operators/activation_op_xpu.cc
@@ -61,13 +61,38 @@ void xpu_activation_forward(const framework::ExecutionContext &ctx,
  const T *x_data = x->data<T>();
  T *y_data = y->mutable_data<T>(ctx.GetPlace());
  int r = 0;
-  if (xpu::Activation_t::ACT_POW == type.type) {
+  auto xpu_context = ctx.device_context<DeviceContext>().x_context();
+  switch (type.type) {
+    case xpu::Activation_t::HARD_SWISH: {
+      float threshold = ctx.Attr<float>("threshold");
+      float scale = ctx.Attr<float>("scale");
+      float offset = ctx.Attr<float>("offset");
+      PADDLE_ENFORCE_EQ(threshold, 6.0f,
+                        platform::errors::External(
+                            "Not support threshold [%f] in XPU", threshold));
+      PADDLE_ENFORCE_EQ(
+          scale, 6.0f,
+          platform::errors::External("Not support scale [%f] in XPU", scale));
+      PADDLE_ENFORCE_EQ(
+          offset, 3.0f,
+          platform::errors::External("Not support offset [%f] in XPU", offset));
+      r = xpu::hard_swish(xpu_context, reinterpret_cast<const float *>(x_data),
+                          reinterpret_cast<float *>(y_data), x->numel());
+      break;
+    }
+    case xpu::Activation_t::ACT_POW: {
      type.pow_factor = ctx.Attr<float>("factor");
    }
-  auto xpu_context = ctx.device_context<DeviceContext>().x_context();
+    default: {
      r = xpu::activation_forward(xpu_context, type, x->numel(),
                                  reinterpret_cast<const float *>(x_data),
                                  reinterpret_cast<float *>(y_data));
+      break;
+    }
+  }
  PADDLE_ENFORCE_EQ(r, XPU_SUCCESS,
                    platform::errors::External(
                        "XPU API return wrong value[%d], please check whether "
@@ -90,12 +115,40 @@ void xpu_activation_backward(const framework::ExecutionContext &ctx,
  if (y != nullptr) y_data = y->data<T>();
  if (dOut != nullptr) y_grad = dOut->data<T>();
  T *x_grad = dX->mutable_data<T>(ctx.GetPlace());
+  int r = 0;
  auto xpu_context = ctx.device_context<DeviceContext>().x_context();
-  int r = xpu::activation_backward(xpu_context, type, dX->numel(),
+  switch (type.type) {
+    case xpu::Activation_t::HARD_SWISH: {
+      float threshold = ctx.Attr<float>("threshold");
+      float scale = ctx.Attr<float>("scale");
+      float offset = ctx.Attr<float>("offset");
+      PADDLE_ENFORCE_EQ(threshold, 6.0f,
+                        platform::errors::External(
+                            "Not support threshold [%f] in XPU", threshold));
+      PADDLE_ENFORCE_EQ(
+          scale, 6.0f,
+          platform::errors::External("Not support scale [%f] in XPU", scale));
+      PADDLE_ENFORCE_EQ(
+          offset, 3.0f,
+          platform::errors::External("Not support offset [%f] in XPU", offset));
+      r = xpu::hard_swish_grad(xpu_context,
+                               reinterpret_cast<const float *>(x_data),
+                               reinterpret_cast<const float *>(y_data),
+                               reinterpret_cast<const float *>(y_grad),
+                               reinterpret_cast<float *>(x_grad), dX->numel());
+      break;
+    }
+    default: {
+      r = xpu::activation_backward(xpu_context, type, dX->numel(),
                                   reinterpret_cast<const float *>(x_data),
                                   reinterpret_cast<const float *>(y_data),
                                   reinterpret_cast<const float *>(y_grad),
                                   reinterpret_cast<float *>(x_grad));
+      break;
+    }
+  }
  PADDLE_ENFORCE_EQ(r, XPU_SUCCESS,
                    platform::errors::External(
                        "XPU API return wrong value[%d], please check whether "
@@ -132,6 +185,8 @@ using XPULogFunctor = XPUActivationFunc<T, xpu::Activation_t::LOG>;
 template <typename T>
 using XPUSquareFunctor = XPUActivationFunc<T, xpu::Activation_t::SQUARE>;
 template <typename T>
+using XPUHardSwishFunctor = XPUActivationFunc<T, xpu::Activation_t::HARD_SWISH>;
+template <typename T>
 using XPUSuareGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::SQUARE>;
 template <typename T>
 using XPUReluGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::RELU>;
@@ -147,6 +202,9 @@ using XPUSqrtFunctor = XPUActivationFunc<T, xpu::Activation_t::SQRT>;
 template <typename T>
 using XPUSqrtGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::SQRT>;
 template <typename T>
+using XPUHardSwishGradFunctor =
+    XPUActivationGradFunc<T, xpu::Activation_t::HARD_SWISH>;
+template <typename T>
 using XPUACTPowFunctor = XPUActivationFunc<T, xpu::Activation_t::ACT_POW>;
 template <typename T>
 using XPUABSFunctor = XPUActivationFunc<T, xpu::Activation_t::ABS>;
@@ -169,6 +227,8 @@ REGISTER_ACTIVATION_XPU_KERNEL(sigmoid, XPUSigmoidFunctor,
 REGISTER_ACTIVATION_XPU_KERNEL(gelu, XPUGeluFunctor, XPUGeluGradFunctor)
 REGISTER_ACTIVATION_XPU_KERNEL(sqrt, XPUSqrtFunctor, XPUSqrtGradFunctor)
 REGISTER_ACTIVATION_XPU_KERNEL(square, XPUSquareFunctor, XPUSuareGradFunctor)
+REGISTER_ACTIVATION_XPU_KERNEL(hard_swish, XPUHardSwishFunctor,
+                               XPUHardSwishGradFunctor)
 REGISTER_OP_XPU_KERNEL(log,
                       ops::XPUActivationKernel<ops::XPULogFunctor<float>>);
 REGISTER_OP_XPU_KERNEL(pow,

--- a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py
@@ -20,6 +20,7 @@ import unittest
 import numpy as np
 import paddle.fluid.core as core
 from op_test import OpTest
+from op_test_xpu import XPUOpTest
 from scipy.special import expit, erf
 import paddle
 import paddle.fluid as fluid
@@ -30,7 +31,7 @@ from paddle.fluid import compiler, Program, program_guard
 @unittest.skipIf(not paddle.is_compiled_with_xpu(),
                 "core is not compiled with XPU")
-class TestXPUActivation(OpTest):
+class TestXPUActivation(XPUOpTest):
    def setUp(self):
        self.op_type = "exp"
        self.init_dtype()
@@ -166,6 +167,33 @@ def gelu(x, approximate):
    return y_ref.astype(x.dtype)
+@unittest.skipIf(not paddle.is_compiled_with_xpu(),
+                 "core is not compiled with XPU")
+class TestXPUHardSwish(TestXPUActivation):
+    def setUp(self):
+        self.op_type = "hard_swish"
+        self.init_dtype()
+        x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
+        offset = 3.0
+        threshold = 6.0
+        scale = 6.0
+        out = hard_swish(x, offset, threshold, scale)
+        self.inputs = {'X': x}
+        self.outputs = {'Out': out}
+        self.attrs = {'use_xpu': True}
+    def test_check_grad(self):
+        if paddle.is_compiled_with_xpu():
+            place = paddle.XPUPlace(0)
+            self.check_grad_with_place(place, ['X'], 'Out')
+def hard_swish(x, offset, threshold, scale):
+    y_ref = np.minimum(threshold, np.maximum(0, x + offset)) * x / scale
+    return y_ref.astype(x.dtype)
 @unittest.skipIf(not paddle.is_compiled_with_xpu(),
                 "core is not compiled with XPU")
 class TestXPULog(TestXPUActivation):